From 786480ec9d503681d27cb5b8bef4fd94e84e81a9 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 22 Apr 2024 15:41:13 +0200 Subject: [PATCH 01/53] Scorecards development in progress --- modules/Scorecards/R/tmp/LoadMetrics.R | 72 +-- modules/Scorecards/Scorecards.R | 610 ------------------- modules/Scorecards/Scorecards_calculations.R | 484 +++++++++++++++ modules/Scorecards/Scorecards_plotting.R | 222 +++++++ modules/Statistics/Statistics.R | 48 +- tools/check_recipe.R | 6 +- 6 files changed, 751 insertions(+), 691 deletions(-) delete mode 100644 modules/Scorecards/Scorecards.R create mode 100644 modules/Scorecards/Scorecards_calculations.R create mode 100644 modules/Scorecards/Scorecards_plotting.R diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index 84e44028..155a569b 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -28,7 +28,6 @@ #' reference. = 'ERA5', #' var = 'tas', #' period = '1993-2016' -#' metrics = c('mean_bias', 'enscorr', 'rpss', 'crpss', 'enssprerr'), #' start_months = sprintf("%02d", 1:12), #' calib_method = 'raw', #' input_path = '/esarchive/scratch/nmilders/scorecards_data/input_data') @@ -37,9 +36,9 @@ #'@import multiApply #'@export -LoadMetrics <- function(input_path, system, reference, var, period, - metrics, start_months, calib_method = NULL, - inf_to_na = FALSE) { +LoadMetrics <- function(input_path, system, reference, var, period, data_type, + # metrics, + start_months, calib_method = NULL) { # Initial checks ## system @@ -62,9 +61,9 @@ LoadMetrics <- function(input_path, system, reference, var, period, var <- var[1] } ## metrics - if (!is.character(metrics)) { - stop("Parameter 'metrics' cannot be NULL.") - } + # if (!is.character(metrics)) { + # stop("Parameter 'metrics' cannot be NULL.") + # } ## start_months if (is.character(start_months)) { warning("Parameter 'start_months' must be a numeric vector indicating ", @@ -96,18 +95,13 @@ LoadMetrics <- function(input_path, system, reference, var, period, system <- gsub('.','', system, fixed = T) reference <- gsub('.','', reference, fixed = T) - all_metrics <- sapply(system, function(x) NULL) - names(all_metrics) <- system ## Load data for each system + all_metrics <- NULL for (sys in 1:length(system)) { - ## Define empty list to saved data - by_reference <- sapply(reference, function(x) NULL) - names(by_reference) <- reference ## Load data for each reference + by_reference <- NULL for (ref in 1:length(reference)) { ## Call function to load metrics data - met_by_smonth <- NULL - for (met in metrics) { result <- .loadmetrics(input_path = input_path, system = system[sys], reference = reference[ref], @@ -115,27 +109,18 @@ LoadMetrics <- function(input_path, system, reference, var, period, period = period, start_months = start_months, calib_method = calib_method, - metric = met) + data_type = data_type) result_attr <- attributes(result) - met_by_smonth <- abind::abind(met_by_smonth, result, along = length(dim(result)) + 1) - } - attributes(met_by_smonth) <- result_attr[-1] - # names(dim(met_by_smonth)) <- c(names(result_attr$dim), 'metric') - - dim(met_by_smonth) <- c(dim(result), metric = length(metrics)) - ## Save metric data as array in reference list - by_reference[[reference[ref]]] <- met_by_smonth - ## Remove -Inf from crpss data if variable is precipitation - if (inf_to_na) { - by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA - } + + by_reference <- abind::abind(by_reference, result, along = length(dim(result)) + 1) + dim(by_reference) <- c(dim(result), reference = length(reference)) } ## close loop on reference - ## Save reference data in list of system - all_metrics[[system[sys]]] <- by_reference + all_metrics <- abind::abind(all_metrics, by_reference, along = length(dim(by_reference)) + 1) + dim(all_metrics) <- c(dim(by_reference), system = length(system)) + # attributes(all_metrics) <- result_attr[-1] } ## close loop on system - attributes(all_metrics)$metrics <- metrics attributes(all_metrics)$start_months <- start_months return(all_metrics) @@ -145,13 +130,13 @@ LoadMetrics <- function(input_path, system, reference, var, period, .loadmetrics <- function(input_path, system, reference, var, period, start_months, - calib_method, metric) { + calib_method, data_type) { ## Load data for each start date allfiles <- sapply(start_months, function(m) { paste0(input_path, "/", system, "/", reference, "/", calib_method, "/", var, "/scorecards_", system, "_", reference, "_", - var, "_", metric, "_", period, "_s", m, # mod.pressure, + var, "_", data_type, "_", period, "_s", m, ".nc")}) allfiles_exist <- sapply(allfiles, file.exists) @@ -170,40 +155,23 @@ LoadMetrics <- function(input_path, system, reference, var, period, } num_dims[i] <- max(allfiledims[i,]) # We take the largest dimension } - # dims: [metric, longitude, latitude, time, smonth] - # or [metric, region, time, smonth] - + # Loop for file dim(allfiles) <- c(dat = 1, sdate = length(allfiles)) array_met_by_sdate <- Apply(data = allfiles, target_dims = 'dat', fun = function(x) { if (file.exists(x)) { - res <- easyNCDF::NcToArray(x, vars_to_read = metric, unlist = T, + res <- easyNCDF::NcToArray(x, vars_to_read = data_type, unlist = T, drop_var_dim = T) names(dim(res)) <- NULL } else { - res <- array(dim = c(length(metrics), allfiledims[-1,1])) + res <- array(dim = c(length(data_type), allfiledims[-1,1])) names(dim(res)) <- NULL } res})$output1 dim(array_met_by_sdate) <- c(allfiledims[-1,1], sdate = length(allfiles)) - # Attributes - # Read attributes from the first existing file - if ("region" %in% rownames(allfiledims)) { - file_for_att <- ncdf4::nc_open(allfiles[allfiles_exist[1]]) - region <- ncdf4::ncatt_get(file_for_att, 'region') - ncdf4::nc_close(file_for_att) - attributes(array_met_by_sdate)$region <- region - } else { - lon <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'longitude', - unlist = T, drop_var_dim = T) - lat <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'latitude', - unlist = T, drop_var_dim = T) - attributes(array_met_by_sdate)$lon <- lon - attributes(array_met_by_sdate)$lat <- lat - } return(array_met_by_sdate) } diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R deleted file mode 100644 index 37aa421c..00000000 --- a/modules/Scorecards/Scorecards.R +++ /dev/null @@ -1,610 +0,0 @@ -############################################################################### -##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### -############################################################################### - -##### Load source functions ##### -source('modules/Scorecards/R/tmp/LoadMetrics.R') -source('modules/Scorecards/R/tmp/WeightedMetrics.R') -source('modules/Scorecards/R/tmp/Utils.R') -source('modules/Scorecards/R/tmp/SCTransform.R') -source('modules/Scorecards/R/tmp/ScorecardsSingle.R') -source('modules/Scorecards/R/tmp/ScorecardsMulti.R') -source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') -source('modules/Scorecards/R/tmp/VizScorecard.R') - -## Temporary for new ESviz function -source('modules/Scorecards/R/tmp/ColorBarContinuous.R') -source('modules/Scorecards/R/tmp/ClimPalette.R') -.IsColor <- s2dv:::.IsColor -.FilterUserGraphicArgs <- s2dv:::.FilterUserGraphicArgs - -## Define function -Scorecards <- function(recipe) { - - ## Parameters for loading data files - skill.input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") - stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") - output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") - dir.create(output.path, recursive = T, showWarnings = F) - system <- recipe$Analysis$Datasets$System$name - reference <- recipe$Analysis$Datasets$Reference$name - var <- recipe$Analysis$Variables$name - start.year <- as.numeric(recipe$Analysis$Time$hcst_start) - end.year <- as.numeric(recipe$Analysis$Time$hcst_end) - forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max - calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - - if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { - start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) - } else { - start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, - split = ", | |,")[[1]]) - if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ - error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") - } - } - - start.months <- sprintf("%02d", start.months) - period <- paste0(start.year, "-", end.year) - - ## Parameters for data aggregation - regions <- recipe$Analysis$Workflow$Scorecards$regions - for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} - - metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation - metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) - metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) - ncores <- 1 # recipe$Analysis$ncores - - if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ - alpha <- 0.05 - } else { - alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha - } - - if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ - inf.to.na <- FALSE - } else { - inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na - } - - if(is.null(recipe$Analysis$remove_NAs)){ - na.rm <- FALSE - } else { - na.rm <- recipe$Analysis$remove_NAs - } - - ## Parameters for scorecard layout - table.label <- recipe$Analysis$Workflow$Scorecards$table_label - fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label - col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width - col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width - legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks - legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width - - if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)){ - plot.legend <- TRUE - } else { - plot.legend <- recipe$Analysis$Workflow$Scorecards$plot_legend - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$columns_width)){ - columns.width <- 1.2 - } else { - columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)){ - legend.white.space <- 6 - } else { - legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$legend_height)){ - legend.height <- 50 - } else { - legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$label_scale)){ - label.scale <- 1.4 - } else { - label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)){ - round.decimal <- 2 - } else { - round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$font_size)){ - font.size <- 1.1 - } else { - font.size <- recipe$Analysis$Workflow$Scorecards$font_size - } - - ## Define if difference scorecard is to be plotted - if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)){ - calculate.diff <- FALSE - } else { - calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff - } - - ####### SKILL AGGREGATION ####### - if(metric.aggregation == 'skill'){ - - ## Load data files - loaded_metrics <- LoadMetrics(input_path = skill.input.path, - system = system, - reference = reference, - var = var, - metrics = metrics.visualize, - period = period, - start_months = start.months, - calib_method = calib.method, - inf_to_na = inf.to.na - ) - - ## Spatial Aggregation of metrics - if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ - - ### Convert loaded metrics to array for already aggregated data - metrics.dim <- attributes(loaded_metrics)$metrics - forecast.months.dim <- forecast.months - start.months.dim <- attributes(loaded_metrics)$start_months - regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) - - aggregated_metrics <- array(dim = c(system = length(loaded_metrics), - reference = length(loaded_metrics[[1]]), - metric = length(metrics.dim), - time = length(forecast.months.dim), - sdate = length(start.months.dim), - region = length(regions.dim))) - - - for (sys in 1:length(names(loaded_metrics))){ - for (ref in 1:length(names(loaded_metrics[[sys]]))){ - aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) - } - } - - ## Add attributes - attributes(aggregated_metrics)$metrics <- metrics.load - attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics)$start_months - attributes(aggregated_metrics)$forecast.months <- forecast.months - attributes(aggregated_metrics)$regions <- regions - attributes(aggregated_metrics)$system.name <- names(loaded_metrics) - attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) - - - } else { - ## Calculate weighted mean of spatial aggregation - aggregated_metrics <- WeightedMetrics(loaded_metrics, - regions = regions, - forecast.months = forecast.months, - metric.aggregation = metric.aggregation, - ncores = ncores) - } ## close if on region - metrics_significance <- NULL - - } ## close if on skill - - ###### SCORE AGGREGATION ###### - if(metric.aggregation == 'score'){ - - lon_dim <- 'longitude' - lat_dim <- 'latitude' - time_dim <- 'syear' - memb_dim <- 'ensemble' - - ## Define arrays to filled with data - aggregated_metrics <- array(data = NA, - dim = c(system = length(system), - reference = length(reference), - time = length(forecast.months), - sdate = length(start.months), - region = length(regions), - metric = length(metrics.visualize))) - - metrics_significance <- array(data = NA, - dim = c(system = length(system), - reference = length(reference), - time = length(forecast.months), - sdate = length(start.months), - region = length(regions), - metric = length(metrics.visualize))) - - ## Load and aggregated data for each metric - for (sys in 1:length(system)){ - for (ref in 1:length(reference)){ - for (met in metrics.visualize) { - - if(met == 'rpss'){ - ## Load data from saved files - rps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'rps_syear') - - rps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'rps_clim_syear') - - ## Spatially aggregate data - rps_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_syear)$lon), - lat = as.vector(attributes(rps_syear)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') - - rps_clim_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_clim_syear)$lon), - lat = as.vector(attributes(rps_clim_syear)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' - names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' - - ## Calculate significance - sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Temporally aggregate data - rps_syear <- Apply(data = rps_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - rps_clim_syear <- Apply(data = rps_clim_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate RPSS from aggregated RPS and RPS_clim - rpss <- 1 - rps_syear / rps_clim_syear - - ## Save metric result in arrays - aggregated_metrics[sys, ref, , , ,which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'sdate','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'sdate','region')) - - } ## close if on rpss - - if(met == 'crpss'){ - - ## Load data from saved files - crps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'crps_syear') - - crps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'crps_clim_syear') - - ## Spatially aggregate data - crps_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_syear)$lon), - lat = as.vector(attributes(crps_syear)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - crps_clim_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_clim_syear)$lon), - lat = as.vector(attributes(crps_clim_syear)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' - names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' - - ## Calculate significance - sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Temporally aggregate data - crps_syear <- Apply(data = crps_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - crps_clim_syear <- Apply(data = crps_clim_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate CRPSS from aggregated CRPS and CRPS_clim - crpss <- 1 - crps_syear / crps_clim_syear - - ## Save metric result in arrays - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'sdate','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'sdate','region')) - - } ## close if on crpss - - if(met == 'enscorr'){ - ## Load data from saved files - cov <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'cov') - - std_hcst <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'std_hcst') - - std_obs <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'std_obs') - - - n_eff <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'n_eff') - - ## Calculate spatial aggregation - cov <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = cov, - region = regions[[X]], - lon = as.vector(attributes(cov)$lon), - lat = as.vector(attributes(cov)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(cov))[length(dim(cov))] <- 'region' - - - std_hcst <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_hcst, - region = regions[[X]], - lon = as.vector(attributes(std_hcst)$lon), - lat = as.vector(attributes(std_hcst)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' - - std_obs <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_obs, - region = regions[[X]], - lon = as.vector(attributes(std_obs)$lon), - lat = as.vector(attributes(std_obs)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_obs))[length(dim(std_obs))] <- 'region' - - n_eff <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = n_eff, - region = regions[[X]], - lon = as.vector(attributes(n_eff)$lon), - lat = as.vector(attributes(n_eff)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(n_eff))[length(dim(n_eff))] <- 'region' - - ## Calculate correlation - enscorr <- cov / (std_hcst * std_obs) - - ## Calculate significance of corr - t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) - t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) - - sign_corr<- array(data = NA, - dim = c(time = length(forecast.months), - sdate = length(start.months), - region = length(regions))) - - for (time in 1:dim(sign_corr)[['time']]){ - for (mon in 1:dim(sign_corr)[['sdate']]){ - for (reg in 1:dim(sign_corr)[['region']]){ - - if (anyNA(c(t[time, mon, reg], t_alpha2_n2[time, mon, reg])) == FALSE - && t[time, mon, reg] >= t_alpha2_n2[time, mon, reg]){ - sign_corr[time, mon, reg] <- TRUE - } else { - sign_corr[time, mon, reg] <- FALSE - } - } - } - } - - ## Save metric result in arrays - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'sdate','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'sdate','region')) - - } ## close if on enscorr - - if(met == 'mean_bias'){ - - mean_bias <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'mean_bias') - - ## Calculate spatial aggregation - mean_bias <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = mean_bias, - region = regions[[X]], - lon = as.vector(attributes(mean_bias)$lon), - lat = as.vector(attributes(mean_bias)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' - - ## Save metric result in array - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias, order = c('time', 'sdate','region')) - - } ## close on mean_bias - - if(met == 'enssprerr'){ - - enssprerr <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'enssprerr') - - ## Calculate spatial aggregation - enssprerr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = enssprerr, - region = regions[[X]], - lon = as.vector(attributes(enssprerr)$lon), - lat = as.vector(attributes(enssprerr)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(enssprerr))[length(dim(enssprerr))] <- 'region' - - ## Save metric result in array - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr, order = c('time', 'sdate','region')) - - } ## close on enssprerr - - } ## close loop on metric - } ## close if on reference - } ## close if on system - - ## Include metric attributes - attributes(aggregated_metrics)$metrics <- metrics.visualize - - ## Set NAs to False - metrics_significance[is.na(metrics_significance)] <- FALSE - - } ## close if on score - - - ####### PLOT SCORECARDS ########## - - ## Create simple scorecard tables - ## (one system only) - ## Metrics input must be in the same order as function SC_spatial_aggregation - scorecard_single <- ScorecardsSingle(data = aggregated_metrics, - sign = metrics_significance, - system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - start.months = start.months, - forecast.months = forecast.months, - region.names = names(regions), - metrics = metrics.visualize, - table.label = table.label, - fileout.label = fileout.label, - plot.legend = plot.legend, - legend.breaks = legend.breaks, - legend.white.space = legend.white.space, - legend.width = legend.width, - legend.height = legend.height, - label.scale = label.scale, - col1.width = col1.width, - col2.width = col2.width, - columns.width = columns.width, - font.size = font.size, - round.decimal = round.decimal, - output.path = output.path) - - ## Create multi system/reference scorecard tables - ## (multiple systems with one reference or one system with multiple references) - ## Metrics input must be in the same order as function SC_spatial_aggregation - if(length(system) > 1 || length(reference) > 1){ - scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, - sign = metrics_significance, - system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - start.months = start.months, - forecast.months = forecast.months, - region.names = names(regions), - metrics = metrics.visualize, - table.label = table.label, - fileout.label = fileout.label, - plot.legend = plot.legend, - legend.breaks = legend.breaks, - legend.white.space = legend.white.space, - legend.width = legend.width, - legend.height = legend.height, - label.scale = label.scale, - col1.width = col1.width, - col2.width = col2.width, - columns.width = columns.width, - font.size = font.size, - round.decimal = round.decimal, - output.path = output.path) - } ## close if - - - if(calculate.diff == TRUE){ - if(length(system) == 2 || length(reference) == 2){ - scorecard_diff <- ScorecardsSystemDiff(data = aggregated_metrics, - system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - start.months = start.months, - forecast.months = forecast.months, - region.names = attributes(regions)$names, - metrics = metrics.visualize, - table.label = table.label, - fileout.label = fileout.label, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - output.path = output.path) - } else {stop ("Difference scorecard can only be computed with two systems or two references.")} - } ## close if on calculate.diff - -} - diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R new file mode 100644 index 00000000..a5fe67ba --- /dev/null +++ b/modules/Scorecards/Scorecards_calculations.R @@ -0,0 +1,484 @@ +############################################################################### +##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### +############################################################################### + +##### Load source functions ##### +# source('modules/Scorecards/R/tmp/LoadMetrics.R') +# source('modules/Scorecards/R/tmp/WeightedMetrics.R') + +## Define function +Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, recipe) { + + ## Parameters for saving output data files + output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") + dir.create(output.path, recursive = T, showWarnings = F) + + system <- recipe$Analysis$Datasets$System$name + reference <- recipe$Analysis$Datasets$Reference$name + var <- recipe$Analysis$Variables$name + start.year <- as.numeric(recipe$Analysis$Time$hcst_start) + end.year <- as.numeric(recipe$Analysis$Time$hcst_end) + forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max + start.months <- substr(recipe$Analysis$Time$sdate, 1,2) + period <- paste0(start.year, "-", end.year) + + file.name.metrics <- paste0('scorecards_aggr_skill_',system,'_',reference,'_',var,'_',period,'_s',start.months,'.RDS') + file.name.sign <- paste0('scorecards_aggr_sign_',system,'_',reference,'_',var,'_',period,'_s',start.months,'.RDS') + + ## Parameters for data aggregation + regions <- recipe$Analysis$Workflow$Scorecards$regions + for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} + + metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation + metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + ncores <- recipe$Analysis$ncores + + if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ + alpha <- 0.05 + } else { + alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha + } + + if(is.null(recipe$Analysis$remove_NAs)){ + na.rm <- FALSE + } else { + na.rm <- recipe$Analysis$remove_NAs + } + + if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ + inf.to.na <- FALSE + } else { + inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na + } + + ##### TO DO: Need to include condition for removing -INF ##### + # if (inf_to_na) { + # by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA + # } + + + ############################# SKILL AGGREGATION ############################# + if(metric.aggregation == 'skill'){ + + # ## Load data files + # loaded_metrics <- LoadMetrics(input_path = skill.input.path, + # system = system, + # reference = reference, + # var = var, + # metrics = metrics, + # period = period, + # start_months = start.months, + # calib_method = calib.method, + # inf_to_na = inf.to.na + # ) + + # ## Remove -Inf from crpss data if variable is precipitation ## from loadMetrics function + # if (inf_to_na) { + # by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA + # } + + ## Spatial Aggregation of metrics + if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ + + ### Convert loaded metrics to array for already aggregated data + metrics.dim <- attributes(loaded_metrics)$metrics + forecast.months.dim <- forecast.months + start.months.dim <- attributes(loaded_metrics)$start_months + regions.dim <- regions + + aggr_metrics <- array(dim = c(system = length(loaded_metrics), + reference = length(loaded_metrics[[1]]), + metric = length(metrics.dim), + time = length(forecast.months.dim), + sdate = length(start.months.dim), + region = length(regions.dim))) + + + for (sys in 1:length(names(loaded_metrics))){ + for (ref in 1:length(names(loaded_metrics[[sys]]))){ + aggr_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) + } + } + + ## Add attributes + attributes(aggr_metrics)$metrics <- metrics.load + attributes(aggr_metrics)$start.months <- attributes(loaded_metrics)$start_months + attributes(aggr_metrics)$forecast.months <- forecast.months + attributes(aggr_metrics)$regions <- regions + attributes(aggr_metrics)$system.name <- names(loaded_metrics) + attributes(aggr_metrics)$reference.name <- names(loaded_metrics[[1]]) + + + } else { + ## Calculate weighted mean of spatial aggregation + aggr_metrics <- WeightedMetrics(loaded_metrics, + regions = regions, + forecast.months = forecast.months, + metric.aggregation = metric.aggregation, + ncores = ncores) + } ## close if on region + aggr_significance <- NULL + + } ## close if on skill + + ############################# SCORE AGGREGATION ############################# + if(metric.aggregation == 'score'){ + + lon_dim <- 'longitude' + lat_dim <- 'latitude' + time_dim <- 'syear' + memb_dim <- 'ensemble' + lon <- as.numeric(data$hcst$coords$longitude) + lat <- as.numeric(data$hcst$coords$latitude) + + ## Define arrays to filled with data + aggr_metrics <- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions), + metric = length(metrics))) + + aggr_significance <- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions), + metric = length(metrics))) + + ## Spatially aggregate data for each metric + for (met in metrics) { + + if(met == 'rpss'){ + + rps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$rps_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + rps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$rps_clim_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' + names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' + + ## Remove 'var' dimension + rps_syear <-Subset(rps_syear, 'var', 1, drop = 'selected') + rps_clim_syear <-Subset(rps_clim_syear, 'var', 1, drop = 'selected') + + ## Calculate significance + sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Average over 'syear' dimension + rps_syear <- Apply(data = rps_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + rps_clim_syear <- Apply(data = rps_clim_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate RPSS from aggregated RPS and RPS_clim + rpss <- 1 - rps_syear / rps_clim_syear + + ## Save metric result in arrays + aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'region')) + + } ## close if on rpss + + if(met == 'crpss'){ + + crps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$crps_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + crps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$crps_clim_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' + names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' + + ## Remove 'var' dimension + crps_syear <-Subset(crps_syear, 'var', 1, drop = 'selected') + crps_clim_syear <-Subset(crps_clim_syear, 'var', 1, drop = 'selected') + + ## Calculate significance + sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Average over 'syear' dimension + crps_syear <- Apply(data = crps_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + crps_clim_syear <- Apply(data = crps_clim_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate CRPSS from aggregated CRPS and CRPS_clim + crpss <- 1 - crps_syear / crps_clim_syear + + ## Save metric result in arrays + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'region')) + + } ## close if on crpss + + if(met == 'enscorr'){ + + cov <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$cov, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + std_hcst <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$std_hcst, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + std_obs <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$std_obs, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + n_eff <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$n_eff, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(cov))[length(dim(cov))] <- 'region' + names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' + names(dim(std_obs))[length(dim(std_obs))] <- 'region' + names(dim(n_eff))[length(dim(n_eff))] <- 'region' + + ## Remove 'var' dimension + cov <- Subset(cov, 'var', 1, drop = 'selected') + std_hcst <- Subset(std_hcst, 'var', 1, drop = 'selected') + std_obs <- Subset(std_obs, 'var', 1, drop = 'selected') + n_eff <- Subset(n_eff, 'var', 1, drop = 'selected') + + ## Calculate correlation + enscorr <- cov / (std_hcst * std_obs) + + ## Calculate significance of corr + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) + t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) + + sign_corr<- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions))) + + for (time in 1:dim(sign_corr)[['time']]){ + for (reg in 1:dim(sign_corr)[['region']]){ + + if (anyNA(c(t[time, reg], t_alpha2_n2[time, reg])) == FALSE + && t[time, reg] >= t_alpha2_n2[time, reg]){ + sign_corr[time, reg] <- TRUE + } else { + sign_corr[time, reg] <- FALSE + } + } + } + + ## Save metric result in arrays + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + + } ## close if on enscorr + + if(met == 'mean_bias'){ + + ## Calculate ensemble mean + hcst_data_ens <- MeanDims(data$hcst$data, dims = 'ensemble') + obs_data_ens <- MeanDims(data$obs$data, dims = 'ensemble') + + ## Aggregate data over regions + hcst_data_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = hcst_data_ens, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + obs_data_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = obs_data_ens, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(hcst_data_aggr))[length(dim(hcst_data_aggr))] <- 'region' + names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' + + ## Remove unnecessary dimension + hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') + obs_data_aggr <- Subset(obs_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') + + ## Calculate significance + pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), + target_dims = c('syear'), ncores = ncores, + fun = function(x,y){t.test(as.vector(x),as.vector(y))})$p.value + + sign_mean_bias <- pval_mean_bias <= alpha + + ## Calculate aggregated mean bias metric + mean_bias <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$mean_bias, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' + + ## Remove 'var' dimension + mean_bias <- Subset(mean_bias, 'var', 1, drop = 'selected') + + ## Save metric result in array + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = mean_bias, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_mean_bias, order = c('time', 'region')) + + } ## close on mean_bias + + if(met == 'enssprerr'){ + + ## Calculate metric + spread <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$spread, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + error <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$rms, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(spread))[length(dim(spread))] <- 'region' + names(dim(error))[length(dim(error))] <- 'region' + + ## Remove 'var' dimension + spread <- Subset(spread, 'var', 1, drop = 'selected') + error <- Subset(error, 'var', 1, drop = 'selected') + + enssprerr <- spread / error + + # ## Significance calculation + # + # # Effective sample size + # # exp <- data$hcst$data + # # ens_exp <- MeanDims(data$hcst$data, dims = 'ensemble') + # # enospr <- sum(Eno(exp - InsertDim(ens_exp, length(dim(exp)), dim(exp)['ensemble']), "ensemble")) + # # enodif <- .Eno(ens_exp - ens_obs, na.action = na.pass) + # + # # Removing eno at the moment + # # F <- (enospr * spread^2 / (enospr - 1)) / (enodif * error^2 / (enodif - 1)) + # F <- spread^2 / error^2 + # # if (!is.na(F) & !is.na(enospr) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + # # p.val <- pf(F, enospr - 1, enodif - 1) + # + # pval_enssprerr <- Apply(data = list(x = F, y = data$hcst$data), ### DOES NOT WORK + # target_dims = c('time'), + # fun = function(x, y) + # {pf(x, dim(y)['syear'] -1 ,dim(y)['syear'] -1)}) + # + # pval_enssprerr <- pf(F,dim(data$hcst$data)['syear'] -1 ,dim(data$hcst$data)['syear'] -1) + # pval_enssprerr <- 2 * min(pval_enssprerr, 1 - pval_enssprerr) + # + # sign_enssprerr <- pval_enssprerr <= alpha + + ## Save metric result in array + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enssprerr, order = c('time', 'region')) + # aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + + } ## close on enssprerr + + } ## close loop on metric + + ## Set NAs to False + aggr_significance[is.na(aggr_significance)] <- FALSE + + } ## close if on score + + ## Include 'var' dimension to be able to save array + if(!'var' %in% names(dim(aggr_metrics))){ + aggr_metrics <- InsertDim(aggr_metrics, 1, 1, name = 'var') + aggr_significance <- InsertDim(aggr_significance, 1, 1, name = 'var') + } + + ## Include attributes + attributes(aggr_metrics)$metrics <- metrics + attributes(aggr_metrics)$forecast.months <- forecast.months + attributes(aggr_metrics)$regions <- regions + attributes(aggr_metrics)$system.name <- system + attributes(aggr_metrics)$reference.name <- reference + + aggr_scorecards <- list(aggr_metrics = aggr_metrics, aggr_significance = aggr_significance) + + ## Save metric data arrays + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Scorecards/") + + save_metrics(recipe = recipe, metrics = aggr_scorecards, + data_cube = data$hcst, agg = 'region', + module = "scorecard") + } + diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R new file mode 100644 index 00000000..ff00e55c --- /dev/null +++ b/modules/Scorecards/Scorecards_plotting.R @@ -0,0 +1,222 @@ +############################################################################### +##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### +############################################################################### + +##### Load source functions ##### +source('modules/Scorecards/R/tmp/LoadMetrics.R') +source('modules/Scorecards/R/tmp/Utils.R') +source('modules/Scorecards/R/tmp/SCTransform.R') +source('modules/Scorecards/R/tmp/ScorecardsSingle.R') +source('modules/Scorecards/R/tmp/ScorecardsMulti.R') +source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') +source('modules/Scorecards/R/tmp/VizScorecard.R') + +## Temporary for new ESviz function +source('modules/Scorecards/R/tmp/ColorBarContinuous.R') +source('modules/Scorecards/R/tmp/ClimPalette.R') +.IsColor <- s2dv:::.IsColor +.FilterUserGraphicArgs <- s2dv:::.FilterUserGraphicArgs + +## Define function +Scorecards_plotting <- function(recipe) { + + ## Parameters for loading data files + input.path <- paste0(recipe$Run$output_dir, "/outputs/Scorecards/") + output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") + dir.create(output.path, recursive = T, showWarnings = F) + + system <- recipe$Analysis$Datasets$System$name + reference <- recipe$Analysis$Datasets$Reference$name + var <- recipe$Analysis$Variables$name + start.year <- as.numeric(recipe$Analysis$Time$hcst_start) + end.year <- as.numeric(recipe$Analysis$Time$hcst_end) + forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + + if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { + start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + } else { + start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, + split = ", | |,")[[1]]) + if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ + error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") + } + } + start.months <- sprintf("%02d", start.months) + period <- paste0(start.year, "-", end.year) + + ## Parameters for data aggregation + regions <- recipe$Analysis$Workflow$Scorecards$regions + for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} + + ## Parameters for scorecard layout + table.label <- recipe$Analysis$Workflow$Scorecards$table_label + fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label + col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width + col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width + legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks + legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width + + if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)){ + plot.legend <- TRUE + } else { + plot.legend <- recipe$Analysis$Workflow$Scorecards$plot_legend + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$columns_width)){ + columns.width <- 1.2 + } else { + columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)){ + legend.white.space <- 6 + } else { + legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$legend_height)){ + legend.height <- 50 + } else { + legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$label_scale)){ + label.scale <- 1.4 + } else { + label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)){ + round.decimal <- 2 + } else { + round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$font_size)){ + font.size <- 1.1 + } else { + font.size <- recipe$Analysis$Workflow$Scorecards$font_size + } + + ## Define if difference scorecard is to be plotted + if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)){ + calculate.diff <- FALSE + } else { + calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff + } + + ####### Load data files ####### + aggregated_metrics <- LoadMetrics(input_path = input.path, + system = system, + reference = reference, + var = var, + data_type = "aggr_metrics", + period = period, + start_months = as.numeric(start.months), + calib_method = calib.method) + + attributes(aggregated_metrics)$metrics <- metrics + # attributes(aggregated_metrics)$forecast.months <- forecast.months + # attributes(aggregated_metrics)$regions <- regions + # attributes(aggregated_metrics)$system.name <- system + # attributes(aggregated_metrics)$reference.name <- reference + + aggregated_significance <- LoadMetrics(input_path = input.path, + system = system, + reference = reference, + var = var, + data_type = "aggr_significance", + period = period, + start_months = as.numeric(start.months), + calib_method = calib.method) + + aggregated_significance <- aggregated_significance == 1 + + ####### PLOT SCORECARDS ########## + + ## Create simple scorecard tables + ## (one system only) + ## Metrics input must be in the same order as function SC_spatial_aggregation + scorecard_single <- ScorecardsSingle(data = aggregated_metrics, + sign = aggregated_significance, + system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = names(regions), + metrics = metrics, + table.label = table.label, + fileout.label = fileout.label, + plot.legend = plot.legend, + legend.breaks = legend.breaks, + legend.white.space = legend.white.space, + legend.width = legend.width, + legend.height = legend.height, + label.scale = label.scale, + col1.width = col1.width, + col2.width = col2.width, + columns.width = columns.width, + font.size = font.size, + round.decimal = round.decimal, + output.path = output.path) + + ## Create multi system/reference scorecard tables + ## (multiple systems with one reference or one system with multiple references) + ## Metrics input must be in the same order as function SC_spatial_aggregation + if(length(system) > 1 || length(reference) > 1){ + scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, + sign = aggregated_significance, + system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = names(regions), + metrics = metrics, + table.label = table.label, + fileout.label = fileout.label, + plot.legend = plot.legend, + legend.breaks = legend.breaks, + legend.white.space = legend.white.space, + legend.width = legend.width, + legend.height = legend.height, + label.scale = label.scale, + col1.width = col1.width, + col2.width = col2.width, + columns.width = columns.width, + font.size = font.size, + round.decimal = round.decimal, + output.path = output.path) + } ## close if + + + if(calculate.diff == TRUE){ + if(length(system) == 2 || length(reference) == 2){ + scorecard_diff <- ScorecardsSystemDiff(data = aggregated_metrics, + system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = attributes(regions)$names, + metrics = metrics, + table.label = table.label, + fileout.label = fileout.label, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + output.path = output.path) + } else {stop ("Difference scorecard can only be computed with two systems or two references.")} + } ## close if on calculate.diff + +} + diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 085bcdc5..ed0af72a 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -5,29 +5,16 @@ Statistics <- function(recipe, data, agg = 'global') { # agg: data aggregation time_dim <- 'syear' + memb_dim <- 'ensemble' ncores <- recipe$Analysis$ncores ## Calculate ensemble mean - hcst_ensmean <- Apply(data$hcst$data, - target_dims = 'ensemble', - fun = 'mean')$output1 - obs_ensmean <- Apply(data$obs$data, - target_dims = 'ensemble', - fun = 'mean')$output1 - - ## Remove unwanted dimensions - ## TODO: Apply .drop_dims() function instead? - hcst_ensmean <- Subset(hcst_ensmean, - along = c('dat', 'sday', 'sweek'), - indices = list(1, 1, 1), - drop = 'selected') - obs_ensmean <- Subset(obs_ensmean, - along = c('dat', 'sday', 'sweek'), - indices = list(1, 1, 1), - drop = 'selected') + hcst_ensmean <- MeanDims(data$hcst$data, dims = memb_dim) + obs_ensmean <- MeanDims(data$obs$data, dims = memb_dim) statistics_list <- tolower(recipe$Analysis$Workflow$Statistics$metric) statistics <- list() + # Compute statistics in the list for (stat in strsplit(statistics_list, ", | |,")[[1]]) { if (stat %in% c('cov', 'covariance')) { @@ -37,7 +24,7 @@ Statistics <- function(recipe, data, agg = 'global') { fun = function(x,y) {cov(as.vector(x),as.vector(y), use = "everything", method = "pearson")})$output1 - + covariance <- .drop_dims(covariance) statistics[[ stat ]] <- covariance } ## close if on covariance if (stat %in% c('std', 'standard_deviation')) { @@ -45,37 +32,46 @@ Statistics <- function(recipe, data, agg = 'global') { std_hcst <- Apply(data = hcst_ensmean, target_dims = c(time_dim), fun = 'sd')$output1 - std_obs <- Apply(data = obs_ensmean, target_dims = c(time_dim), fun = 'sd')$output1 - + std_hcst <- .drop_dims(std_hcst) + std_obs <- .drop_dims(std_obs) statistics[['std_hcst']] <- std_hcst statistics[['std_obs']] <- std_obs - } ## close if on std if (stat %in% c('var', 'variance')) { ## Calculate variance var_hcst <- (Apply(data = hcst_ensmean, target_dims = c(time_dim), fun = 'sd')$output1)^2 - var_obs <- (Apply(data = obs_ensmean, target_dims = c(time_dim), fun = 'sd')$output1)^2 - + var_hcst <- .drop_dims(var_hcst) + var_obs <- .drop_dims(var_obs) statistics[['var_hcst']] <- var_hcst statistics[['var_obs']] <- var_obs } ## close if on variance - if (stat == 'n_eff') { + if (stat == 'n_eff') { ## Calculate degrees of freedom n_eff <- s2dv::Eno(data = obs_ensmean, time_dim = time_dim, na.action = na.pass, ncores = ncores) + n_eff <- .drop_dims(n_eff) statistics[['n_eff']] <- n_eff - } ## close on n_eff - } + } ## close on n_eff + if (stat == 'spread') { + C_cov <- stats:::C_cov + spread <- sqrt(Apply(Apply(data = data$hcst$data, + target_dims = c(memb_dim), + fun = 'var')$output1, + fun = 'mean', target_dims = time_dim)$output1) + spread <- .drop_dims(spread) + statistics[['spread']] <- spread + } ## close on spread + } ## close on stat info(recipe$Run$logger, "##### STATISTICS COMPUTATION COMPLETE #####") .log_memory_usage(recipe$Run$logger, when = "After statistics computation") diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 01896877..ab77743b 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -100,7 +100,7 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Datasets$Multimodel) || (is.logical(recipe$Analysis$Datasets$Multimodel) && !(recipe$Analysis$Datasets$Multimodel))) { - recipe$Analysis$Datasets$Multimodel <- list(execute = FALSE) + recipe$Analysis$Datasets$Multimodel$execute <- FALSE } if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'false') { multimodel <- FALSE @@ -134,7 +134,7 @@ check_recipe <- function(recipe) { } } } else { - recipe$Analysis$Datasets$Multimodel <- FALSE + recipe$Analysis$Datasets$Multimodel$execute <- FALSE } # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || @@ -509,7 +509,7 @@ check_recipe <- function(recipe) { # Skill AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", "rps_syear", "rpss", "frps", "frpss", "crps", "crps_syear", - "crpss", "bss10", "bss90", + "crpss", "bss10", "bss90", "rms", "mean_bias", "mean_bias_ss", "enssprerr", "rps_clim", "rps_clim_syear", "crps_clim", "crps_clim_syear", "enscorr_specs", "frps_specs", "rpss_specs", -- GitLab From 309a7d1c9b83e0fbcb3ce6cf6b78257e548ac0bf Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 26 Apr 2024 17:11:45 +0200 Subject: [PATCH 02/53] reading var.units from recipe --- modules/Scorecards/R/tmp/LoadMetrics.R | 4 -- modules/Scorecards/R/tmp/ScorecardsMulti.R | 33 +++++---------- modules/Scorecards/R/tmp/ScorecardsSingle.R | 31 +++++--------- .../Scorecards/R/tmp/ScorecardsSystemDiff.R | 36 +++++++---------- modules/Scorecards/Scorecards_calculations.R | 4 +- modules/Scorecards/Scorecards_plotting.R | 16 +++++++- modules/Scorecards/execute_scorecards.R | 2 +- modules/Skill/Skill.R | 2 +- modules/Statistics/Statistics.R | 5 ++- tools/check_recipe.R | 40 +++++++++++++------ 10 files changed, 84 insertions(+), 89 deletions(-) diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index 155a569b..fe19dc85 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -60,10 +60,6 @@ LoadMetrics <- function(input_path, system, reference, var, period, data_type, "will be used.") var <- var[1] } - ## metrics - # if (!is.character(metrics)) { - # stop("Parameter 'metrics' cannot be NULL.") - # } ## start_months if (is.character(start_months)) { warning("Parameter 'start_months' must be a numeric vector indicating ", diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index 7278eb16..1633eb86 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -7,11 +7,11 @@ #'@param sign is an array with the same dimensions as data indicting the #' significance of the metrics, with either true, false or null. #'@param system a vector of character strings defining the systems following the -#' archive.yml format from verification suite +#' archive.yml format for SUNSET. #'@param reference a vector of character strings defining the references -#' following the archive.yml format from verification suite -#'@param var a character string following the format from -#' variable-dictionary.yml from verification suite +#' following the archive.yml format for SUNSET. +#'@param var a character string indicating the variable. +#'@param var.units a character sting indicating the the variable units. #'@param start.year a numeric indicating the start year of the reference period #'@param end.year a numeric indicating the end year of the reference period #'@param start.date a vector of character strings indicating the start months @@ -56,20 +56,21 @@ #' system.name = c('ECMWF-SEAS5','DWD-GFCS2.1'), #' reference.name = 'ERA5', #' var = 'tas', +#' var.units = 'C', #' start.year = 1993, #' end.year = 2016, #' start.months = 1:12, #' forecast.months = 1:6, #' region.names = c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') #' metrics = c('mean_bias', 'enscorr', 'rpss','crpss', 'enssprerr'), -#' table.label = '(Interpolation = to system, Aggregation level = skill, Cross-validation = terciles)', +#' table.label = '(Interpolation = to system, Cross-validation = True)', #' fileout.label = '_crossval-terciles_agg-skill', -#' output.path = '/esarchive/scratch/nmilders/scorecards_images/testing' +#' output.path = './scorecards' #' ) -ScorecardsMulti <- function(data, sign, system, reference, var, start.year, - end.year, start.months, forecast.months, +ScorecardsMulti <- function(data, sign, system, reference, var, var.units, + start.year, end.year, start.months, forecast.months, region.names, metrics, plot.legend = TRUE, legend.breaks = NULL, legend.white.space = NULL, legend.width = 555, legend.height = 50, @@ -108,7 +109,6 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, attributes(sign)$metrics <- metrics } - ## Transform data for scorecards by forecast month (types 11 & 12) if(length(start.months) >= length(forecast.months)){ @@ -133,25 +133,14 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, } sys_dict <- read_yaml("conf/archive.yml")[[filesystem]] var_dict <- read_yaml("conf/variable-dictionary.yml")$vars - + ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - if ('name' %in% names(recipe$Analysis$Variables)){ - if (recipe$Analysis$Variables$name == var) { - var.units <- recipe$Analysis$Variables$units - } - } else { - for (i in 1:length(recipe$Analysis$Variables)) { - if (recipe$Analysis$Variables[[i]]$name == var) { - var.units <- recipe$Analysis$Variables[[i]]$units - } - } - } if (is.null(var.units)) { var.units <- var_dict[[var]]$units } - + system.name <- NULL reference.name <- NULL diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 9fd44545..26e2eeb2 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -7,11 +7,11 @@ #'@param sign is an array with the same dimensions as data indicting the #' significance of the metrics, with either true, false or null. #'@param system a vector of character strings defining the systems following the -#' archive.yml format from verification suite +#' archive.yml format for SUNSET. #'@param reference a vector of character strings defining the references -#' following the archive.yml format from verification suite -#'@param var a character string following the format from -#' variable-dictionary.yml from verification suite +#' following the archive.yml format for SUNSET. +#'@param var a character string indicating the variable. +#'@param var.units a character sting indicating the the variable units. #'@param start.year a numeric indicating the start year of the reference period #'@param end.year a numeric indicating the end year of the reference period #'@param start.date a vector of character strings indicating the start months @@ -54,19 +54,20 @@ #' system.name = c('ECMWF-SEAS5','DWD-GFCS2.1'), #' reference.name = 'ERA5', #' var = 'tas', +#' var.units = 'C', #' start.year = 1993, #' end.year = 2016, #' start.months = 1:12, #' forecast.months = 1:6, #' region.names = c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') #' metrics = c('mean_bias', 'enscorr', 'rpss','crpss', 'enssprerr'), -#' table.label = '(Interpolation = to system, Aggregation level = skill, Cross-validation = terciles)', +#' table.label = '(Interpolation = to system, Cross-validation = True)', #' fileout.label = '_crossval-terciles_agg-skill', -#' output.path = '/esarchive/scratch/nmilders/scorecards_images/test' +#' output.path = './scorecards' #' ) #'@export -ScorecardsSingle <- function(data, sign, system, reference, var, start.year, - end.year, start.months, forecast.months, +ScorecardsSingle <- function(data, sign, system, reference, var, var.units, + start.year, end.year, start.months, forecast.months, region.names, metrics, plot.legend = TRUE, legend.breaks = NULL, legend.white.space = NULL, legend.width = 550, legend.height = 50, @@ -149,17 +150,6 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - if ('name' %in% names(recipe$Analysis$Variables)){ - if (recipe$Analysis$Variables$name == var) { - var.units <- recipe$Analysis$Variables$units - } - } else { - for (i in 1:length(recipe$Analysis$Variables)) { - if (recipe$Analysis$Variables[[i]]$name == var) { - var.units <- recipe$Analysis$Variables[[i]]$units - } - } - } if (is.null(var.units)) { var.units <- var_dict[[var]]$units } @@ -205,9 +195,6 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, stopifnot(identical(names(dim(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'))), c('metric','time','sdate','region'))) temp_data <- Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected') pos_bias <- which(metrics == 'mean_bias') - if(var == 'psl'){ - temp_data[pos_bias,,,] <- temp_data[pos_bias,,,]/100 - } breaks_bias <- .SCBiasBreaks(Subset(temp_data, along = 'metric', indices = pos_bias)) } diff --git a/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R index 88e9de15..2d17f94c 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R +++ b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R @@ -5,11 +5,11 @@ #'@param input_data is an array of spatially aggregated metrics containing the #' following dimensions; system, reference, metric, time, sdate, region. #'@param system a vector of character strings defining the systems following the -#' archive.yml format from verification suite +#' archive.yml format for SUNSET. #'@param reference a vector of character strings defining the references -#' following the archive.yml format from verification suite -#'@param var a character string following the format from -#' variable-dictionary.yml from verification suite +#' following the archive.yml format for SUNSET. +#'@param var a character string indicating the variable. +#'@param var.units a character sting indicating the the variable units. #'@param start.year a numeric indicating the start year of the reference period #'@param end.year a numeric indicating the end year of the reference period #'@param start.date a vector of character strings indicating the start months @@ -32,6 +32,7 @@ #' system.name = c('ECMWF-SEAS5','DWD-GFCS2.1'), #' reference.name = 'ERA5', #' var = 'tas', +#' var.units = 'C', #' start.year = 1993, #' end.year = 2016, #' start.months = 1:12, @@ -44,22 +45,12 @@ #' ) -ScorecardsSystemDiff <- function(data, - system, - reference, - var, - start.year, - end.year, - start.months, - forecast.months, - region.names, - metrics, - table.label = NULL, - fileout.label = NULL, - legend.white.space = NULL, - col1.width = NULL, - col2.width = NULL, - output.path){ +ScorecardsSystemDiff <- function(data, system, reference, var, var.units, + start.year, end.year, start.months, + forecast.months, region.names, metrics, + table.label = NULL, fileout.label = NULL, + legend.white.space = NULL, col1.width = NULL, + col2.width = NULL, output.path){ ## Checks to apply: # first dimension in aggregated_metrics is system and second dimension is reference @@ -121,7 +112,10 @@ ScorecardsSystemDiff <- function(data, ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - var.units <- var_dict[[var]]$units ## TODO: Get units from recipe or elsewhere + + if (is.null(var.units)) { + var.units <- var_dict[[var]]$units + } system.name <- NULL reference.name <- NULL diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index a5fe67ba..f54cfbc2 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -464,13 +464,13 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci aggr_significance <- InsertDim(aggr_significance, 1, 1, name = 'var') } - ## Include attributes + ## Include attributes (necessary?) attributes(aggr_metrics)$metrics <- metrics attributes(aggr_metrics)$forecast.months <- forecast.months attributes(aggr_metrics)$regions <- regions attributes(aggr_metrics)$system.name <- system attributes(aggr_metrics)$reference.name <- reference - + aggr_scorecards <- list(aggr_metrics = aggr_metrics, aggr_significance = aggr_significance) ## Save metric data arrays diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index ff00e55c..c2793bf1 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -107,6 +107,18 @@ Scorecards_plotting <- function(recipe) { calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff } + if ('name' %in% names(recipe$Analysis$Variables)){ + if (recipe$Analysis$Variables$name == var) { + var.units <- recipe$Analysis$Variables$units + } + } else { + for (i in 1:length(recipe$Analysis$Variables)) { + if (recipe$Analysis$Variables[[i]]$name == var) { + var.units <- recipe$Analysis$Variables[[i]]$units + } + } + } + ####### Load data files ####### aggregated_metrics <- LoadMetrics(input_path = input.path, system = system, @@ -144,6 +156,7 @@ Scorecards_plotting <- function(recipe) { system = system, reference = reference, var = var, + var.units = var.units, start.year = start.year, end.year = end.year, start.months = start.months, @@ -173,7 +186,7 @@ Scorecards_plotting <- function(recipe) { sign = aggregated_significance, system = system, reference = reference, - var = var, + var.units = var.units, start.year = start.year, end.year = end.year, start.months = start.months, @@ -203,6 +216,7 @@ Scorecards_plotting <- function(recipe) { system = system, reference = reference, var = var, + var.units = var.units, start.year = start.year, end.year = end.year, start.months = start.months, diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index e4dcd4ec..6a8de1bc 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -32,6 +32,6 @@ for (variable in 1:length(recipe$Analysis$Variables)) { scorecard_recipe$Analysis$Variables <- recipe$Analysis$Variables[[variable]] # Plot Scorecards - Scorecards(scorecard_recipe) + Scorecards_plotting(scorecard_recipe) } print("##### SCORECARDS SAVED TO THE OUTPUT DIRECTORY #####") diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index aa22838a..d59f29d7 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -205,7 +205,7 @@ Skill <- function(recipe, data, agg = 'global') { skill_metrics[[ metric ]] <- skill$crpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'rms') { - source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/RMS.R") + # source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/RMS.R") hcst_mean <- Apply(list(data$hcst$data), target_dims = memb_dim, fun = mean, na.rm = na.rm, ncores = ncores)$output1 hcst_mean <- InsertDim(hcst_mean, pos = 1, len = 1, name = memb_dim) diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index ed0af72a..d3b805f8 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -3,7 +3,7 @@ Statistics <- function(recipe, data, agg = 'global') { # data$obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml # agg: data aggregation - + time_dim <- 'syear' memb_dim <- 'ensemble' ncores <- recipe$Analysis$ncores @@ -29,6 +29,7 @@ Statistics <- function(recipe, data, agg = 'global') { } ## close if on covariance if (stat %in% c('std', 'standard_deviation')) { ## Calculate standard deviation + print() std_hcst <- Apply(data = hcst_ensmean, target_dims = c(time_dim), fun = 'sd')$output1 @@ -66,7 +67,7 @@ Statistics <- function(recipe, data, agg = 'global') { C_cov <- stats:::C_cov spread <- sqrt(Apply(Apply(data = data$hcst$data, target_dims = c(memb_dim), - fun = 'var')$output1, + fun = stats::var)$output1, fun = 'mean', target_dims = time_dim)$output1) spread <- .drop_dims(spread) statistics[['spread']] <- spread diff --git a/tools/check_recipe.R b/tools/check_recipe.R index ab77743b..7e7f33cb 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -98,9 +98,8 @@ check_recipe <- function(recipe) { } # Check multimodel if (is.null(recipe$Analysis$Datasets$Multimodel) || - (is.logical(recipe$Analysis$Datasets$Multimodel) && - !(recipe$Analysis$Datasets$Multimodel))) { - recipe$Analysis$Datasets$Multimodel$execute <- FALSE + isFALSE(recipe$Analysis$Datasets$Multimodel)) { + recipe$Analysis$Datasets$Multimodel <- list(execute = FALSE) } if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'false') { multimodel <- FALSE @@ -134,7 +133,7 @@ check_recipe <- function(recipe) { } } } else { - recipe$Analysis$Datasets$Multimodel$execute <- FALSE + recipe$Analysis$Datasets$Multimodel$execute <- FALSE } # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || @@ -217,12 +216,12 @@ check_recipe <- function(recipe) { "The 'Regrid' element must specify the 'method' and 'type'.") error_status <- TRUE } - + if (recipe$Analysis$Regrid$type == 'to_system' && multimodel) { - error(recipe$Run$logger, - paste0("The 'Regrid$type' cannot be 'to_system' if ", - "'Multimodel$execute' is yes/true or both.")) - error_status <- TRUE + error(recipe$Run$logger, + paste0("The 'Regrid$type' cannot be 'to_system' if ", + "'Multimodel$execute' is yes/true or both.")) + error_status <- TRUE } # TODO: Add Workflow checks? # ... @@ -648,8 +647,14 @@ check_recipe <- function(recipe) { } } # Scorecards + if (!is.null(recipe$Analysis$Workflow$Statistics)) { + statistics <- strsplit(recipe$Analysis$Workflow$Statistics$metric, + ", | |,")[[1]] + } else { + statistics <- NULL + } if ("Scorecards" %in% names(recipe$Analysis$Workflow)) { - if(recipe$Analysis$Workflow$Scorecards$execute == TRUE){ + if (recipe$Analysis$Workflow$Scorecards$execute == TRUE) { if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) { error(recipe$Run$logger, "Parameter 'metric' must be defined under 'Scorecards'.") @@ -674,12 +679,21 @@ check_recipe <- function(recipe) { requested_metrics <- c(requested_metrics, 'crps_syear') } } - if ('enscorr' %in% tolower(sc_metrics)) { - recipe$Analysis$Workflow$Statistics <- c('std', 'cov', 'n_eff') + if ('enscorr' %in% tolower(sc_metrics) && + !all(c('std', 'cov', 'n_eff') %in% statistics)) { + error(recipe$Run$logger, + paste("For 'enscorr' to be plotted in the Scorecards with", + "the 'score' aggregation, the Statistics module must", + "be called and the statistics 'std', 'cov' and", + "'n_eff' are required.")) + error_status <- TRUE } - recipe$Analysis$Workflow$Skill$metric <- requested_metrics + recipe$Analysis$Workflow$Skill$metric <- paste0(requested_metrics, + collapse = " ") } if (tolower(recipe$Analysis$Output_format) != 'scorecards') { + warn(recipe$Run$logger, + "Scorecards requested: setting output format as 'Scorecards'") recipe$Analysis$Output_format <- 'scorecards' } if (!all(tolower(sc_metrics) %in% tolower(requested_metrics))) { -- GitLab From c47fb7179056b3498a46dd9da879ecfa7adb6841 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Tue, 7 May 2024 17:06:11 +0200 Subject: [PATCH 03/53] Adjustments for scorecards with launcher --- modules/Scorecards/Scorecards_calculations.R | 129 +++++++------------ modules/Scorecards/Scorecards_plotting.R | 13 +- modules/Scorecards/execute_scorecards.R | 4 +- modules/Statistics/Statistics.R | 1 - 4 files changed, 52 insertions(+), 95 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index f54cfbc2..468a72d9 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -1,14 +1,17 @@ ############################################################################### -##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### +############# Spatial aggregation calculations for scorecards ################# ############################################################################### -##### Load source functions ##### -# source('modules/Scorecards/R/tmp/LoadMetrics.R') -# source('modules/Scorecards/R/tmp/WeightedMetrics.R') +## This function spatially aggregates the skill metrics and statistics for the +## scorecards. The aggregated statistical significance is also calculate for the +## metrics when 'score' aggregate is requested. The aggregation regions are +## defined in the general recipe under the scorecards section. ## Define function Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, recipe) { + ## TO DO: need to test with NAO data + ## Parameters for saving output data files output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) @@ -21,9 +24,6 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max start.months <- substr(recipe$Analysis$Time$sdate, 1,2) period <- paste0(start.year, "-", end.year) - - file.name.metrics <- paste0('scorecards_aggr_skill_',system,'_',reference,'_',var,'_',period,'_s',start.months,'.RDS') - file.name.sign <- paste0('scorecards_aggr_sign_',system,'_',reference,'_',var,'_',period,'_s',start.months,'.RDS') ## Parameters for data aggregation regions <- recipe$Analysis$Workflow$Scorecards$regions @@ -51,72 +51,49 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na } - ##### TO DO: Need to include condition for removing -INF ##### - # if (inf_to_na) { - # by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA - # } + lon_dim <- 'longitude' + lat_dim <- 'latitude' + time_dim <- 'syear' + memb_dim <- 'ensemble' + lon <- as.numeric(data$hcst$coords$longitude) + lat <- as.numeric(data$hcst$coords$latitude) + ## Define arrays to filled with data + aggr_metrics <- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions), + metric = length(metrics))) + + aggr_significance <- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions), + metric = length(metrics))) ############################# SKILL AGGREGATION ############################# if(metric.aggregation == 'skill'){ - - # ## Load data files - # loaded_metrics <- LoadMetrics(input_path = skill.input.path, - # system = system, - # reference = reference, - # var = var, - # metrics = metrics, - # period = period, - # start_months = start.months, - # calib_method = calib.method, - # inf_to_na = inf.to.na - # ) - - # ## Remove -Inf from crpss data if variable is precipitation ## from loadMetrics function - # if (inf_to_na) { - # by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA - # } - - ## Spatial Aggregation of metrics - if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ - - ### Convert loaded metrics to array for already aggregated data - metrics.dim <- attributes(loaded_metrics)$metrics - forecast.months.dim <- forecast.months - start.months.dim <- attributes(loaded_metrics)$start_months - regions.dim <- regions - aggr_metrics <- array(dim = c(system = length(loaded_metrics), - reference = length(loaded_metrics[[1]]), - metric = length(metrics.dim), - time = length(forecast.months.dim), - sdate = length(start.months.dim), - region = length(regions.dim))) - - - for (sys in 1:length(names(loaded_metrics))){ - for (ref in 1:length(names(loaded_metrics[[sys]]))){ - aggr_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) + ## Calculate weighted mean of spatial aggregation + for(met in metrics){ + result <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics[[met]], + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(result))[length(dim(result))] <- 'region' + result <-Subset(result, 'var', 1, drop = 'selected') + + if(met =='crpss' && inf_to_na == TRUE){ + result[result == -Inf] <- NA } - } - - ## Add attributes - attributes(aggr_metrics)$metrics <- metrics.load - attributes(aggr_metrics)$start.months <- attributes(loaded_metrics)$start_months - attributes(aggr_metrics)$forecast.months <- forecast.months - attributes(aggr_metrics)$regions <- regions - attributes(aggr_metrics)$system.name <- names(loaded_metrics) - attributes(aggr_metrics)$reference.name <- names(loaded_metrics[[1]]) - + + aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = result, order = c('time', 'region')) - } else { - ## Calculate weighted mean of spatial aggregation - aggr_metrics <- WeightedMetrics(loaded_metrics, - regions = regions, - forecast.months = forecast.months, - metric.aggregation = metric.aggregation, - ncores = ncores) - } ## close if on region + } ##close on met + aggr_significance <- NULL } ## close if on skill @@ -124,24 +101,6 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci ############################# SCORE AGGREGATION ############################# if(metric.aggregation == 'score'){ - lon_dim <- 'longitude' - lat_dim <- 'latitude' - time_dim <- 'syear' - memb_dim <- 'ensemble' - lon <- as.numeric(data$hcst$coords$longitude) - lat <- as.numeric(data$hcst$coords$latitude) - - ## Define arrays to filled with data - aggr_metrics <- array(data = NA, - dim = c(time = length(forecast.months), - region = length(regions), - metric = length(metrics))) - - aggr_significance <- array(data = NA, - dim = c(time = length(forecast.months), - region = length(regions), - metric = length(metrics))) - ## Spatially aggregate data for each metric for (met in metrics) { @@ -464,7 +423,7 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci aggr_significance <- InsertDim(aggr_significance, 1, 1, name = 'var') } - ## Include attributes (necessary?) + ## Include attributes attributes(aggr_metrics)$metrics <- metrics attributes(aggr_metrics)$forecast.months <- forecast.months attributes(aggr_metrics)$regions <- regions diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index c2793bf1..ead6294e 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -25,8 +25,9 @@ Scorecards_plotting <- function(recipe) { output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - system <- recipe$Analysis$Datasets$System$name - reference <- recipe$Analysis$Datasets$Reference$name + system <- as.vector(unlist(recipe$Analysis$Datasets$System)) + reference <- as.vector(unlist(recipe$Analysis$Datasets$Reference)) + var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) @@ -107,6 +108,7 @@ Scorecards_plotting <- function(recipe) { calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff } + if ('name' %in% names(recipe$Analysis$Variables)){ if (recipe$Analysis$Variables$name == var) { var.units <- recipe$Analysis$Variables$units @@ -129,11 +131,7 @@ Scorecards_plotting <- function(recipe) { start_months = as.numeric(start.months), calib_method = calib.method) - attributes(aggregated_metrics)$metrics <- metrics - # attributes(aggregated_metrics)$forecast.months <- forecast.months - # attributes(aggregated_metrics)$regions <- regions - # attributes(aggregated_metrics)$system.name <- system - # attributes(aggregated_metrics)$reference.name <- reference + attributes(aggregated_metrics)$metrics <- metrics aggregated_significance <- LoadMetrics(input_path = input.path, system = system, @@ -146,6 +144,7 @@ Scorecards_plotting <- function(recipe) { aggregated_significance <- aggregated_significance == 1 + ####### PLOT SCORECARDS ########## ## Create simple scorecard tables diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index 6a8de1bc..0999b8ec 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -1,5 +1,5 @@ source('tools/libs.R') -source('modules/Scorecards/Scorecards.R') +source('modules/Scorecards/Scorecards_plotting.R') args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] @@ -31,7 +31,7 @@ for (variable in 1:length(recipe$Analysis$Variables)) { as.vector(unlist(recipe$Analysis$Datasets$Reference)) scorecard_recipe$Analysis$Variables <- recipe$Analysis$Variables[[variable]] - # Plot Scorecards + ## Plot Scorecards Scorecards_plotting(scorecard_recipe) } print("##### SCORECARDS SAVED TO THE OUTPUT DIRECTORY #####") diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index d3b805f8..6fa5d03c 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -29,7 +29,6 @@ Statistics <- function(recipe, data, agg = 'global') { } ## close if on covariance if (stat %in% c('std', 'standard_deviation')) { ## Calculate standard deviation - print() std_hcst <- Apply(data = hcst_ensmean, target_dims = c(time_dim), fun = 'sd')$output1 -- GitLab From e105a27c90e233585f3b7f4fa84bf9cf751bcdac Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 10 May 2024 16:38:23 +0200 Subject: [PATCH 04/53] include indices condition for scorecards --- modules/Scorecards/Scorecards_calculations.R | 662 ++++++++++--------- 1 file changed, 339 insertions(+), 323 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index 468a72d9..5ba0a569 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -51,12 +51,6 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na } - lon_dim <- 'longitude' - lat_dim <- 'latitude' - time_dim <- 'syear' - memb_dim <- 'ensemble' - lon <- as.numeric(data$hcst$coords$longitude) - lat <- as.numeric(data$hcst$coords$latitude) ## Define arrays to filled with data aggr_metrics <- array(data = NA, @@ -69,353 +63,375 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci region = length(regions), metric = length(metrics))) - ############################# SKILL AGGREGATION ############################# - if(metric.aggregation == 'skill'){ - - ## Calculate weighted mean of spatial aggregation - for(met in metrics){ - result <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics[[met]], - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(result))[length(dim(result))] <- 'region' - result <-Subset(result, 'var', 1, drop = 'selected') - - if(met =='crpss' && inf_to_na == TRUE){ - result[result == -Inf] <- NA - } - - aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = result, order = c('time', 'region')) + + ## For data that is already aggregated by region + if ("region" %in% names(dim(skill_metrics[[1]]))) { + + aggr_metrics <- NULL + + for(met in metrics){ + skill_metrics[[met]] <- Reorder(skill_metrics[[met]], c("time", "region", "var")) + aggr_metrics <- abind(aggr_metrics, skill_metrics[[met]], along=3) + } - } ##close on met - - aggr_significance <- NULL + names(dim(aggr_metrics)) <- c("time", "region", "metric") - } ## close if on skill + } else { - ############################# SCORE AGGREGATION ############################# - if(metric.aggregation == 'score'){ + lon_dim <- 'longitude' + lat_dim <- 'latitude' + time_dim <- 'syear' + memb_dim <- 'ensemble' + lon <- as.numeric(data$hcst$coords$longitude) + lat <- as.numeric(data$hcst$coords$latitude) - ## Spatially aggregate data for each metric - for (met in metrics) { - - if(met == 'rpss'){ - - rps_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics$rps_syear, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - rps_clim_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics$rps_clim_syear, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' - names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' - - ## Remove 'var' dimension - rps_syear <-Subset(rps_syear, 'var', 1, drop = 'selected') - rps_clim_syear <-Subset(rps_clim_syear, 'var', 1, drop = 'selected') - - ## Calculate significance - sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Average over 'syear' dimension - rps_syear <- Apply(data = rps_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - rps_clim_syear <- Apply(data = rps_clim_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate RPSS from aggregated RPS and RPS_clim - rpss <- 1 - rps_syear / rps_clim_syear - - ## Save metric result in arrays - aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'region')) + ############################# SKILL AGGREGATION ############################# + if(metric.aggregation == 'skill'){ + + ## Calculate weighted mean of spatial aggregation + for(met in metrics){ + result <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics[[met]], + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(result))[length(dim(result))] <- 'region' + result <-Subset(result, 'var', 1, drop = 'selected') + + if(met =='crpss' && inf_to_na == TRUE){ + result[result == -Inf] <- NA + } + + aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = result, order = c('time', 'region')) - } ## close if on rpss + } ##close on met + + } ## close if on skill + + ############################# SCORE AGGREGATION ############################# + if(metric.aggregation == 'score'){ - if(met == 'crpss'){ - - crps_syear <- sapply(X = 1:length(regions), + ## Spatially aggregate data for each metric + for (met in metrics) { + + if(met == 'rpss'){ + + rps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$rps_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + rps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$rps_clim_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' + names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' + + ## Remove 'var' dimension + rps_syear <-Subset(rps_syear, 'var', 1, drop = 'selected') + rps_clim_syear <-Subset(rps_clim_syear, 'var', 1, drop = 'selected') + + ## Calculate significance + sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Average over 'syear' dimension + rps_syear <- Apply(data = rps_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + rps_clim_syear <- Apply(data = rps_clim_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate RPSS from aggregated RPS and RPS_clim + rpss <- 1 - rps_syear / rps_clim_syear + + ## Save metric result in arrays + aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'region')) + + } ## close if on rpss + + if(met == 'crpss'){ + + crps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$crps_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + crps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$crps_clim_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' + names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' + + ## Remove 'var' dimension + crps_syear <-Subset(crps_syear, 'var', 1, drop = 'selected') + crps_clim_syear <-Subset(crps_clim_syear, 'var', 1, drop = 'selected') + + ## Calculate significance + sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Average over 'syear' dimension + crps_syear <- Apply(data = crps_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + crps_clim_syear <- Apply(data = crps_clim_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate CRPSS from aggregated CRPS and CRPS_clim + crpss <- 1 - crps_syear / crps_clim_syear + + ## Save metric result in arrays + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'region')) + + } ## close if on crpss + + if(met == 'enscorr'){ + + cov <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$cov, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + std_hcst <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = skill_metrics$crps_syear, + WeightedMean(data = statistics$std_hcst, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - - crps_clim_syear <- sapply(X = 1:length(regions), + + std_obs <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$std_obs, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + n_eff <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$n_eff, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(cov))[length(dim(cov))] <- 'region' + names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' + names(dim(std_obs))[length(dim(std_obs))] <- 'region' + names(dim(n_eff))[length(dim(n_eff))] <- 'region' + + ## Remove 'var' dimension + cov <- Subset(cov, 'var', 1, drop = 'selected') + std_hcst <- Subset(std_hcst, 'var', 1, drop = 'selected') + std_obs <- Subset(std_obs, 'var', 1, drop = 'selected') + n_eff <- Subset(n_eff, 'var', 1, drop = 'selected') + + ## Calculate correlation + enscorr <- cov / (std_hcst * std_obs) + + ## Calculate significance of corr + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) + t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) + + sign_corr<- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions))) + + for (time in 1:dim(sign_corr)[['time']]){ + for (reg in 1:dim(sign_corr)[['region']]){ + + if (anyNA(c(t[time, reg], t_alpha2_n2[time, reg])) == FALSE + && t[time, reg] >= t_alpha2_n2[time, reg]){ + sign_corr[time, reg] <- TRUE + } else { + sign_corr[time, reg] <- FALSE + } + } + } + + ## Save metric result in arrays + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + + } ## close if on enscorr + + if(met == 'mean_bias'){ + + ## Calculate ensemble mean + hcst_data_ens <- MeanDims(data$hcst$data, dims = 'ensemble') + obs_data_ens <- MeanDims(data$obs$data, dims = 'ensemble') + + ## Aggregate data over regions + hcst_data_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = hcst_data_ens, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + obs_data_aggr <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = skill_metrics$crps_clim_syear, + WeightedMean(data = obs_data_ens, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - - ## Include name of region dimension - names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' - names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' - - ## Remove 'var' dimension - crps_syear <-Subset(crps_syear, 'var', 1, drop = 'selected') - crps_clim_syear <-Subset(crps_clim_syear, 'var', 1, drop = 'selected') - - ## Calculate significance - sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Average over 'syear' dimension - crps_syear <- Apply(data = crps_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - crps_clim_syear <- Apply(data = crps_clim_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate CRPSS from aggregated CRPS and CRPS_clim - crpss <- 1 - crps_syear / crps_clim_syear - - ## Save metric result in arrays - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'region')) - - } ## close if on crpss - - if(met == 'enscorr'){ - - cov <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = statistics$cov, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - std_hcst <- sapply(X = 1:length(regions), + + ## Include name of region dimension + names(dim(hcst_data_aggr))[length(dim(hcst_data_aggr))] <- 'region' + names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' + + ## Remove unnecessary dimension + hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') + obs_data_aggr <- Subset(obs_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') + + ## Calculate significance + pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), + target_dims = c('syear'), ncores = ncores, + fun = function(x,y){t.test(as.vector(x),as.vector(y))})$p.value + + sign_mean_bias <- pval_mean_bias <= alpha + + ## Calculate aggregated mean bias metric + mean_bias <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$mean_bias, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' + + ## Remove 'var' dimension + mean_bias <- Subset(mean_bias, 'var', 1, drop = 'selected') + + ## Save metric result in array + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = mean_bias, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_mean_bias, order = c('time', 'region')) + + } ## close on mean_bias + + if(met == 'enssprerr'){ + + ## Calculate metric + spread <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$std_hcst, + WeightedMean(data = statistics$spread, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, na.rm = na.rm) - }, simplify = 'array') - - std_obs <- sapply(X = 1:length(regions), + }, simplify = 'array') + + error <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$std_obs, + WeightedMean(data = skill_metrics$rms, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, na.rm = na.rm) - }, simplify = 'array') - - n_eff <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = statistics$n_eff, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(cov))[length(dim(cov))] <- 'region' - names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' - names(dim(std_obs))[length(dim(std_obs))] <- 'region' - names(dim(n_eff))[length(dim(n_eff))] <- 'region' - - ## Remove 'var' dimension - cov <- Subset(cov, 'var', 1, drop = 'selected') - std_hcst <- Subset(std_hcst, 'var', 1, drop = 'selected') - std_obs <- Subset(std_obs, 'var', 1, drop = 'selected') - n_eff <- Subset(n_eff, 'var', 1, drop = 'selected') - - ## Calculate correlation - enscorr <- cov / (std_hcst * std_obs) - - ## Calculate significance of corr - t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) - t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) - - sign_corr<- array(data = NA, - dim = c(time = length(forecast.months), - region = length(regions))) - - for (time in 1:dim(sign_corr)[['time']]){ - for (reg in 1:dim(sign_corr)[['region']]){ - - if (anyNA(c(t[time, reg], t_alpha2_n2[time, reg])) == FALSE - && t[time, reg] >= t_alpha2_n2[time, reg]){ - sign_corr[time, reg] <- TRUE - } else { - sign_corr[time, reg] <- FALSE - } - } - } - - ## Save metric result in arrays - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) - - } ## close if on enscorr - - if(met == 'mean_bias'){ - - ## Calculate ensemble mean - hcst_data_ens <- MeanDims(data$hcst$data, dims = 'ensemble') - obs_data_ens <- MeanDims(data$obs$data, dims = 'ensemble') - - ## Aggregate data over regions - hcst_data_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = hcst_data_ens, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - obs_data_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = obs_data_ens, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(hcst_data_aggr))[length(dim(hcst_data_aggr))] <- 'region' - names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' - - ## Remove unnecessary dimension - hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') - obs_data_aggr <- Subset(obs_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') - - ## Calculate significance - pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), - target_dims = c('syear'), ncores = ncores, - fun = function(x,y){t.test(as.vector(x),as.vector(y))})$p.value - - sign_mean_bias <- pval_mean_bias <= alpha - - ## Calculate aggregated mean bias metric - mean_bias <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics$mean_bias, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' - - ## Remove 'var' dimension - mean_bias <- Subset(mean_bias, 'var', 1, drop = 'selected') - - ## Save metric result in array - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = mean_bias, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_mean_bias, order = c('time', 'region')) - - } ## close on mean_bias - - if(met == 'enssprerr'){ - - ## Calculate metric - spread <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = statistics$spread, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - error <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics$rms, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(spread))[length(dim(spread))] <- 'region' - names(dim(error))[length(dim(error))] <- 'region' - - ## Remove 'var' dimension - spread <- Subset(spread, 'var', 1, drop = 'selected') - error <- Subset(error, 'var', 1, drop = 'selected') - - enssprerr <- spread / error - - # ## Significance calculation - # - # # Effective sample size - # # exp <- data$hcst$data - # # ens_exp <- MeanDims(data$hcst$data, dims = 'ensemble') - # # enospr <- sum(Eno(exp - InsertDim(ens_exp, length(dim(exp)), dim(exp)['ensemble']), "ensemble")) - # # enodif <- .Eno(ens_exp - ens_obs, na.action = na.pass) - # - # # Removing eno at the moment - # # F <- (enospr * spread^2 / (enospr - 1)) / (enodif * error^2 / (enodif - 1)) - # F <- spread^2 / error^2 - # # if (!is.na(F) & !is.na(enospr) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { - # # p.val <- pf(F, enospr - 1, enodif - 1) - # - # pval_enssprerr <- Apply(data = list(x = F, y = data$hcst$data), ### DOES NOT WORK - # target_dims = c('time'), - # fun = function(x, y) - # {pf(x, dim(y)['syear'] -1 ,dim(y)['syear'] -1)}) - # - # pval_enssprerr <- pf(F,dim(data$hcst$data)['syear'] -1 ,dim(data$hcst$data)['syear'] -1) - # pval_enssprerr <- 2 * min(pval_enssprerr, 1 - pval_enssprerr) - # - # sign_enssprerr <- pval_enssprerr <= alpha - - ## Save metric result in array - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enssprerr, order = c('time', 'region')) - # aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(spread))[length(dim(spread))] <- 'region' + names(dim(error))[length(dim(error))] <- 'region' + + ## Remove 'var' dimension + spread <- Subset(spread, 'var', 1, drop = 'selected') + error <- Subset(error, 'var', 1, drop = 'selected') + + enssprerr <- spread / error + + # ## Significance calculation + # + # # Effective sample size + # # exp <- data$hcst$data + # # ens_exp <- MeanDims(data$hcst$data, dims = 'ensemble') + # # enospr <- sum(Eno(exp - InsertDim(ens_exp, length(dim(exp)), dim(exp)['ensemble']), "ensemble")) + # # enodif <- .Eno(ens_exp - ens_obs, na.action = na.pass) + # + # # Removing eno at the moment + # # F <- (enospr * spread^2 / (enospr - 1)) / (enodif * error^2 / (enodif - 1)) + # F <- spread^2 / error^2 + # # if (!is.na(F) & !is.na(enospr) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + # # p.val <- pf(F, enospr - 1, enodif - 1) + # + # pval_enssprerr <- Apply(data = list(x = F, y = data$hcst$data), ### DOES NOT WORK + # target_dims = c('time'), + # fun = function(x, y) + # {pf(x, dim(y)['syear'] -1 ,dim(y)['syear'] -1)}) + # + # pval_enssprerr <- pf(F,dim(data$hcst$data)['syear'] -1 ,dim(data$hcst$data)['syear'] -1) + # pval_enssprerr <- 2 * min(pval_enssprerr, 1 - pval_enssprerr) + # + # sign_enssprerr <- pval_enssprerr <= alpha + + ## Save metric result in array + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enssprerr, order = c('time', 'region')) + # aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + + } ## close on enssprerr - } ## close on enssprerr + } ## close loop on metric - } ## close loop on metric - - ## Set NAs to False - aggr_significance[is.na(aggr_significance)] <- FALSE - - } ## close if on score + } ## close if on score + + } ## close if on region + + ## Set NAs to False + aggr_significance[is.na(aggr_significance)] <- FALSE ## Include 'var' dimension to be able to save array if(!'var' %in% names(dim(aggr_metrics))){ -- GitLab From 4f75c9a4cd88256ee82fb1b98d8c4f429f1a3cad Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 13 May 2024 11:34:21 +0200 Subject: [PATCH 05/53] cleaning code --- modules/Scorecards/R/tmp/WeightedMetrics.R | 110 ------------------- modules/Scorecards/Scorecards_calculations.R | 97 +++++++++------- modules/Scorecards/Scorecards_plotting.R | 17 ++- 3 files changed, 70 insertions(+), 154 deletions(-) delete mode 100644 modules/Scorecards/R/tmp/WeightedMetrics.R diff --git a/modules/Scorecards/R/tmp/WeightedMetrics.R b/modules/Scorecards/R/tmp/WeightedMetrics.R deleted file mode 100644 index 08267db4..00000000 --- a/modules/Scorecards/R/tmp/WeightedMetrics.R +++ /dev/null @@ -1,110 +0,0 @@ -#' Scorecards spatial aggregation of loaded metrics -#' -#'@description Scorecards function to perform the spatial aggregation of the -#' loaded metrics for the specified regions. -#' -#'@param loaded_metrics is a list of arrays containing the metrics loaded by the -#' function SC_load_metrics. -#'@param region is a named list of vectors containing the desired regions to -#' analyze. For each region the following should be specified in this order: -#' lon_min, lon_max, lat_min, lat_max. -#'@param metric.aggregation a character indicating whether the skill score RPS -#' and CRPSS are calculated from aggregated scores or aggregated skill score -#' directly, either 'score' or 'skill' respectively -#'@param ncores is the number of cores to use for the calculation. -#' -#'@return An array with the following dimensions: system, reference, metrics, -#' time, sdate, region. -#' -#'@examples -#'regions <- list('global' = c(lon.min = 0, lon.max = 360, lat.min = -90, lat.max = 90), -#' 'europe' = c(lon.min = -10, lon.max = 40, lat.min = 30, lat.max = 70)) -#'aggregated_metrics <- WeightedMetrics(loaded_metrics, -#' regions = regions, -#' metric.aggregation = 'skill', -#' ncores = 4) -#'@import multiApply -#'@importFrom ClimProjDiags WeightedMean -#'@importFrom s2dv Reorder -#'@export -WeightedMetrics <- function(loaded_metrics, regions, forecast.months, - metric.aggregation, ncores = NULL, na.rm = TRUE) { - ## Initial checks - # loaded_metrics - if (any(sapply(loaded_metrics, function(x) { - sapply(x, function(y) {is.null(dim(y))}) - }))) { - stop(paste0("Parameter 'loaded_metrics' must be a list of lists of arrays ", - "with named dimensions.")) - } - # regions - if (!all(sapply(regions, is.numeric))) { - stop(paste0("Parameter 'regions' must be a named list of vectors ", - "containing the desired regions to analyze.")) - } - # metric.aggregation - if (!is.character(metric.aggregation)) { - stop("Parameter 'metric.aggregation' must be a character indicating.") - } - # ncores - if (!is.numeric(ncores)) { - stop("Parameter 'ncores' must be an integer.") - } - - ## Get metric names - ## TO DO: check all metric are in the same order for all sys - metrics <- attributes(loaded_metrics)$metrics - start.months <- attributes(loaded_metrics)$start_months - - all_metric_means <- array(dim = c(metric = length(metrics), - time = length(forecast.months), - sdate = length(start.months), - region = length(regions), - reference = length(loaded_metrics[[1]]), - system = length(loaded_metrics))) - - ## Loop over system - for (sys in 1:length(loaded_metrics)) { - ## Loop over reference - for (ref in 1:length(loaded_metrics[[sys]])) { - dimnames <- names(dim(loaded_metrics[[sys]][[ref]])) - lon_dim_name <- dimnames[which(dimnames %in% .KnownLonNames())] - lat_dim_name <- dimnames[which(dimnames %in% .KnownLatNames())] - ## Get latitude and longitude from attributes of loaded metrics - ## Loop over region - for (reg in 1:length(regions)) { - ## Calculate weighted means for defined regions for each system and reference - weighted.mean <- WeightedMean(data = loaded_metrics[[sys]][[ref]], - lon = as.vector(attributes(loaded_metrics[[sys]][[ref]])$lon), - lat = as.vector(attributes(loaded_metrics[[sys]][[ref]])$lat), - region = regions[[reg]], - londim = lon_dim_name, - latdim = lat_dim_name, - na.rm = na.rm, - ncores = ncores) - - if (!all(names(dim(weighted.mean)) == c('metric', 'time', 'sdate'))) { - weighted.mean <- Reorder(weighted.mean, c('metric', 'time', 'sdate')) - } - - all_metric_means[, , , reg, ref, sys] <- weighted.mean - - } ## close loop on region - } ## close loop on reference - } ## close loop on system - - ## reorder dimensions in array - all_metric_means <- s2dv::Reorder(all_metric_means, c('system','reference','metric','time','sdate','region')) - - ## Add attributes - attributes(all_metric_means)$metrics <- metrics - attributes(all_metric_means)$start.months <- start.months - attributes(all_metric_means)$forecast.months <- forecast.months - attributes(all_metric_means)$regions <- regions - attributes(all_metric_means)$system.name <- names(loaded_metrics) - attributes(all_metric_means)$reference.name <- names(loaded_metrics[[1]]) - - return(all_metric_means) - -} ## close function - diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index 5ba0a569..ca39dd01 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -5,12 +5,12 @@ ## This function spatially aggregates the skill metrics and statistics for the ## scorecards. The aggregated statistical significance is also calculate for the ## metrics when 'score' aggregate is requested. The aggregation regions are -## defined in the general recipe under the scorecards section. +## defined in the recipe under the scorecards section. This function reads from +## the atomic recipes. ## Define function -Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, recipe) { - - ## TO DO: need to test with NAO data +Scorecards_calculations <- function(data, skill_metrics, + statistics = NULL, recipe) { ## Parameters for saving output data files output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") @@ -21,7 +21,7 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) - forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max + forecast.months <- recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max start.months <- substr(recipe$Analysis$Time$sdate, 1,2) period <- paste0(start.year, "-", end.year) @@ -30,7 +30,8 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation - metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), + ", | |,")) ncores <- recipe$Analysis$ncores if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ @@ -51,13 +52,7 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na } - - ## Define arrays to filled with data - aggr_metrics <- array(data = NA, - dim = c(time = length(forecast.months), - region = length(regions), - metric = length(metrics))) - + ## Define array to filled with data aggr_significance <- array(data = NA, dim = c(time = length(forecast.months), region = length(regions), @@ -70,13 +65,20 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci aggr_metrics <- NULL for(met in metrics){ - skill_metrics[[met]] <- Reorder(skill_metrics[[met]], c("time", "region", "var")) + skill_metrics[[met]] <- Reorder(skill_metrics[[met]], + c("time", "region", "var")) aggr_metrics <- abind(aggr_metrics, skill_metrics[[met]], along=3) } names(dim(aggr_metrics)) <- c("time", "region", "metric") } else { + + ## Define arrays to filled with data + aggr_metrics <- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions), + metric = length(metrics))) lon_dim <- 'longitude' lat_dim <- 'latitude' @@ -85,7 +87,7 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci lon <- as.numeric(data$hcst$coords$longitude) lat <- as.numeric(data$hcst$coords$latitude) - ############################# SKILL AGGREGATION ############################# + ## Skill aggregation if(metric.aggregation == 'skill'){ ## Calculate weighted mean of spatial aggregation @@ -102,17 +104,18 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci names(dim(result))[length(dim(result))] <- 'region' result <-Subset(result, 'var', 1, drop = 'selected') - if(met =='crpss' && inf_to_na == TRUE){ + if(met =='crpss' && inf.to.na == TRUE){ result[result == -Inf] <- NA } - aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = result, order = c('time', 'region')) + aggr_metrics[,,which(metrics == met)] <- Reorder(data = result, + order = c('time', 'region')) - } ##close on met + } ## close on met } ## close if on skill - ############################# SCORE AGGREGATION ############################# + ## Score Aggregation if(metric.aggregation == 'score'){ ## Spatially aggregate data for each metric @@ -143,12 +146,13 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' ## Remove 'var' dimension - rps_syear <-Subset(rps_syear, 'var', 1, drop = 'selected') - rps_clim_syear <-Subset(rps_clim_syear, 'var', 1, drop = 'selected') + rps_syear <- Subset(rps_syear, 'var', 1, drop = 'selected') + rps_clim_syear <- Subset(rps_clim_syear, 'var', 1, drop = 'selected') ## Calculate significance sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', + time_dim = time_dim, + test.type = 'two.sided', alpha = alpha, pval = FALSE, sign = TRUE, ncores = NULL)$sign @@ -165,8 +169,10 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci rpss <- 1 - rps_syear / rps_clim_syear ## Save metric result in arrays - aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'region')) + aggr_metrics[,,which(metrics == met)] <- Reorder(data = rpss, + order = c('time', 'region')) + aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_rpss, + order = c('time', 'region')) } ## close if on rpss @@ -195,12 +201,13 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' ## Remove 'var' dimension - crps_syear <-Subset(crps_syear, 'var', 1, drop = 'selected') - crps_clim_syear <-Subset(crps_clim_syear, 'var', 1, drop = 'selected') + crps_syear <- Subset(crps_syear, 'var', 1, drop = 'selected') + crps_clim_syear <- Subset(crps_clim_syear, 'var', 1, drop = 'selected') ## Calculate significance sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', + time_dim = time_dim, + test.type = 'two.sided', alpha = alpha, pval = FALSE, sign = TRUE, ncores = NULL)$sign @@ -217,8 +224,10 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci crpss <- 1 - crps_syear / crps_clim_syear ## Save metric result in arrays - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'region')) + aggr_metrics[,,which(metrics == met)] <- Reorder(data = crpss, + order = c('time', 'region')) + aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_crpss, + order = c('time', 'region')) } ## close if on crpss @@ -296,8 +305,10 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci } ## Save metric result in arrays - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + aggr_metrics[,,which(metrics == met)] <- Reorder(data = enscorr, + order = c('time', 'region')) + aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, + order = c('time', 'region')) } ## close if on enscorr @@ -331,13 +342,16 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' ## Remove unnecessary dimension - hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') - obs_data_aggr <- Subset(obs_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') + hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','var', 'sday','sweek'), + list(1,1,1,1) , drop = 'selected') + obs_data_aggr <- Subset(obs_data_aggr, c('dat','var', 'sday','sweek'), + list(1,1,1,1) , drop = 'selected') ## Calculate significance pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), target_dims = c('syear'), ncores = ncores, - fun = function(x,y){t.test(as.vector(x),as.vector(y))})$p.value + fun = function(x,y) + {t.test(as.vector(x),as.vector(y))})$p.value sign_mean_bias <- pval_mean_bias <= alpha @@ -358,8 +372,10 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci mean_bias <- Subset(mean_bias, 'var', 1, drop = 'selected') ## Save metric result in array - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = mean_bias, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_mean_bias, order = c('time', 'region')) + aggr_metrics[,,which(metrics == met)] <- Reorder(data = mean_bias, + order = c('time', 'region')) + aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_mean_bias, + order = c('time', 'region')) } ## close on mean_bias @@ -419,8 +435,10 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci # sign_enssprerr <- pval_enssprerr <= alpha ## Save metric result in array - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enssprerr, order = c('time', 'region')) - # aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + aggr_metrics[,,which(metrics == met)] <- Reorder(data = enssprerr, + order = c('time', 'region')) + # aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, + # order = c('time', 'region')) } ## close on enssprerr @@ -446,7 +464,8 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci attributes(aggr_metrics)$system.name <- system attributes(aggr_metrics)$reference.name <- reference - aggr_scorecards <- list(aggr_metrics = aggr_metrics, aggr_significance = aggr_significance) + aggr_scorecards <- list(aggr_metrics = aggr_metrics, + aggr_significance = aggr_significance) ## Save metric data arrays recipe$Run$output_dir <- paste0(recipe$Run$output_dir, diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index ead6294e..fd59b36f 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -1,7 +1,11 @@ ############################################################################### -##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### +##################### Scorecard visualization plotting ####################### ############################################################################### +## This function loads the saved netcdf files containing the spatially +## aggregated skill metrics for the scorecards and plots the scorecard +## visualizations. The function reads from the general recipe. + ##### Load source functions ##### source('modules/Scorecards/R/tmp/LoadMetrics.R') source('modules/Scorecards/R/tmp/Utils.R') @@ -31,11 +35,13 @@ Scorecards_plotting <- function(recipe) { var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) - forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max + forecast.months <- recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), + ", | |,")) - if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { + if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || + is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) } else { start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, @@ -228,7 +234,8 @@ Scorecards_plotting <- function(recipe) { col1.width = col1.width, col2.width = col2.width, output.path = output.path) - } else {stop ("Difference scorecard can only be computed with two systems or two references.")} + } else {stop ( + "Difference scorecard can only be computed with two systems or two references.")} } ## close if on calculate.diff } -- GitLab From ea8dc32ed903abd4711d12b9af02c5365c01b07f Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 13 May 2024 11:38:14 +0200 Subject: [PATCH 06/53] Cleaning LoadMetrics function --- modules/Scorecards/R/tmp/LoadMetrics.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index fe19dc85..2a70eff9 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -30,14 +30,13 @@ #' period = '1993-2016' #' start_months = sprintf("%02d", 1:12), #' calib_method = 'raw', -#' input_path = '/esarchive/scratch/nmilders/scorecards_data/input_data') +#' input_path = './scorecards_data/input_data') #'} #'@import easyNCDF #'@import multiApply #'@export LoadMetrics <- function(input_path, system, reference, var, period, data_type, - # metrics, start_months, calib_method = NULL) { # Initial checks @@ -109,12 +108,13 @@ LoadMetrics <- function(input_path, system, reference, var, period, data_type, result_attr <- attributes(result) - by_reference <- abind::abind(by_reference, result, along = length(dim(result)) + 1) + by_reference <- abind::abind(by_reference, result, + along = length(dim(result)) + 1) dim(by_reference) <- c(dim(result), reference = length(reference)) } ## close loop on reference - all_metrics <- abind::abind(all_metrics, by_reference, along = length(dim(by_reference)) + 1) + all_metrics <- abind::abind(all_metrics, by_reference, + along = length(dim(by_reference)) + 1) dim(all_metrics) <- c(dim(by_reference), system = length(system)) - # attributes(all_metrics) <- result_attr[-1] } ## close loop on system attributes(all_metrics)$start_months <- start_months -- GitLab From 75fe3e5981b0e113e6aeca9614284dc9e418b061 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 31 May 2024 16:50:01 +0200 Subject: [PATCH 07/53] included mean_bias and spread-to-error functions which compute significance --- modules/Skill/R/tmp/Bias.R | 216 +++++++++++++++++++++++++++++++++ modules/Skill/R/tmp/SprErr.R | 226 +++++++++++++++++++++++++++++++++++ modules/Skill/Skill.R | 40 ++++--- 3 files changed, 464 insertions(+), 18 deletions(-) create mode 100644 modules/Skill/R/tmp/Bias.R create mode 100644 modules/Skill/R/tmp/SprErr.R diff --git a/modules/Skill/R/tmp/Bias.R b/modules/Skill/R/tmp/Bias.R new file mode 100644 index 00000000..098a678e --- /dev/null +++ b/modules/Skill/R/tmp/Bias.R @@ -0,0 +1,216 @@ +#'Compute the Mean Bias +#' +#'The Mean Bias or Mean Error (Wilks, 2011) is defined as the mean difference +#'between the ensemble mean forecast and the observations. It is a deterministic +#'metric. Positive values indicate that the forecasts are on average too high +#'and negative values indicate that the forecasts are on average too low. +#'It also allows to compute the Absolute Mean Bias or bias without temporal +#'mean. If there is more than one dataset, the result will be computed for each +#'pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter +#' 'exp' is already the ensemble mean. The default value is NULL. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is FALSE. +#'@param absolute A logical value indicating whether to compute the absolute +#' bias. The default value is FALSE. +#'@param time_mean A logical value indicating whether to compute the temporal +#' mean of the bias. The default value is TRUE. +#'@param alpha A numeric or NULL (default) to indicate the significance level using Weltch test. Only available when absolute is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number +#'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If alpha is specified, and absolute is FALSE, the result is a list with two elements, the bias as describe above and the significance as logical array with the same dimensions. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) +#'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, alpha = NULL, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop("Parameter 'exp' must be a numeric array.") + if (!is.array(obs) | !is.numeric(obs)) + stop("Parameter 'obs' must be a numeric array.") + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop("Parameter 'time_dim' must be a character string.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## absolute + if (!is.logical(absolute) | length(absolute) > 1) { + stop("Parameter 'absolute' must be one logical value.") + } + ## time_mean + if (!is.logical(time_mean) | length(time_mean) > 1) { + stop("Parameter 'time_mean' must be one logical value.") + } + ## alpha + if (!is.null(alpha)) { + if (!is.numeric(alpha) | length(alpha) > 1) { + stop("Parameter 'alpha' must be null or a numeric value.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = na.rm) + } + + ## (Mean) Bias + bias <- Apply(data = list(exp, obs), + target_dims = c(time_dim, dat_dim), + fun = .Bias, + time_dim = time_dim, + dat_dim = dat_dim, + na.rm = na.rm, + absolute = absolute, + time_mean = time_mean, + alpha = alpha, + ncores = ncores) + + if (is.null(alpha)) { + bias <- bias$output1 + } + return(bias) +} + + +.Bias <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, alpha = NULL) { + # exp and obs: [sdate, (dat)] + if (is.null(dat_dim)) { + bias <- exp - obs + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- mean(bias, na.rm = na.rm) + } + + if (!is.null(alpha)) { + if (!absolute) { + pval <- t.test(x = obs, y = exp, alternative = "two.sided")$p.value + sig <- pval <= alpha + } + } + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + pval <- array(dim = c(nexp = nexp, nobs = nobs)) + sig <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + bias[, i, j] <- exp[, i] - obs[, j] + if (!is.null(alpha)) { + if (!absolute) { + pval[i,j] <- t.test(x = obs[,j], y = exp[,i], + alternative = "two.sided")$p.value + sig[i,j] <- pval <= alpha + } + } + } + } + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- MeanDims(bias, time_dim, na.rm = na.rm) + } + } + if (!is.null(alpha) && !absolute) { + return(list(bias = bias, sig = sig)) + } else { + return(bias) + } +} diff --git a/modules/Skill/R/tmp/SprErr.R b/modules/Skill/R/tmp/SprErr.R new file mode 100644 index 00000000..15bcba31 --- /dev/null +++ b/modules/Skill/R/tmp/SprErr.R @@ -0,0 +1,226 @@ +#'Compute the ratio between the ensemble spread and RMSE +#' +#'Compute the ratio between the spread of the members around the +#'ensemble mean in experimental data and the RMSE between the ensemble mean of +#'experimental and observational data. The p-value and/or the statistical +#'significance is provided by a one-sided Fisher's test. +#' +#'@param exp A named numeric array of experimental data with at least two +#' dimensions 'memb_dim' and 'time_dim'. +#'@param obs A named numeric array of observational data with at least two +#' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. The default value +#' is 'member'. +#'@param time_dim A character string indicating the name of dimension along +#' which the ratio is computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param na.rm A logical value indicating whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of two arrays with dimensions c(nexp, nobs, the rest of +#' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is +#' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +#' If dat_dim is NULL, nexp and nobs are omitted. \cr +#'\item{$ratio}{ +#' The ratio of the ensemble spread and RMSE. +#'} +#'\item{$p_val}{ +#' The p-value of the one-sided Fisher's test with Ho: SD/RMSE = 1. Only present +#' if \code{pval = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs, dat_dim = 'dataset') +#'# Reorder the data in order to plot it with PlotVsLTime +#'rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) +#'rsdrms_plot[, , 2, ] <- rsdrms$ratio +#'rsdrms_plot[, , 4, ] <- rsdrms$p.val +#'\dontrun{ +#'PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", +#' monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), +#' listobs = c('ERSST'), biglab = FALSE, siglev = TRUE) +#'} +#' +#'@import multiApply +#'@export +SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', pval = TRUE, sign = FALSE, + alpha = 0.05, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.")) + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ", + "Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions except 'dat_dim' and 'memb_dim'.")) + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + # alpha + if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + # na.rm + if (!na.rm %in% c(TRUE, FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate RatioSDRMS + + # If dat_dim = NULL, insert dat dim + remove_dat_dim <- FALSE + if (is.null(dat_dim)) { + dat_dim <- 'dataset' + exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') + remove_dat_dim <- TRUE + } + + res <- Apply(list(exp, obs), + target_dims = list(c(dat_dim, memb_dim, time_dim), + c(dat_dim, memb_dim, time_dim)), + pval = pval, + sign = sign, + na.rm = na.rm, + fun = .SprErr, + ncores = ncores) + + if (remove_dat_dim) { + if (length(dim(res[[1]])) > 2) { + res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') + } else { + res <- lapply(res, as.numeric) + } + } + + return(res) +} + +.SprErr <- function(exp, obs, pval = TRUE, sign = FALSE, alpha = 0.05, na.rm = FALSE) { + + # exp: [dat_exp, member, sdate] + # obs: [dat_obs, member, sdate] + nexp <- dim(exp)[1] + nobs <- dim(obs)[1] + + # ensemble mean + ens_exp <- MeanDims(exp, 2, na.rm = na.rm) # [dat, sdate] + ens_obs <- MeanDims(obs, 2, na.rm = na.rm) + + # Create empty arrays + ratio <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + + # spread and error + spread <- sqrt(mean(apply(exp[jexp,,], 2, var, na.rm = na.rm), na.rm = na.rm)) + error <- sqrt(mean((ens_obs - ens_exp[jexp,])^2, na.rm = na.rm)) + ratio[jexp, jobs] <- spread/error + + # effective sample size + enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) + enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) + if (pval) { + F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) + if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + p.val[jexp, jobs] <- pf(F, enospr[jexp] - 1, enodif - 1) + p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) + } else { + ratio[jexp, jobs] <- NA + } + } + } + } + + res <- list(ratio = ratio) + if (pval) {res$p.val <- p.val} + if (sign) {res$sign <- p.val <= alpha} + + return(res) +} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index d59f29d7..2a803dc0 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -19,9 +19,11 @@ source("modules/Skill/R/tmp/GetProbs.R") ## Temporary source("modules/Skill/R/tmp/RPS.R") source("modules/Skill/R/tmp/CRPS.R") +source("modules/Skill/R/tmp/SprErr.R") # https://earth.bsc.es/gitlab/es/s2dv/-/issues/115 +source("modules/Skill/R/tmp/Bias.R") # https://earth.bsc.es/gitlab/es/s2dv/-/issues/118 Skill <- function(recipe, data, agg = 'global') { - + # data$hcst: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml @@ -225,15 +227,19 @@ Skill <- function(recipe, data, agg = 'global') { skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, time_dim = time_dim, memb_dim = memb_dim, + alpha = 0.05, ncores = ncores) } else { skill <- Bias(data$hcst$data, data$obs$data, time_dim = time_dim, memb_dim = memb_dim, + alpha = 0.05, ncores = ncores) } - skill <- .drop_dims(skill) - skill_metrics[[ metric ]] <- skill + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$bias + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sig # Mean bias skill score } else if (metric == 'mean_bias_ss') { if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && @@ -310,21 +316,19 @@ Skill <- function(recipe, data, agg = 'global') { skill_metrics[[ metric ]] <- skill$msss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'enssprerr') { - # Remove ensemble dim from obs to avoid veriApply warning - obs_noensdim <- ClimProjDiags::Subset(data$obs$data, "ensemble", 1, - drop = "selected") - capture.output( - skill <- easyVerification::veriApply(verifun = 'EnsSprErr', - fcst = data$hcst$data, - obs = obs_noensdim, - tdim = which(names(dim(data$hcst$data))==time_dim), - ensdim = which(names(dim(data$hcst$data))==memb_dim), - na.rm = na.rm, - ncpus = ncores) - ) - remove(obs_noensdim) - skill <- .drop_dims(skill) - skill_metrics[[ metric ]] <- skill + skill <- SprErr(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + dat_dim = 'dat', + pval = TRUE, + sign = TRUE, + alpha = 0.05, + na.rm = na.rm, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$ratio + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # SpecsVerification metrics } else if (grepl("specs", metric, fixed = TRUE)) { # Compute SpecsVerification version of the metrics -- GitLab From c5a6f9c7d8dd813a8d85a7d7484fb7f082a36e46 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 3 Jun 2024 12:58:38 +0200 Subject: [PATCH 08/53] updating SprErr function --- modules/Skill/R/tmp/SprErr.R | 25 +++++++++---------------- modules/Skill/Skill.R | 3 ++- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/modules/Skill/R/tmp/SprErr.R b/modules/Skill/R/tmp/SprErr.R index 15bcba31..a22bdf0d 100644 --- a/modules/Skill/R/tmp/SprErr.R +++ b/modules/Skill/R/tmp/SprErr.R @@ -3,7 +3,7 @@ #'Compute the ratio between the spread of the members around the #'ensemble mean in experimental data and the RMSE between the ensemble mean of #'experimental and observational data. The p-value and/or the statistical -#'significance is provided by a one-sided Fisher's test. +#'significance is provided by a two-sided Fisher's test. #' #'@param exp A named numeric array of experimental data with at least two #' dimensions 'memb_dim' and 'time_dim'. @@ -37,23 +37,16 @@ #' The ratio of the ensemble spread and RMSE. #'} #'\item{$p_val}{ -#' The p-value of the one-sided Fisher's test with Ho: SD/RMSE = 1. Only present +#' The p-value of the two-sided Fisher's test with Ho: Spread/RMSE = 1. Only present #' if \code{pval = TRUE}. #'} #' #'@examples -#'# Load sample data as in Load() example: -#'example(Load) -#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs, dat_dim = 'dataset') -#'# Reorder the data in order to plot it with PlotVsLTime -#'rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) -#'rsdrms_plot[, , 2, ] <- rsdrms$ratio -#'rsdrms_plot[, , 4, ] <- rsdrms$p.val -#'\dontrun{ -#'PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", -#' monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), -#' listobs = c('ERSST'), biglab = FALSE, siglev = TRUE) -#'} +#'exp <- array(rnorm(30), dim = c(lat = 2, sdate = 3, member = 5)) +#'obs <- array(rnorm(30), dim = c(lat = 2, sdate = 3)) +#'sprerr1 <- SprErr(exp, obs) +#'sprerr2 <- SprErr(exp, obs, pval=F, sign=T) +#'sprerr3 <- SprErr(exp, obs, pval=T, sign=T) #' #'@import multiApply #'@export @@ -206,13 +199,13 @@ SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', # effective sample size enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) - if (pval) { + if (pval | sign) { F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { p.val[jexp, jobs] <- pf(F, enospr[jexp] - 1, enodif - 1) p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) } else { - ratio[jexp, jobs] <- NA + p.val[jexp, jobs] <- NA } } } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 2a803dc0..47832bde 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -316,11 +316,12 @@ Skill <- function(recipe, data, agg = 'global') { skill_metrics[[ metric ]] <- skill$msss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'enssprerr') { + .Eno <- s2dv:::.Eno skill <- SprErr(data$hcst$data, data$obs$data, time_dim = time_dim, memb_dim = memb_dim, dat_dim = 'dat', - pval = TRUE, + pval = FALSE, sign = TRUE, alpha = 0.05, na.rm = na.rm, -- GitLab From 3ecba8a61a94acac9dca848bbb8479f27ad8b4bc Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 3 Jun 2024 14:33:24 +0200 Subject: [PATCH 09/53] Updated unit tests to include mean_bias_significance metric --- tests/testthat/test-seasonal_NAO.R | 3 ++- tests/testthat/test-seasonal_downscaling.R | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-seasonal_NAO.R b/tests/testthat/test-seasonal_NAO.R index f70eefc7..c2e13546 100644 --- a/tests/testthat/test-seasonal_NAO.R +++ b/tests/testthat/test-seasonal_NAO.R @@ -213,7 +213,7 @@ TRUE ) expect_equal( names(skill_metrics), -c("mean_bias", "enscorr", +c("mean_bias", "mean_bias_significance" , "enscorr", "enscorr_significance", "rps", "rpss", "rpss_significance", "crps", "crpss", "crpss_significance", "enssprerr") ) @@ -254,6 +254,7 @@ c(paste0("Indices/ECMWF-SEAS5/nao/", paste0("nao_", 1993:2000, "0301.nc")), "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enscorr_significance_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enssprerr_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_mean_bias_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_mean_bias_significance_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rps_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rpss_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rpss_significance_1993-2000_s03.nc" diff --git a/tests/testthat/test-seasonal_downscaling.R b/tests/testthat/test-seasonal_downscaling.R index 8a52657a..1ba0e3df 100644 --- a/tests/testthat/test-seasonal_downscaling.R +++ b/tests/testthat/test-seasonal_downscaling.R @@ -214,7 +214,7 @@ TRUE ) expect_equal( names(skill_metrics), -c("bss10", "bss10_significance", "crpss", "crpss_significance","rpss", "rpss_significance", "mean_bias") +c("bss10", "bss10_significance", "crpss", "crpss_significance","rpss", "rpss_significance", "mean_bias", "mean_bias_significance") ) expect_equal( class(skill_metrics$rpss), -- GitLab From b0331406cc015d67ae679715c6a4d247064b3127 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 3 Jun 2024 16:05:36 +0200 Subject: [PATCH 10/53] Updated unit test to include ensprerr_significance metric --- tests/testthat/test-seasonal_NAO.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-seasonal_NAO.R b/tests/testthat/test-seasonal_NAO.R index c2e13546..f5a77af2 100644 --- a/tests/testthat/test-seasonal_NAO.R +++ b/tests/testthat/test-seasonal_NAO.R @@ -215,7 +215,7 @@ expect_equal( names(skill_metrics), c("mean_bias", "mean_bias_significance" , "enscorr", "enscorr_significance", "rps", "rpss", "rpss_significance", - "crps", "crpss", "crpss_significance", "enssprerr") + "crps", "crpss", "crpss_significance", "enssprerr", "enssprerr_significance") ) expect_equal( class(skill_metrics$rpss), @@ -253,6 +253,7 @@ c(paste0("Indices/ECMWF-SEAS5/nao/", paste0("nao_", 1993:2000, "0301.nc")), "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enscorr_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enscorr_significance_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enssprerr_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enssprerr_significance_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_mean_bias_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_mean_bias_significance_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rps_1993-2000_s03.nc", @@ -263,7 +264,7 @@ c(paste0("Indices/ECMWF-SEAS5/nao/", paste0("nao_", 1993:2000, "0301.nc")), expect_equal( length(list.files(outputs, recursive = T)), -26 +28 ) }) -- GitLab From e873564b5bdef9b3816088b02efeecee62cfb422 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 25 Jun 2024 11:14:46 +0200 Subject: [PATCH 11/53] Fix format of recipe elements --- modules/Scorecards/execute_scorecards.R | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index f5cf0452..401e6b45 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -18,23 +18,19 @@ for (variable in 1:length(recipe$Analysis$Variables)) { scorecard_recipe <- recipe # Collect all system names scorecard_recipe$Analysis$Datasets$System <- - list(name = as.vector(unlist(recipe$Analysis$Datasets$System))) + as.vector(unlist(recipe$Analysis$Datasets$System)) # Include multimodel in systems if (!isFALSE(recipe$Analysis$Datasets$Multimodel$execute)) { - scorecard_recipe$Analysis$Datasets$System$name <- - c(scorecard_recipe$Analysis$Datasets$System$name, 'Multimodel') + scorecard_recipe$Analysis$Datasets$System <- + c(scorecard_recipe$Analysis$Datasets$System, 'Multimodel') } # Collect all reference names scorecard_recipe$Analysis$Datasets$Reference <- - list(name = as.vector(unlist(recipe$Analysis$Datasets$Reference))) + as.vector(unlist(recipe$Analysis$Datasets$Reference)) # Assign variables scorecard_recipe$Analysis$Variables <- recipe$Analysis$Variables[[variable]] -# scorecard_recipe$Analysis$Datasets$Reference <- -# as.vector(unlist(recipe$Analysis$Datasets$Reference)) - scorecard_recipe$Analysis$Variables <- - recipe$Analysis$Variables[[variable]] - ## Plot Scorecards + ## Plot Scorecards Scorecards_plotting(scorecard_recipe) } -- GitLab From 126479e5eeb8c92a1e4d0d22e0b386b8738d62a5 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 25 Jun 2024 11:15:54 +0200 Subject: [PATCH 12/53] Update Bias with latest changes --- modules/Skill/R/tmp/Bias.R | 53 ++++++++++++++++++++++++++------------ modules/Skill/Skill.R | 2 +- 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/modules/Skill/R/tmp/Bias.R b/modules/Skill/R/tmp/Bias.R index 098a678e..d4887189 100644 --- a/modules/Skill/R/tmp/Bias.R +++ b/modules/Skill/R/tmp/Bias.R @@ -27,7 +27,8 @@ #' bias. The default value is FALSE. #'@param time_mean A logical value indicating whether to compute the temporal #' mean of the bias. The default value is TRUE. -#'@param alpha A numeric or NULL (default) to indicate the significance level using Weltch test. Only available when absolute is FALSE. +#'@param alpha A numeric or NULL (default) to indicate the significance level +#' using Welch's t-test. Only available when absolute is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -35,7 +36,10 @@ #'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of #''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number #'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation -#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If alpha is specified, and absolute is FALSE, the result is a list with two elements, the bias as describe above and the significance as logical array with the same dimensions. +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If +#'alpha is specified, and absolute is FALSE, the result is a list with two +#'elements: the bias as described above and the significance as a logical array +#'with the same dimensions. #' #'@references #'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 @@ -44,12 +48,15 @@ #'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) #'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) #'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +#'bias2 <- Bias(exp = exp, obs = obs, memb_dim = 'member', alpha = 0.01) +#'abs_bias <- Bias(exp = exp, obs = obs, memb_dim = 'member', absolute = TRUE, alpha = NULL) #' #'@import multiApply #'@importFrom ClimProjDiags Subset #'@export -Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, - absolute = FALSE, time_mean = TRUE, alpha = NULL, ncores = NULL) { +Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, + na.rm = FALSE, absolute = FALSE, time_mean = TRUE, + alpha = 0.05, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -123,9 +130,14 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, } ## alpha if (!is.null(alpha)) { - if (!is.numeric(alpha) | length(alpha) > 1) { + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { stop("Parameter 'alpha' must be null or a numeric value.") } + if (absolute) { + alpha <- NULL + .warning("Parameter 'absolute' is TRUE, so 'alpha' has been set to", + "false and significance will not be returned.") + } } ## ncores if (!is.null(ncores)) { @@ -134,9 +146,9 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, stop("Parameter 'ncores' must be either NULL or a positive integer.") } } - + ############################### - + ## Ensemble mean if (!is.null(memb_dim)) { exp <- MeanDims(exp, memb_dim, na.rm = na.rm) @@ -153,7 +165,7 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, time_mean = time_mean, alpha = alpha, ncores = ncores) - + if (is.null(alpha)) { bias <- bias$output1 } @@ -177,8 +189,12 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, if (!is.null(alpha)) { if (!absolute) { - pval <- t.test(x = obs, y = exp, alternative = "two.sided")$p.value - sig <- pval <= alpha + if (all(is.na(bias))) { + sign <- NA + } else { + pval <- t.test(x = obs, y = exp, alternative = "two.sided")$p.value + sign <- pval <= alpha + } } } } else { @@ -186,30 +202,33 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, nobs <- as.numeric(dim(obs)[dat_dim]) bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) pval <- array(dim = c(nexp = nexp, nobs = nobs)) - sig <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) for (i in 1:nexp) { for (j in 1:nobs) { bias[, i, j] <- exp[, i] - obs[, j] if (!is.null(alpha)) { if (!absolute) { - pval[i,j] <- t.test(x = obs[,j], y = exp[,i], - alternative = "two.sided")$p.value - sig[i,j] <- pval <= alpha + pval[i, j] <- t.test(x = obs[, j], y = exp[, i], + alternative = "two.sided")$p.value + sign[i, j] <- pval[i, j] <= alpha } } } } - + if (isTRUE(absolute)) { bias <- abs(bias) } - + if (isTRUE(time_mean)) { bias <- MeanDims(bias, time_dim, na.rm = na.rm) + if (!is.null(sign)) { + sign[which(is.na(bias))] <- NA + } } } if (!is.null(alpha) && !absolute) { - return(list(bias = bias, sig = sig)) + return(list(bias = bias, sign = sign)) } else { return(bias) } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 4f1d8f31..7c829c25 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -238,7 +238,7 @@ Skill <- function(recipe, data, agg = 'global') { skill <- lapply(skill, function(x) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$bias - skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sig + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Mean bias skill score } else if (metric == 'mean_bias_ss') { if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && -- GitLab From 36b44ed4e523938f48e2d1cbb30560a8ad6c3f60 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 25 Jun 2024 14:31:09 +0200 Subject: [PATCH 13/53] Formatting and consistency improvements --- modules/Scorecards/Scorecards_calculations.R | 178 ++++++++----------- modules/Scorecards/Scorecards_plotting.R | 38 ++-- modules/Scorecards/execute_scorecards.R | 7 +- 3 files changed, 97 insertions(+), 126 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index ca39dd01..7b91e984 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -9,13 +9,39 @@ ## the atomic recipes. ## Define function -Scorecards_calculations <- function(data, skill_metrics, - statistics = NULL, recipe) { +Scorecards_calculations <- function(recipe, data, skill_metrics, + statistics = NULL) { + # Check that skill_metrics is not NULL + if (is.null(skill_metrics)) { + stop("Scorecards calculations requested but 'skill_metrics' not provided.") + } else if (!is.list(skill_metrics)) { + stop("'skill_metrics' should be a named list with metric arrays") + } + # Assign parameters + ncores <- recipe$Analysis$ncores + if (is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)) { + alpha <- 0.05 + } else { + alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha + } + + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm <- FALSE + } else { + na.rm <- recipe$Analysis$remove_NAs + } + + if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ + inf.to.na <- FALSE + } else { + inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na + } + ## Parameters for saving output data files output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - + system <- recipe$Analysis$Datasets$System$name reference <- recipe$Analysis$Datasets$Reference$name var <- recipe$Analysis$Variables$name @@ -32,25 +58,6 @@ Scorecards_calculations <- function(data, skill_metrics, metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) - ncores <- recipe$Analysis$ncores - - if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ - alpha <- 0.05 - } else { - alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha - } - - if(is.null(recipe$Analysis$remove_NAs)){ - na.rm <- FALSE - } else { - na.rm <- recipe$Analysis$remove_NAs - } - - if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ - inf.to.na <- FALSE - } else { - inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na - } ## Define array to filled with data aggr_significance <- array(data = NA, @@ -58,22 +65,16 @@ Scorecards_calculations <- function(data, skill_metrics, region = length(regions), metric = length(metrics))) - ## For data that is already aggregated by region if ("region" %in% names(dim(skill_metrics[[1]]))) { - - aggr_metrics <- NULL - - for(met in metrics){ + aggr_metrics <- NULL + for (met in metrics) { skill_metrics[[met]] <- Reorder(skill_metrics[[met]], c("time", "region", "var")) aggr_metrics <- abind(aggr_metrics, skill_metrics[[met]], along=3) - } - + } names(dim(aggr_metrics)) <- c("time", "region", "metric") - - } else { - + } else { ## Define arrays to filled with data aggr_metrics <- array(data = NA, dim = c(time = length(forecast.months), @@ -88,41 +89,34 @@ Scorecards_calculations <- function(data, skill_metrics, lat <- as.numeric(data$hcst$coords$latitude) ## Skill aggregation - if(metric.aggregation == 'skill'){ + if (metric.aggregation == 'skill') { + ## Calculate weighted mean of spatial aggregation + for (met in metrics) { + result <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics[[met]], + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') - ## Calculate weighted mean of spatial aggregation - for(met in metrics){ - result <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics[[met]], - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(result))[length(dim(result))] <- 'region' - result <-Subset(result, 'var', 1, drop = 'selected') - - if(met =='crpss' && inf.to.na == TRUE){ - result[result == -Inf] <- NA - } - - aggr_metrics[,,which(metrics == met)] <- Reorder(data = result, - order = c('time', 'region')) + names(dim(result))[length(dim(result))] <- 'region' + result <-Subset(result, 'var', 1, drop = 'selected') - } ## close on met - + if (met =='crpss' && inf.to.na == TRUE) { + result[result == -Inf] <- NA + } + aggr_metrics[,,which(metrics == met)] <- Reorder(data = result, + order = c('time', 'region')) + } ## close on met } ## close if on skill ## Score Aggregation - if(metric.aggregation == 'score'){ - + if (metric.aggregation == 'score') { ## Spatially aggregate data for each metric for (met in metrics) { - - if(met == 'rpss'){ - + if (met == 'rpss') { rps_syear <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = skill_metrics$rps_syear, @@ -174,10 +168,7 @@ Scorecards_calculations <- function(data, skill_metrics, aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_rpss, order = c('time', 'region')) - } ## close if on rpss - - if(met == 'crpss'){ - + } else if (met == 'crpss') { crps_syear <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = skill_metrics$crps_syear, @@ -186,7 +177,6 @@ Scorecards_calculations <- function(data, skill_metrics, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - crps_clim_syear <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = skill_metrics$crps_clim_syear, @@ -224,15 +214,11 @@ Scorecards_calculations <- function(data, skill_metrics, crpss <- 1 - crps_syear / crps_clim_syear ## Save metric result in arrays - aggr_metrics[,,which(metrics == met)] <- Reorder(data = crpss, - order = c('time', 'region')) - aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_crpss, - order = c('time', 'region')) - - } ## close if on crpss - - if(met == 'enscorr'){ - + aggr_metrics[ , , which(metrics == met)] <- Reorder(data = crpss, + order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- Reorder(data = sign_crpss, + order = c('time', 'region')) + } else if (met == 'enscorr') { cov <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = statistics$cov, @@ -241,7 +227,6 @@ Scorecards_calculations <- function(data, skill_metrics, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - std_hcst <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = statistics$std_hcst, @@ -250,7 +235,6 @@ Scorecards_calculations <- function(data, skill_metrics, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - std_obs <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = statistics$std_obs, @@ -259,7 +243,6 @@ Scorecards_calculations <- function(data, skill_metrics, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - n_eff <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = statistics$n_eff, @@ -292,15 +275,14 @@ Scorecards_calculations <- function(data, skill_metrics, dim = c(time = length(forecast.months), region = length(regions))) - for (time in 1:dim(sign_corr)[['time']]){ - for (reg in 1:dim(sign_corr)[['region']]){ - - if (anyNA(c(t[time, reg], t_alpha2_n2[time, reg])) == FALSE - && t[time, reg] >= t_alpha2_n2[time, reg]){ - sign_corr[time, reg] <- TRUE - } else { - sign_corr[time, reg] <- FALSE - } + for (time in 1:dim(sign_corr)[['time']]) { + for (reg in 1:dim(sign_corr)[['region']]) { + if ((anyNA(c(t[time, reg], t_alpha2_n2[time, reg])) == FALSE) && + (t[time, reg] >= t_alpha2_n2[time, reg])) { + sign_corr[time, reg] <- TRUE + } else { + sign_corr[time, reg] <- FALSE + } } } @@ -310,10 +292,7 @@ Scorecards_calculations <- function(data, skill_metrics, aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, order = c('time', 'region')) - } ## close if on enscorr - - if(met == 'mean_bias'){ - + } else if (met == 'mean_bias') { ## Calculate ensemble mean hcst_data_ens <- MeanDims(data$hcst$data, dims = 'ensemble') obs_data_ens <- MeanDims(data$obs$data, dims = 'ensemble') @@ -372,15 +351,12 @@ Scorecards_calculations <- function(data, skill_metrics, mean_bias <- Subset(mean_bias, 'var', 1, drop = 'selected') ## Save metric result in array - aggr_metrics[,,which(metrics == met)] <- Reorder(data = mean_bias, + aggr_metrics[ , , which(metrics == met)] <- Reorder(data = mean_bias, order = c('time', 'region')) - aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_mean_bias, + aggr_significance[ , , which(metrics == met)] <- Reorder(data = sign_mean_bias, order = c('time', 'region')) - } ## close on mean_bias - - if(met == 'enssprerr'){ - + } else if (met == 'enssprerr') { ## Calculate metric spread <- sapply(X = 1:length(regions), FUN = function(X) { @@ -435,17 +411,13 @@ Scorecards_calculations <- function(data, skill_metrics, # sign_enssprerr <- pval_enssprerr <= alpha ## Save metric result in array - aggr_metrics[,,which(metrics == met)] <- Reorder(data = enssprerr, - order = c('time', 'region')) + aggr_metrics[ , , which(metrics == met)] <- Reorder(data = enssprerr, + order = c('time', 'region')) # aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, # order = c('time', 'region')) - - } ## close on enssprerr - + } } ## close loop on metric - } ## close if on score - } ## close if on region ## Set NAs to False diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index fd59b36f..ff1ba186 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -29,8 +29,8 @@ Scorecards_plotting <- function(recipe) { output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - system <- as.vector(unlist(recipe$Analysis$Datasets$System)) - reference <- as.vector(unlist(recipe$Analysis$Datasets$Reference)) + system <- as.vector(unlist(recipe$Analysis$Datasets$System$name)) + reference <- as.vector(unlist(recipe$Analysis$Datasets$Reference$name)) var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) @@ -42,7 +42,7 @@ Scorecards_plotting <- function(recipe) { if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { - start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) } else { start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, split = ", | |,")[[1]]) @@ -65,57 +65,57 @@ Scorecards_plotting <- function(recipe) { legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width - if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)) { plot.legend <- TRUE } else { plot.legend <- recipe$Analysis$Workflow$Scorecards$plot_legend } - if(is.null(recipe$Analysis$Workflow$Scorecards$columns_width)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$columns_width)) { columns.width <- 1.2 } else { columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width } - if(is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)) { legend.white.space <- 6 } else { legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space } - if(is.null(recipe$Analysis$Workflow$Scorecards$legend_height)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$legend_height)) { legend.height <- 50 } else { legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height } - if(is.null(recipe$Analysis$Workflow$Scorecards$label_scale)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$label_scale)) { label.scale <- 1.4 } else { label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale } - if(is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)) { round.decimal <- 2 } else { round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal } - if(is.null(recipe$Analysis$Workflow$Scorecards$font_size)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$font_size)) { font.size <- 1.1 } else { font.size <- recipe$Analysis$Workflow$Scorecards$font_size } ## Define if difference scorecard is to be plotted - if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)) { calculate.diff <- FALSE } else { calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff } - if ('name' %in% names(recipe$Analysis$Variables)){ + if ('name' %in% names(recipe$Analysis$Variables)) { if (recipe$Analysis$Variables$name == var) { var.units <- recipe$Analysis$Variables$units } @@ -186,7 +186,7 @@ Scorecards_plotting <- function(recipe) { ## Create multi system/reference scorecard tables ## (multiple systems with one reference or one system with multiple references) ## Metrics input must be in the same order as function SC_spatial_aggregation - if(length(system) > 1 || length(reference) > 1){ + if (length(system) > 1 || length(reference) > 1) { scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, sign = aggregated_significance, system = system, @@ -214,9 +214,8 @@ Scorecards_plotting <- function(recipe) { output.path = output.path) } ## close if - - if(calculate.diff == TRUE){ - if(length(system) == 2 || length(reference) == 2){ + if (calculate.diff == TRUE) { + if (length(system) == 2 || length(reference) == 2) { scorecard_diff <- ScorecardsSystemDiff(data = aggregated_metrics, system = system, reference = reference, @@ -234,9 +233,10 @@ Scorecards_plotting <- function(recipe) { col1.width = col1.width, col2.width = col2.width, output.path = output.path) - } else {stop ( - "Difference scorecard can only be computed with two systems or two references.")} + } else { + stop("Difference scorecard can only be computed with two systems or ", + "two references.")} } ## close if on calculate.diff - + } diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index 401e6b45..2fa27549 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -5,7 +5,6 @@ args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] output_dir <- args[2] -## TODO: Replace with function # Read recipe and set outdir recipe <- read_yaml(recipe_file) recipe$Run$output_dir <- output_dir @@ -18,7 +17,7 @@ for (variable in 1:length(recipe$Analysis$Variables)) { scorecard_recipe <- recipe # Collect all system names scorecard_recipe$Analysis$Datasets$System <- - as.vector(unlist(recipe$Analysis$Datasets$System)) + list(name = as.vector(unlist(recipe$Analysis$Datasets$System))) # Include multimodel in systems if (!isFALSE(recipe$Analysis$Datasets$Multimodel$execute)) { scorecard_recipe$Analysis$Datasets$System <- @@ -26,11 +25,11 @@ for (variable in 1:length(recipe$Analysis$Variables)) { } # Collect all reference names scorecard_recipe$Analysis$Datasets$Reference <- - as.vector(unlist(recipe$Analysis$Datasets$Reference)) + list(name = as.vector(unlist(recipe$Analysis$Datasets$Reference))) # Assign variables scorecard_recipe$Analysis$Variables <- recipe$Analysis$Variables[[variable]] - ## Plot Scorecards + # Plot Scorecards Scorecards_plotting(scorecard_recipe) } -- GitLab From f220eb1ee966f9970b023062775e93c959d871ce Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 26 Jun 2024 10:53:31 +0200 Subject: [PATCH 14/53] Update Scorecards use case (WIP); fix bug in saving directory for scorecards metrics --- modules/Scorecards/Scorecards_calculations.R | 5 +---- .../ex1_2_autosubmit_scorecards/ex1_2-handson.md | 8 +++++++- .../ex1_2_autosubmit_scorecards/ex1_2-recipe.yml | 12 ++++++------ use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R | 5 +++++ 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index 7b91e984..1ea84c68 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -440,11 +440,8 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, aggr_significance = aggr_significance) ## Save metric data arrays - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Scorecards/") - save_metrics(recipe = recipe, metrics = aggr_scorecards, data_cube = data$hcst, agg = 'region', - module = "scorecard") + module = "scorecards") } diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md index aab5b875..323d8430 100644 --- a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md @@ -82,6 +82,7 @@ source("modules/Loading/Loading.R") source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") source("modules/Statistics/Statistics.R") +source("modules/Scorecards/Scorecards_calculations.R") source("modules/Saving/Saving.R") # Read recipe @@ -91,7 +92,7 @@ recipe_file <- args[1] recipe <- read_atomic_recipe(recipe_file) ``` -The rest of the user-defined script can be written in the same way as any other SUNSET script. We load the data, calculate the anomalies, then compute the skill scores and save the result as netCDF files for Scorecards. +The rest of the user-defined script can be written in the same way as any other SUNSET script. We load the data, calculate the anomalies, then compute the skill metrics and statistics, and we call `Scorecards_calculations()` to do some specific computations and save the result as netCDF files for Scorecards. ```R # Load data @@ -102,6 +103,11 @@ data <- Anomalies(recipe, data) skill_metrics <- Skill(recipe, data) # Compute statistics statistics <- Statistics(recipe, data) +# Pre-computations required for the Scorecards +Scorecards_calculations(recipe, data = data, + skill_metrics = skill_metrics, + statistics = statistics) + ``` Check the example script at [ex1_2-script.yml](use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R). You can execute it as-is or copy it and modify it according to your needs. diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml index 68a8dc62..e63747d9 100644 --- a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml @@ -80,20 +80,20 @@ Run: Loglevel: INFO Terminal: yes filesystem: esarchive - output_dir: /esarchive/scratch/aho/auto-s2s-outputs/ - code_dir: /esarchive/scratch/aho/git/auto-s2s/ + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/git/auto-s2s/ autosubmit: yes # fill only if using autosubmit auto_conf: - script: /esarchive/scratch/aho/git/auto-s2s/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R # replace with the path to your script - expid: a6pc # replace with your EXPID - hpc_user: bsc032734 # replace with your hpc username + script: use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username wallclock: 03:00 # hh:mm processors_per_job: 8 platform: nord3v2 custom_directives: ['#SBATCH --exclusive'] email_notifications: yes # enable/disable email notifications. Change it if you want to. - email_address: an.ho@bsc.es # replace with your email address + email_address: victoria.agudetse@bsc.es # replace with your email address notify_completed: yes # notify me by email when a job finishes notify_failed: yes # notify me by email when a job fails diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R index a39c1c39..a8ee7eff 100644 --- a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R @@ -11,6 +11,7 @@ source("modules/Loading/Loading.R") source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") source("modules/Statistics/Statistics.R") +source("modules/Scorecards/Scorecards_calculations.R") source("modules/Saving/Saving.R") # Read recipe @@ -26,3 +27,7 @@ data <- Anomalies(recipe, data) skill_metrics <- Skill(recipe, data) # Compute statistics statistics <- Statistics(recipe, data) +# Pre-computations required for the scorecards +Scorecards_calculations(recipe, data = data, + skill_metrics = skill_metrics, + statistics = statistics) -- GitLab From 03f82c36e8e5d42a1cad117147bfcf78ce9bc1d3 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 26 Jun 2024 15:54:41 +0200 Subject: [PATCH 15/53] Change series of ifs to if/ifelse --- modules/Statistics/Statistics.R | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 6fa5d03c..e3c27e97 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -26,8 +26,7 @@ Statistics <- function(recipe, data, agg = 'global') { method = "pearson")})$output1 covariance <- .drop_dims(covariance) statistics[[ stat ]] <- covariance - } ## close if on covariance - if (stat %in% c('std', 'standard_deviation')) { + } else if (stat %in% c('std', 'standard_deviation')) { ## Calculate standard deviation std_hcst <- Apply(data = hcst_ensmean, target_dims = c(time_dim), @@ -39,8 +38,7 @@ Statistics <- function(recipe, data, agg = 'global') { std_obs <- .drop_dims(std_obs) statistics[['std_hcst']] <- std_hcst statistics[['std_obs']] <- std_obs - } ## close if on std - if (stat %in% c('var', 'variance')) { + } else if (stat %in% c('var', 'variance')) { ## Calculate variance var_hcst <- (Apply(data = hcst_ensmean, target_dims = c(time_dim), @@ -52,8 +50,7 @@ Statistics <- function(recipe, data, agg = 'global') { var_obs <- .drop_dims(var_obs) statistics[['var_hcst']] <- var_hcst statistics[['var_obs']] <- var_obs - } ## close if on variance - if (stat == 'n_eff') { + } else if (stat == 'n_eff') { ## Calculate degrees of freedom n_eff <- s2dv::Eno(data = obs_ensmean, time_dim = time_dim, @@ -61,8 +58,7 @@ Statistics <- function(recipe, data, agg = 'global') { ncores = ncores) n_eff <- .drop_dims(n_eff) statistics[['n_eff']] <- n_eff - } ## close on n_eff - if (stat == 'spread') { + } else if (stat == 'spread') { C_cov <- stats:::C_cov spread <- sqrt(Apply(Apply(data = data$hcst$data, target_dims = c(memb_dim), @@ -70,7 +66,7 @@ Statistics <- function(recipe, data, agg = 'global') { fun = 'mean', target_dims = time_dim)$output1) spread <- .drop_dims(spread) statistics[['spread']] <- spread - } ## close on spread + } } ## close on stat info(recipe$Run$logger, "##### STATISTICS COMPUTATION COMPLETE #####") -- GitLab From 5cb281b4188c7161f60ac00b436d76131d1580e5 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 26 Jun 2024 15:55:00 +0200 Subject: [PATCH 16/53] Add checks for score-aggregated sprerr --- tools/check_recipe.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 78048e4a..290b20a6 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -772,6 +772,18 @@ check_recipe <- function(recipe) { "'n_eff' are required.")) error_status <- TRUE } + if ('ensprerr' %in% tolower(sc_metrics)) { + if (!('spread' %in% statistics)) { + error(recipe$Run$logger, + paste("For 'enssprerr' to be plotted in the Scorecards with", + "the 'score' aggregation, the Statistics module must", + "be called and the statistics 'spread' and 'rms'", + "are required.")) + error_status <- TRUE + } else if (!('rms' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'rms') + } + } recipe$Analysis$Workflow$Skill$metric <- paste0(requested_metrics, collapse = " ") } -- GitLab From 84ce2448fe55baf2b297677d743292299d19499c Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 26 Jun 2024 15:55:08 +0200 Subject: [PATCH 17/53] add missing metrics --- use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml index e63747d9..3cd547ed 100644 --- a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml @@ -50,7 +50,7 @@ Analysis: cross_validation: yes save: 'all' Statistics: - metric: cov std n_eff + metric: cov std n_eff spread save: 'all' Probabilities: percentiles: [[1/3, 2/3]] @@ -81,7 +81,7 @@ Run: Terminal: yes filesystem: esarchive output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ - code_dir: /esarchive/scratch/vagudets/git/auto-s2s/ + code_dir: /home/Earth/vagudets/git/auto-s2s/ autosubmit: yes # fill only if using autosubmit auto_conf: -- GitLab From 4073ed666c18a37c6e92d08caddaaec62ee6ec56 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 27 Jun 2024 16:00:49 +0200 Subject: [PATCH 18/53] Fix bug in check_recipe() and add correct output directory to Scorecards_calculations() --- modules/Scorecards/Scorecards_calculations.R | 4 +++- tools/check_recipe.R | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index 1ea84c68..a48aa99b 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -438,8 +438,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, aggr_scorecards <- list(aggr_metrics = aggr_metrics, aggr_significance = aggr_significance) - + ## Save metric data arrays + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, + "outputs", "Scorecards") save_metrics(recipe = recipe, metrics = aggr_scorecards, data_cube = data$hcst, agg = 'region', module = "scorecards") diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 290b20a6..e588683d 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -772,7 +772,7 @@ check_recipe <- function(recipe) { "'n_eff' are required.")) error_status <- TRUE } - if ('ensprerr' %in% tolower(sc_metrics)) { + if ('enssprerr' %in% tolower(sc_metrics)) { if (!('spread' %in% statistics)) { error(recipe$Run$logger, paste("For 'enssprerr' to be plotted in the Scorecards with", @@ -933,7 +933,7 @@ check_recipe <- function(recipe) { warn(recipe$Run$logger, paste("No 'custom_directives_multimodel' specified, the", "single-model verification custom directives will be used.")) - recipe$Run$auto_conf$custom_directives_multimodel <- + recipe$Run$auto_conf$custom_directives_multimodel <- recipe$Run$auto_conf$custom_directives } if (is.null(recipe$Run$auto_conf$processors_multimodel) && -- GitLab From 0ba530c0ad0d15913e4c693bb3d87b6523fc2a4a Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 1 Jul 2024 09:06:20 +0200 Subject: [PATCH 19/53] Bugfix: fix Scorecards directory path --- modules/Scorecards/Scorecards_calculations.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index a48aa99b..ed785ae7 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -424,7 +424,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, aggr_significance[is.na(aggr_significance)] <- FALSE ## Include 'var' dimension to be able to save array - if(!'var' %in% names(dim(aggr_metrics))){ + if (!'var' %in% names(dim(aggr_metrics))) { aggr_metrics <- InsertDim(aggr_metrics, 1, 1, name = 'var') aggr_significance <- InsertDim(aggr_significance, 1, 1, name = 'var') } @@ -441,9 +441,8 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Save metric data arrays recipe$Run$output_dir <- file.path(recipe$Run$output_dir, - "outputs", "Scorecards") + "outputs", "Scorecards/") save_metrics(recipe = recipe, metrics = aggr_scorecards, data_cube = data$hcst, agg = 'region', module = "scorecards") - } - +} -- GitLab From fc83b456db6d62348243706b69c94168c451dbe8 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 19 Jul 2024 11:40:47 +0200 Subject: [PATCH 20/53] corrected correlation aggregation for scorecards calculations --- modules/Scorecards/Scorecards_calculations.R | 87 +++++++++++--------- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index ed785ae7..e24ed79e 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -25,23 +25,23 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } else { alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha } - + if (is.null(recipe$Analysis$remove_NAs)) { na.rm <- FALSE } else { na.rm <- recipe$Analysis$remove_NAs } - + if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ inf.to.na <- FALSE } else { inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na } - + ## Parameters for saving output data files output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - + system <- recipe$Analysis$Datasets$System$name reference <- recipe$Analysis$Datasets$Reference$name var <- recipe$Analysis$Variables$name @@ -50,7 +50,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, forecast.months <- recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max start.months <- substr(recipe$Analysis$Time$sdate, 1,2) period <- paste0(start.year, "-", end.year) - + ## Parameters for data aggregation regions <- recipe$Analysis$Workflow$Scorecards$regions for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} @@ -80,7 +80,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, dim = c(time = length(forecast.months), region = length(regions), metric = length(metrics))) - + lon_dim <- 'longitude' lat_dim <- 'latitude' time_dim <- 'syear' @@ -93,13 +93,13 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Calculate weighted mean of spatial aggregation for (met in metrics) { result <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics[[met]], - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') + FUN = function(X) { + WeightedMean(data = skill_metrics[[met]], + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') names(dim(result))[length(dim(result))] <- 'region' result <-Subset(result, 'var', 1, drop = 'selected') @@ -227,46 +227,53 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - std_hcst <- sapply(X = 1:length(regions), + n_eff <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$n_eff, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Calculate variance from standard deviation + var_hcst <- (statistics$std_hcst)^2 + var_obs <- (statistics$std_obs)^2 + + var_hcst <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$std_hcst, + WeightedMean(data = var_hcst, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - std_obs <- sapply(X = 1:length(regions), + var_obs <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$std_obs, + WeightedMean(data = var_obs, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - n_eff <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = statistics$n_eff, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') ## Include name of region dimension names(dim(cov))[length(dim(cov))] <- 'region' - names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' - names(dim(std_obs))[length(dim(std_obs))] <- 'region' + names(dim(var_hcst))[length(dim(var_hcst))] <- 'region' + names(dim(var_obs))[length(dim(var_obs))] <- 'region' names(dim(n_eff))[length(dim(n_eff))] <- 'region' - ## Remove 'var' dimension - cov <- Subset(cov, 'var', 1, drop = 'selected') - std_hcst <- Subset(std_hcst, 'var', 1, drop = 'selected') - std_obs <- Subset(std_obs, 'var', 1, drop = 'selected') - n_eff <- Subset(n_eff, 'var', 1, drop = 'selected') + ## Convert aggregated variance back to standard deviation + std_hcst <- sqrt(var_hcst) + std_obs <- sqrt(var_obs) ## Calculate correlation enscorr <- cov / (std_hcst * std_obs) + ## Remove 'var' dimension + enscorr <- Subset(enscorr, 'var', 1, drop = 'selected') + n_eff <- Subset(n_eff, 'var', 1, drop = 'selected') + ## Calculate significance of corr t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) @@ -306,7 +313,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - + obs_data_aggr <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = obs_data_ens, @@ -330,7 +337,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), target_dims = c('syear'), ncores = ncores, fun = function(x,y) - {t.test(as.vector(x),as.vector(y))})$p.value + {t.test(as.vector(x),as.vector(y))})$p.value sign_mean_bias <- pval_mean_bias <= alpha @@ -352,9 +359,9 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Save metric result in array aggr_metrics[ , , which(metrics == met)] <- Reorder(data = mean_bias, - order = c('time', 'region')) + order = c('time', 'region')) aggr_significance[ , , which(metrics == met)] <- Reorder(data = sign_mean_bias, - order = c('time', 'region')) + order = c('time', 'region')) } else if (met == 'enssprerr') { ## Calculate metric @@ -385,7 +392,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, error <- Subset(error, 'var', 1, drop = 'selected') enssprerr <- spread / error - + # ## Significance calculation # # # Effective sample size @@ -409,7 +416,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, # pval_enssprerr <- 2 * min(pval_enssprerr, 1 - pval_enssprerr) # # sign_enssprerr <- pval_enssprerr <= alpha - + ## Save metric result in array aggr_metrics[ , , which(metrics == met)] <- Reorder(data = enssprerr, order = c('time', 'region')) @@ -435,10 +442,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, attributes(aggr_metrics)$regions <- regions attributes(aggr_metrics)$system.name <- system attributes(aggr_metrics)$reference.name <- reference - + aggr_scorecards <- list(aggr_metrics = aggr_metrics, aggr_significance = aggr_significance) - + ## Save metric data arrays recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", "Scorecards/") -- GitLab From 244602f9afd798863efd76f9335c9001da2471b9 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 29 Jul 2024 14:09:00 +0200 Subject: [PATCH 21/53] Replace SprErr() with final version --- modules/Skill/R/tmp/SprErr.R | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/modules/Skill/R/tmp/SprErr.R b/modules/Skill/R/tmp/SprErr.R index a22bdf0d..f89d37a1 100644 --- a/modules/Skill/R/tmp/SprErr.R +++ b/modules/Skill/R/tmp/SprErr.R @@ -17,15 +17,15 @@ #' is 'member'. #'@param time_dim A character string indicating the name of dimension along #' which the ratio is computed. The default value is 'sdate'. -#'@param pval A logical value indicating whether to compute or not the p-value +#'@param pval A logical value indicating whether to compute the p-value #' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. #'@param sign A logical value indicating whether to retrieve the statistical #' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is #' FALSE. #'@param alpha A numeric indicating the significance level for the statistical #' significance test. The default value is 0.05. -#'@param na.rm A logical value indicating whether to remove NA values. The default -#' value is TRUE. +#'@param na.rm A logical value indicating whether to remove NA values. The +#' default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -37,16 +37,16 @@ #' The ratio of the ensemble spread and RMSE. #'} #'\item{$p_val}{ -#' The p-value of the two-sided Fisher's test with Ho: Spread/RMSE = 1. Only present -#' if \code{pval = TRUE}. +#' The p-value of the two-sided Fisher's test with Ho: Spread/RMSE = 1. Only +#' present if \code{pval = TRUE}. #'} #' #'@examples #'exp <- array(rnorm(30), dim = c(lat = 2, sdate = 3, member = 5)) #'obs <- array(rnorm(30), dim = c(lat = 2, sdate = 3)) #'sprerr1 <- SprErr(exp, obs) -#'sprerr2 <- SprErr(exp, obs, pval=F, sign=T) -#'sprerr3 <- SprErr(exp, obs, pval=T, sign=T) +#'sprerr2 <- SprErr(exp, obs, pval = FALSE, sign = TRUE) +#'sprerr3 <- SprErr(exp, obs, pval = TRUE, sign = TRUE) #' #'@import multiApply #'@export @@ -83,16 +83,16 @@ SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', if (!is.character(memb_dim) | length(memb_dim) > 1) { stop("Parameter 'memb_dim' must be a character string.") } - if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { - stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ", - "Set it as NULL if there is no member dimension.") + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimensions. ", + "'exp' must have the member dimension to compute the spread.") } # Add [member = 1] if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { dim(obs) <- c(dim(obs), 1) names(dim(obs))[length(dim(obs))] <- memb_dim } - if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { ## check no longer needed? dim(exp) <- c(dim(exp), 1) names(dim(exp))[length(dim(exp))] <- memb_dim } @@ -158,6 +158,7 @@ SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', c(dat_dim, memb_dim, time_dim)), pval = pval, sign = sign, + alpha = alpha, na.rm = na.rm, fun = .SprErr, ncores = ncores) @@ -166,7 +167,7 @@ SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', if (length(dim(res[[1]])) > 2) { res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') } else { - res <- lapply(res, as.numeric) + res <- lapply(res, as.vector) } } @@ -200,9 +201,9 @@ SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) if (pval | sign) { - F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) - if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { - p.val[jexp, jobs] <- pf(F, enospr[jexp] - 1, enodif - 1) + f_statistic <- (enospr * spread^2 / (enospr - 1)) / (enodif * error^2 / (enodif - 1)) + if (!is.na(f_statistic) & !is.na(enospr) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + p.val[jexp, jobs] <- pf(f_statistic, enospr - 1, enodif - 1) p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) } else { p.val[jexp, jobs] <- NA -- GitLab From 64587f27ff4f69b8de168605c4593855299caf73 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Tue, 20 Aug 2024 16:00:46 +0200 Subject: [PATCH 22/53] including gpfs paths in config file --- conf/archive.yml | 117 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/conf/archive.yml b/conf/archive.yml index e49e9b01..cc518535 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -1,3 +1,120 @@ +gpfs: + src: "/gpfs/projects/bsc32/esarchive_cache/" + System: + ECMWF-SEAS5.1: + name: "ECMWF SEAS5 (v5.1)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/system51c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/monthly_mean/tas_f6h/tas_20180501.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" + CMCC-SPS3.5: + name: "CMCC-SPS3.5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/cmcc/system35c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 50 + hcst: 40 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system35c3s.txt" + Meteo-France-System8: + name: "Meteo-France System 8" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/meteofrance/system8c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind": "monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system7c3s.txt" + UK-MetOffice-Glosea601: + name: "UK MetOffice GloSea 6 (v6.01)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system601-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo600.txt" + NCEP-CFSv2: + name: "NCEP CFSv2" + institution: "NOAA NCEP" #? + src: "exp/ncep/cfs-v2/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 124 + hcst: 24 + calendar: "gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" + DWD-GCFS2.1: + name: "DWD GCFS 2.1" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/dwd/system21_m1/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 50 + hcst: 30 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system21_m1.txt" + ECCC-CanCM4i: + name: "ECCC CanCM4i (v3)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/eccc/eccc3/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 10 + hcst: 10 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc1.txt" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/era5/" + monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", + "psl":"monthly_mean/psl_f1h-r1440x721cds/", + "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", + "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} + calendar: "standard" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + +######################################################################### + esarchive: src: "/esarchive/" System: -- GitLab From e95c778903e05cdefc41d2d2596e76f7e6db6505 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 22 Aug 2024 11:24:01 +0200 Subject: [PATCH 23/53] included development from dev-vis-mask branch and adjusted plotting parameters --- modules/Visualization/R/plot_metrics.R | 286 +++++++++------------ modules/Visualization/R/tmp/PlotRobinson.R | 4 +- 2 files changed, 126 insertions(+), 164 deletions(-) diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 1c219314..657b28a2 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -1,15 +1,15 @@ library(stringr) -library(lubridate) plot_metrics <- function(recipe, data_cube, metrics, - outdir, significance = F, output_conf) { + outdir, significance = F, output_conf) { # recipe: Auto-S2S recipe # archive: Auto-S2S archive # data_cube: s2dv_cube object with the corresponding hindcast data # metrics: list of named metric arrays with named dimensions # outdir: output directory # significance: T/F, whether to display the significance dots in the plots - # Abort if frequency is daily + + # Abort if frequency is daily if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { error(recipe$Run$logger, "Visualization functions not yet implemented for daily data.") @@ -20,8 +20,8 @@ plot_metrics <- function(recipe, data_cube, metrics, stop("The element 'metrics' must be a list of named arrays.") } - latitude <- data_cube$coords$latitude - longitude <- data_cube$coords$longitude + latitude <- data_cube$coords$lat + longitude <- data_cube$coords$lon archive <- get_archive(recipe) if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ system_name <- paste0('Multimodel-', @@ -31,38 +31,25 @@ plot_metrics <- function(recipe, data_cube, metrics, } hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - start_date <- recipe$Analysis$Time$sdate if (tolower(recipe$Analysis$Horizon) == "seasonal") { init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, start = 1, stop = 2)) - months <- lubridate::month(Subset(data_cube$attrs$Dates, + } else { + ## TODO: Sort out decadal initial month (is it always January?) + init_month <- 1 + } + month_label <- tolower(month.name[init_month]) + month_abbreviation <- month.abb[init_month] + # Get months + months <- lubridate::month(Subset(data_cube$attrs$Dates, "syear", indices = 1), label = T, abb = F,locale = "en_GB") - month_label <- tolower(month.name[init_month]) - month_abbreviation <- month.abb[init_month] - } else if (tolower(recipe$Analysis$Horizon) == "subseasonal") { - init_week <- as.character(recipe$Analysis$Time$sdate) - wday_ini <- wday(ymd(init_week), week_start = 1) - ftime_min <- recipe$Analysis$Time$ftime_min - ftime_max <- recipe$Analysis$Time$ftime_max - week_valid_ini <- ymd(init_week) + (1 - wday(ymd(init_week), week_start = 1)) + - (ftime_min:ftime_max)*7 - week_valid_end <- week_valid_ini + 6 - # Forecast times: - weeks <- paste0(0, ftime_min:ftime_max) - # This week_label appears on the name of the file. It's just the start date. - week_label <- recipe$Analysis$Time$sdate - } else { ## ? - init_month <- 1 - init_week <- 1 - } - if (!is.null(recipe$Analysis$Workflow$Visualization$projection)) { projection <- tolower(recipe$Analysis$Workflow$Visualization$projection) } else { projection <- "cylindrical_equidistant" } - + # Define color palette and number of breaks according to output format if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { diverging_palette <- "purpleorange" @@ -77,14 +64,14 @@ plot_metrics <- function(recipe, data_cube, metrics, "enscorr_specs", "rmsss", "msss") scores <- c("rps", "frps", "crps", "frps_specs", "mse") statistics <- c("cov", "std_hcst", "std_obs", "var_hcst", "var_obs", "n_eff") - + # Loop over variables and assign colorbar and plot parameters to each metric for (var in 1:data_cube$dims[['var']]) { var_name <- data_cube$attrs$Variable$varName[[var]] ## For statistics var_metric <- lapply(metrics, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) for (name in c(skill_scores, scores, statistics, "mean_bias", "enssprerr")) { if (name %in% names(metrics)) { units <- NULL @@ -194,86 +181,75 @@ plot_metrics <- function(recipe, data_cube, metrics, if ((significance_name %in% names(metrics))) { metric_significance <- var_metric[[significance_name]] metric_significance <- Reorder(metric_significance, c("time", - "longitude", - "latitude")) + "longitude", + "latitude")) # Split significance into list of lists, along the time dimension # This allows for plotting the significance dots correctly. metric_significance <- ClimProjDiags::ArrayToList(metric_significance, - dim = "time", - level = "sublist", - names = "dots") - } else { - metric_significance <- NULL + dim = "time", + level = "sublist", + names = "dots") + } else { + metric_significance <- NULL } } else { metric_significance <- NULL } - # Define output file name and titles if (tolower(recipe$Analysis$Horizon) == "seasonal") { outfile <- paste0(outdir[var], name, "-", month_label) - } else if (tolower(recipe$Analysis$Horizon) == "subseasonal") { - outfile <- paste0(outdir[var], name, "-", week_label) } else { outfile <- paste0(outdir[var], name) } - # Get variable name and long name var_name <- data_cube$attrs$Variable$varName[[var]] var_long_name <- data_cube$attrs$Variable$metadata[[var_name]]$long_name - # Multi-panel or single-panel plots if (recipe$Analysis$Workflow$Visualization$multi_panel) { # Define titles - if (tolower(recipe$Analysis$Horizon) == "seasonal") { - toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), + toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", display_name, " / ", hcst_period) - ## time_bounds in data_cube to know if Time_aggregation was applied - if (!is.null(attributes(data_cube$attrs$time_bounds))) { - info(recipe$Run$logger, "Using plotting attrs from time_bounds.") - if (length(attributes(data_cube$attrs$time_bounds)$plotting_attr) > 1) { - titles <- unlist( - lapply(1:length(attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime), - function(x) { - paste("Forecast time", - attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime[x], - "to", - attributes(data_cube$attrs$time_bounds)$plotting_attr$end_ftime[x])})) - } else { - titles <- attributes(data_cube$attrs$time_bounds)$plotting_attr[[1]] - } + ## time_bounds in data_cube to know if Time_aggregation was applied + if (!is.null(attributes(data_cube$attrs$time_bounds))) { + info(recipe$Run$logger, "Using plotting attrs from time_bounds.") + if (length(attributes(data_cube$attrs$time_bounds)$plotting_attr) > 1) { + titles <- unlist( + lapply(1:length(attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime), + function(x) { + paste("Forecast time", + attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime[x], + "to", + attributes(data_cube$attrs$time_bounds)$plotting_attr$end_ftime[x])})) } else { - titles <- as.vector(months) + titles <- attributes(data_cube$attrs$time_bounds)$plotting_attr[[1]] } - } else if (tolower(recipe$Analysis$Horizon) == "subseasonal") { - toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), - "\n", display_name, " / ", "Issued on ", - format(ymd(start_date), "%d-%m-%Y"), - " / ", hcst_period) - titles <- paste("Valid from", format(week_valid_ini, "%d-%m"), - "to", format(week_valid_end, "%d-%m")) - } else { - toptitle <- "Unknown" - titles <- "Unknown" + } else { + titles <- as.vector(months) } ## TODO: Combine PlotLayout with PlotRobinson? - output_configuration <- output_conf$Multipanel$plot_metrics - base_args <- list(fun = "PlotEquiMap", - plot_dims = c('longitude', 'latitude'), - var = asplit(metric, MARGIN = 1), - lon = longitude, lat = latitude, - special_args = metric_significance, - dot_symbol = 20, toptitle = toptitle, - title_scale = 0.6, titles = titles, - filled.continents = FALSE, brks = brks, - cols = cols, col_inf = col_inf, col_sup = col_sup, - fileout = paste0(outfile, ".pdf"), - bar_label_digits = 3, - bar_extra_margin = rep(0.9, 4), - extra_margin = rep(1, 4), bar_label_scale = 1.5, - axes_label_scale = 1.3, width = 11, height = 11) - base_args[names(output_configuration)] <- output_configuration - do.call(PlotLayout, base_args) + suppressWarnings( + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + asplit(metric, MARGIN=1), # Splitting array into a list + longitude, latitude, + special_args = metric_significance, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents = F, + brks = brks, + cols = cols, + col_inf = col_inf, + col_sup = col_sup, + fileout = paste0(outfile, ".pdf"), + bar_label_digits = 3, + bar_extra_margin = rep(0.9, 4), + extra_margin = rep(1, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3, + width = 11,#default i + height = 11) + ) } else { # Define function and parameters depending on projection if (projection == 'cylindrical_equidistant') { @@ -282,13 +258,13 @@ plot_metrics <- function(recipe, data_cube, metrics, base_args <- list(var = NULL, dots = NULL, lon = longitude, lat = latitude, dot_symbol = 20, dot_size = 1, - title_scale = 0.6, margin_scale = c(1, 5, 5, 5), + title_scale = 0.6, filled.continents = F, brks = brks, cols = cols, col_inf = col_inf, col_sup = col_sup, units = units, font.main = 2, bar_label_digits = 3, bar_label_scale = 1.5, axes_label_scale = 1, width = 8, height = 5) - base_args[names(output_configuration)] <- output_configuration + base_args[names(output_configuration)] <- output_configuration } else { fun <- PlotRobinson common_projections <- c("robinson", "stereographic", "lambert_europe") @@ -305,78 +281,62 @@ plot_metrics <- function(recipe, data_cube, metrics, style = 'point', dots = NULL, brks = brks, cols = cols, col_inf = col_inf, col_sup = col_sup, - units = units) + units = units) } # Loop over forecast times for (i in 1:dim(metric)[['time']]) { # Get forecast time label - if (recipe$Analysis$Horizon == "Seasonal") { - # Case without time aggregation: - if (is.null(attributes(data_cube$attrs$time_bounds))) { - forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 - } - forecast_time <- sprintf("%02d", forecast_time) - # Define plot title + # Case without time aggregation: + if (is.null(attributes(data_cube$attrs$time_bounds))) { + forecast_time <- match(months[i], month.name) - init_month + 1 + + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + # Define plot title + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", months[i], "/", + hcst_period) + } else { + if (length(attributes(data_cube$attrs$time_bounds)$plotting_attr) > 1) { + forecast_time_ini <-attributes(data_cube$attrs$time_bounds)$plotting_attr$ini[i] + forecast_time_end <- attributes(data_cube$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) + # title names: + forecast_time_ini <- init_month + forecast_time_ini - 1 + forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) + forecast_time_ini <- month.name[forecast_time_ini] + forecast_time_end <- init_month + forecast_time_end - 1 + forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) + forecast_time_end <- month.name[forecast_time_end] toptitle <- paste(system_name, "/", str_to_title(var_long_name), - "\n", display_name, "/", months[i], "/", + "\n", display_name, "/", forecast_time_ini, "to", + forecast_time_end, "/", hcst_period) } else { - if (length(attributes(data_cube$attrs$time_bounds)$plotting_attr) > 1) { - forecast_time_ini <-attributes(data_cube$attrs$time_bounds)$plotting_attr$ini[i] - forecast_time_end <- attributes(data_cube$attrs$time_bounds)$plotting_attr$end[i] - # labels for file name: - forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) - # title names: - forecast_time_ini <- init_month + forecast_time_ini - 1 - forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) - forecast_time_ini <- month.name[forecast_time_ini] - forecast_time_end <- init_month + forecast_time_end - 1 - forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) - forecast_time_end <- month.name[forecast_time_end] - toptitle <- paste(system_name, "/", - str_to_title(var_long_name), - "\n", display_name, "/", forecast_time_ini, "to", - forecast_time_end, "/", - hcst_period) - } else { - forecast_time <- attributes(data_cube$attrs$time_bounds)$plotting_attr[[1]][i] - toptitle <- paste(system_name, "/", - str_to_title(var_long_name), - "\n", display_name, "/", - forecast_time, "/", - hcst_period) - } + forecast_time <- attributes(data_cube$attrs$time_bounds)$plotting_attr[[1]][i] + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", + forecast_time, "/", + hcst_period) } - } else if (tolower(recipe$Analysis$Horizon == "subseasonal")) { - # Get forecast time label - forecast_time <- weeks[i] - toptitle <- paste(system_name, "/", - str_to_title(var_long_name), - "\n", display_name, " / ", "Issued on ", - format(ymd(start_date), "%d-%m-%Y"), - " / ", hcst_period, "\n", - paste("Valid from", format(week_valid_ini[i], "%d-%m"), - "to", format(week_valid_end[i], "%d-%m"), "of", - year(ymd(start_date)))) - # "/ valid week", format(weeks[i], - # "%Y-%m-%d"), "/", hcst_period) - } else { - warning("Plotting decadal?") } # Modify base arguments base_args[[1]] <- metric[i, , ] if (!is.null(metric_significance)) { - sign_file_label <- NULL - if (is.logical(significance)) { - if (significance) { + sign_file_label <- NULL + if (is.logical(significance)) { + if (significance) { base_args[[2]] <- metric_significance[[i]][[1]] - sign_file_lable <- '_mask' - } - } else { - if (significance == 'dots') { + sign_file_lable <- '_mask' + } + } else { + if (significance == 'dots') { if (projection != 'cylindrical_equidistant') { base_args[[10]] <- metric_significance[[i]][[1]] } else { @@ -384,32 +344,34 @@ plot_metrics <- function(recipe, data_cube, metrics, # so PlotEquiMap plots dots when requested base_args[[2]] <- metric_significance[[i]][[1]] } - sign_file_label <- '_dots' - } else if (significance == 'mask') { - base_args[[2]] <- metric_significance[[i]][[1]] - sign_file_label <- '_mask' + sign_file_label <- '_dots' + } else if (significance == 'mask') { + base_args[[2]] <- metric_significance[[i]][[1]] + sign_file_label <- '_mask' } - } - significance_caption <- "alpha = 0.05" + } + significance_caption <- "Alpha: 0.05" } else { significance_caption <- NULL - sign_file_label <- NULL + sign_file_label <- NULL } if (identical(fun, PlotRobinson)) { ## TODO: Customize alpha and other technical details depending on the metric - base_args[['caption']] <- - paste0("Nominal start date: ", ymd(start_date), "\n", - "Forecast week: ", sprintf("%02d", i), "\n", ## This is specific for subseasonal, would need a loop to specify time horizon - "Reference: ", recipe$Analysis$Datasets$Reference, "\n", - "Units: ", data_cube$attrs$Variable$metadata[[var_name]]$units, "\n", - significance_caption) + base_args[['caption']] <- + paste0(" Nominal start date: ", + "1st of ", str_to_title(month_label), "\n", + " Forecast month: ", forecast_time, "\n", + " Reference: ", recipe$Analysis$Datasets$Reference, "\n", + " Interpolation: ", recipe$Analysis$Regrid$type, "\n", + " Cross-validation: ", tolower(recipe$Analysis$Workflow$Skill$cross_validation), "\n", + paste0(" ",significance_caption)) } fileout <- paste0(outfile, "_ft", forecast_time, sign_file_label, ".pdf") # Plot info(recipe$Run$logger, paste("Plotting", display_name)) - + do.call(fun, args = c(base_args, list(toptitle = toptitle, @@ -419,6 +381,6 @@ plot_metrics <- function(recipe, data_cube, metrics, } } } - info(recipe$Run$logger, + info(recipe$Run$logger, "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") } diff --git a/modules/Visualization/R/tmp/PlotRobinson.R b/modules/Visualization/R/tmp/PlotRobinson.R index 67ca034d..addaf93a 100644 --- a/modules/Visualization/R/tmp/PlotRobinson.R +++ b/modules/Visualization/R/tmp/PlotRobinson.R @@ -120,7 +120,7 @@ PlotRobinson <- function(data, lon, lat, lon_dim = NULL, lat_dim = NULL, triangle_ends = NULL, col_inf = NULL, col_sup = NULL, colNA = NULL, color_fun = clim.palette(), bar_extra_margin = c(3.5, 0, 3.5, 0), vertical = TRUE, toptitle = NULL, caption = NULL, units = NULL, crop_coastlines = NULL, - point_size = "auto", title_size = 10, dots_size = 0.2, + point_size = 0.2, title_size = 12, dots_size = 0.3, # point_size = "auto", dots_shape = 47, coastlines_width = 0.3, fileout = NULL, width = 8, height = 5, size_units = "in", res = 300) { @@ -281,7 +281,7 @@ PlotRobinson <- function(data, lon, lat, lon_dim = NULL, lat_dim = NULL, title = units, title_scale = 1, # units_scale label_scale = 1, tick_scale = 1, #bar_tick_scale extra_margin = bar_extra_margin, label_digits = 4) - brks <- colorbar$brks + brks <- round(colorbar$brks, 2) cols <- colorbar$cols col_inf <- colorbar$col_inf col_sup <- colorbar$col_sup -- GitLab From 11e1a1190ec837c6f9d53b6a38b72bd6ab2e6d08 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 28 Aug 2024 12:52:29 +0200 Subject: [PATCH 24/53] cleaned var dimension removal in code, and bug fix for 1 forecast time --- modules/Scorecards/Scorecards_calculations.R | 158 +++++++++---------- 1 file changed, 77 insertions(+), 81 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index e24ed79e..75168908 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -61,7 +61,8 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Define array to filled with data aggr_significance <- array(data = NA, - dim = c(time = length(forecast.months), + dim = c(var = length(var), + time = length(forecast.months), region = length(regions), metric = length(metrics))) @@ -77,9 +78,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } else { ## Define arrays to filled with data aggr_metrics <- array(data = NA, - dim = c(time = length(forecast.months), + dim = c(var = length(var), + time = length(forecast.months), region = length(regions), - metric = length(metrics))) + metric = length(metrics))) lon_dim <- 'longitude' lat_dim <- 'latitude' @@ -102,13 +104,12 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, }, simplify = 'array') names(dim(result))[length(dim(result))] <- 'region' - result <-Subset(result, 'var', 1, drop = 'selected') - + if (met =='crpss' && inf.to.na == TRUE) { result[result == -Inf] <- NA } - aggr_metrics[,,which(metrics == met)] <- Reorder(data = result, - order = c('time', 'region')) + aggr_metrics[,,,which(metrics == met)] <- Reorder(data = result, + order = c('var','time', 'region')) } ## close on met } ## close if on skill @@ -138,11 +139,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Include name of region dimension names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' - - ## Remove 'var' dimension - rps_syear <- Subset(rps_syear, 'var', 1, drop = 'selected') - rps_clim_syear <- Subset(rps_clim_syear, 'var', 1, drop = 'selected') - + ## Calculate significance sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, time_dim = time_dim, @@ -163,10 +160,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, rpss <- 1 - rps_syear / rps_clim_syear ## Save metric result in arrays - aggr_metrics[,,which(metrics == met)] <- Reorder(data = rpss, - order = c('time', 'region')) - aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_rpss, - order = c('time', 'region')) + aggr_metrics[,,,which(metrics == met)] <- Reorder(data = rpss, + order = c('var','time','region')) + aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_rpss, + order = c('var','time','region')) } else if (met == 'crpss') { crps_syear <- sapply(X = 1:length(regions), @@ -189,11 +186,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Include name of region dimension names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' - - ## Remove 'var' dimension - crps_syear <- Subset(crps_syear, 'var', 1, drop = 'selected') - crps_clim_syear <- Subset(crps_clim_syear, 'var', 1, drop = 'selected') - + ## Calculate significance sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, time_dim = time_dim, @@ -214,10 +207,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, crpss <- 1 - crps_syear / crps_clim_syear ## Save metric result in arrays - aggr_metrics[ , , which(metrics == met)] <- Reorder(data = crpss, - order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- Reorder(data = sign_crpss, - order = c('time', 'region')) + aggr_metrics[,,,which(metrics == met)] <- Reorder(data = crpss, + order = c('var','time','region')) + aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_crpss, + order = c('var','time','region')) } else if (met == 'enscorr') { cov <- sapply(X = 1:length(regions), FUN = function(X) { @@ -257,11 +250,18 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, na.rm = na.rm) }, simplify = 'array') - ## Include name of region dimension - names(dim(cov))[length(dim(cov))] <- 'region' - names(dim(var_hcst))[length(dim(var_hcst))] <- 'region' - names(dim(var_obs))[length(dim(var_obs))] <- 'region' - names(dim(n_eff))[length(dim(n_eff))] <- 'region' + if (is.null(dim(cov))){ + cov <- array(data = cov, dim = c(var = 1, time = 1, region = 1)) + n_eff <- array(data = n_eff, dim = c(var = 1, time = 1, region = 1)) + var_hcst <- array(data = var_hcst, dim = c(var = 1, time = 1, region = 1)) + var_obs <- array(data = var_obs, dim = c(var = 1, time = 1, region = 1)) + } else { + ## Include name of region dimension + names(dim(cov))[length(dim(cov))] <- 'region' + names(dim(var_hcst))[length(dim(var_hcst))] <- 'region' + names(dim(var_obs))[length(dim(var_obs))] <- 'region' + names(dim(n_eff))[length(dim(n_eff))] <- 'region' + } ## Convert aggregated variance back to standard deviation std_hcst <- sqrt(var_hcst) @@ -269,35 +269,37 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Calculate correlation enscorr <- cov / (std_hcst * std_obs) - - ## Remove 'var' dimension - enscorr <- Subset(enscorr, 'var', 1, drop = 'selected') - n_eff <- Subset(n_eff, 'var', 1, drop = 'selected') - + ## Calculate significance of corr t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) + if (is.null(dim(t_alpha2_n2))){ + t_alpha2_n2 <- array(data = t_alpha2_n2, dim = c(var = 1, time = 1, region = 1)) + } t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) sign_corr<- array(data = NA, - dim = c(time = length(forecast.months), + dim = c(var = length(var), + time = length(forecast.months), region = length(regions))) - for (time in 1:dim(sign_corr)[['time']]) { - for (reg in 1:dim(sign_corr)[['region']]) { - if ((anyNA(c(t[time, reg], t_alpha2_n2[time, reg])) == FALSE) && - (t[time, reg] >= t_alpha2_n2[time, reg])) { - sign_corr[time, reg] <- TRUE - } else { - sign_corr[time, reg] <- FALSE + for (var in 1:dim(sign_corr)[['var']]) { + for (time in 1:dim(sign_corr)[['time']]) { + for (reg in 1:dim(sign_corr)[['region']]) { + if ((anyNA(c(t[var, time, reg], t_alpha2_n2[var, time, reg])) == FALSE) + && (t[var, time, reg] >= t_alpha2_n2[var, time, reg])) { + sign_corr[var, time, reg] <- TRUE + } else { + sign_corr[var, time, reg] <- FALSE + } } - } + } } ## Save metric result in arrays - aggr_metrics[,,which(metrics == met)] <- Reorder(data = enscorr, - order = c('time', 'region')) - aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, - order = c('time', 'region')) + aggr_metrics[,,,which(metrics == met)] <- Reorder(data = enscorr, + order = c('var','time','region')) + aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_corr, + order = c('var','time','region')) } else if (met == 'mean_bias') { ## Calculate ensemble mean @@ -328,10 +330,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' ## Remove unnecessary dimension - hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','var', 'sday','sweek'), - list(1,1,1,1) , drop = 'selected') - obs_data_aggr <- Subset(obs_data_aggr, c('dat','var', 'sday','sweek'), - list(1,1,1,1) , drop = 'selected') + hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','sday','sweek'), + list(1,1,1) , drop = 'selected') + obs_data_aggr <- Subset(obs_data_aggr, c('dat','sday','sweek'), + list(1,1,1) , drop = 'selected') ## Calculate significance pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), @@ -351,17 +353,17 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, na.rm = na.rm) }, simplify = 'array') - ## Include name of region dimension - names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' - - ## Remove 'var' dimension - mean_bias <- Subset(mean_bias, 'var', 1, drop = 'selected') - + if (is.null(dim(mean_bias))){ + mean_bias <- array(data = mean_bias, dim = c(var = 1, time = 1, region = 1)) + } else { + names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' + } + ## Save metric result in array - aggr_metrics[ , , which(metrics == met)] <- Reorder(data = mean_bias, - order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- Reorder(data = sign_mean_bias, - order = c('time', 'region')) + aggr_metrics[,,,which(metrics == met)] <- Reorder(data = mean_bias, + order = c('var', 'time', 'region')) + aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_mean_bias, + order = c('var', 'time', 'region')) } else if (met == 'enssprerr') { ## Calculate metric @@ -383,17 +385,17 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, na.rm = na.rm) }, simplify = 'array') - ## Include name of region dimension - names(dim(spread))[length(dim(spread))] <- 'region' - names(dim(error))[length(dim(error))] <- 'region' - - ## Remove 'var' dimension - spread <- Subset(spread, 'var', 1, drop = 'selected') - error <- Subset(error, 'var', 1, drop = 'selected') - + if (is.null(dim(spread))){ + spread <- array(data = spread, dim = c(var = 1, time = 1, region = 1)) + error <- array(data = error, dim = c(var = 1, time = 1, region = 1)) + } else { + names(dim(spread))[length(dim(spread))] <- 'region' + names(dim(error))[length(dim(error))] <- 'region' + } + enssprerr <- spread / error - # ## Significance calculation + # ## Significance calculation (in progress) # # # Effective sample size # # exp <- data$hcst$data @@ -418,10 +420,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, # sign_enssprerr <- pval_enssprerr <= alpha ## Save metric result in array - aggr_metrics[ , , which(metrics == met)] <- Reorder(data = enssprerr, - order = c('time', 'region')) + aggr_metrics[,,,which(metrics == met)] <- Reorder(data = enssprerr, + order = c('var','time','region')) # aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, - # order = c('time', 'region')) + # order = c('var', time', 'region')) } } ## close loop on metric } ## close if on score @@ -429,13 +431,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Set NAs to False aggr_significance[is.na(aggr_significance)] <- FALSE - - ## Include 'var' dimension to be able to save array - if (!'var' %in% names(dim(aggr_metrics))) { - aggr_metrics <- InsertDim(aggr_metrics, 1, 1, name = 'var') - aggr_significance <- InsertDim(aggr_significance, 1, 1, name = 'var') - } - + ## Include attributes attributes(aggr_metrics)$metrics <- metrics attributes(aggr_metrics)$forecast.months <- forecast.months -- GitLab From 17ff211db3710f8bb1973a917d207c09d0f46657 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 29 Aug 2024 12:38:15 +0200 Subject: [PATCH 25/53] bug fix in LoadMetrics function for loading multiple systems --- modules/Scorecards/R/tmp/LoadMetrics.R | 50 ++++++++++++------------ modules/Scorecards/Scorecards_plotting.R | 1 + 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index 2a70eff9..e49132f6 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -91,34 +91,34 @@ LoadMetrics <- function(input_path, system, reference, var, period, data_type, reference <- gsub('.','', reference, fixed = T) ## Load data for each system + all_metrics <- NULL - for (sys in 1:length(system)) { - ## Load data for each reference - by_reference <- NULL - for (ref in 1:length(reference)) { - ## Call function to load metrics data - result <- .loadmetrics(input_path = input_path, - system = system[sys], - reference = reference[ref], - var = var, - period = period, - start_months = start_months, - calib_method = calib_method, - data_type = data_type) - - result_attr <- attributes(result) - by_reference <- abind::abind(by_reference, result, - along = length(dim(result)) + 1) - dim(by_reference) <- c(dim(result), reference = length(reference)) - } ## close loop on reference - all_metrics <- abind::abind(all_metrics, by_reference, - along = length(dim(by_reference)) + 1) - dim(all_metrics) <- c(dim(by_reference), system = length(system)) - } ## close loop on system - + for (sys in 1:length(system)){ + for (ref in 1:length(reference)){ + + result <- .loadmetrics(input_path = input_path, + system = system[sys], + reference = reference[ref], + var = var, + period = period, + start_months = start_months, + calib_method = calib_method, + data_type = data_type) + + if (sys == 1 && ref == 1){ + all_metrics <- array(data = NA, + dim = c('system'= length(system), + 'reference'=length(reference), + attributes(result)$dim)) + } + all_metrics[sys,ref,,,,] <- result + + } + } + attributes(all_metrics)$start_months <- start_months - + return(all_metrics) } ## close function diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index ff1ba186..efeaad26 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -191,6 +191,7 @@ Scorecards_plotting <- function(recipe) { sign = aggregated_significance, system = system, reference = reference, + var = var, var.units = var.units, start.year = start.year, end.year = end.year, -- GitLab From f8525b6d9a3ba7fd14041cbc00f457a529d48440 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 29 Aug 2024 16:20:32 +0200 Subject: [PATCH 26/53] Including missing systems in archive --- conf/archive.yml | 254 ++++++++++--------- conf/grid_description/griddes_system8c3s.txt | 17 ++ conf/grid_description/griddes_ukmo601.txt | 17 ++ modules/Visualization/R/plot_metrics.R | 2 +- 4 files changed, 175 insertions(+), 115 deletions(-) create mode 100644 conf/grid_description/griddes_system8c3s.txt create mode 100644 conf/grid_description/griddes_ukmo601.txt diff --git a/conf/archive.yml b/conf/archive.yml index cc518535..1e0ad0cd 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -1,117 +1,117 @@ gpfs: - src: "/gpfs/projects/bsc32/esarchive_cache/" - System: - ECMWF-SEAS5.1: - name: "ECMWF SEAS5 (v5.1)" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/ecmwf/system51c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 51 - hcst: 25 - calendar: "proleptic_gregorian" - time_stamp_lag: "0" - reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/monthly_mean/tas_f6h/tas_20180501.nc" - land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" - CMCC-SPS3.5: - name: "CMCC-SPS3.5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/cmcc/system35c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 50 - hcst: 40 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_system35c3s.txt" - Meteo-France-System8: - name: "Meteo-France System 8" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/meteofrance/system8c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_s0-24h/", - "sfcWind": "monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 51 - hcst: 25 - time_stamp_lag: "+1" - calendar: "proleptic_gregorian" - reference_grid: "conf/grid_description/griddes_system7c3s.txt" - UK-MetOffice-Glosea601: - name: "UK MetOffice GloSea 6 (v6.01)" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/ukmo/glosea6_system601-c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 62 - hcst: 28 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_ukmo600.txt" - NCEP-CFSv2: - name: "NCEP CFSv2" - institution: "NOAA NCEP" #? - src: "exp/ncep/cfs-v2/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 124 - hcst: 24 - calendar: "gregorian" - time_stamp_lag: "0" - reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" - DWD-GCFS2.1: - name: "DWD GCFS 2.1" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/dwd/system21_m1/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 50 - hcst: 30 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_system21_m1.txt" - ECCC-CanCM4i: - name: "ECCC CanCM4i (v3)" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/eccc/eccc3/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_s0-24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 10 - hcst: 10 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_eccc1.txt" - Reference: - ERA5: - name: "ERA5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/ecmwf/era5/" - monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", - "psl":"monthly_mean/psl_f1h-r1440x721cds/", - "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", - "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} - calendar: "standard" - reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" - land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + src: "/gpfs/projects/bsc32/esarchive_cache/" + System: + ECMWF-SEAS5.1: + name: "ECMWF SEAS5 (v5.1)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/system51c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/monthly_mean/tas_f6h/tas_20180501.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" + CMCC-SPS3.5: + name: "CMCC-SPS3.5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/cmcc/system35c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 50 + hcst: 40 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system35c3s.txt" + Meteo-France-System8: + name: "Meteo-France System 8" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/meteofrance/system8c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind": "monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system7c3s.txt" + UK-MetOffice-Glosea601: + name: "UK MetOffice GloSea 6 (v6.01)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system601-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo600.txt" + NCEP-CFSv2: + name: "NCEP CFSv2" + institution: "NOAA NCEP" #? + src: "exp/ncep/cfs-v2/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 124 + hcst: 24 + calendar: "gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" + DWD-GCFS2.1: + name: "DWD GCFS 2.1" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/dwd/system21_m1/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 50 + hcst: 30 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system21_m1.txt" + ECCC-CanCM4i: + name: "ECCC CanCM4i (v3)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/eccc/eccc3/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 10 + hcst: 10 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc1.txt" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/era5/" + monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", + "psl":"monthly_mean/psl_f1h-r1440x721cds/", + "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", + "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} + calendar: "standard" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" ######################################################################### @@ -179,6 +179,20 @@ esarchive: time_stamp_lag: "+1" calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system7c3s.txt" + Meteo-France-System8: + name: "Meteo-France System 8" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/meteofrance/system8c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "g500":"monthly_mean/g500_f12h/", + "prlr":"monthly_mean/prlr_f24h/", "sfcWind": "monthly_mean/sfcWind_f6h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", + "tos":"monthly_mean/tos_f6h/"} + nmember: + fcst: 51 + hcst: 25 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system8c3s.txt" DWD-GCFS2.1: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" @@ -220,7 +234,7 @@ esarchive: ECCC-CanCM4i: name: "ECCC CanCM4i" institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/eccc/eccc1/" + src: "exp/eccc/eccc3/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} nmember: @@ -241,6 +255,18 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo600.txt" + UK-MetOffice-Glosea601: + name: "UK MetOffice GloSea 6 (v6.01)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system601-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tasmin":"monthly_mean/tasmin_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_s0-24h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo601.txt" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? diff --git a/conf/grid_description/griddes_system8c3s.txt b/conf/grid_description/griddes_system8c3s.txt new file mode 100644 index 00000000..942238c6 --- /dev/null +++ b/conf/grid_description/griddes_system8c3s.txt @@ -0,0 +1,17 @@ +# +# gridID 30 +# +gridtype = lonlat +gridsize = 64800 +xname = lon +xlongname = longitude +xunits = degrees_east +yname = lat +ylongname = latitude +yunits = degrees_north +xsize = 360 +ysize = 180 +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/grid_description/griddes_ukmo601.txt b/conf/grid_description/griddes_ukmo601.txt new file mode 100644 index 00000000..942238c6 --- /dev/null +++ b/conf/grid_description/griddes_ukmo601.txt @@ -0,0 +1,17 @@ +# +# gridID 30 +# +gridtype = lonlat +gridsize = 64800 +xname = lon +xlongname = longitude +xunits = degrees_east +yname = lat +ylongname = latitude +yunits = degrees_north +xsize = 360 +ysize = 180 +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 657b28a2..45dd79e4 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -363,7 +363,7 @@ plot_metrics <- function(recipe, data_cube, metrics, " Forecast month: ", forecast_time, "\n", " Reference: ", recipe$Analysis$Datasets$Reference, "\n", " Interpolation: ", recipe$Analysis$Regrid$type, "\n", - " Cross-validation: ", tolower(recipe$Analysis$Workflow$Skill$cross_validation), "\n", + " Cross-validation: metrics", "\n", # tolower(recipe$Analysis$Workflow$Skill$cross_validation), "\n", paste0(" ",significance_caption)) } fileout <- paste0(outfile, "_ft", forecast_time, -- GitLab From ffdace49bd635ad5d78eb922c6c20ae3c1debfff Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Tue, 3 Sep 2024 15:39:04 +0200 Subject: [PATCH 27/53] corrected path --- modules/Loading/Loading.R | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 725ebf33..2a465c95 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -3,7 +3,6 @@ source("tools/libs.R") Loading <- function(recipe) { ## TODO: remove with new release of startR path <- "modules/Loading/tmp/startR/R/" - path <- "/esarchive/scratch/vagudets/repos/startR/R/" ff <- lapply(list.files(path), function(x) paste0(path, x)) invisible(lapply(ff, source)) # Source correct function depending on filesystem and time horizon -- GitLab From 12170efc50cbd6bf75f04b5a283a5de1954ccc51 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 4 Sep 2024 17:22:55 +0200 Subject: [PATCH 28/53] Including orography correction for temperature data --- conf/archive.yml | 8 +++ modules/Loading/R/orography_correction.R | 79 ++++++++++++++++++++++++ modules/Visualization/R/plot_metrics.R | 4 +- 3 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 modules/Loading/R/orography_correction.R diff --git a/conf/archive.yml b/conf/archive.yml index 1e0ad0cd..178aa8dd 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -146,6 +146,7 @@ esarchive: time_stamp_lag: "0" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" land_sea_mask: "/esarchive/exp/ecmwf/system5c3s/constant/lsm/lsm.nc" + orography: "/esarchive/exp/ecmwf/system5c3s/constant/orography.nc" ECMWF-SEAS5.1: name: "ECMWF SEAS5 (v5.1)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -193,6 +194,7 @@ esarchive: time_stamp_lag: "+1" calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system8c3s.txt" + orography: "/esarchive/exp/meteofrance/system8c3s/constant/orography.nc" DWD-GCFS2.1: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" @@ -206,6 +208,7 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system21_m1.txt" + orography: "/esarchive/exp/dwd/system21_m1/constant/orography.nc" CMCC-SPS3.5: name: "CMCC-SPS3.5" institution: "European Centre for Medium-Range Weather Forecasts" @@ -219,6 +222,7 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system35c3s.txt" + orography: "/esarchive/exp/cmcc/system35c3s/constant/orography.nc" JMA-CPS2: name: "JMA System 2" institution: "European Centre for Medium-Range Weather Forecasts" @@ -243,6 +247,7 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" + orography: "/esarchive/exp/eccc/eccc2/constant/orography.nc" UK-MetOffice-Glosea600: name: "UK MetOffice GloSea 6 (v6.0)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -267,6 +272,7 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo601.txt" + orography: "/esarchive/exp/ukmo/glosea6_system601-c3s/constant/orography.nc" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? @@ -279,6 +285,7 @@ esarchive: calendar: "gregorian" time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" + orography: "/esarchive/exp/ncep/cfs-v2/constant/orography.nc" Reference: ERA5: name: "ERA5" @@ -315,6 +322,7 @@ esarchive: calendar: "standard" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" land_sea_mask: "/esarchive/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + orography: "/esarchive/recon/ecmwf/era5/constant/orography.nc" ERA5-Land: name: "ERA5-Land" institution: "European Centre for Medium-Range Weather Forecasts" diff --git a/modules/Loading/R/orography_correction.R b/modules/Loading/R/orography_correction.R new file mode 100644 index 00000000..407ec1bf --- /dev/null +++ b/modules/Loading/R/orography_correction.R @@ -0,0 +1,79 @@ +## TODO: remove paths to personal scratchs +source("modules/Loading/R/get_regrid_params.R") +source("modules/Loading/R/check_latlon.R") + +orography_correction <- function(recipe, data) { + + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + + archive <- read_yaml("conf/archive.yml")$esarchive + + # Define regrid parameters: + regrid_params <- get_regrid_params(recipe, archive) + + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + ## Load exp orography + orography_exp <- Start(dat = archive$System[[exp.name]]$orography, + var = 'orography', + lon = values(list(lons.min, lons.max)), + lon_reorder = circularsort, + lat = values(list(lats.min, lats.max)), + lat_reorder = Sort(), + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('lat','lon'), + return_vars = list(lat = NULL, lon = NULL), + synonims = list(lon = c('lon','longitude'), + lat = c('lat','latitude')), + num_procs = 1, retrieve = TRUE) + + ## Set negative values to zero + orography_exp <- pmax(orography_exp, 0) + + ## load obs orography + orography_obs <- Start(dat = archive$Reference[[ref.name]]$orography, + var = 'orography', + lon = values(list(lons.min, lons.max)), + lon_reorder = circularsort, + lat = values(list(lats.min, lats.max)), + lat_reorder = Sort(), + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('lat','lon'), + return_vars = list(lat = NULL, lon = NULL), + synonims = list(lon = c('lon','longitude'), + lat = c('lat','latitude')), + num_procs = 1, retrieve = TRUE) + + ## Set negative values to zero + orography_obs <- pmax(orography_obs, 0) + + ## Calculate difference between orographies + oro_diff <- orography_exp - orography_obs + + ## Apply lapse rate factor to correct temperature = -0.0065 K/m (-6.5 K/km) + oro_obs_corr <- oro_diff * -0.0065 + oro_obs_corr <- Reorder(data = drop(oro_obs_corr), order = c('lat', 'lon')) + + ## Apply correction to obs temperature data + for(ens in 1:dim(data$obs$data)['ensemble']){ + for(time in 1:dim(data$obs$data)['time']){ + for(syear in 1:dim(data$obs$data)['syear']){ + data$obs$data[,,,,syear,time,,,ens] <- data$obs$data[,,,,syear,time,,,ens] + oro_obs_corr + + } + } + } + + return(data) + +} + diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 45dd79e4..57f735a8 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -363,11 +363,11 @@ plot_metrics <- function(recipe, data_cube, metrics, " Forecast month: ", forecast_time, "\n", " Reference: ", recipe$Analysis$Datasets$Reference, "\n", " Interpolation: ", recipe$Analysis$Regrid$type, "\n", - " Cross-validation: metrics", "\n", # tolower(recipe$Analysis$Workflow$Skill$cross_validation), "\n", + " Cross-validation: none", "\n", # tolower(recipe$Analysis$Workflow$Skill$cross_validation), "\n", paste0(" ",significance_caption)) } fileout <- paste0(outfile, "_ft", forecast_time, - sign_file_label, ".pdf") + sign_file_label, ".png") # Plot info(recipe$Run$logger, paste("Plotting", display_name)) -- GitLab From abf1b12ef6f194386739b060fcc41abef81f4648 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 12 Sep 2024 18:14:47 +0200 Subject: [PATCH 29/53] including logo in scorecards --- MODULES | 10 + bsc_logo_small.png | Bin 0 -> 22294 bytes conf/archive.yml | 6 +- modules/Crossval/Crossval_Calibration.R | 99 + modules/Crossval/Crossval_NAO.R | 375 ++++ modules/Crossval/Crossval_anomalies.R | 357 ++++ modules/Crossval/Crossval_metrics.R | 260 +++ modules/Crossval/Crossval_multimodel_NAO.R | 139 ++ .../Crossval/Crossval_multimodel_anomalies.R | 369 ++++ .../Crossval/Crossval_multimodel_metrics.R | 273 +++ modules/Crossval/R/CRPS_clim.R | 35 + modules/Crossval/R/RPS_clim.R | 39 + modules/Crossval/R/tmp/Bias.R | 216 ++ modules/Crossval/R/tmp/Corr.R | 484 +++++ modules/Crossval/R/tmp/EOF.R | 293 +++ modules/Crossval/R/tmp/Eno.R | 103 + modules/Crossval/R/tmp/GetProbs.R | 351 +++ modules/Crossval/R/tmp/NAO.R | 574 +++++ modules/Crossval/R/tmp/ProjectField.R | 272 +++ modules/Crossval/R/tmp/RPS.R | 408 ++++ modules/Crossval/R/tmp/RPSS.R | 638 ++++++ modules/Crossval/R/tmp/RandomWalkTest.R | 184 ++ modules/Crossval/R/tmp/SprErr.R | 227 ++ modules/Crossval/R/tmp/Utils.R | 1885 +++++++++++++++++ modules/Crossval/recipe_crossval_ecvs.yml | 184 ++ .../Crossval/recipe_crossval_ecvs_global.yml | 184 ++ modules/Scorecards/execute_scorecards.R | 4 + modules/Visualization/R/plot_metrics.R | 23 +- rsz_rsz_bsc_logo.png | Bin 0 -> 48040 bytes tools/add_logo.R | 19 +- 30 files changed, 8005 insertions(+), 6 deletions(-) create mode 100644 bsc_logo_small.png create mode 100644 modules/Crossval/Crossval_Calibration.R create mode 100644 modules/Crossval/Crossval_NAO.R create mode 100644 modules/Crossval/Crossval_anomalies.R create mode 100644 modules/Crossval/Crossval_metrics.R create mode 100644 modules/Crossval/Crossval_multimodel_NAO.R create mode 100644 modules/Crossval/Crossval_multimodel_anomalies.R create mode 100644 modules/Crossval/Crossval_multimodel_metrics.R create mode 100644 modules/Crossval/R/CRPS_clim.R create mode 100644 modules/Crossval/R/RPS_clim.R create mode 100644 modules/Crossval/R/tmp/Bias.R create mode 100644 modules/Crossval/R/tmp/Corr.R create mode 100644 modules/Crossval/R/tmp/EOF.R create mode 100644 modules/Crossval/R/tmp/Eno.R create mode 100644 modules/Crossval/R/tmp/GetProbs.R create mode 100644 modules/Crossval/R/tmp/NAO.R create mode 100644 modules/Crossval/R/tmp/ProjectField.R create mode 100644 modules/Crossval/R/tmp/RPS.R create mode 100644 modules/Crossval/R/tmp/RPSS.R create mode 100644 modules/Crossval/R/tmp/RandomWalkTest.R create mode 100644 modules/Crossval/R/tmp/SprErr.R create mode 100644 modules/Crossval/R/tmp/Utils.R create mode 100644 modules/Crossval/recipe_crossval_ecvs.yml create mode 100644 modules/Crossval/recipe_crossval_ecvs_global.yml create mode 100644 rsz_rsz_bsc_logo.png diff --git a/MODULES b/MODULES index 6654c092..4f58e594 100644 --- a/MODULES +++ b/MODULES @@ -26,6 +26,16 @@ elif [[ $HOSTNAME == "bsceshub02.bsc.es" ]]; then module load GDAL/3.5.2-foss-2021b-Python-3.9.6 module load PROJ/9.1.0-foss-2021b module load Phantomjs/2.1.1 + +elif [[ $BSC_MACHINE == "amd" ]]; then + + module purge + module load CDO/1.9.10-foss-2019b + module load R/4.1.2-foss-2019b + module load GEOS/3.7.2-foss-2019b-Python-3.7.4 + module load GDAL/3.5.0-foss-2019b-Python-3.7.4 + module load PROJ/9.0.0-GCCcore-8.3.0 + module load Phantomjs/2.1.1 else diff --git a/bsc_logo_small.png b/bsc_logo_small.png new file mode 100644 index 0000000000000000000000000000000000000000..56c7dcf3b559caf94defd7544446e3fbfd1e1a8d GIT binary patch literal 22294 zcmV*7Kytr{P)1^@s6wJ8{Y0000WV@Og>004R> z004l5008;`004mK004C`008P>0026e000+ooVrmw00006VoOIv0RI600RN!9r;`8x z00(qQO+^Rj2@DSv6jwHz(f|MgReDrdbW&k=AaHVTW@&6?Aar?fWgvKMZ~y=}%av3? zlH)K4yz`2Ef(!_x=Qxpd+p0Ov^S6|)uKld)xUtF}pF-umTo>CCWK)39WhtWAQsS_?2+IK{hNZF7k|3It@M)*}yD=f>TAWbz0D+a;n5r)SVR+bbt?zmU1iy%-N#x-8B!iCQl@AMd1n5>yq)lSry z#%9nfuDB2kbh?ipZw@csM#esnYg{7}#>!aR1VBbEj{1ZmUO)>7vHzR}Qb4It2(~(u zs!#|Xw|!d(f^~^8nl)Wg@&lY-^xXkW&f*kQa9ZY`3Z%f{Sa8 zK066Q@1lPW9-ZhQ2cI6{Tm~ODA++L%AG*9F_B*KwOYd-RA$NU3>JFiEv}u$@s}4=4 z4+24alpo!L4P`W6fYnXA-d<=L{^y=$XyjDID*8 z{48)DeQdzR3!bAP(xH!5J4iuCEr1j(u!8>rY{OH68ajxE1SKdcgu*n)SR=!j3JXD< z$^D4*q1-%oA&+*x9&YZjTWzf!@Y z_k#ccQ*cQ{K~#9!?VWX?9>w+dzh`Fk(d!mh;t9b*&=A~9@uG#67K#*iw*rL%1yU#! zE$+cWAV467B_zcC#;)J{=-SMA|G1a_kRa92zWMxSPFrqtRNdw7*fx5aB<}KKOi%%GZtA2I(rqFELZAZ+lES0l<&0?VYNB~sFI%M$D2F`2oh z`{ZzX^&zM!ad5>Mn}_(D+02(QeyHH?=RJsfE}w+>@G7{Dk^049@17L|e5TeY+nBtR zSY}z`j&#cS>xNai=2$F_S(&qO$k9)sp|OZT!_#uj8tU(+t#LDFwT{;sqqIgH7_l#p z3RKs>UElLQYtCqaR;9VgP+U)1E)Iz2PRQ@^@fF+GteUkr!{67J&1~lXH-3vuDo`BCt_L-!p|xd$&#*bAjwK7f)Edw4Fd?v5hfabvPV zpvAVu^)3sSnWJ*K=>AAy^u}bR#7ftd zzs*g({e`N%&q3fth{a5-dw)GjwyGRnD5&$okPrDb1D6tuO_QFoZShRszRCHY&20+_ zz?kjIjQI<)J%T_FRS^$Y8UvZRj+sg{vqQ{)5TPK>0<=L2EEAIES(cbrUTQ60_Hj!g z*(;8kZV7aB6mjk^55oBe5C4}aZ|0JE>~h?nkWAR{eEqev49gVg*0Tg&A;97%&;5_x z)RJ*m!L|$(ixEf=N|GkDX4JoO@qerhoB46^t%iQ{qi^H9YuLBJCgLAp);V8n^ z8e)Sxd?CVTH!<&_GymawubVgnmz?}G+`g&HeP3S)Km!oyH28=J>l{a(pKtbFzVxa+ z@WfNIPz)Hv496sl+^QeuFJ2GLK;CD78Ko|vqrC_s#;O&Bp+ih$3O<+#aYthL$|f9g z&=`EaWDQ)~Kx0D&kp^Oxz>>*xFmS)^uy|e*;z{clMNi!>C03QB&3zbHSzclxUsNCh z5^)Rd86RdM22&Ee2oM5DTa1nl4+KVSMFK$t6#M{j*F;Ne4iQTjh>%Jr(3EC zt>H3LQ!H*awY3LuU9y(=M+@n22{GnX)Pjb<(xKdwr&H5G8{El>X29?s4D>-feS{j@t?!KVvXVA|dD;f}5T zjpylGyZq9>T9Dv7jyStGZQcGI)zh z04AW`w8Z^|4>mlJ+NE~;Vo_g1g7TInKJkj`0NbYLEK|ObFGOc(&7+wirNA7=mA}t) z1Rs@`+jY6TI#ns&n;A(VFyD6M?YTwm@ATTfCcbuU;ngVOoh^$dJp9Nmr=4`tdj|x* z{<+o|NI)ng&4|V1P1#(y)OC%0JWro54bd(MD}zAo3Lvsfac5!LiigW~>sOO6sB@L# z-2kM($Vh``#AEVCKhUc`e)3ls)NS($$B&tB7?r!j70?Q>ZMju0r(Vz+-8WXyvgqD{ z{gP+2wg>*Y^*LCUfq2{)l+8vDMG-dYo@Sc#W-=*HYikdd)Yil>cvy*?{(j@JzNfEI z-*TOQTgM3n%c4J)m0QUuR49!hmL=Ya#l_Dp3EAEeJ+BnH3rWvuMz<*BeWavCRkiL;ZOz^>o~Lgm0)!wB z%-V6~L!PI0RtiI<#0mgXDfBc9npRO^9n;bh{632KCqmM6LD&#Qe4r3mkxCl-k`Q4> zd+?sp=n-?pDJ0NPEb51}Mp{ZN)EYI+C^0R1;>e?VAN$l3E3fl?KA#8>lA5&QQr0jG z6QA_x72mCCL*Z1<dBryGmW6Mfwb{^rJL z!F!G87mNCj|7jx!0A|DjpU-b;4(@hiW=$+<w- z*>bh->tAV&v|-S@wYBbe+YxsIfL6TKlsDIR*BV0sKuVfgT59cK81!0~hf-h&q05rS zq>_}pAQ_kUS`v!@_y7Px=Li30e)N3B(BHj#IyThj(A4A|7)8AQ#tIOjXxrk(mi75n znRn;HvLq@h95<5*uL(omePd+|gQh2w=BfF7w7R^^MrD;_tyq!2*!T67-?rTLpMU`b z0UzDo7Tg_=8`X)FgFoE=9wsdbzPD`OUe&;-El>(07$70gR9b2t^NYi4cW_;CEdd;c zJSvEI_r`(AzgZUG5`MhzkD`n-w##93|kP^8xn1- z(YLGqH`gr5tONhe;m{94U7|Jmcb$Lr+%t#HWrp3^KFn`v@%pT539oDIP*-Gg(S?4% z=deZ{5dtA-k(K&E3n81?#Qe?B54iX-RPEj$wcX=ozR$n;;%0vx!=Q=Xy2W1X)+dEs z&)f|SZ*Rbc^~DpzkT<$^)HN%AW=BV`tWWP0&Od7?EnAvB-S_o{KXke5-%S|uPdeJe z-=|Vm+HnjVbH**-TV~p<6&P|xd9aNpo)f!@OE%9smG_q}lm=bYv7_FCw&;m08D_mUyVEZ%wu( zmc(Lm^RWHDfWH~~%Xhtt#%y zzGcF$VB7NY%!=%X$NX$47JS%%ORoC#kigTIecKPorXmdZ=$2;hg26*erEN)Ub<*A6 zDc{oOp|RPk(#(SZ25TrM&W|E&)3to~h;50ti4XyR@2kVw+Jf7Z;-N%K zsR7KW76N(Gparp*#O)InYSR)=34zuq;xTRQ;UqucqZC82pqEUGUX~JzRm3COTm8Rg z+QY~4x#(uE7!Ay3LQ{8r3P_3h^)Js{Ynt>V5uz~U6FNGgSCztWX<$w)CjSunY6~!u zL};~bvAF1|fm)+G5kkWh^9b+=fY#{Q)*f8v`|8&KLkL9^Ih2hH_uWH0jylBTD{gNTCf=qPoVh8XNrIMG@8_oo%?*;3T3#m)rOL{MZ5YAJ1-C!C+qqEO#vVoFtxY2%Z*mX_5dm zYu#!Z^m;5VCRrwVHMKDe=$~BD+8TTULWc&{SSGy@bLEw}sh7X+tsT;o?3bKIK$~WG zQew7kiQ7v{?faupQ4}ag5`SV_;(4mAs4uT{K8X}B0&_bEOKgjtjK_?t+E-;4#!Ji! zFmJF-npjmCn-Y3qinZ=A4Vn~p<*SA%K2w?t3^W=BO?OOsRe%@TCQa(zE&g%LG4zHx zuW#ymx0&yO0ASFOw_?T03`|pw%jct)n34FppdjSparqNJP?Jp?C>Ata*W9O&S5IwB zUP>%UrOf@KNH0#MOvGI2tXf^TKM45fP2K75LQ2d_r_BSSNUtbK+gSb51sfOacQHT& z6e2)>Eh;BQFv@LIlsl*>voY_p7VzBDL9{=x<-S032!tB200;?q8n6gvodae@U9}6_ z7Ff0-3;odwjQ-uW;*$#&XlqOrh7Bpj@S){+@|mUZK54_aJBQJfU*5(T@l({)x!AD0 z2;{=fHkFuwAEA0+5)EsMz`X15i?<%9`;PsD6a5m%O!598=ZydktkZ;I5{PLTf|6u@M3cnLVAeo4x==nHu zpIve0gtySMsuXv;I0LR_V!^#90jR4g#|#6pKS&p=SA46E<$3_3Uz#By2Ra z=P0}&8&Z4iJ%_Skt--?Gho4+L~NegLR!Y4P7w3L`frM^;gf~`tOk4 z^7uP2cAGjp{=x#}vLS+iVVZo^u&^7?U?=WKkiTezsx0fKaIEM7f}`mA_WLy zyngcp^gZNYG`9p$n&H?2Eel(4@n!qq`cwAY)J^`cMcjMNA@CYKgb{D|ZP{9dq}i2~ z&a#Sf7t5BlqH53UYNKf5Yvw{=o#TiXlY`UfeelibKcK`7Lwz*!56@=vV?afGz-gzv z-@m>-g99#oYNIj(kGc(^j&RpC(;)=anWo$~<{HOXmfTMY>hbioGvOu#cKpqRP2HUz zCGXB_ftPE?aX%}iUcFf)lfF3Z%0poqQc6ifr4(ZfDSAQ(2(j5Q=bwQ_9(OycYb-hC zorcH5kPq9KkmHCey!zbLTz?a+?XHAn%Y6%Z_1wmm$V`)--fF9|quW};Xw8}&q#?G= z<)YWMM(L*R?BAsr0vt#Drr74+g$0uV%=(7Rw!fZ$S@%pue8+(Yc%HsicRIcp03so< z$aTfJx%K(i$L?5zDAX`5fe-%j7RDYw9$}yKHV)+n3wi5y=>0`4lhBGF-gm767QOMhG%*Y9VY0#T9-fxT+J$W_qYx4*Kz1x4A zkprFcYf;2|4c)5T>VN3QcZpfDs0rn}5ANZ4`gclUgb*~xw&)%rY)}eAeV;GrH=-== z8rmdMb{HuG1Bb#fwIN8`N-3*br3q8ADM;C)2RVX>lK_{@e&`IewihvbMFaf6LicW# zWX)w@F8TB?XJXyDB6i&_jdYTP6xtS0)_uPlNDMAfO2-W?&~Jb%nYolTm(|pm(kmLc z``js5wI++{fBpjionp-uhJ<_m#S>BAkj3s>rC=G_B3AaP2c}6HaGANhvf7ldKC%D+ zQeZB<>#mX1o()lw21%i9L$Wob>w1C^zw?e+a9n|SjD!%{X4ZDD7*V$}K~iWNtZfT{ z7<$C^=`i4*g&_`B5&9476$gH*k_?s z0pkyQ6jH)v&E=&f(&$?!r66T%rOfpW9z0+D9e4hJ6H4!T4L~$7n+f018ns^+5CZGd zX>%MiFRHGJ!?a*6T9kP{40+#;$TlaF#_n42oSNDgOq1*-OR~>xO!jMNAy7bPJolH! zXv7TrTbyw;5gH_+l>t*TD!=Uqg#eI1lOVJJfM!%^jf(G6ju5CXNt@#|^Ro6Smwn@W zV#li>rPw~3jo#FZxNVEOtE=oA+d9H6i$yhDNHmp~niJbw!z-2M{*EK=zwx#)e?9l~ z=~qP|?_^nGQdzluUt^z zW|x)Pz4Cc=S`_gv3?v0%jcv*MZoP4bXD+{HMrB7PyoDJxt}8#u6{6v+(bF{P6)9+? zAL!#4a4nM_zTnK!fB3^ab4C{n>S_{DLehHA*V_m}o8!nEa?Qob$qMJEzOPS}lIklf z?B8ZGVgF)LT}1>hkud&}&8d@vkjFD4Zb+pFO!RU1R)=)HIkb35HKR8(974l&$_k6>B*hD+l7L9Tx(RC2ED2^`UF0o zpW&$V(Z_cA)v?D+9T$YyBUB7Q>@g!0L_M>)=nSPWL`tj#!wdo(tF`J;UG1LS+7jFq zhS*C8v=E^^im->)YSj6cPFtoKmLKq;%y5-f+bRW<88NMSh!EVOHImG50_KXlADH*7 zAL#A;koWEM^y9@~R746}dY*Fo4K4YozP`9;5MnojF}1cXUTA7=+rba9j}TbJM2|2~ z&5Sq^0+O^`2wLy?JU$4qr)126knjRND2gyH(t1ZCbSQ-ir7)a`SCws3RqW^pCn$v; zQec&eczh6WFJ=%KG==)yf@4qpuUPE)w<1)i!JQBLMqN^cz`AaA@mxh&425C{-;XMp zd82PbLA3a=!}@1>b}vPts1StQOKVhpZzBXgX`8%a3bUT+`)Xz`8_X;g!kJpD=~1X( zuC8`Zmy$jN04cFFnKX_obOiUcuFo&7s&X=GU;pU_Fn{b#KlcL55`Xhul@jw43FD~Z zs_e~5^W6IN#fEi_UV{eD@un}k(zeBAU3WH8XgFX#>y#;F| zg+NOzCXP)cLLC&gh{v4*>uIuW%gt zc}a=6hl;qi%gC3c)8>Jxq&(8LaGzn&jC@WV6h+vB2u0TwrDIU?K>sgf*)*iMkZUg|W(Kpi(_17AM zyYBJqnSVONSDHt59@EmTM_0aT7~+|(^J~k?oZ|*=QSz+niXV^ypLw3LnfZ&LoQF!% z#(q*_ac7%2EEd!uUHVy;E-?>qW5%xm6p0WS5_60y2PFO_`Ahz*5n-eeMk)dBbcg+h z0HNWdKc5ly=~ap-WN1z0-xhI3f|mW^(hv31_xuu`7eOiA6WI7n{+E%G=3-DKI+o`E z&~^Zw3eXX8$4Jyv$8f`aySI~|dH^sC@qGKIEuX-Uh?hGD&RLM3iks*n)_XC)0+EY@u%4T z!oPgAuTnr0%0$9EHsk40PD0*MT59g-y5eFY6tqUAig=vXJdznigw@@<$5w`+9>E$V zL}+zvu|z37iqZMt4z@py0lW#LKbsp17#}-Acc_;uFQhcy}MSeB9f^i5aD$QkD zbDyrZd@}TBx5l;V^1~PS{EeGV@uNtO=z4Y*@32c{{qkj*O4gl*Ed|Xk6rvtlqlO59 zWs0Yy6l>5G3}r~MNO#E+VR7qQ)8=-K`pnKBR=Fe|Gx6}b`~9cw>PLscnqdfG1yCDZ zzhIyZrRQMx(LM0$q$Q9NCBQ~t@X<+oO|7K;G%}MQS*HnV~F5MN%Pz?2M^!7 zJstatX(wsL$Afuk80nA0$X|NyiIXZ=>wW+r1sc&U@rB!I$bc0oK6+(+w2i&pE+G&l z0CJ_IDOJ_Z>&27_W-e`Q3GNO%hnS%dVN{Y7ZxSL)N^DTyHCMx^-u>(YjpkM_`tqMm zk{}>Ok-ip+dlp%}LoTiYk@tl@grug>*WS#V@Q{7X7@S#mc;56xi8!f?KdZ_-x`Wjno&&z&oITjFw{et5f=h$Y)3396xATj zs3AhZFllzAsC#E$d4w}|yURQ;+Lsxo6tp@PGu9CiIySYH{Rrq9(|r5pCQ?ulDTz-P zt%gzz)@);Ax#kEO0&UU~IDC(7fVKd@_s_Nyp~r2>@Zl+Yi~Y)F~-9kzbL0ty?!|&qP8VKtfDbS_};ie`c8Zg1&iuWAy&IhG0q*@~+?2KFvt2S{sO08z(+ANk-%$~Enb!qi?v3NTsC?!2>5&=c$P)C2}w~8O*R3sd9W$h)PdzW{!;DJm^yJ+w(-FQEMLb>zwA!|qz>H=hga9ZA z^tn;QmjG}{pkNwef$wWUU7PVlA@AztqjQzgBbng}f#z6Du2PD(>WY?I)wfS-gNk@G zGaMn%WSU~B>x%nKgC>KKh(i4f25kWVLSn(KzuPL)vwI4g0SuA^rP%{PVr8zp@OB?T1>DX_M&1!(nvK8f#*VHAQ^rc%}hNx<551({Ab8_Xi=LkNtr>HE4cuGDB=T`F3H{+i^)Ika8S?1(`Gd1(rI&^*HE07>Jguo%c&<= zqnnhp8s#>^YzQf7(3fN1wMLo7w@<=-1%@dCMdEAUd0r4+}eMd$Hfn8}F8>Fr#O4`zlXB`uD}j^c;6XbdY3DgG2?baVo5w^cx@fQST`=;FeE)91+8vt z5B~(9R7#q8z8xa@$SfPrqHIc}xZNy?i`gz45b2FpNoXC4zwhL_lS! ziH=OD^7-&p{^X-qPx#$#;=v16XdY9AtxvfVqx+ZO;b)hA?9FKX_@v+W68E1oS!49p z$Yeq^zPX_uj(I60p;+etOLKrNFX=pv0ZW?!+qnd;ftz32jn4k^~31M?`$KXm`oc{^tCCBOOEZF;5l=`WZZtvGuZWv7dZ+< z@ZNh_Y5EKJ{x#9$zcF)`R$ywbQ~R}V4;4KCe0nYR`1u>iw5$VfSBv+j7xaGTjK#!TeuYDBm_({Wze~-L3@)eLC91ceO^3uM1cfJ?zgo z`GkEhe2d?oz7iy8crL5n{<^~n5jf_`V+&sN*vt3-68j%F0dB%P#VhIw-!=n4gnS}l z>=#A)-Qwyj>PD8A2LYF8)=?}b8c-Lz!)w$|V^r7+AeXhv0~y?+?#mc}3RN7dQ&1=7|&b1aU z$lMzQeEfGQ$g=3MdvD+R*hl`lj^DcDm;Vx_Kk(AKIOW_|5H5Zazz_f>Ft_dlDG1)I z>o(1k@ON<8CG88_z0$ZMN2WI~h8+80HVI-3#n7JjJiPmY&-!OF6s2YCEla^BC z$6_X!(Ui}rX5g!a&J5DbmFy{8`KQUSTnQ;@4G00gB{!T9*bt8!OE#=70^BlGU-f@(w1#0K?6W=gIC4jL zieO5hPlW{#;r@*n`k0?x2g{W3{0Iyn?n-FQc<;eYvzOY6`YgbVahF|&kb#5&%vuoQ z*SieBbN`_>d@$ixcx7=BH(dLB#I_lQoF~J;W9)atLi%W^MQtHQBmJdw9j1xb41?Y?h&O1E8%7u& zMZEtPM*bz)$dGu)FzHo8@G53lk){Dr#Ct1+?f6@K2fkFEU00r1$cGP&-nJZ{++Pde zd0cY+AJC&*(9(6G5ri5LMx>PHnSZ_fKkkxPmY)4jRI-)DXh>v8A zGVZ*dXBhNGB59l+hPuA2+(z6n(ca-hX&{v_uxx1q`VZ>~r8NRigPD`rQ*!~mdzNDDD;Kp3(doMSucj-5lkr0hml^W!-OEh~S{jeb zWvP_)4I>ADm}{b2kEGSn5nbg6e5%$c6B2U`gJv&SFef^O3iImuqOj}%Z zS~W0i7Z@ho&TXW*eD*_UqPeLE(~cgl}Ymv2Ag!zbtdveJ>n~i?4bG1XQ=Q29q~3R4&95a{n;YQ~LKwVZq{d zIF4~hv7nyVSOnUS9owc`{TcJl+5PN&G5OY?f92eHe|aBw-ZvShA$G{;)hk*fZJP9Q zX_@sa+mc~pWAVN);+-s$-rjc0@}GS+zrBBms&SAw*^;4As?hQ%7~CRE%9ezL+OhUhYrD&Y=c!Ock0bZ5LhAv=7u4DS}5wf%FCU~z}Ew#koSs0-oY!XqqN2lrO-bJ z_~%7WZAXImi-$&b3ASw6?4Vq1b~20Ypk{{@gRVZ2!1RkFC8_i@^;_TCu)tP zlwzS_86O&kSi%5Q#N*C5`~7Z8V;~3+gb(9!c~D78?3GL!pO%(x-f-dnM+5-;{=A=} zveHFOtvg3bn)-Ditx*~HdiUJA0zQBAEL2pwsH$;4l#-@=T~;X`)!rU%*WB!3@uEh= zcj^n<6;B9(wVS%LKNKQFrbQ2}UXjbKow*vv?LGP%GL^{~P!WUZ@?~d+qat11z9ElO z&zrc_=ZhL&TClk3t$hx9z6+~Jv)cw}QS+F`^8bst=ks@wMlG&U8m zVp-O+Z1LwWf;PT@8Sx;{M~&aTR>oo``t(Vn{q0NVnI=89sXO~ak&>pS($>qBbusM! z^Nnr}i^fEg3PQqD^;Zi2(z`55iq-TPBE51OVF> zkCat7C)=)Zzv&u}r^@U*17CMzMmY(zo2Hl@OPbhvyS`|A?NT(pzIn5S{~!6=Ec9Uq zZi(vZ7)na5H>IQxzAmT~Z{6M=?A*{;geeKNbuMaaoXLhsuY6rLig^DI-f!A^b$u3% zjRmB3y$12PF+obq-_#xcK8TRB9dYZ1^@aAPWi6O^(}^3Eo3iL#R8-hVr_DFJ_BKNl zVH>ZgFNq@FRtR)hwz#_Eoy*>{EP6potcgP2wW-m2sgR8x4vNtgDo~}FOi-aU_v_l5 ze0J|cZcb^9u9$85?2Q|z1VmH>0IkrySkPArA&RC+PY^+QMRh?-lQ*%LizY;VbUa5Y z2qQg$8M1R5X1O{MGw|l`j{7^bu$lig{x&o7`IM)T&kjab3Ezns`6%K8@l}6z6M!?I zPx2+xkWqWaLtASQIgYFkLq6ck+cGgD&dk=(A?a7bfOW&`=i-enk4kKMixmQH_hqjC@Z@kXF_X9R%?QYrHtt@JVoY7v4KnkK#KxZ;|j zLrb5cx}ACQt9z|}^oeED0W=bcRw?nGV~ZQN+qUMpHEVNj5OIzOZ`-zbnV`L*rvT#V zrb#d8>eBVexVZq}4N~xI)09tltBFmAz)Yq1b0V}8QN3x>({9YTrtPhTi)4@Tl-9gN z8uUVKt^1+tn!07)q)n~b&1^!x5~;D*1rK8Gy!CJ#>DJd59tlJK=~sja`FLC&Q7DA3 zXfMF<(Y2WS$XRfd-*VlYG$pjH{v6wt2pABBbq9%0Py*ElqOl#@V+-<(kr*7(w zZw$krX{9CR;X$C++Kxoy8{h3>dh$)r;F0I&AQ89FvaSgJ{5t&N$(O{RkDH{e(G{qx zi$g^k6%`H^%x{CYpabam10a3_W9u^X?2$y{`T}zK2w=cXNF?GGnwvbZX4tNTpAC>G zvyp2Ifs6pkBlPNb`v5FzBtSD*Mnd)a}Wng^ms%mMuz}n}SEe zkoW(Z>_;Uf#vy*7>kWgT6~ne=bz58TP#9vbP2Kq~BPCYE53{u(Ea z8}x(9o_X)P=+&n;UU>a|gi(m{>Ke4=L;QBPDjYg~JG}Gpd_4A17A7LBY|NlMX(Joi zIC5Bo(Zl+KAV7?7qy!)Wm>K8%?oTKyNuncLz)o9_0sv$}6Ic9v9~|-fw~#M}7}7n7 zKod?saVzkemabdN024rr{ypk`P+vClzsWZY{j|F#V)`d5@yY$~z!^L29N**Lec30R z2-w^-7%4(Ghqptwl&F5RVxH zvsv{}6!Dl%tpK1ixF!~pzs#-5y}Zvc0}&PplW#le2bI14-sf@W#05yiY~(u%&|1TG zEUa3%4*MUv114Vgy>T=D6zP4gg((TlK+7w0aOPdd;=z|cg=q+A07XAS@y#p0vyQ<> z--e>E5QHJBODt5Ex_JHezkkp*^ypiV^A!A0Ar&)`jv4syFTeeYOv&C?!?7fUAQZd^ z?XPd#Uw{9L{)&lzc?Z3=>kHotAOsm-J%py_0$N(z z;aUX8l2})tL0LKm*OkZ>Ladp%arfqLxP;zy+Wu(o@KLenmZ+$(|6~~S?3d3oLj=A) zC$&rWv7da}glIz^#c5Zdz0vzT7L#WRfi+*24?{kozCM4(>1S<0wKZ`BpEshTBUl_y z$de6&rf+KH34yh)D^AT#-SG0D9jdWvRqh8H`O()sjf-!eg=~8SL6A|z-IYS$Fw}7) zRf37vJ%gjJdg6QQ8Gg)dsN45?V9Seu(HFwn?uSLCJu_=9z#7cjV67c&IRN+l^&=1g zL~O8Tixq!wJtRyh7Niv1DAM$gF2PllTd-{+*6e7dNq?=H+h3Ur0)xS(R@zeElC1Bf zo1enO8(sl*t3nv6xK`XZinxa%U}QT2-0;w6|4{pWOr&bkD;71?aY!M+jM9y5UH)ahHF2Z6!xbPR*tXFxn~9!?BHsGT=M96V zmXw-D`JP(KVD#@-g4r*uKw@a=!G)r}M{Csl?IMJ%>x#1rA6)V0?)%?>H4|q;ZaomG zwB0|Kjc$!XKH&Qw$A3Li;&V49&&qw+IC;Q{#0p za}uu1#T^rkZACb)39T5-*#L$h^sP%HA8IUk>@4hi{sVa9-IZ`1V}GT1d`-D~Ap`!J z4Ozrob02`iAkdTDgbAe?yQf=Nk@Q{LKNw$FyeiB)}v5MEbDG3itl9 z5+_K+9lZUAlTo|x^~eVrK_P@~8~Y1FTNtVj?3mHnwv-J&i15er4#ZE!-veuOCES=C zsT3a}NOb^YYZigy8231)=qOKjI{rujD5U{If|ya?QbZVPl*J7cy$J2?c~sSupd%l` zF$Ef4y96VT{yn51GyuIMy^3;75$ogn^Zxw_@}*)o3q9XwC$1Ou~lO&cl{Jy9=x}hHp`hS##DS z=2$2M3fV%G&`OW&Sr?z4j63?XrL71RBORB>b`;T`DWZG#GISIaTvMRB+(K)+=VbHh zXxBBK5kwiKbY&3eqmnWCY#8aa6{Ruc{Rl1D5RNI)_}V;-KXcr-2YH-z(#|NnzZ`9? z{<2t1oGvAne))V?;=vo!X)|RQ0;^VMFmR9V$i2V*g_tW&kpio}kYz^F^YqQ`4p;r+ z-~+phsy#b+MoTNotrIPqZV~j|*=YWi5J8(Jy_ifIhv%BT$)k=M03z{s8TqA; z&Om92gJi-i4FY|d5MpNco@F<7yf^PgDa4CW#K-jMRbm+uQfuzXntP-Yrg-nJDG8LJ5OO6ESxRY}8GW=y-4&+~MX?ZKb)02T zl;OL^mz0$5k}iRzrMtsr>5$$fmK6|i=?3XuQjzYZ8$l!$SP3a95d@b80g({T{^!h` znKNh3r}xu6_ssj@dEWQBuHUuXf^hzEDqll@dNii00kVd@>N3av2hZNoyN?bh3T_+0 zA{W>2QU>(f*a*p(!#TM|A&$pQwAH`jh=>`AVS87*qVC{P=bU|c%lJ};A)spXavIkg zDEe>v?i0!HEh^~PB!2=)!W!LbTZPbAs;haDPmt+Qdb0(K63~U(uxKo}H>Kcq$*jj0 z6-boddfh?bjt*EW&|7>=PiNfoT`NV-gQs(?RXd)FgSR4(w+(pJSIP}*!iyPFON zhk+~T5N_Julq_*&L-i5&MEbtGh<#3b0Z&Q~y_wEMJ9J^l>j<-mxunU09tx@=iy$(v zyD*t~*+DR!bMoY1WJ1nM+tp+ux|k0Zb99|22O`PJCk%fkieFsS&J@D>CPwdc2RmI7 z)ZaL2M^tt@AHRle>Gy8 z{`vmcY|6#>P2~Oy8ZqSo{KVd6_PFTYr@|LEMIQiOU;W*TP<|#=+>hPSHK;_F?|lau z%o!WCAhRy4OmmYJwc}-L_YL*=TEivVVd^tX^J|S~3ah=4%3Z|{VEPL~Ez#<|!nb3v zGZK=hAf%5{k?p!J7h2;_Dz9dUtMQYDlipzg`l~D~p1O z78X4G2VRoq3k+{pz>@|9jXZ`4udB$k2d=uK5DS_|6($htU(*di6}0n|9&AFWtA&!6 z`-ZL8IdCRe&fZz5XYOW;6#Z#FK7%{^(C!UNis^)HfW-h{V;%N)fu0sQD7@ZFNJq>A2 z($N6yrBPHx7F)udY>$!ae7$#F_W%z`7RhKI5ODW2)#zHOK->MKbv1`vFEq)&%!~NIe*9fMVM?r`T?Kc@0Ih*=p$q0a$m#aM!l?A`XGe8j%)Lapv!vuq5&uY>lY3}cA0;O9mE zj={L|WkPm6)uj3x<$qiF)g#!6KhuU8fiT~fw`8CVx1Wh#Npl#-ehER$|2A^I)JdJ( zHa}LZ3=DzhL^$=?xEwf}RO%ZXdT5FLS`vYerrlnRX7L`?8|b4x>HA`dzu3>xAqIjQ zyjb@XtF{j?4VJBm%B9g$2XlfpK7g6=jFQZqYxB%;CJwzv76SpCl*c5)fu&WxfvQ0r8o>9 zwS4)=egz@<{<^ICp=sf&qQl9-ND48W0~IvfXzV?ig5~B&)P!xSLoeQ8ED){0k*qqB ztEkSG|A?r!vJVS?^Uu+JB5hnOz-!8ds3G{|zrFF&B;p zGt#9qNe<~E#0>h92GsV**LO5#H8dyv+Ngr8XJb@9XeDd?62Q|HnI-{1TtrwHfD*;Kf7`}!(z6Prb2Eo#KtgZ)qc2K6;Ha2~Kt-B|KUzn*`<;Cx2D!k! zDpVp9u6N|a+GeF4%Au33sX+i|vK*ZDPE=2GS-T=(WD$T5z4kVNzf&p%1zVEh1*GYM zbVn-6-KUo}tV3F%?0ong66J=>Wy;0COmrL(_eNU6?n{4s^0RIN_jMrx=z}PBQlG01 z@`CKVMo(oe%JH~JE@vs}T_>wDiOl9a?>=}}y@7-%4dPy7M5#P02PKb}`w9l>z%vJzuwSD*UnaR2PRVFIZFT^QC)Qf3kVQFFYc@&ov#Yll$TwR zZR_P999%~oBF}b_>tSl~$DxOheTY~?e$jt~4qMme1XVz0xu1Qy{s17r;aFW{S7uLU za#8MfV;8zQj02`*h}Aed86DsIuPje@?ga}fyk9quHHnK34?Npi!>bY;yEvlBz7CC5 z-tJR_HHyiEAOhiW1b9;FgI&$k0VcL*bb_XK-jXYl$+sVDbm4ax05&fVbUm3;cU99@UwAMiRWQt zKDE%4Y;c2U@IF2vs!l-sUtLE&cLII%SDq5b0^FSiq}lrF^4NI*2Y>+0O10dE_0nYk zlBrqX1{@Gfw`Ey^Cpr>FoFhVC{w$~LmzANiV1;-=Bod$+*(l&=8!}%>8MoF`L|%iI zA-<-7a)K4b=#t6WuOt) z^M1{e9FDqsGO0$J1?Gi$zHI$7;Fl6|eV53L)`c8Zc$XQgsx0S-4X1m!5L^W_hI-{? zFp!sE;k-P0UF(7hW|O^zkxyE+FiC8WWQ(I~Bynt8q14C!42XF}ecte7&Ufw8>@n z!_=E1PWM!MlaIGg!m{Lxf%L9znGP17rD3}Zz>rj!*t>}F)jc&A2fRSMNUSY{9LmK; zc%F4C%Fc8`bQDEdmQYi1wVn)(E`&XH-Brt*Ml5Zx1;423VhNsNUX6Dcn~OS+Sz)YF z*-Nh^Vsyr^R7#8u0$Re~_(g2rrh5DIw9WgvIxT5*MMCuL!oCZ3pG>WvUc|E83t|mC zP6dt>b2hDfK55~~^w!4(5)}f{A;&i~&=LI!=XCo1W;fcsf#?9Mi_81WT^UqQlT!jk z!pzuIOny(R)bofOLCpuvocja^>;T+G{ZI=N>hn9*6REu&-kn^v9jtiIn8nxGNxPK7 z3{DhZU7Rc9McwKNR_W=X+1Tt|3ni!o?+aWeb%u4l1taY${1xY)SV?vi@r5x8ZLD_G zc0YeM-VJgB00`nG3|+Ko_wbw>i3|-MeqkD=b6WD7LM8$`|q*EBr%+oKC}l)&N6gs;YtXC#C~$U^hj%gF$-LI+lveK zOy`#1t%;`(j+SZaFNv&j{gHDj;$(Ax;NWood`hVYu^l$&X^HtCfz1@E{gow%f-_S! zLqZShi+MH8cAnir(>v88*0{_xb@S~9iuN@79lOUJ(w{yFD?d+JYqj!rR$7zcOj71( zl(J!Telzzj4`#A)Xs%F%k=@n4hGp!&Q4meAkqP^gTJ&cS8H+HZ(UX4b?;N3y* z3u|+8E{<(hw*XcYI*68pDv2{jBNZeViK9W1!RcrloA2@n+1Y$<%-tLak-R^hWNW_M zRF3eLeV<1_&O~=ieT?2sNP-F&I%2ThnT;bCjS*)CZ_M zhhGvd=eEpK45<>C2@|;eWyx#OPwj(}88}tI?dzo(Vjiu4E^wAQJk1!74e@cDLgzXg z;AeXb-s#tVi)B4%npBZ{$4QCYf~wP`VqD3sVvbZH5plN;e~eAq&>M2SBUdZo&`JaC zhoVK|-rE4XK|@U zNAkPON*#PEt_2kD91$ALF3XE+&L8QBS;%et7O%=XULDGknYy8(AqFi_qn9b0?vcP0 zb8($^eNKAKA=S>|RTH0ijqDb_%=V3Rb-IhAOl=+IkqkAYRM13{iF{H#9%aVZR-Jx` z1tW0R{p%2w1ngwFe=FS~N6jXGL>=w%(U0yd$?i9fZZ;34>}* z6(c+(c2=B9Zg{hQ57{H>ryVj`@I7SQ+P)Wbia5+U&Aipa+Nt@4({b=L_{E~onjUl6 zht3@vs6C9Ne7VrFkC|_QrKC#Wbets%*Q1a$naa|?$+x=c8|aXLE)B=z18}M~4P{2_ zt7ogzt;miD!3eBH5fxUPeK)vDvkv;>@rAG`zC{r{!L6=E>nrs3DvyF~@%BXa;(h;Y z+5kCse2&W&i*WO|sP$1;6E-<;esLzW^|wR4sS!UBi}vTii;kNb{HsJ9UJK?}tYFsr z@DCOtvn#vYuRGZLAMO`Txt1L})!b?&J;35nq$Dmfxt9&pZ4cQ^2Wl$0Q;xgu#*qVv zafkKPD~_U=IaD8K9z`irvtiU6HA^vvntqg*C_qYs8CTj|)W(+72Tn8k=+NJHVel(| z>gXj+6<#!D8>0=L8f7U)99;XEv-iYSkhxb79zWC-E0rjFf-; zPb~q#j+snzqELcKp8Q-Kl^Momu~gp4l~OKp>RA6dZN@-HqIP44m=PHzN&2vCQ zoFy(I!AEdXIEtXzIfExB_X#Lz_Vk7q6M+%&VWkna>?p+q$S*ACszQ*-0$3QBR?Tt5 znL4g48d5M0aDS+ADG!U6A~vTJZ?Kw8^H0S4mJHys>Ws>-+*g+0_J$~)yqIcj?DPu;-CxZ-i2kW zZ;bvMq_}oVktIZ#&h4^vnT>`+=d}3Zk5>m@+U9PRe?>PN(wNZVIBGIu+{HHUSt$OE zld}OfUdu6zJ19^kb|>B}$i0*HK&+e$3jS4kdh)dZT%dzEBCyYiu43X-a#`I%?F%7x ze(fAN{VVo8?9-r7yfjwAmMo5zsb{1|BIUdS7Sra%u`KGFwC2B?{*1AOC*5~l{|x

7*e4V z?HzwPi~|sx#u`ry-(noz{uYO!4U*vZSx)7X_wQbYwOCSOzMG8LdZuDK7}LMlaDd;- z$jWB6t59Ys3njC_9x~aMd%r)`RJ@xoGqm0=vxE!Uhx!p-9nn)5x|Y)7y%P-mDRb@; zp8!D5BrU1>{Pb%j2I*^1LOSNG+i(A@vsG_K4@cO=-#_i2l^vtXyJLE4epcF* zz+b)DH~?(^4v*Bbaab!oJY@hc#CgA>R-Y(Sx?&=WFz5J=U~y_D_ bss + # if more than 1 -> rpss + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) + if (is.null(alpha)) { + alpha <- 0.05 + } + ## START SKILL ASSESSMENT: + skill_metrics <- list() + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + # The recipe allows to requset more than only terciles: + for (ps in 1:length(exe_rps)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = data_crossval$probs$hcst_ev[[ps]], + obs = data_crossval$probs$obs_ev[[1]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(data_crossval$probs$obs_ev[[1]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = data_crossval$probs$hcst_ev[[1]], + obs = data_crossval$probs$obs_ev[[1]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = categories[[ps]], + indices_for_clim = NULL, + Fair = fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + # TO USE IT when visualization works for more rpsss + #skill_metrics[[paste0('rpss', exe_rps[ps])]] <- rpss$rpss + #skill_metrics[[paste0('rpss', + # exe_rps[ps], + # "_significance")]] <- rpss$sign + } + } + if ('crps' %in% requested_metrics) { + crps <- CRPS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = ncores) + skill_metrics$crps <- crps + crps_clim <- CRPS(exp = data_crossval$ref_obs_tr, + obs = data_crossval$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, ncores = ncores) + skill_metrics$crps_clim <- crps_clim + } + if ('crpss' %in% requested_metrics) { + crpss <- CRPSS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + ref = data_crossval$ref_obs_tr, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + } + + if ('enscorr' %in% requested_metrics) { + enscorr <- Corr(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha, + ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('mean_bias' %in% requested_metrics) { + if (!is.null(data_crossval$hcst.full_val$data)) { + mean_bias <- Bias(exp = data_crossval$hcst.full_val$data, + obs = data_crossval$obs.full_val$data, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } else { + info(recipe$Run$logger, + "Full values not available") + } + } + if ('enssprerr' %in% requested_metrics) { + enssprerr <- SprErr(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = ncores) + skill_metrics$SprErr <- enssprerr$ratio + skill_metrics$SprErr_significance <- enssprerr$p.val <= alpha + } + if ('rms' %in% requested_metrics) { + rms <- RMS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + if ('rmss' %in% requested_metrics) { + rmss <- RMSSS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (is.null(data_crossval$hcst_EM)) { + data_crossval$hcst_EM <- MeanDims(data_crossval$hcst$data, + dims = 'ensemble', + drop = TRUE) + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + std_hcst <- Apply(data = data_crossval$hcst_EM, + target_dims = 'syear', + fun = 'sd')$output1 + + std_obs <- Apply(data = data_crossval$obs$data, + target_dims = 'syear', + fun = 'sd')$output1 + + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(data = data_crossval$hcst_EM, + target_dims = 'syear', + fun = 'sd')$output1 ^ 2 + + var_obs <- Apply(data = data_crossval$obs$data, + target_dims = 'syear', + fun = 'sd')$output1 ^ 2 + + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } ## close if on variance + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(data = data_crossval$obs$data, + time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } ## close on n_eff + + if (any(c('cov', 'covariance') %in% requested_metrics)) { + covariance <- Apply(data = list(x = data_crossval$obs$data, + y = data_crossval$hcst_EM), + target_dims = 'syear', + fun = function(x, y) { + cov(as.vector(x), as.vector(y), + use = "everything", + method = "pearson")})$output1 + skill_metrics$covariance <- covariance + } + original <- recipe$Run$output_dir + recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + + skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) + # Save metrics + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data_crossval$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + + recipe$Run$output_dir <- original + # reduce dimension to work with Visualization module: + skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) + skill_metrics <- lapply(skill_metrics, function(x){ + InsertDim(x, pos = 1, len = 1, name = 'var')}) + return(skill_metrics) +} + diff --git a/modules/Crossval/Crossval_multimodel_NAO.R b/modules/Crossval/Crossval_multimodel_NAO.R new file mode 100644 index 00000000..b82aea16 --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_NAO.R @@ -0,0 +1,139 @@ +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_multimodel_NAO <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + obsproj <- recipe$Analysis$Workflow$Indices$NAO$obsproj + if (is.null(obsproj)) { + obsproj <- TRUE + } + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst[[1]]$data)['syear'] + orig_dims <- names(dim(data$hcst[[1]]$data)) + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + # Define output dims and names: + nao_hcst_tr_aux <- list() + nao_hcst_ev_aux <- list() + + nao_obs_tr_res <- list() + nao_hcst_ev_res <- list() + # Cross-val loop starts + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + nao_hcst_tr_res <- list() + nao_hcst_ev_res <- list() + + # Observations + obs_tr <- Subset(data $obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + clim_obs_tr <- MeanDims(obs_tr, 'syear') + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # NAO for individual models + for (sys in 1:length(data$hcst)) { + hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # compute climatology: + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + # compute NAO: + nao <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_hcst_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + + nao_obs_ev <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, + exp_cor = ano_obs_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores)$exp_cor + #Standarisation: + # Need the nao_hcst (for the train.dexes) to standarize the eval.dexes? + nao_hcst_ev <- Apply(list(nao$exp, nao$exp_cor), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_ev <- Apply(list(nao$obs, nao_obs_ev), + target_dims = c('syear','ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_tr <- Apply(list(nao$obs), target_dims = 'syear', + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, 1, function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores, + output_dims = 'syear')$output1 + nao_hcst_tr <- Apply(list(nao$exp), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + # store results: + nao_hcst_tr_res <- append(nao_hcst_tr_res, list(nao_hcst_tr)) + names(nao_hcst_tr_res)[length(nao_hcst_tr_res)] <- names(data$hcst)[[sys]] + nao_hcst_ev_res <- append(nao_hcst_ev_res, list(nao_hcst_ev)) + names(nao_hcst_ev_res)[length(nao_hcst_ev_res)] <- names(data$hcst)[[sys]] + } + if (t == 1) { + nao_hcst_ev_aux <- nao_hcst_ev_res + } else { + for (sys in 1:length(data$hcst)) { + nao_hcst_ev_aux[[sys]] <- abind(nao_hcst_ev_aux[[sys]], + nao_hcst_ev_res[[sys]], + along = length(dim(nao_hcst_ev_res[[sys]])) + 1) + nao_hcst_tr_aux[[sys]] <- abind(nao_hcst_tr_aux[[sys]], + nao_hcst_tr_res[[sys]], + along = length(dim(nao_hcst_tr_res[[sys]])) + 1) + } + } + } + nao_hcst_ev_aux <- lapply(1:length(nao_hcst_ev_aux), function(x) { + names(dim(nao_hcst_ev_aux[[x]])) <- c(names(dim(nao_hcst_ev_res[[x]])), 'sample') + return(nao_hcst_ev_aux[[x]])}) + nao_hcst_tr_aux <- lapply(1:length(nao_hcst_tr_aux), function(x) { + names(dim(nao_hcst_tr_aux[[x]])) <- c(names(dim(nao_hcst_tr_res[[x]])), 'sample') + return(nao_hcst_tr_aux[[x]])}) + + # Observed NAO should be the same for all models. + nao_obs_ev_res <- append(nao_obs_ev_res, list(nao_obs_ev)) + names(nao_obs_ev_res)[length(nao_obs_ev_res)] <- names(data$hcst)[[1]] + + diff --git a/modules/Crossval/Crossval_multimodel_anomalies.R b/modules/Crossval/Crossval_multimodel_anomalies.R new file mode 100644 index 00000000..89afff40 --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_anomalies.R @@ -0,0 +1,369 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_multimodel_anomalies <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst[[1]]$data)['syear'] + orig_dims <- names(dim(data$hcst[[1]]$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst[[1]]$data)['latitude'] + nlons <- dim(data$hcst[[1]]$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst[[1]]$data))) { + agg = 'region' + nregions <- dim(data$hcst[[1]]$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + ## output objects + ano_obs_ev_res <- NULL + ano_hcst_ev_res <- lapply(data$hcst, function(x) {NULL}) + ano_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_ano_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + hcst_res <- list() + ano_hcst_tr <- ano_hcst_ev <- ano_fcst <- list() + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # Observations + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + clim_obs_tr <- MeanDims(obs_tr, 'syear') + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # Store cross validation loops: + ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, + along = length(dim(ano_obs_ev)) + 1) + ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, + along = length(dim(ano_obs_tr)) + 1) + + # Anomalies of individual models + for (sys in 1:length(data$hcst)) { + hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + # compute climatology: + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_hcst_tr <- append(ano_hcst_tr, + list(s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores))) + ano_hcst_ev <- append(ano_hcst_ev, + list(s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores))) + ano_hcst_ev_res[[sys]] <- abind(ano_hcst_ev_res[[sys]], + ano_hcst_ev[[sys]], + along = length(dim(ano_hcst_ev[[sys]])) + 1) + } + # compute category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, + target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), + ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_ano_obs_tr <- Apply(ano_obs_tr, + target_dims = c('syear'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + + #store results + for(ps in 1:length(categories)) { + lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], + along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) + lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], + along = length(dim(lims_ano_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + ano_hcst_ev_res <- lapply(ano_hcst_ev_res, function(x) { + names(dim(x)) <- ev_dim_names + return(x)}) + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + # Forecast anomalies: + ano_fcst <- lims_fcst <- NULL + if (!is.null(data$fcst)) { + ano_fcst <- Apply(ano_hcst_ev_res, + target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + clim <- mean(as.vector(res), na.rm = na.rm) + res <- res - clim}, + ncores = ncores, + output_dims = c('syear', 'ensemble'))$output1 + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + # create a list of unknown length of systems and limits: + target_dims_list <- append(list(lims = 'cat'), + lapply(ano_hcst_ev_res, function(x) { + c('syear', 'ensemble')})) + hcst_probs_ev[[ps]] <- Apply(append(list(lims = lims_ano_hcst_tr[[ps]]), + ano_hcst_ev_res), + target_dims = target_dims_list, + function(lims, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims)}, + ncores = ncores)$output1 + obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_obs_tr_res[[ps]], + ncores = ncores) +fcst_probs <- NULL + if (!is.null(data$fcst)) { +# fcst_probs[[ps]] <- Apply(append(list(lims = lims_fcst[[ps]]), +# ano_fcst), +# target_dims = list('cat', +# c('syear', 'ensemble')), +# function(lims, fcst) { + fcst_probs[[ps]] <- GetProbs(ano_fcst, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], +#}, + ncores = ncores) + } + } + return(list(hcst = ano_hcst_ev_res, obs = ano_obs_ev_res, fcst = ano_fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + #hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res, + fcst = lims_fcst), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev, fcst_probs), + ref_obs_tr = ano_obs_tr_res)) +} +##### TO SAVE edit the lines below: + # Convert to s2dv_cubes the resulting anomalies +# ano_hcst <- data$hcst +# ano_hcst$data <- ano_hcst_ev_res +# ano_obs <- data$obs +# ano_obs$data <- ano_obs_ev_res + +# info(recipe$Run$logger, +# "#### Anomalies and Probabilities Done #####") +# if (recipe$Analysis$Workflow$Anomalies$save != 'none') { +# info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") +# recipe$Run$output_dir <- paste0(recipe$Run$output_dir, +# "/outputs/Anomalies/") + # Save forecast +# if ((recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { +# save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') +# } + # Save hindcast +# if (recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only')) { +# save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') +# } +# # Save observation +# if (recipe$Analysis$Workflow$Anomalies$save == 'all') { +# save_observations(recipe = recipe, data_cube = ano_obs) +# } +# } + # Save probability bins +# probs_hcst <- list() +# probs_fcst <- list() +# probs_obs <- list() +# all_names <- NULL + # Make categories rounded number to use as names: +# categories <- recipe$Analysis$Workflow$Probabilities$percentiles +# categories <- lapply(categories, function (x) { +# sapply(x, function(y) { +# round(eval(parse(text = y)),2)})}) + + # for (ps in 1:length(categories)) { + # for (perc in 1:(length(categories[[ps]]) + 1)) { + # if (perc == 1) { + # name_elem <- paste0("below_", categories[[ps]][perc]) + # } else if (perc == length(categories[[ps]]) + 1) { + # name_elem <- paste0("above_", categories[[ps]][perc-1]) + # } else { + # name_elem <- paste0("from_", categories[[ps]][perc-1], + # "_to_", categories[[ps]][perc]) + # } + # probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_hcst) + # probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_obs) + # if (!is.null(data$fcst)) { + # probs_fcst <- append(list(Subset(fcst_probs[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_fcst) + # } + # all_names <- c(all_names, name_elem) + # } + # } + # names(probs_hcst) <- all_names + # if (!('var' %in% names(dim(probs_hcst[[1]])))) { + # probs_hcst <- lapply(probs_hcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # names(probs_obs) <- all_names + # if (!('var' %in% names(dim(probs_obs[[1]])))) { + # probs_obs <- lapply(probs_obs, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + + # if (!is.null(data$fcst)) { + # names(probs_fcst) <- all_names + # if (!('var' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(syear = 1, dim(x)) + # return(x)}) + # } + # } + # if (recipe$Analysis$Workflow$Probabilities$save %in% + # c('all', 'bins_only')) { + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$hcst, agg = agg, + # type = "obs") + # TODO Forecast + # if (!is.null(probs_fcst)) { + # save_probabilities(recipe = recipe, probs = probs_fcst, + # data_cube = data$fcst, agg = agg, + # type = "fcst") + # } + # } + # Save ensemble mean for multimodel option: + # hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(hcst_EM = + # Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$hcst, agg = agg, + # module = "statistics") + #fcst_EM <- NULL + #if (!is.null(data$fcst)) { + # fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(fcst_EM = + # Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$fcst, agg = agg, + # module = "statistics") + #} + #return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + # hcst.full_val = data$hcst, obs.full_val = data$obs, + # hcst_EM = hcst_EM, fcst_EM = fcst_EM, + # cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + # obs_tr = lims_ano_obs_tr_res), + # probs = list(hcst_ev = hcst_probs_ev, + # obs_ev = obs_probs_ev), + # ref_obs_tr = ano_obs_tr_res)) +#} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/modules/Crossval/Crossval_multimodel_metrics.R b/modules/Crossval/Crossval_multimodel_metrics.R new file mode 100644 index 00000000..7f6a7292 --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_metrics.R @@ -0,0 +1,273 @@ +# One function to compute all the possible metrics or statistics +# datos is a list returned by load_multimodel or load_multimodel_mean + ## this reduce memory and it can be used to compute several metrics + ## in the mean the ensemble dim is length 1 for each model +# probs is a list returned by load_multimodel_probs + ## this is needed to compute rps/rpss +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Visualization/Visualization.R") # to load .warning from utils +Crossval_multimodel_metrics <- function(recipe, + data = NULL, + Fair = FALSE) { + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + # Prepare indices for crossval + sdate_dim <- dim(data$hcst[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + alpha <- recipe$Analysis$alpha + if (is.null(alpha)) { + alpha <- 0.05 + } + + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + skill_metrics <- list() + if (!is.null(data)) { + # conver $data elements to list to use multiApply: + datos <- append(list(obs = data$obs), data$hcst) + ## CRPS metrics only make sense in pool method: + if (any(c('crps', 'crpss') %in% requested_metrics)) { + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + mean(s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', + memb_dim = 'ensemble'))}, + ncores = ncores)$output1 + skill_metrics$crps <- crps + # Build the reference forecast: + ref_clim <- Apply(list(datos$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', + indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + mean(s2dv:::.CRPS(exp = ref, obs = obs, + dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble'))}, + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = FALSE, + time_dim = 'syear', clim.cross.val = FALSE)}, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + datos <- datos[-1] + } + ## Deterministic metrics using ensemble mean: + if ('enscorr' %in% requested_metrics) { + enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Corr(exp = res, + obs = obs, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha)}, ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('mean_bias' %in% requested_metrics) { + ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) + mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Bias(exp = res, + obs = obs, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha)}, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } + if ('rms' %in% requested_metrics) { + rms <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMS(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha)}, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + ####### Do we need reference fcst to compute rmss?? + if ('rmss' %in% requested_metrics) { + # if (is.null(res$ref_obs_tr)) { + # } + rmss <- Apply(datos, target_dims = c('syear','ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMSSS(exp = res, + obs = obs, + #ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE)}, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + # This are used to compute correlation for the scorecards + # It's the temporal standard deviation of the ensmean + std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + sd(as.vector(res))}, + ncores = ncores)$output1 + + std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = 'sd')$output1 + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + fun = function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + variance <- var(as.vector(res))}, + ncores = ncores)$output1 + + var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = function(x) {var(as.vector(x))}, + ncores = ncores)$output1 + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } + if (any(c('cov', 'covariance') %in% requested_metrics)) { + # The covariance of the ensemble mean + covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + cov(as.vector(obs), as.vector(res), + use = "everything", + method = "pearson")}, + ncores = ncores)$output1 + skill_metrics$covariance <- covariance + } + } + # Probabilistic metrics for categories + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + # TODO: distinguish between rpss and bss + # if 1 percentile -> bss + # if more than 1 -> rpss + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) + if (!is.null(datos)) { + if (Fair) { + nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) + } else { + nmemb <- NULL + } + } else { + nmemb <- NULL + } + + # Compute rps + for (ps in 1:length(exe_rps)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = Fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(data$probs$obs[[ps]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = Fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = categories[[ps]], # un use param when providing probs + indices_for_clim = NULL, + Fair = Fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + } + } + skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) + skill_metrics <- lapply(skill_metrics, function(x){ + InsertDim(x, pos = 1, len = 1, name = 'var')}) + return(skill_metrics) +} diff --git a/modules/Crossval/R/CRPS_clim.R b/modules/Crossval/R/CRPS_clim.R new file mode 100644 index 00000000..0e6bef65 --- /dev/null +++ b/modules/Crossval/R/CRPS_clim.R @@ -0,0 +1,35 @@ +# CRPS version for climatology +CRPS_clim <- function(obs, memb_dim ='ensemble', return_mean = TRUE, clim.cross.val= TRUE){ + time_dim <- names(dim(obs)) + obs_time_len <- dim(obs)[time_dim] + + if (isFALSE(clim.cross.val)) { + # Without cross-validation + ref <- array(data = rep(obs, each = obs_time_len), + dim = c(obs_time_len, obs_time_len)) + } else if (isTRUE(clim.cross.val)) { + # With cross-validation (excluding the value of that year to create ref for that year) + ref <- array(data = NA, + dim = c(obs_time_len, obs_time_len - 1)) + for (i in 1:obs_time_len) { + ref[i, ] <- obs[-i] + } + } + + names(dim(ref)) <- c(time_dim, memb_dim) + # ref: [sdate, memb] + # obs: [sdate] + crps_ref <- s2dv:::.CRPS(exp = ref, obs = obs, + time_dim = time_dim, + memb_dim = memb_dim, + dat_dim = NULL, + Fair = FALSE) + + # crps_ref should be [sdate] + if (return_mean == TRUE) { + return(mean(crps_ref)) + } else { + return(crps_ref) + } +} + diff --git a/modules/Crossval/R/RPS_clim.R b/modules/Crossval/R/RPS_clim.R new file mode 100644 index 00000000..6deab3ec --- /dev/null +++ b/modules/Crossval/R/RPS_clim.R @@ -0,0 +1,39 @@ +# RPS version for climatology +RPS_clim <- function(obs, indices_for_clim = NULL, + prob_thresholds = c(1/3, 2/3), cross.val = TRUE, + Fair = FALSE, bin_dim_abs = NULL, return_mean = TRUE) { + if (is.null(indices_for_clim)){ + indices_for_clim <- 1:length(obs) + } + if (is.null(bin_dim_abs)) { + obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) + } else { + obs_probs <- obs + } + # clim_probs: [bin, sdate] + clim_probs <- c(prob_thresholds[1], + diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) + if (Fair) { # FairRPS + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + ## See explanation in https://freva.met.fu-berlin.de/about/problems/ + R <- dim(obs)[2] #years + adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) + adjustment <- colSums(adjustment) + rps_ref <- rps_ref + adjustment + } + + if (return_mean == TRUE) { + return(mean(rps_ref)) + } else { + return(rps_ref) + } +} diff --git a/modules/Crossval/R/tmp/Bias.R b/modules/Crossval/R/tmp/Bias.R new file mode 100644 index 00000000..b9292cae --- /dev/null +++ b/modules/Crossval/R/tmp/Bias.R @@ -0,0 +1,216 @@ +#'Compute the Mean Bias +#' +#'The Mean Bias or Mean Error (Wilks, 2011) is defined as the mean difference +#'between the ensemble mean forecast and the observations. It is a deterministic +#'metric. Positive values indicate that the forecasts are on average too high +#'and negative values indicate that the forecasts are on average too low. +#'It also allows to compute the Absolute Mean Bias or bias without temporal +#'mean. If there is more than one dataset, the result will be computed for each +#'pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter +#' 'exp' is already the ensemble mean. The default value is NULL. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is FALSE. +#'@param absolute A logical value indicating whether to compute the absolute +#' bias. The default value is FALSE. +#'@param time_mean A logical value indicating whether to compute the temporal +#' mean of the bias. The default value is TRUE. +#'@param alpha A numeric or NULL (default) to indicate the significance level using Weltch test. Only available when absolute is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number +#'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If alpha is specified, and absolute is FALSE, the result is a list with two elements, the bias as describe above and the significance as logical array with the same dimensions. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) +#'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, alpha = NULL, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop("Parameter 'exp' must be a numeric array.") + if (!is.array(obs) | !is.numeric(obs)) + stop("Parameter 'obs' must be a numeric array.") + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop("Parameter 'time_dim' must be a character string.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## absolute + if (!is.logical(absolute) | length(absolute) > 1) { + stop("Parameter 'absolute' must be one logical value.") + } + ## time_mean + if (!is.logical(time_mean) | length(time_mean) > 1) { + stop("Parameter 'time_mean' must be one logical value.") + } + ## alpha + if (!is.null(alpha)) { + if (!is.numeric(alpha) | length(alpha) > 1) { + stop("Parameter 'alpha' must be null or a numeric value.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = na.rm) + } + + ## (Mean) Bias + bias <- Apply(data = list(exp, obs), + target_dims = c(time_dim, dat_dim), + fun = .Bias, + time_dim = time_dim, + dat_dim = dat_dim, + na.rm = na.rm, + absolute = absolute, + time_mean = time_mean, + alpha = alpha, + ncores = ncores) + + if (is.null(alpha)) { + bias <- bias$output1 + } + return(bias) +} + + +.Bias <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, alpha = NULL) { + # exp and obs: [sdate, (dat)] + if (is.null(dat_dim)) { + bias <- exp - obs + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- mean(bias, na.rm = na.rm) + } + + if (!is.null(alpha)) { + if (!absolute) { + pval <- t.test(x = obs, y = exp, alternative = "two.sided")$p.value + sig <- pval <= alpha + } + } + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + pval <- array(dim = c(nexp = nexp, nobs = nobs)) + sig <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + bias[, i, j] <- exp[, i] - obs[, j] + if (!is.null(alpha)) { + if (!absolute) { + pval[i,j] <- t.test(x = obs[,j], y = exp[,i], + alternative = "two.sided")$p.value + sig[i,j] <- pval <= alpha + } + } + } + } + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- MeanDims(bias, time_dim, na.rm = na.rm) + } + } + if (!is.null(alpha) && !absolute) { + return(list(bias = bias, sig = sig)) + } else { + return(bias) + } +} diff --git a/modules/Crossval/R/tmp/Corr.R b/modules/Crossval/R/tmp/Corr.R new file mode 100644 index 00000000..744ff109 --- /dev/null +++ b/modules/Crossval/R/tmp/Corr.R @@ -0,0 +1,484 @@ +#'Compute the correlation coefficient between an array of forecast and their corresponding observation +#' +#'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for +#'an array of forecast and an array of observation. The correlations are +#'computed along 'time_dim' that usually refers to the start date dimension. If +#''comp_dim' is given, the correlations are computed only if obs along comp_dim +#'dimension are complete between limits[1] and limits[2], i.e., there is no NA +#'between limits[1] and limits[2]. This option can be activated if the user +#'wants to account only for the forecasts which the corresponding observations +#'are available at all leadtimes.\cr +#'The confidence interval is computed by the Fisher transformation and the +#'significance level relies on an one-sided student-T distribution.\cr +#'The function can calculate ensemble mean before correlation by 'memb_dim' +#'specified and 'memb = F'. If ensemble mean is not calculated, correlation will +#'be calculated for each member. +#'If there is only one dataset for exp and obs, you can simply use cor() to +#'compute the correlation. +#' +#'@param exp A named numeric array of experimental data, with at least dimension +#' 'time_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param comp_dim A character string indicating the name of dimension along which +#' obs is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default is c(1, length(comp_dim dimension)). +#'@param method A character string indicating the type of correlation: +#' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no +#' member dimension, set NULL. The default value is NULL. +#'@param memb A logical value indicating whether to remain 'memb_dim' dimension +#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when +#' 'memb_dim' is not NULL. The default value is TRUE. +#'@param pval A logical value indicating whether to return or not the p-value +#' of the test Ho: Corr = 0. The default value is TRUE. +#'@param conf A logical value indicating whether to return or not the confidence +#' intervals. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: Corr = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except +#' time_dim and memb_dim).\cr +#'nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the +#'number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and +#'nobs are omitted. exp_memb is the number of member in experiment (i.e., +#''memb_dim' in exp) and obs_memb is the number of member in observation (i.e., +#''memb_dim' in obs). If memb = F, exp_memb and obs_memb are omitted.\cr\cr +#'\item{$corr}{ +#' The correlation coefficient. +#'} +#'\item{$p.val}{ +#' The p-value. Only present if \code{pval = TRUE}. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$sign}{ +#' The statistical significance. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#'# Case 1: Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'runmean_months <- 12 +#' +#'# Smooth along lead-times +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) +#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times +#'leadtimes_per_startdate <- 60 +#'corr <- Corr(MeanDims(smooth_ano_exp, 'member'), +#' MeanDims(smooth_ano_obs, 'member'), +#' comp_dim = 'ftime', dat_dim = 'dataset', +#' limits = c(ceiling((runmean_months + 1) / 2), +#' leadtimes_per_startdate - floor(runmean_months / 2))) +#' +#'# Case 2: Keep member dimension +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', dat_dim = 'dataset') +#'# ensemble mean +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE, +#' dat_dim = 'dataset') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom stats cor pt qnorm +#'@export +Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, + comp_dim = NULL, limits = NULL, method = 'pearson', + memb_dim = NULL, memb = TRUE, + pval = TRUE, conf = TRUE, sign = FALSE, + alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) + } + } + ## method + if (!(method %in% c("kendall", "spearman", "pearson"))) { + stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'dat_dim' and 'memb_dim'.")) + } + if (dim(exp)[time_dim] < 3) { + stop("The length of time_dim must be at least 3 to compute correlation.") + } + + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- Reorder(obs, order_obs) + + + ############################### + # Calculate Corr + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + pos <- which(names(dim(obs)) == comp_dim) + if (is.null(limits)) { + obs_sub <- obs + } else { + obs_sub <- ClimProjDiags::Subset(obs, pos, list(limits[1]:limits[2])) + } + outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA + rm(obs_sub, outrows) + } + if (!is.null(memb_dim)) { + if (!memb) { #ensemble mean + exp <- MeanDims(exp, memb_dim, na.rm = TRUE) + obs <- MeanDims(obs, memb_dim, na.rm = TRUE) +# name_exp <- names(dim(exp)) +# margin_dims_ind <- c(1:length(name_exp))[-which(name_exp == memb_dim)] +# exp <- apply(exp, margin_dims_ind, mean, na.rm = TRUE) #NOTE: remove NAs here +# obs <- apply(obs, margin_dims_ind, mean, na.rm = TRUE) + memb_dim <- NULL + } + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim, memb_dim), + c(time_dim, dat_dim, memb_dim)), + fun = .Corr, + dat_dim = dat_dim, memb_dim = memb_dim, + time_dim = time_dim, method = method, + pval = pval, conf = conf, sign = sign, alpha = alpha, + ncores = ncores) + + return(res) +} + +.Corr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', method = 'pearson', + conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) { + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + if (is.null(memb_dim)) { + CORR <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + if (any(!is.na(exp)) && sum(!is.na(obs)) > 2) { + CORR[, ] <- cor(exp, obs, use = "pairwise.complete.obs", method = method) + } + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + for (j in 1:nobs) { + for (y in 1:nexp) { + if (any(!is.na(exp[, y])) && sum(!is.na(obs[, j])) > 2) { + CORR[y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + } + } +#---------------------------------------- +# Same as above calculation. +#TODO: Compare which is faster. +# CORR <- sapply(1:nobs, function(i) { +# sapply(1:nexp, function (x) { +# if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { +# cor(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method) +# } else { +# NA +# } +# }) +# }) +#----------------------------------------- + } + + } else { # memb_dim != NULL + exp_memb <- as.numeric(dim(exp)[memb_dim]) # memb_dim + obs_memb <- as.numeric(dim(obs)[memb_dim]) + + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + + if (is.null(dat_dim)) { + # exp: [sdate, memb_exp] + # obs: [sdate, memb_obs] + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + + if (any(!is.na(exp[,y])) && sum(!is.na(obs[, j])) > 2) { + CORR[, , y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + + } + } + } else { + # exp: [sdate, dat_exp, memb_exp] + # obs: [sdate, dat_obs, memb_obs] + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + CORR[, , y, j] <- sapply(1:nobs, function(i) { + sapply(1:nexp, function (x) { + if (any(!is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) { + cor(exp[, x, y], obs[, i, j], + use = "pairwise.complete.obs", + method = method) + } else { + NA + } + }) + }) + + } + } + } + + } + + +# if (pval) { +# for (i in 1:nobs) { +# p.val[, i] <- try(sapply(1:nexp, +# function(x) {(cor.test(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method)$p.value)/2}), silent = TRUE) +# if (class(p.val[, i]) == 'character') { +# p.val[, i] <- NA +# } +# } +# } + + if (pval || conf || sign) { + if (method == "kendall" | method == "spearman") { + if (!is.null(dat_dim) | !is.null(memb_dim)) { + tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) + names(dim(tmp))[1] <- time_dim + eno <- Eno(tmp, time_dim) + } else { + tmp <- rank(obs) + tmp <- array(tmp) + names(dim(tmp)) <- time_dim + eno <- Eno(tmp, time_dim) + } + } else if (method == "pearson") { + eno <- Eno(obs, time_dim) + } + + if (is.null(memb_dim)) { + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + eno_expand[i, ] <- eno + } + } else { #member + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + for (i in 1:nexp) { + for (j in 1:exp_memb) { + eno_expand[i, , j, ] <- eno + } + } + } + + } + +#############old################# +#This doesn't return error but it's diff from cor.test() when method is spearman and kendall + if (pval || sign) { + t <- sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) + p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) + if (sign) signif <- !is.na(p.val) & p.val <= alpha + } +################################### + if (conf) { + conf.lower <- alpha / 2 + conf.upper <- 1 - conf.lower + suppressWarnings({ + conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) + confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) + }) + } + +################################### + # Remove nexp and nobs if dat_dim = NULL + if (is.null(dat_dim)) { +# if (is.null(dat_dim) & !is.null(memb_dim)) { + + if (length(dim(CORR)) == 2) { + dim(CORR) <- NULL + if (pval) { + dim(p.val) <- NULL + } + if (conf) { + dim(conflow) <- NULL + dim(confhigh) <- NULL + } + if (sign) { + dim(signif) <- NULL + } + } else { + dim(CORR) <- dim(CORR)[3:length(dim(CORR))] + if (pval) { + dim(p.val) <- dim(p.val)[3:length(dim(p.val))] + } + if (conf) { + dim(conflow) <- dim(conflow)[3:length(dim(conflow))] + dim(confhigh) <- dim(confhigh)[3:length(dim(confhigh))] + } + if (sign) { + dim(signif) <- dim(signif)[3:length(dim(signif))] + } + } + } + +################################### + + res <- list(corr = CORR) + if (pval) { + res <- c(res, list(p.val = p.val)) + } + if (conf) { + res <- c(res, list(conf.lower = conflow, conf.upper = confhigh)) + } + if (sign) { + res <- c(res, list(sign = signif)) + } + + return(res) + +} diff --git a/modules/Crossval/R/tmp/EOF.R b/modules/Crossval/R/tmp/EOF.R new file mode 100644 index 00000000..87795b66 --- /dev/null +++ b/modules/Crossval/R/tmp/EOF.R @@ -0,0 +1,293 @@ +#'Area-weighted empirical orthogonal function analysis using SVD +#' +#'Perform an area-weighted EOF analysis using single value decomposition (SVD) +#'based on a covariance matrix or a correlation matrix if parameter 'corr' is +#'set to TRUE. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs +#' could exist but it should be consistent along time_dim. That is, if one grid +#' point has NAs, all the time steps at this point should be NAs. +#'@param lat A vector of the latitudes of 'ano'. +#'@param lon A vector of the longitudes of 'ano'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param neofs A positive integer of the modes to be kept. The default value is +#' 15. If time length or the product of the length of space_dim is smaller than +#' neofs, neofs will be changed to the minimum of the three values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{EOFs}{ +#' An array of EOF patterns normalized to 1 (unitless) with dimensions +#' (number of modes, rest of the dimensions of 'ano' except 'time_dim'). +#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed +#' field. +#'} +#'\item{PCs}{ +#' An array of principal components with the units of the original field to +#' the power of 2, with dimensions (time_dim, number of modes, rest of the +#' dimensions of 'ano' except 'space_dim'). +#' 'PCs' contains already the percentage of explained variance so, +#' to reconstruct the original field it's only needed to multiply 'EOFs' +#' by 'PCs'. +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode (number of modes). The dimensions are (number of +#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). +#'} +#'\item{mask}{ +#' An array of the mask with dimensions (space_dim, rest of the dimensions of +#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that +#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to +#' replace NAs with 0s for EOF calculation and mask the result with NAs again +#' after the calculation. +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by cosine of 'lat' and used to compute the fraction of variance explained by +#' each EOFs. +#'} +#'\item{tot_var}{ +#' A number or a numeric array of the total variance explained by all the modes. +#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. +#'} +#' +#'@seealso ProjectField, NAO, PlotBoxWhisker +#'@examples +#'# This example computes the EOFs along forecast horizons and plots the one +#'# that explains the greatest amount of variability. The example data has low +#'# resolution so the result may not be explanatory, but it displays how to +#'# use this function. +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'tmp <- MeanDims(ano$exp, c('dataset', 'member')) +#'ano <- tmp[1, , ,] +#'names(dim(ano)) <- names(dim(tmp))[-2] +#'eof <- EOF(ano, sampleData$lat, sampleData$lon) +#'\dontrun{ +#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) +#'} +#' +#'@import multiApply +#'@importFrom stats sd +#'@export +EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), + neofs = 15, corr = FALSE, ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + ## lon + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") + } + if (any(lon > 360 | lon < -360)) { + .warning("Some 'lon' is out of the range [-360, 360].") + } + ## neofs + if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) { + stop("Parameter 'neofs' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate EOF + +# # Replace mask of NAs with 0s for EOF analysis. +# ano[!is.finite(ano)] <- 0 + + # Area weighting. Weights for EOF; needed to compute the + # fraction of variance explained by each EOFs + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi / 180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anomaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + # neofs is bounded + if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) { + neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs) + .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and neofs.")) + } + + res <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(EOFs = c('mode', space_dim), + PCs = c(time_dim, 'mode'), + var = 'mode', + tot_var = NULL, + mask = space_dim), + fun = .EOF, + corr = corr, neofs = neofs, + wght = wght, + ncores = ncores) + + return(c(res, wght = list(wght))) + +} + +.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) { + # ano: [time, lat, lon] + + # Dimensions + nt <- dim(ano)[1] + ny <- dim(ano)[2] + nx <- dim(ano)[3] + + # Check if all the time steps at one grid point are NA-consistent. + # The grid point should have all NAs or no NA along time dim. + if (anyNA(ano)) { + ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon] + na_ind <- which(is.na(ano_latlon), arr.ind = T) + if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) { + stop("Detect certain grid points have NAs but not consistent across time ", + "dimension. If the grid point is NA, it should have NA at all time step.") + } + } + + # Build the mask + mask <- ano[1, , ] + mask[!is.finite(mask)] <- NA + mask[is.finite(mask)] <- 1 + dim(mask) <- c(ny, nx) + + # Replace mask of NAs with 0s for EOF analysis. + ano[!is.finite(ano)] <- 0 + + ano <- ano * InsertDim(wght, 1, nt) + + # The use of the correlation matrix is done under the option corr. + if (corr) { + stdv <- apply(ano, c(2, 3), sd, na.rm = T) + ano <- ano / InsertDim(stdv, 1, nt) + } + + # Time/space matrix for SVD + dim(ano) <- c(nt, ny * nx) + dim.dat <- dim(ano) + + # 'transpose' means the array needs to be transposed before + # calling La.svd for computational efficiency because the + # spatial dimension is larger than the time dimension. This + # goes with transposing the outputs of LA.svd also. + if (dim.dat[2] > dim.dat[1]) { + transpose <- TRUE + } else { + transpose <- FALSE + } + if (transpose) { + pca <- La.svd(t(ano)) + } else { + pca <- La.svd(ano) + } + + # La.svd conventions: decomposition X = U D t(V) La.svd$u + # returns U La.svd$d returns diagonal values of D La.svd$v + # returns t(V) !! The usual convention is PC=U and EOF=V. + # If La.svd is called for ano (transpose=FALSE case): EOFs: + # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE + # case): EOFs: t($u) PCs: t($v) + + if (transpose) { + pca.EOFs <- t(pca$u) + pca.PCs <- t(pca$v) + } else { + pca.EOFs <- pca$v + pca.PCs <- pca$u + } + + # The numbers of transposition is limited to neofs + PC <- pca.PCs[, 1:neofs] + EOF <- pca.EOFs[1:neofs, ] + dim(EOF) <- c(neofs, ny, nx) + + # To sort out crash when neofs=1. + if (neofs == 1) { + PC <- InsertDim(PC, 2, 1, name = 'new') + } + + # Computation of the % of variance associated with each mode + W <- pca$d[1:neofs] + tot.var <- sum(pca$d^2) + var.eof <- 100 * pca$d[1:neofs]^2 / tot.var + + for (e in 1:neofs) { + # Set all masked grid points to NA in the EOFs + # Divide patterns by area weights so that EOF * PC gives unweigthed (original) data + EOF[e, , ] <- EOF[e, , ] * mask / wght + # PC is multiplied by the explained variance, + # so that the reconstruction is only EOF * PC + PC[, e] <- PC[, e] * W[e] + } + + if (neofs == 1) { + var.eof <- as.array(var.eof) + } + + return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask))) +} + diff --git a/modules/Crossval/R/tmp/Eno.R b/modules/Crossval/R/tmp/Eno.R new file mode 100644 index 00000000..cb927602 --- /dev/null +++ b/modules/Crossval/R/tmp/Eno.R @@ -0,0 +1,103 @@ +#'Compute effective sample size with classical method +#' +#'Compute the number of effective samples along one dimension of an array. This +#'effective number of independent observations can be used in +#'statistical/inference tests.\cr +#'The calculation is based on eno function from Caio Coelho from rclim.txt. +#' +#'@param data A numeric array with named dimensions. +#'@param time_dim A function indicating the dimension along which to compute +#' the effective sample size. The default value is 'sdate'. +#'@param na.action A function. It can be na.pass (missing values are allowed) +#' or na.fail (no missing values are allowed). See details in stats::acf(). +#' The default value is na.pass. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return An array with the same dimension as parameter 'data' except the +#' time_dim dimension, which is removed after the computation. The array +#' indicates the number of effective sample along time_dim. +#' +#'@examples +#'set.seed(1) +#'data <- array(rnorm(800), dim = c(dataset = 1, member = 2, sdate = 4, +#' ftime = 4, lat = 10, lon = 10)) +#'na <- floor(runif(40, min = 1, max = 800)) +#'data[na] <- NA +#'res <- Eno(data) +#' +#'@importFrom stats acf na.pass na.fail +#'@import multiApply +#'@export +Eno <- function(data, time_dim = 'sdate', na.action = na.pass, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- time_dim + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## na.action + if (as.character(substitute(na.action)) != "na.pass" & + as.character(substitute(na.action)) != "na.fail") { + stop("Parameter 'na.action' must be a function either na.pass or na.fail.") + } + if (as.character(substitute(na.action)) == "na.fail" && anyNA(data)) { + stop("Calculation fails because NA is found in paratemter 'data', ", + "which is not accepted when ", + "parameter 'na.action' = na.fail.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Eno + + eno <- Apply(data = list(data), + target_dims = time_dim, + output_dims = NULL, + fun = .Eno, + na.action = na.action, + ncores = ncores)$output1 + + return(eno) +} + +.Eno <- function(x, na.action) { + n <- length(sort(x)) + if (n > 1) { + a <- acf(x, lag.max = n - 1, plot = FALSE, + na.action = na.action)$acf[2:n, 1, 1] + s <- 0 + for (k in 1:(n - 1)) { + s <- s + (((n - k) / n) * a[k]) + } + eno <- min(n / (1 + (2 * s)), n) + } else { + eno <- NA + } + + return(eno) +} + diff --git a/modules/Crossval/R/tmp/GetProbs.R b/modules/Crossval/R/tmp/GetProbs.R new file mode 100644 index 00000000..fb2cda0c --- /dev/null +++ b/modules/Crossval/R/tmp/GetProbs.R @@ -0,0 +1,351 @@ +#'Compute probabilistic forecasts or the corresponding observations +#' +#'Compute probabilistic forecasts from an ensemble based on the relative +#'thresholds, or the probabilistic observations (i.e., which probabilistic +#'category was observed). A reference period can be specified to calculate the +#'absolute thresholds between each probabilistic category. The absolute +#'thresholds can be computed in cross-validation mode. If data is an ensemble, +#'the probabilities are calculated as the percentage of members that fall into +#'each category. For observations (or forecast without member dimension), 1 +#'means that the event happened, while 0 indicates that the event did not +#'happen. Weighted probabilities can be computed if the weights are provided for +#'each ensemble member and time step. The absolute thresholds can also be +#'provided directly for probabilities calculation. +#' +#'@param data A named numerical array of the forecasts or observations with, at +#' least, time dimension. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast, or NULL if there is no member +#' dimension (e.g., for observations, or for forecast with only one ensemble +#' member). The default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param abs_thresholds A numeric array or vector of the absolute thresholds in +#' the same units as \code{data}. If an array is provided, it should have at +#' least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different +#' thresholds for different locations, i.e. lon and lat dimensions), they +#' should match the dimensions of \code{data}, except the member dimension +#' which should not be included. The default value is NULL and, in this case, +#' 'prob_thresholds' is used for calculating the probabilities. +#'@param bin_dim_abs A character string of the dimension name of +#' 'abs_thresholds' array in which category limits are stored. It will also be +#' the probabilistic category dimension name in the output. The default value +#' is 'bin'. +#'@param indices_for_quantiles A vector of the indices to be taken along +#' 'time_dim' for computing the absolute thresholds between the probabilistic +#' categories. If NULL (default), the whole period is used. It is only used +#' when 'prob_thresholds' is provided. +#'@param weights A named numerical array of the weights for 'data' with +#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value +#' is NULL. The ensemble should have at least 70 members or span at least 10 +#' time steps and have more than 45 members if consistency between the weighted +#' and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation mode. The default value +#' is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of probabilities with dimensions c(bin_dim_abs, the rest +#'dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of +#'probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. +#' +#'@examples +#'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) +#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' indices_for_quantiles = 4:17) +#' +#'# abs_thresholds is provided +#'abs_thr1 <- c(-0.2, 0.3) +#'abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) +#'res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr1) +#'res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', + indices_for_quantiles = NULL, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + bin_dim_abs = 'bin', weights = NULL, cross.val = FALSE, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimensions.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimensions. If no member ", + "dimension exists, set it as NULL.") + } + } + ## bin_dim_abs + if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { + stop('Parameter "bin_dim_abs" must be a character string.') + } + ## prob_thresholds, abs_thresholds + if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { + .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", + "Only the first one is used.")) + abs_thresholds <- NULL + } else if (is.null(abs_thresholds) & is.null(prob_thresholds)) { + stop("One of the parameters 'prob_thresholds' and 'abs_thresholds' must be provided.") + } + if (!is.null(prob_thresholds)) { + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_quantiles + if (is.null(indices_for_quantiles)) { + indices_for_quantiles <- seq_len(dim(data)[time_dim]) + } else { + if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { + stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") + } else if (length(indices_for_quantiles) > dim(data)[time_dim] | + max(indices_for_quantiles) > dim(data)[time_dim] | + any(indices_for_quantiles < 1)) { + stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + } + } + + } else { # abs_thresholds + + if (is.null(dim(abs_thresholds))) { # a vector + dim(abs_thresholds) <- length(abs_thresholds) + names(dim(abs_thresholds)) <- bin_dim_abs + } + # bin_dim_abs + if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { + stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") + } + if (!is.null(memb_dim) && memb_dim %in% names(dim(abs_thresholds))) { + stop("Parameter abs_thresholds' cannot have member dimension.") + } + dim_name_abs <- names(dim(abs_thresholds))[which(names(dim(abs_thresholds)) != bin_dim_abs)] + if (!all(dim_name_abs %in% names(dim(data)))) { + stop("Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well.") + } else { + if (any(dim(abs_thresholds)[dim_name_abs] != dim(data)[dim_name_abs])) { + stop("Parameter 'abs_thresholds' dimensions must have the same length as 'data'.") + } + } + if (!is.null(indices_for_quantiles)) { + warning("Parameter 'indices_for_quantiles' is not used when 'abs_thresholds' are provided.") + } + abs_target_dims <- bin_dim_abs + if (time_dim %in% names(dim(abs_thresholds))) { + abs_target_dims <- c(bin_dim_abs, time_dim) + } + + } + + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + +# if (is.null(dat_dim)) { + if (!is.null(memb_dim)) { + lendim_weights <- 2 + namesdim_weights <- c(time_dim, memb_dim) + } else { + lendim_weights <- 1 + namesdim_weights <- c(time_dim) + } + if (length(dim(weights)) != lendim_weights | + !all(names(dim(weights)) %in% namesdim_weights)) { + stop("Parameter 'weights' must have dimension ", + paste0(namesdim_weights, collapse = ' and '), ".") + } + if (any(dim(weights)[namesdim_weights] != dim(data)[namesdim_weights])) { + stop("Parameter 'weights' must have the same dimension length as ", + paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.") + } + weights <- Reorder(weights, namesdim_weights) + +# } else { +# if (length(dim(weights)) != 3 | +# any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) +# stop("Parameter 'weights' must have three dimensions with the names of ", +# "'memb_dim', 'time_dim' and 'dat_dim'.") +# if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | +# dim(weights)[time_dim] != dim(exp)[time_dim] | +# dim(weights)[dat_dim] != dim(exp)[dat_dim]) { +# stop(paste0("Parameter 'weights' must have the same dimension lengths ", +# "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) +# } +# weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) +# } + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + ############################### + if (is.null(abs_thresholds)) { + res <- Apply(data = list(data = data), + target_dims = c(time_dim, memb_dim), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = prob_thresholds, + indices_for_quantiles = indices_for_quantiles, + weights = weights, cross.val = cross.val, ncores = ncores)$output1 + } else { + res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), + target_dims = list(c(time_dim, memb_dim), abs_target_dims), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = NULL, + indices_for_quantiles = NULL, + weights = NULL, cross.val = FALSE, ncores = ncores)$output1 + } + + return(res) +} + +.GetProbs <- function(data, indices_for_quantiles, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + weights = NULL, cross.val = FALSE) { + # .GetProbs() is used in RPS, RPSS, ROCSS + # data + ## if data is exp: [sdate, memb] + ## if data is obs: [sdate, (memb)] + # weights: [sdate, (memb)], same as data + # if abs_thresholds is not NULL: [bin, (sdate)] + + # Add dim [memb = 1] to data if it doesn't have memb_dim + if (length(dim(data)) == 1) { + dim(data) <- c(dim(data), 1) + if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) + } + + # Calculate absolute thresholds + if (is.null(abs_thresholds)) { + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i_time in seq_len(dim(data)[1])) { + if (is.null(weights)) { + tmp <- which(indices_for_quantiles != i_time) + quantiles[, i_time] <- + quantile(x = as.vector(data[indices_for_quantiles[tmp], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + tmp <- which(indices_for_quantiles != i_time) + sorted_arrays <- + .sorted_distributions(data[indices_for_quantiles[tmp], ], + weights[indices_for_quantiles[tmp], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[, i_time] <- approx(cumulative_weights, sorted_data, + prob_thresholds, "linear")$y + } + } + + } else { + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], + weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + quantiles <- array(rep(quantiles, dim(data)[1]), + dim = c(bin = length(quantiles), dim(data)[1])) + } + + } else { # abs_thresholds provided + quantiles <- abs_thresholds + if (length(dim(quantiles)) == 1) { + quantiles <- InsertDim(quantiles, lendim = dim(data)[1], + posdim = 2, name = names(dim(data))[1]) + } + } + # quantiles: [bin-1, sdate] + # Probabilities + probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] + for (i_time in seq_len(dim(data)[1])) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) + } else { + if (is.null(weights)) { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], + threshold = quantiles[, i_time])) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = dim(quantiles)) + for (i_quant in seq_len(dim(quantiles)[1])) { + # for thresholds falling under the distribution + if (quantiles[i_quant, i_time] < min(sorted_data)) { + integrated_probs[i_quant, i_time] <- 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant, i_time]) { + integrated_probs[i_quant, i_time] <- 1 + } else { + integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, + quantiles[i_quant, i_time], "linear")$y + } + } + probs[, i_time] <- append(integrated_probs[, i_time], 1) - + append(0, integrated_probs[, i_time]) + if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { + stop("Probability in i_time = ", i_time, " is out of [0, 1].") + } + } + } + } + + return(probs) +} + +.sorted_distributions <- function(data_vector, weights_vector) { + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / + cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list(data = data_vector[sorter], cumulative_weights = cumulative_weights)) +} + diff --git a/modules/Crossval/R/tmp/NAO.R b/modules/Crossval/R/tmp/NAO.R new file mode 100644 index 00000000..255e2a9f --- /dev/null +++ b/modules/Crossval/R/tmp/NAO.R @@ -0,0 +1,574 @@ +#'Compute the North Atlantic Oscillation (NAO) Index +#' +#'Compute the North Atlantic Oscillation (NAO) index based on the leading EOF +#'of the sea level pressure (SLP) anomalies over the north Atlantic region +#'(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and +#'observed anomalies onto the observed EOF pattern or the forecast +#'anomalies onto the EOF pattern of the other years of the forecast. +#'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month +#'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns +#'cross-validated PCs of the NAO index for hindcast (exp) and observations +#'(obs) based on the leading EOF pattern, or, if forecast (exp_cor) is provided, +#'the NAO index for forecast and the corresponding data (exp and obs). +#' +#'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' hindcast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of observational data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param obs A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of experimental data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param exp_cor A named numeric array of the Nort Atlantic SLP (20-80N, 80W-40E) +#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimension 'time_dim' of length 1 (as in the case of an operational +#' forecast), 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of reference period needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param lat A vector of the latitudes of 'exp' and 'obs'. +#'@param lon A vector of the longitudes of 'exp' and 'obs'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'exp' and 'obs'. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length +#' must be 1. The default value is 'member'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension of 'exp' and 'obs'. The default value is 'ftime'. +#'@param ftime_avg A numeric vector of the forecast time steps to average +#' across the target period. If average is not needed, set NULL. The default +#' value is 2:4, i.e., from 2nd to 4th forecast time steps. +#'@param obsproj A logical value indicating whether to compute the NAO index by +#' projecting the forecast anomalies onto the leading EOF of observational +#' reference (TRUE, default) or compute the NAO by first computing the leading +#' EOF of the forecast anomalies (in cross-validation mode, i.e. leave the +#' evaluated year out), then projecting forecast anomalies onto this EOF +#' (FALSE). If 'exp_cor' is provided, 'obs' will be used when obsproj is TRUE +#' and 'exp' will be used when obsproj is FALSE, and no cross-validation is +#' applied. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list which contains some of the following items depending on the data inputs: +#'\item{exp}{ +#' A numeric array of hindcast NAO index in verification format with the same +#' dimensions as 'exp' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#'\item{obs}{ +#' A numeric array of observation NAO index in verification format with the same +#' dimensions as 'obs' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#'} +#'\item{exp_cor}{ +#' A numeric array of forecast NAO index in verification format with the same +#' dimensions as 'exp_cor' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#' +#'@references +#'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of +#' multi-model seasonal forecasts of the wintertime North Atlantic +#' Oscillation. Climate Dynamics, 21, 501-514. +#' DOI: 10.1007/s00382-003-0350-4 +#' +#'@examples +#'# Make up synthetic data +#'set.seed(1) +#'exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'set.seed(2) +#'obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'lat <- seq(20, 80, length.out = 6) +#'lon <- seq(-80, 40, length.out = 9) +#'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) +#' +#'exp_cor <- array(rnorm(540), dim = c(member = 2, sdate = 1, ftime = 5, lat = 6, lon = 9)) +#'nao <- NAO(exp = exp, obs = obs, exp_cor = exp_cor, lat = lat, lon = lon, obsproj = TRUE) +#'# plot the NAO index +#' \dontrun{ +#'nao$exp <- Reorder(nao$exp, c(2, 1)) +#'nao$obs <- Reorder(nao$obs, c(2, 1)) +#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", +#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") +#' } +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, lon, time_dim = 'sdate', + memb_dim = 'member', space_dim = c('lat', 'lon'), + ftime_dim = 'ftime', ftime_avg = 2:4, + obsproj = TRUE, ncores = NULL) { + # Check inputs + ## exp, obs, and exp_cor (1) + if (is.null(obs) & is.null(exp)) { + stop("Parameter 'exp' and 'obs' cannot both be NULL.") + } + if (!is.null(exp)) { + if (!is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (is.null(dim(exp))) { + stop("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { + stop("Parameter 'exp' must have dimension names.") + } + } + if (!is.null(obs)) { + if (!is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (is.null(dim(obs))) { + stop("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'obs' must have dimension names.") + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(exp_cor)) { + stop("Parameter 'exp_cor' must be a numeric array.") + } + if (is.null(dim(exp_cor))) { + stop(paste0("Parameter 'exp_cor' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.")) + } + if (any(is.null(names(dim(exp_cor)))) | any(nchar(names(dim(exp_cor))) == 0)) { + stop("Parameter 'exp_cor' must have dimension names.") + } + if (is.null(exp) || is.null(obs)) { + stop("Parameters 'exp' and 'obs' are required when 'exp_cor' is not provided.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!time_dim %in% names(dim(exp))) { + stop("Parameter 'time_dim' is not found in 'exp' dimension.") + } + } + if (!is.null(obs)) { + if (!time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!time_dim %in% names(dim(exp_cor))) { + stop("Parameter 'time_dim' is not found in 'exp_cor' dimension.") + } + if (dim(exp_cor)[time_dim] > 1) { + stop("Parameter 'exp_cor' is expected to have length 1 in ", + time_dim, "dimension.") + } + } + + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + add_member_back <- FALSE + if (!is.null(obs)) { + if (memb_dim %in% names(dim(obs))) { + if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } else { + add_member_back <- TRUE + obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected') + } + } + } + if (!is.null(exp_cor)) { + if (!memb_dim %in% names(dim(exp_cor))) { + stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") + } + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!is.null(exp)) { + if (!all(space_dim %in% names(dim(exp)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!all(space_dim %in% names(dim(obs)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (any(!space_dim %in% names(dim(exp_cor)))) { + stop("Parameter 'space_dim' is not found in 'exp_cor' dimensions.") + } + } + ## ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!ftime_dim %in% names(dim(exp))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!ftime_dim %in% names(dim(obs))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!ftime_dim %in% names(dim(exp_cor))) { + stop("Parameter 'ftime_dim' is not found in 'exp_cor' dimensions.") + } + } + ## exp and obs (2) + #TODO: Add checks for exp_cor + if (!is.null(exp) & !is.null(obs)) { + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + throw_error <- FALSE + if (length(name_exp) != length(name_obs)) { + throw_error <- TRUE + } else if (any(name_exp != name_obs)) { + throw_error <- TRUE + } else if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + throw_error <- TRUE + } + if (throw_error) { + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'memb_dim'.") + } + } + ## ftime_avg + if (!is.null(ftime_avg)) { + if (!is.vector(ftime_avg) | !is.numeric(ftime_avg)) { + stop("Parameter 'ftime_avg' must be an integer vector.") + } + if (!is.null(exp)) { + if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(obs)) { + if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(exp_cor)) { + if (max(ftime_avg) > dim(exp_cor)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + } + ## sdate >= 2 + if (!is.null(exp)) { + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } else { + if (dim(obs)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } + ## lat and lon + if (!is.null(exp)) { + if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } + if (!is.null(obs)) { + if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(lat) | length(lat) != dim(exp_cor)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp_cor'.") + } + if (!is.numeric(lon) | length(lon) != dim(exp_cor)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp_cor'.") + } + } + stop_needed <- FALSE + if (max(lat) > 80 | min(lat) < 20) { + stop_needed <- TRUE + } + #NOTE: different from s2dverification + # lon is not used in the calculation actually. EOF only uses lat to do the + # weight. So we just need to ensure the data is in this region, regardless + # the order. + if (any(lon < 0)) { #[-180, 180] + if (!(min(lon) > -90 & min(lon) < -70 & max(lon) < 50 & max(lon) > 30)) { + stop_needed <- TRUE + } + } else { #[0, 360] + if (any(lon >= 50 & lon <= 270)) { + stop_needed <- TRUE + } else { + lon_E <- lon[which(lon < 50)] + lon_W <- lon[-which(lon < 50)] + if (max(lon_E) < 30 | min(lon_W) > 290) { + stop_needed <- TRUE + } + } + } + if (stop_needed) { + stop("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") + } + ## obsproj + if (!is.logical(obsproj) | length(obsproj) > 1) { + stop("Parameter 'obsproj' must be either TRUE or FALSE.") + } + if (obsproj) { + if (is.null(obs)) { + stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.") + } + if (is.null(exp) & is.null(exp_cor)) { + .warning("parameter 'obsproj' set to TRUE but no 'exp' nor 'exp_cor' provided.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores == 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # Average ftime + if (!is.null(ftime_avg)) { + if (!is.null(exp)) { + exp_sub <- ClimProjDiags::Subset(exp, ftime_dim, ftime_avg, drop = FALSE) + exp <- MeanDims(exp_sub, ftime_dim, na.rm = TRUE) + ## Cross-validated PCs. Fabian. This should be extended to + ## nmod and nlt by simple loops. Virginie + } + if (!is.null(obs)) { + obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE) + obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE) + } + if (!is.null(exp_cor)) { + exp_cor_sub <- ClimProjDiags::Subset(exp_cor, ftime_dim, ftime_avg, drop = FALSE) + exp_cor <- MeanDims(exp_cor_sub, ftime_dim, na.rm = TRUE) + } + } + + # wght + wght <- array(sqrt(cos(lat * pi / 180)), dim = c(length(lat), length(lon))) + if (is.null(exp_cor)) { + if (!is.null(exp) & !is.null(obs)) { + res <- Apply(list(exp, obs), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } else if (!is.null(exp)) { + res <- Apply(list(exp = exp), + target_dims = list(exp = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, obs = NULL, + obsproj = obsproj, add_member_back = FALSE, + ncores = ncores) + } else if (!is.null(obs)) { + if (add_member_back) { + output_dims <- list(obs = c(time_dim, memb_dim)) + } else { + output_dims <- list(obs = time_dim) + } + res <- Apply(list(obs = obs), + target_dims = list(obs = c(time_dim, space_dim)), + output_dims = output_dims, + fun = .NAO, + lat = lat, wght = wght, exp = NULL, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + } else { # exp_cor provided + res <- Apply(list(exp = exp, obs = obs, exp_cor = exp_cor), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim), + exp_cor = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + + return(res) +} + +.NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, wght, obsproj = TRUE, + add_member_back = FALSE) { + # exp: [memb_exp, sdate, lat, lon] + # obs: [sdate, lat, lon] + # exp_cor: [memb, sdate = 1, lat, lon] + # wght: [lat, lon] + + if (!is.null(exp)) { + ntime <- dim(exp)[2] + nlat <- dim(exp)[3] + nlon <- dim(exp)[4] + nmemb_exp <- dim(exp)[1] + } else { + ntime <- dim(obs)[1] + nlat <- dim(obs)[2] + nlon <- dim(obs)[3] + } + if (!is.null(exp_cor)) { + ntime_exp_cor <- dim(exp_cor)[2] # should be 1 + nmemb_exp_cor <- dim(exp_cor)[1] + } + + if (!is.null(obs)) nao_obs <- array(NA, dim = ntime) + if (!is.null(exp)) nao_exp <- array(NA, dim = c(ntime, nmemb_exp)) + if (!is.null(exp_cor)) { + nao_exp_cor <- array(NA, dim = c(ntime_exp_cor, nmemb_exp_cor)) + #NOTE: The dimensions are flipped to fill in data correctly. Need to flip it back later. + } + + if (is.null(exp_cor)) { + + for (tt in 1:ntime) { # cross-validation + + if (!is.null(obs)) { + ## Calculate observation EOF. Excluding one forecast start year. + obs_sub <- obs[(1:ntime)[-tt], , , drop = FALSE] + EOF_obs <- .EOF(obs_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode = 1, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies. + PF <- .ProjectField(obs, eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + ## Keep PCs of excluded forecast start year. Fabian. + nao_obs[tt] <- PF[tt] + } + + if (!is.null(exp)) { + if (!obsproj) { + exp_sub <- exp[, (1:ntime)[-tt], , , drop = FALSE] + # Combine 'memb' and 'sdate' to calculate EOF + dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) + EOF_exp <- .EOF(exp_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + + ## Correct polarity of pattern + ##NOTE: different from s2dverification, which doesn't use mean(). +# if (0 < EOF_exp[1, which.min(abs(lat - 65)), ]) { + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + + ### Lines below could be simplified further by computing + ### ProjectField() only on the year of interest... (though this is + ### not vital). Lauriane + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_exp[1, , ], wght = wght) # [sdate, memb] + nao_exp[tt, imemb] <- PF[tt] + } + } else { + ## Project forecast anomalies on obs EOF + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + nao_exp[tt, imemb] <- PF[tt] + } + } + } + + } # for loop sdate + + } else { # exp_cor provided + + ## Calculate observation EOF. Without cross-validation + EOF_obs <- .EOF(obs, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies + PF <- .ProjectField(obs, eof_mode = EOF_obs, wght = wght) # [mode = 1, sdate] + nao_obs[] <- PF[1, ] + + if (!obsproj) { + # Calculate EOF_exp + tmp <- array(exp, dim = c(nmemb_exp * ntime, nlat, nlon)) + EOF_exp <- .EOF(tmp, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + eof_mode_input <- EOF_exp[1, , ] + } else { + eof_mode_input <- EOF_obs[1, , ] + } + + # Calculate NAO_exp + for (imemb in 1:dim(exp)[1]) { + exp_sub <- ClimProjDiags::Subset(exp, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp[ , imemb] <- PF + } + + # Calculate NAO_exp_cor + for (imemb in 1:dim(exp_cor)[1]) { + exp_sub <- ClimProjDiags::Subset(exp_cor, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp_cor[, imemb] <- PF + } + + } + # add_member_back + if (add_member_back) { + memb_dim_name <- ifelse(!is.null(names(dim(exp))[1]), names(dim(exp))[1], 'member') + nao_obs <- InsertDim(nao_obs, 2, 1, name = memb_dim_name) + } + + # Return results + if (is.null(exp_cor)) { + res <- NULL + if (!is.null(exp)) { + res <- c(res, list(exp = nao_exp)) + } + if (!is.null(obs)) { + res <- c(res, list(obs = nao_obs)) + } + return(res) + + } else { + return(list(exp = nao_exp, obs = nao_obs, exp_cor = nao_exp_cor)) + } +} + diff --git a/modules/Crossval/R/tmp/ProjectField.R b/modules/Crossval/R/tmp/ProjectField.R new file mode 100644 index 00000000..efa35dc3 --- /dev/null +++ b/modules/Crossval/R/tmp/ProjectField.R @@ -0,0 +1,272 @@ +#'Project anomalies onto modes of variability +#' +#'Project anomalies onto modes of variability to get the temporal evolution of +#'the EOF mode selected. It returns principal components (PCs) by area-weighted +#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from +#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and +#'returns NA if the whole spatial pattern is NA. +#' +#'@param ano A numerical array of anomalies with named dimensions. The +#' dimensions must have at least 'time_dim' and 'space_dim'. It can be +#' generated by Ano(). +#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which +#' are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and +#' 'space_dim' at least. 'wght' has dimensions space_dim. It can be generated +#' by EOF() or REOF(). +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param mode An integer of the variability mode number in the EOF to be +#' projected on. The default value is NULL, which means all the modes of 'eof' +#' is calculated. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numerical array of the principal components in the verification +#' format. The dimensions are the same as 'ano' except 'space_dim'. +#' +#'@seealso EOF, NAO, PlotBoxWhisker +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) +#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) +#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) +#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) +#' +#'\dontrun{ +#' # Plot the forecast and the observation of the first mode for the last year +#' # of forecast +#' sdate_dim_length <- dim(mode1_obs)['sdate'] +#' plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), +#' lwd = 2) +#' for (i in 1:dim(mode1_exp)['member']) { +#' par(new = TRUE) +#' plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], +#' ylim = c(-15000, 15000)) +#' } +#'} +#' +#'@import multiApply +#'@export +ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), + mode = NULL, ncores = NULL) { + + # Check inputs + ## ano (1) + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## eof (1) + if (is.null(eof)) { + stop("Parameter 'eof' cannot be NULL.") + } + if (!is.list(eof)) { + stop("Parameter 'eof' must be a list generated by EOF() or REOF().") + } + if ('EOFs' %in% names(eof)) { + EOFs <- "EOFs" + } else if ('REOFs' %in% names(eof)) { + EOFs <- "REOFs" + } else if ('patterns' %in% names(eof)) { + EOFs <- "patterns" + } else { + stop("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().") + } + if (!'wght' %in% names(eof)) { + stop("Parameter 'eof' must be a list that contains 'wght'. ", + "It can be generated by EOF() or REOF().") + } + if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") + } + if (!is.numeric(eof$wght) || !is.array(eof$wght)) { + stop("The component 'wght' of parameter 'eof' must be a numeric array.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## ano (2) + if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { + stop("Parameter 'ano' must be an array with dimensions named as ", + "parameter 'space_dim' and 'time_dim'.") + } + ## eof (2) + if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | + !'mode' %in% names(dim(eof[[EOFs]]))) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.") + } + if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { + stop("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.") + } + ## mode + if (!is.null(mode)) { + if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) { + stop("Parameter 'mode' must be NULL or a positive integer.") + } + if (mode > dim(eof[[EOFs]])['mode']) { + stop("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + +#------------------------------------------------------- + + # Keep the chosen mode + if (!is.null(mode)) { + eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected') + } else { + eof_mode <- eof[[EOFs]] + } + + if ('mode' %in% names(dim(eof_mode))) { + dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')] + } else { + dimnames_without_mode <- names(dim(eof_mode)) + } + + if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon] + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + + } else { + + if (!all(dimnames_without_mode %in% names(dim(ano)))) { + stop("The array 'EOF' in parameter 'eof' has dimension not in parameter ", + "'ano'. Check if 'ano' and 'eof' are compatible.") + } + + common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] + if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != + dim(eof_mode)[dimnames_without_mode])) { + stop("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", + "with different length. Check if 'ano' and 'eof' are compatible.") + } + + # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent + # between ano and eof. + additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] + additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] + if (length(additional_dims) != 0) { + for (i in seq_along(additional_dims)) { + eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), + lendim = additional_dims[i], name = names(additional_dims)[i]) + } + } + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + } + + return(res) +} + + +.ProjectField <- function(ano, eof_mode, wght) { + # ano: [sdate, lat, lon] + # eof_mode: [lat, lon] or [mode, lat, lon] + # wght: [lat, lon] + + ntime <- dim(ano)[1] + if (length(dim(eof_mode)) == 2) { # mode != NULL + # Initialization of pc.ver. + pc.ver <- array(NA, dim = ntime) #[sdate] + + # Weight + e.1 <- eof_mode * wght + ano <- ano * InsertDim(wght, 1, ntime) + #ano <- aaply(ano, 1, '*', wght) # much heavier + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + #na <- apply(ano, 1, mean, na.rm = TRUE) # much heavier + tmp <- ano * InsertDim(e.1, 1, ntime) # [sdate, lat, lon] + rm(ano) + #pc.ver <- apply(tmp, 1, sum, na.rm = TRUE) # much heavier + pc.ver <- rowSums(tmp, na.rm = TRUE) + pc.ver[which(is.na(na))] <- NA + + } else { # mode = NULL + # Weight + e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1]) + dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3])) # [mode, lat*lon] + ano <- ano * InsertDim(wght, 1, ntime) + dim(ano) <- c(ntime, prod(dim(ano)[2:3])) # [sdate, lat*lon] + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1)) + + # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate] + # Result: [mode, sdate] + pc.ver <- e.1 %*% t(ano) + pc.ver[which(is.na(na))] <- NA + +# # Change back dimensions to feet original input +# dim(projection) <- c(moredims, mode = unname(neofs)) +# return(projection) + } + + return(pc.ver) +} + + diff --git a/modules/Crossval/R/tmp/RPS.R b/modules/Crossval/R/tmp/RPS.R new file mode 100644 index 00000000..0ed599ac --- /dev/null +++ b/modules/Crossval/R/tmp/RPS.R @@ -0,0 +1,408 @@ +#'Compute the Ranked Probability Score +#' +#'The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the +#'squared differences between the cumulative forecast probabilities (computed +#'from the ensemble members) and the observations (defined as 0% if the category +#'did not happen and 100% if it happened). It can be used to evaluate the skill +#'of multi-categorical probabilistic forecasts. The RPS ranges between 0 +#'(perfect forecast) and n-1 (worst possible forecast), where n is the number of +#'categories. In the case of a forecast divided into two categories (the lowest +#'number of categories that a probabilistic forecast can have), the RPS +#'corresponds to the Brier Score (BS; Wilks, 2011), therefore ranging between 0 +#'and 1.\cr +#'The function first calculates the probabilities for forecasts and observations, +#'then use them to calculate RPS. Or, the probabilities of exp and obs can be +#'provided directly to compute the score. If there is more than one dataset, RPS +#'will be computed for each pair of exp and obs data. The fraction of acceptable +#'NAs can be adjusted. +#' +#'@param exp A named numerical array of either the forecasts with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#' If the data are probabilities, set memb_dim as NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when the exp and obs are probabilities. The default +#' value is NULL, which means that the data are not probabilities. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPS (the +#' potential RPS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param nmemb A numeric value indicating the number of members used to compute the probabilities. This parameter is necessary when calculating FairRPS from probabilities. Default is NULL. +#'@param weights A named numerical array of the weights for 'exp' probability +#' calculation. If 'dat_dim' is NULL, the dimensions should include 'memb_dim' +#' and 'time_dim'. Else, the dimension should also include 'dat_dim'. The +#' default value is NULL. The ensemble should have at least 70 members or span +#' at least 10 time steps and have more than 45 members if consistency between +#' the weighted and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation. The default value is +#' FALSE. +#'@param return_mean A logical indicating whether to return the temporal mean +#' of the RPS or not. If TRUE, the temporal mean is calculated along time_dim, +#' if FALSE the time dimension is not aggregated. The default is TRUE. +#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it +#' means the lower limit for the fraction of the non-NA values. 1 is equal to +#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +# The function returns NA if the fraction of non-NA values in the data is less +#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of RPS with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#'experiment (i.e., dat_dim in exp), and nobs is the number of observation +#'(i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'# Use synthetic data +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- RPS(exp = exp, obs = obs) +#'# Use probabilities as inputs +#'exp_probs <- GetProbs(exp, time_dim = 'sdate', memb_dim = 'member') +#'obs_probs <- GetProbs(obs, time_dim = 'sdate', memb_dim = NULL) +#'res2 <- RPS(exp = exp_probs, obs = obs_probs, memb_dim = NULL, cat_dim = 'bin') +#' +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, weights = NULL, + cross.val = FALSE, return_mean = TRUE, + na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs))) { + stop("Parameter 'cat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- seq_len(dim(obs)[time_dim]) + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + if (Fair) { + if (!is.null(cat_dim)) { + if (cat_dim %in% names(dim(exp))) { + if (is.null(nmemb)) { + stop("Parameter 'nmemb' necessary to compute Fair", + "score from probabilities") + } + } + } + } + ## return_mean + if (!is.logical(return_mean) | length(return_mean) > 1) { + stop("Parameter 'return_mean' must be either TRUE or FALSE.") + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## weights + if (!is.null(weights) & is.null(cat_dim)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + if (is.null(dat_dim)) { + if (length(dim(weights)) != 2 | !all(names(dim(weights)) %in% c(memb_dim, time_dim))) + stop("Parameter 'weights' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim' and 'time_dim' in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights)) != 3 | !all(names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) + stop("Parameter 'weights' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim] | + dim(weights)[dat_dim] != dim(exp)[dat_dim]) { + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) + + } + } else if (!is.null(weights) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' and 'obs' are probabilities already, so parameter ", + "'weights' is not used. Change 'weights' to NULL.")) + weights <- NULL + } + ## na.rm + if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { + stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPS + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) + } + + rps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs), + fun = .RPS, + dat_dim = dat_dim, time_dim = time_dim, + memb_dim = memb_dim, cat_dim = cat_dim, + prob_thresholds = prob_thresholds, nmemb = nmemb, + indices_for_clim = indices_for_clim, Fair = Fair, + weights = weights, cross.val = cross.val, + na.rm = na.rm, ncores = ncores)$output1 + + if (return_mean) { + rps <- MeanDims(rps, time_dim, na.rm = TRUE) + } else { + rps <- rps + } + + return(rps) +} + + +.RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, weights = NULL, + cross.val = FALSE, na.rm = FALSE) { + #--- if memb_dim: + # exp: [sdate, memb, (dat)] + # obs: [sdate, (memb), (dat)] + # weights: NULL or same as exp + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + + # Adjust dimensions to be [sdate, memb, dat] for both exp and obs + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + dim(exp) <- c(dim(exp), nexp = nexp) + dim(obs) <- c(dim(obs), nobs = nobs) + if (!is.null(weights)) dim(weights) <- c(dim(weights), nexp = nexp) + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + rps <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + exp_data <- exp[, , i] + obs_data <- obs[, , j] + + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) + if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) + + # Find the fraction of NAs + ## If any member/bin is NA at this time step, it is not good value. + exp_mean <- rowMeans(exp_data) + obs_mean <- rowMeans(obs_data) + good_values <- !is.na(exp_mean) & !is.na(obs_mean) + + if (isTRUE(na.rm)) { + f_NAs <- 0 + } else if (isFALSE(na.rm)) { + f_NAs <- 1 + } else { + f_NAs <- na.rm + } + + if (f_NAs <= sum(good_values) / length(obs_mean)) { + + exp_data <- exp_data[good_values, , drop = F] + obs_data <- obs_data[good_values, , drop = F] + + # If the data inputs are forecast/observation, calculate probabilities + if (is.null(cat_dim)) { + if (!is.null(weights)) { + weights_data <- weights[which(good_values), , i] + if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) + } else { + weights_data <- weights #NULL + } + + # Subset indices_for_clim + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + + exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = weights_data, + cross.val = cross.val) + # exp_probs: [bin, sdate] + obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) + # obs_probs: [bin, sdate] + + } else { # inputs are probabilities already + if (all(names(dim(exp_data)) == c(time_dim, memb_dim)) || + all(names(dim(exp_data)) == c(time_dim, cat_dim))) { + exp_probs <- t(exp_data) + obs_probs <- t(obs_data) + } + } + + probs_exp_cumsum <- apply(exp_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + + # rps: [sdate, nexp, nobs] + rps [good_values, i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2) + if (Fair) { # FairRPS + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + R <- dim(exp)[2] #memb + } + } else { + R <- nmemb + } + warning("Applying fair correction.") + adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) + adjustment <- colSums(adjustment) + rps[, i, j] <- rps[, i, j] + adjustment + } + + } else { ## not enough values different from NA + + rps[, i, j] <- NA_real_ + + } + + } + } + + if (is.null(dat_dim)) { + dim(rps) <- dim(exp)[time_dim] + } + + return(rps) +} + diff --git a/modules/Crossval/R/tmp/RPSS.R b/modules/Crossval/R/tmp/RPSS.R new file mode 100644 index 00000000..fc9931ad --- /dev/null +++ b/modules/Crossval/R/tmp/RPSS.R @@ -0,0 +1,638 @@ +#'Compute the Ranked Probability Skill Score +#' +#'The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score +#'based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to +#'assess whether a forecast presents an improvement or worsening with respect to +#'a reference forecast. The RPSS ranges between minus infinite and 1. If the +#'RPSS is positive, it indicates that the forecast has higher skill than the +#'reference forecast, while a negative value means that it has a lower skill.\cr +#'Examples of reference forecasts are the climatological forecast (same +#'probabilities for all categories for all time steps), persistence, a previous +#'model version, and another model. It is computed as +#'\code{RPSS = 1 - RPS_exp / RPS_ref}. The statistical significance is obtained +#'based on a Random Walk test at the specified confidence level (DelSole and +#'Tippett, 2016).\cr +#'The function accepts either the ensemble members or the probabilities of +#'each data as inputs. If there is more than one dataset, RPSS will be +#'computed for each pair of exp and obs data. The NA ratio of data will be +#'examined before the calculation. If the ratio is higher than the threshold +#'(assigned by parameter \code{na.rm}), NA will be returned directly. NAs are +#'counted by per-pair method, which means that only the time steps that all the +#'datasets have values count as non-NA values. +#' +#'@param exp A named numerical array of either the forecast with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param ref A named numerical array of either the reference forecast with at +#' least time and member dimensions, or the probabilities with at least time and +#' category dimensions. The probabilities can be generated by +#' \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except +#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +#' not have dataset dimension. If there is corresponding reference for each +#' experiment, the dataset dimension must have the same length as in 'exp'. If +#' 'ref' is NULL, the climatological forecast is used as reference forecast. +#' The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. If the data are probabilities, set memb_dim as +#' NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when exp, obs, and ref are probabilities. The +#' default value is NULL, which means that the data are not probabilities. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPSS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param weights_exp A named numerical array of the forecast ensemble weights +#' for probability calculation. The dimension should include 'memb_dim', +#' 'time_dim' and 'dat_dim' if there are multiple datasets. All dimension +#' lengths must be equal to 'exp' dimension lengths. The default value is NULL, +#' which means no weighting is applied. The ensemble should have at least 70 +#' members or span at least 10 time steps and have more than 45 members if +#' consistency between the weighted and unweighted methodologies is desired. +#'@param weights_ref Same as 'weights_exp' but for the reference forecast. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistics categories in cross-validation. The default value is +#' FALSE. +#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it +#' means the lower limit for the fraction of the non-NA values. 1 is equal to +#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +# The function returns NA if the fraction of non-NA values in the data is less +#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. +#'@param sig_method.type A character string indicating the test type of the +#' significance method. Check \code{RandomWalkTest()} parameter +#' \code{test.type} for details. The default is 'two.sided.approx', which is +#' the default of \code{RandomWalkTest()}. +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$rpss}{ +#' A numerical array of RPSS with dimensions c(nexp, nobs, the rest dimensions +#' of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#' experiment (i.e., dat_dim in exp), and nobs is the number of observation +#' i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the RPSS with the same +#' dimensions as $rpss. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'set.seed(1) +#'exp <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'set.seed(2) +#'obs <- array(rnorm(300), dim = c(lat = 3, lon = 2, sdate = 50)) +#'set.seed(3) +#'ref <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'weights <- sapply(1:dim(exp)['sdate'], function(i) { +#' n <- abs(rnorm(10)) +#' n/sum(n) +#' }) +#'dim(weights) <- c(member = 10, sdate = 50) +#'# Use data as input +#'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast +#'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#'res <- RPSS(exp = exp, obs = obs, ref = ref, weights_exp = weights, weights_ref = weights) +#'res <- RPSS(exp = exp, obs = obs, alpha = 0.01, sig_method.type = 'two.sided') +#' +#'# Use probs as input +#'exp_probs <- GetProbs(exp, memb_dim = 'member') +#'obs_probs <- GetProbs(obs, memb_dim = NULL) +#'ref_probs <- GetProbs(ref, memb_dim = 'member') +#'res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, +#' cat_dim = 'bin') +#' +#'@import multiApply +#'@export +RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, + weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = FALSE, + sig_method.type = 'two.sided.approx', alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (!is.array(obs) | !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop("Parameter 'ref' must be a numeric array.") + if (any(is.null(names(dim(ref)))) | any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } + } + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs)) | + (!is.null(ref) & !cat_dim %in% names(dim(ref)))) { + stop("Parameter 'cat_dim' is not found in 'exp', 'obs', or 'ref' dimension.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp, obs, and ref (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim)) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop("If parameter 'ref' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.") + } + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- seq_len(dim(obs)[time_dim]) + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## weights_exp + if (!is.null(weights_exp) & is.null(cat_dim)) { + if (!is.array(weights_exp) | !is.numeric(weights_exp)) + stop("Parameter 'weights_exp' must be a named numeric array.") + + if (is.null(dat_dim)) { + if (length(dim(weights_exp)) != 2 | + !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim))) { + stop("Parameter 'weights_exp' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + } + if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_exp)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights_exp' must have the same dimension lengths as ", + "'memb_dim' and 'time_dim' in 'exp'.") + } + weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights_exp)) != 3 | + !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim, dat_dim))) { + stop("Parameter 'weights_exp' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + } + if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_exp)[time_dim] != dim(exp)[time_dim] | + dim(weights_exp)[dat_dim] != dim(exp)[dat_dim]) { + stop("Parameter 'weights_exp' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") + } + weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim, dat_dim)) + } + } else if (!is.null(weights_exp) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' is probability already, so parameter ", + "'weights_exp' is not used. Change 'weights_exp' to NULL.")) + weights_exp <- NULL + } + ## weights_ref + if (!is.null(weights_ref) & is.null(cat_dim)) { + if (!is.array(weights_ref) | !is.numeric(weights_ref)) + stop("Parameter 'weights_ref' must be a named numeric array.") + + if (is.null(dat_dim) | ((!is.null(dat_dim)) && (!dat_dim %in% names(dim(ref))))) { + if (length(dim(weights_ref)) != 2 | + !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim))) { + stop("Parameter 'weights_ref' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + } + if (dim(weights_ref)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_ref)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights_ref' must have the same dimension lengths as ", + "'memb_dim' and 'time_dim' in 'ref'.") + } + weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights_ref)) != 3 | + !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim, dat_dim))) { + stop("Parameter 'weights_ref' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + } + if (dim(weights_ref)[memb_dim] != dim(ref)[memb_dim] | + dim(weights_ref)[time_dim] != dim(ref)[time_dim] | + dim(weights_ref)[dat_dim] != dim(ref)[dat_dim]) { + stop("Parameter 'weights_ref' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'ref'.") + } + weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim, dat_dim)) + } + } else if (!is.null(weights_ref) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'ref' is probability already, so parameter ", + "'weights_ref' is not used. Change 'weights_ref' to NULL.")) + weights_ref <- NULL + } + ## na.rm + if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { + stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') + } + ## alpha + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be a number between 0 and 1.") + } + ## sig_method.type + #NOTE: These are the types of RandomWalkTest() + if (!sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { + stop("Parameter 'sig_method.type' must be 'two.sided.approx', 'two.sided', ", + "'greater', or 'less'.") + } + if (sig_method.type == 'two.sided.approx' && alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPSS + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) + } + + if (!is.null(ref)) { # use "ref" as reference forecast + if (!is.null(memb_dim)) { + if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { + target_dims_ref <- c(time_dim, memb_dim, dat_dim) + } else { + target_dims_ref <- c(time_dim, memb_dim) + } + } else { + target_dims_ref <- c(time_dim, cat_dim, dat_dim) + } + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs, + ref = target_dims_ref) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs) + } + + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, nmemb_ref = nmemb_ref, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + + return(output) + +} + +.RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, + weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, + na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05) { + #--- if memb_dim: + # exp: [sdate, memb, (dat)] + # obs: [sdate, (memb), (dat)] + # ref: [sdate, memb, (dat)] or NULL + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + # ref: [sdate, bin, (dat)] or NULL + + if (isTRUE(na.rm)) { + f_NAs <- 0 + } else if (isFALSE(na.rm)) { + f_NAs <- 1 + } else { + f_NAs <- na.rm + } + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + # Calculate RPS + + if (!is.null(ref)) { + + # Adjust dimensions to be [sdate, memb, dat] for both exp, obs, and ref + ## Insert memb_dim in obs + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + ## Insert dat_dim + if (is.null(dat_dim)) { + dim(obs) <- c(dim(obs), dat = nobs) + dim(exp) <- c(dim(exp), dat = nexp) + if (!is.null(weights_exp)) dim(weights_exp) <- c(dim(weights_exp), dat = nexp) + } + if (is.null(dat_dim) || (!is.null(dat_dim) && !dat_dim %in% names(dim(ref)))) { + nref <- 1 + dim(ref) <- c(dim(ref), dat = nref) + if (!is.null(weights_ref)) dim(weights_ref) <- c(dim(weights_ref), dat = nref) + } else { + nref <- as.numeric(dim(ref)[dat_dim]) # should be the same as nexp + } + + # Find good values then calculate RPS + rps_exp <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + rps_ref <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + for (k in 1:nref) { + if (nref != 1 & k != i) { # if nref is 1 or equal to nexp, calculate rps + next + } + exp_data <- exp[, , i, drop = F] + obs_data <- obs[, , j, drop = F] + ref_data <- ref[, , k, drop = F] + exp_mean <- rowMeans(exp_data) + obs_mean <- rowMeans(obs_data) + ref_mean <- rowMeans(ref_data) + good_values <- !is.na(exp_mean) & !is.na(obs_mean) & !is.na(ref_mean) + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + + if (f_NAs <= sum(good_values) / length(good_values)) { + rps_exp[good_values, i, j] <- .RPS(exp = exp[good_values, , i], + obs = obs[good_values, , j], + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = NULL, + prob_thresholds = prob_thresholds, + indices_for_clim = good_indices_for_clim, + Fair = Fair, nmemb = nmemb, + weights = weights_exp[good_values, , i], + cross.val = cross.val, na.rm = na.rm) + rps_ref[good_values, i, j] <- .RPS(exp = ref[good_values, , k], + obs = obs[good_values, , j], + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = NULL, + prob_thresholds = prob_thresholds, + indices_for_clim = good_indices_for_clim, + Fair = Fair, nmemb = nmemb_ref, + weights = weights_ref[good_values, , k], + na.rm = na.rm, cross.val = cross.val) + } + } + } + } + + } else { # ref is NULL + rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, weights = weights_exp, + cross.val = cross.val, na.rm = na.rm) + + # RPS of the reference forecast + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + + rps_ref <- array(NA, dim = c(dim(obs)[time_dim], nexp = nexp, nobs = nobs)) + + if (is.null(dat_dim)) { + dim(obs) <- c(dim(obs), nobs = nobs) + dim(exp) <- c(dim(exp), nexp = nexp) + dim(rps_exp) <- dim(rps_ref) + } + + for (i in 1:nexp) { + for (j in 1:nobs) { + # Use good values only + good_values <- !is.na(rps_exp[, i, j]) + if (f_NAs <= sum(good_values) / length(good_values)) { + obs_data <- obs[good_values, , j] + if (is.null(dim(obs_data))) dim(obs_data) <- c(length(obs_data), 1) + + if (is.null(cat_dim)) { # calculate probs + # Subset indices_for_clim + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + obs_probs <- .GetProbs(data = obs_data, + indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, + weights = NULL, cross.val = cross.val) + } else { + obs_probs <- t(obs_data) + } + # obs_probs: [bin, sdate] + + clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), + 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + # clim_probs: [bin, sdate] + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref[good_values, i, j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2) + } + if (Fair) { # FairRPS + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + R <- dim(obs)[1] #number of years + } + } else { + R <- nmemb_ref + } + adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) + adjustment <- colSums(adjustment) + rps_ref[, i, j] <- rps_ref[, i, j] + adjustment + } + } + } + } + + if (is.null(dat_dim)) { + dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim] + } + +#---------------------------------------------- + # Calculate RPSS + + if (!is.null(dat_dim)) { + # rps_exp and rps_ref: [sdate, nexp, nobs] + rps_exp_mean <- colMeans(rps_exp, na.rm = TRUE) + rps_ref_mean <- colMeans(rps_ref, na.rm = TRUE) + rpss <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (!all(is.na(rps_exp_mean))) { + for (i in 1:nexp) { + for (j in 1:nobs) { + rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j] + ind_nonNA <- !is.na(rps_exp[, i, j]) + if (!any(ind_nonNA)) { + sign[i, j] <- NA + } else { + sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], + skill_B = rps_ref[ind_nonNA, i, j], + test.type = sig_method.type, alpha = alpha, + sign = T, pval = F)$sign + } + } + } + } + + # Turn NaN into NA + if (any(is.nan(rpss))) rpss[which(is.nan(rpss))] <- NA + + } else { # dat_dim is NULL + + ind_nonNA <- !is.na(rps_exp) + if (!any(ind_nonNA)) { + rpss <- NA + sign <- NA + } else { + # rps_exp and rps_ref: [sdate] + rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE) + sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], + skill_B = rps_ref[ind_nonNA], + test.type = sig_method.type, alpha = alpha, + sign = T, pval = F)$sign + } + } + + return(list(rpss = rpss, sign = sign)) +} diff --git a/modules/Crossval/R/tmp/RandomWalkTest.R b/modules/Crossval/R/tmp/RandomWalkTest.R new file mode 100644 index 00000000..16d89f6d --- /dev/null +++ b/modules/Crossval/R/tmp/RandomWalkTest.R @@ -0,0 +1,184 @@ +#'Random Walk test for skill differences +#' +#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a +#'common observational reference) based on Random Walks (DelSole and Tippett, +#'2016). +#' +#'@param skill_A A numerical array of the time series of the scores obtained +#' with the forecaster A. +#'@param skill_B A numerical array of the time series of the scores obtained +#' with the forecaster B. The dimensions should be identical as parameter +#' 'skill_A'. +#'@param time_dim A character string indicating the name of the dimension along +#' which the tests are computed. The default value is 'sdate'. +#'@param test.type A character string indicating the type of significance test. +#' It can be "two.sided.approx" (to assess whether forecaster A and forecaster +#' B are significantly different in terms of skill with a two-sided test using +#' the approximation of DelSole and Tippett, 2016), "two.sided" (to assess +#' whether forecaster A and forecaster B are significantly different in terms +#' of skill with an exact two-sided test), "greater" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for negatively oriented scores), or "less" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for positively oriented scores). The default value is +#' "two.sided.approx". +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test (output "sign"). The default value is 0.05. +#'@param pval A logical value indicating whether to return the p-value of the +#' significance test. The default value is TRUE. +#'@param sign A logical value indicating whether to return the statistical +#' significance of the test based on 'alpha'. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list with: +#'\item{$score}{ +#' A numerical array with the same dimensions as the input arrays except +#' 'time_dim'. The number of times that forecaster A has been better than +#' forecaster B minus the number of times that forecaster B has been better +#' than forecaster A (for skill negatively oriented, i.e., the lower the +#' better). If $score is positive, forecaster A has been better more times +#' than forecaster B. If $score is negative, forecaster B has been better more +#' times than forecaster A. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance with the same dimensions +#' as the input arrays except "time_dim". Returned only if "sign" is TRUE. +#'} +#'\item{$p.val}{ +#' A numeric array of the p-values with the same dimensions as the input arrays +#' except "time_dim". Returned only if "pval" is TRUE. +#'} +#' +#'@details +#' Null and alternative hypothesis for "two-sided" test (regardless of the +#' orientation of the scores):\cr +#' H0: forecaster A and forecaster B are not different in terms of skill\cr +#' H1: forecaster A and forecaster B are different in terms of skill +#' +#' Null and alternative hypothesis for one-sided "greater" (for negatively +#' oriented scores, i.e., the lower the better) and "less" (for positively +#' oriented scores, i.e., the higher the better) tests:\cr +#' H0: forecaster A is not better than forecaster B\cr +#' H1: forecaster A is better than forecaster B +#' +#' Examples of negatively oriented scores are the RPS, RMSE and the Error, while +#' the ROC score is a positively oriented score. +#' +#' DelSole and Tippett (2016) approximation for two-sided test at 95% confidence +#' level: significant if the difference between the number of times that +#' forecaster A has been better than forecaster B and forecaster B has been +#' better than forecaster A is above 2sqrt(N) or below -2sqrt(N). +#' +#'@references +#'DelSole and Tippett (2016): https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#' fcst_A <- array(data = 11:50, dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_B <- array(data = 21:60, dim = c(sdate = 10, lat = 2, lon = 2)) +#' reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +#' scores_A <- abs(fcst_A - reference) +#' scores_B <- abs(fcst_B - reference) +#' res1 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, pval = FALSE, sign = TRUE) +#' res2 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, test.type = 'greater') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', + test.type = 'two.sided.approx', alpha = 0.05, pval = TRUE, + sign = FALSE, ncores = NULL) { + + # Check inputs + ## skill_A and skill_B + if (is.null(skill_A) | is.null(skill_B)) { + stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") + } + if (!is.numeric(skill_A) | !is.numeric(skill_B)) { + stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") + } + if (!identical(dim(skill_A), dim(skill_B))) { + stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))) { + stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") + } + ## alpha + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be a number between 0 and 1.") + } + ## test.type + if (!test.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { + stop("Parameter 'test.type' must be 'two.sided.approx', 'two.sided', 'greater', or 'less'.") + } + if (test.type == 'two.sided.approx') { + if (alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + if (pval) { + .warning("p-value cannot be returned with the DelSole and Tippett (2016) ", + "aproximation. Returning the significance at the 0.05 significance level.") + } + sign <- TRUE + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ## Compute the Random Walk Test + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B), + target_dims = list(skill_A = time_dim, + skill_B = time_dim), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + ncores = ncores) + + return(res) +} + +.RandomWalkTest <- function(skill_A, skill_B, test.type = 'two.sided.approx', + alpha = 0.05, pval = TRUE, sign = FALSE) { + #skill_A and skill_B: [sdate] + + N.eff <- length(skill_A) + + A_better <- sum(skill_B > skill_A) + B_better <- sum(skill_B < skill_A) + + output <- NULL + output$score <- A_better - B_better + + if (test.type == 'two.sided.approx') { + output$sign <- abs(output$score) > (2 * sqrt(N.eff)) + + } else { + + if (!is.na(output$score)) { + p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1 - alpha, + alternative = test.type)$p.value + + } else { + p.val <- NA + } + + if (pval) { + output$p.val <- p.val + } + if (sign) { + output$sign <- !is.na(p.val) & p.val <= alpha + } + + } + + return(output) +} diff --git a/modules/Crossval/R/tmp/SprErr.R b/modules/Crossval/R/tmp/SprErr.R new file mode 100644 index 00000000..33642eab --- /dev/null +++ b/modules/Crossval/R/tmp/SprErr.R @@ -0,0 +1,227 @@ +#'Compute the ratio between the ensemble spread and RMSE +#' +#'Compute the ratio between the spread of the members around the +#'ensemble mean in experimental data and the RMSE between the ensemble mean of +#'experimental and observational data. The p-value and/or the statistical +#'significance is provided by a one-sided Fisher's test. +#' +#'@param exp A named numeric array of experimental data with at least two +#' dimensions 'memb_dim' and 'time_dim'. +#'@param obs A named numeric array of observational data with at least two +#' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. The default value +#' is 'member'. +#'@param time_dim A character string indicating the name of dimension along +#' which the ratio is computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param na.rm A logical value indicating whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of two arrays with dimensions c(nexp, nobs, the rest of +#' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is +#' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +#' If dat_dim is NULL, nexp and nobs are omitted. \cr +#'\item{$ratio}{ +#' The ratio of the ensemble spread and RMSE. +#'} +#'\item{$p_val}{ +#' The p-value of the one-sided Fisher's test with Ho: SD/RMSE = 1. Only present +#' if \code{pval = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs, dat_dim = 'dataset') +#'# Reorder the data in order to plot it with PlotVsLTime +#'rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) +#'rsdrms_plot[, , 2, ] <- rsdrms$ratio +#'rsdrms_plot[, , 4, ] <- rsdrms$p.val +#'\dontrun{ +#'PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", +#' monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), +#' listobs = c('ERSST'), biglab = FALSE, siglev = TRUE) +#'} +#' +#'@import multiApply +#'@export +SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', pval = TRUE, sign = FALSE, + alpha = 0.05, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.")) + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ", + "Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions except 'dat_dim' and 'memb_dim'.")) + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + # alpha + if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + # na.rm + if (!na.rm %in% c(TRUE, FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate RatioSDRMS + + # If dat_dim = NULL, insert dat dim + remove_dat_dim <- FALSE + if (is.null(dat_dim)) { + dat_dim <- 'dataset' + exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') + remove_dat_dim <- TRUE + } + + res <- Apply(list(exp, obs), + target_dims = list(c(dat_dim, memb_dim, time_dim), + c(dat_dim, memb_dim, time_dim)), + pval = pval, + sign = sign, + na.rm = na.rm, + fun = .SprErr, + ncores = ncores) + + if (remove_dat_dim) { + if (length(dim(res[[1]])) > 2) { + res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') + } else { + res <- lapply(res, as.numeric) + } + } + + return(res) +} + +.SprErr <- function(exp, obs, pval = TRUE, sign = FALSE, alpha = 0.05, na.rm = FALSE) { + + # exp: [dat_exp, member, sdate] + # obs: [dat_obs, member, sdate] + nexp <- dim(exp)[1] + nobs <- dim(obs)[1] + + # ensemble mean + ens_exp <- MeanDims(exp, 2, na.rm = na.rm) # [dat, sdate] + ens_obs <- MeanDims(obs, 2, na.rm = na.rm) + + # Create empty arrays + ratio <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + + # spread and error + spread <- sqrt(mean(apply(exp[jexp,,], 2, var, na.rm = na.rm), na.rm = na.rm)) + error <- sqrt(mean((ens_obs - ens_exp[jexp,])^2, na.rm = na.rm)) + ratio[jexp, jobs] <- spread/error + + # effective sample size + enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) + enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) + if (pval) { + F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) + if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + p.val[jexp, jobs] <- pf(F, enospr[jexp] - 1, enodif - 1) + p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) + } else { + ratio[jexp, jobs] <- NA + } + } + } + } + + res <- list(ratio = ratio) + if (pval) {res$p.val <- p.val} + if (sign) {res$sign <- p.val <= alpha} + + return(res) +} + diff --git a/modules/Crossval/R/tmp/Utils.R b/modules/Crossval/R/tmp/Utils.R new file mode 100644 index 00000000..cd7a1e10 --- /dev/null +++ b/modules/Crossval/R/tmp/Utils.R @@ -0,0 +1,1885 @@ +#'@importFrom abind abind +#'@import plyr ncdf4 +#'@importFrom grDevices png jpeg pdf svg bmp tiff +#'@importFrom easyVerification convert2prob + +## Function to tell if a regexpr() match is a complete match to a specified name +.IsFullMatch <- function(x, name) { + x > 0 && attributes(x)$match.length == nchar(name) +} + +.ConfigReplaceVariablesInString <- function(string, replace_values, + allow_undefined_key_vars = FALSE) { + # This function replaces all the occurrences of a variable in a string by + # their corresponding string stored in the replace_values. + if (length(strsplit(string, "\\$")[[1]]) > 1) { + parts <- strsplit(string, "\\$")[[1]] + output <- "" + i <- 0 + for (part in parts) { + if (i %% 2 == 0) { + output <- paste0(output, part) + } else { + if (part %in% names(replace_values)) { + output <- paste0(output, + .ConfigReplaceVariablesInString(replace_values[[part]], + replace_values, + allow_undefined_key_vars)) + } else if (allow_undefined_key_vars) { + output <- paste0(output, "$", part, "$") + } else { + stop('Error: The variable $', part, + '$ was not defined in the configuration file.', sep = '') + } + } + i <- i + 1 + } + output + } else { + string + } +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') + return(known_lon_names) +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') + return(known_lat_names) +} + +.t2nlatlon <- function(t) { + ## As seen in cdo's griddes.c: ntr2nlat() + nlats <- (t * 3 + 1) / 2 + if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) { + nlats <- ceiling(nlats) + } else { + nlats <- round(nlats) + } + if (nlats %% 2 > 0) { + nlats <- nlats + 1 + } + ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF + nlons <- 2 * nlats + keep_going <- TRUE + while (keep_going) { + n <- nlons + if (n %% 8 == 0) n <- trunc(n / 8) + while (n %% 6 == 0) n <- trunc(n / 6) + while (n %% 5 == 0) n <- trunc(n / 5) + while (n %% 4 == 0) n <- trunc(n / 4) + while (n %% 3 == 0) n <- trunc(n / 3) + if (n %% 2 == 0) n <- trunc(n / 2) + if (n <= 8) { + keep_going <- FALSE + } else { + nlons <- nlons + 2 + if (nlons > 9999) { + stop("Error: pick another gaussian grid truncation. ", + "It doesn't fulfill the standards to apply FFT.") + } + } + } + c(nlats, nlons) +} + +.nlat2t <- function(nlats) { + trunc((nlats * 2 - 1) / 3) +} + +.LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) { + # The purpose, working modes, inputs and outputs of this function are + # explained in ?LoadDataFile + #suppressPackageStartupMessages({library(ncdf4)}) + #suppressPackageStartupMessages({library(bigmemory)}) + #suppressPackageStartupMessages({library(plyr)}) + # Auxiliar function to convert array indices to lineal indices + arrayIndex2VectorIndex <- function(indices, dims) { + if (length(indices) > length(dims)) { + stop("Error: indices do not match dimensions in arrayIndex2VectorIndex.") + } + position <- 1 + dims <- rev(dims) + indices <- rev(indices) + for (i in seq_along(indices)) { + position <- position + (indices[i] - 1) * prod(dims[-(1:i)]) + } + position + } + + found_file <- NULL + dims <- NULL + grid_name <- units <- var_long_name <- NULL + is_2d_var <- array_across_gw <- NULL + data_across_gw <- NULL + + filename <- work_piece[['filename']] + namevar <- work_piece[['namevar']] + output <- work_piece[['output']] + # The names of all data files in the directory of the repository that match + # the pattern are obtained. + if (any(grep("^http", filename))) { + is_url <- TRUE + files <- filename + ## TODO: Check that the user is not using shell globbing exps. + } else { + is_url <- FALSE + files <- Sys.glob(filename) + } + + # If we don't find any, we leave the flag 'found_file' with a NULL value. + if (length(files) > 0) { + # The first file that matches the pattern is chosen and read. + filename <- head(files, 1) + filein <- filename + found_file <- filename + mask <- work_piece[['mask']] + + if (!silent && explore_dims) { + .message(paste("Exploring dimensions...", filename)) + ##} else { + ## cat(paste("* Reading & processing data...", filename, '\n')) + ##} + } + + # We will fill in 'expected_dims' with the names of the expected dimensions of + # the data array we'll retrieve from the file. + expected_dims <- NULL + remap_needed <- FALSE + # But first we open the file and work out whether the requested variable is 2d + fnc <- nc_open(filein) + if (!(namevar %in% names(fnc$var))) { + stop("Error: The variable", namevar, "is not defined in the file", filename) + } + var_long_name <- fnc$var[[namevar]]$longname + units <- fnc$var[[namevar]]$units + file_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + # The following two 'ifs' are to allow for 'lon'/'lat' by default, instead of + # 'longitude'/'latitude'. + if (!(work_piece[['dimnames']][['lon']] %in% file_dimnames) && + (work_piece[['dimnames']][['lon']] == 'longitude') && + ('lon' %in% file_dimnames)) { + work_piece[['dimnames']][['lon']] <- 'lon' + } + if (!(work_piece[['dimnames']][['lat']] %in% file_dimnames) && + (work_piece[['dimnames']][['lat']] == 'latitude') && + ('lat' %in% file_dimnames)) { + work_piece[['dimnames']][['lat']] <- 'lat' + } + if (is.null(work_piece[['is_2d_var']])) { + is_2d_var <- all(c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) %in% + unlist(lapply(fnc$var[[namevar]][['dim']], + '[[', 'name'))) + } else { + is_2d_var <- work_piece[['is_2d_var']] + } + if ((is_2d_var || work_piece[['is_file_per_dataset']])) { + if (Sys.which("cdo")[[1]] == "") { + stop("Error: CDO libraries not available") + } + + cdo_version <- + strsplit(suppressWarnings( + system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] + + cdo_version <- + as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) + + } + # If the variable to load is 2-d, we need to determine whether: + # - interpolation is needed + # - subsetting is requested + if (is_2d_var) { + ## We read the longitudes and latitudes from the file. + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + first_lon_in_original_file <- lon[1] + # If a common grid is requested or we are exploring the file dimensions + # we need to read the grid type and size of the file to finally work out the + # CDO grid name. + if (!is.null(work_piece[['grid']]) || explore_dims) { + # Here we read the grid type and its number of longitudes and latitudes + file_info <- system(paste('cdo -s griddes', filein, '2> /dev/null'), intern = TRUE) + grids_positions <- grep('# gridID', file_info) + if (length(grids_positions) < 1) { + stop("The grid should be defined in the files.") + } + grids_first_lines <- grids_positions + 2 + grids_last_lines <- c((grids_positions - 2)[-1], length(file_info)) + grids_info <- as.list(seq_along(grids_positions)) + grids_info <- lapply(grids_info, + function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) + grids_info <- lapply(grids_info, function (x) gsub(" *", " ", x)) + grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x)) + grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = "))) + grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1])) + grids_matches <- unlist(lapply(grids_info, function (x) { + nlons <- if (any(grep('xsize', x))) { + as.numeric(x[grep('xsize', x) + 1]) + } else { + NA + } + nlats <- if (any(grep('ysize', x))) { + as.numeric(x[grep('ysize', x) + 1]) + } else { + NA + } + result <- FALSE + if (!anyNA(c(nlons, nlats))) { + if ((nlons == length(lon)) && + (nlats == length(lat))) { + result <- TRUE + } + } + result + })) + grids_matches <- grids_matches[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_info <- grids_info[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_types <- grids_types[which(grids_types %in% c('gaussian', 'lonlat'))] + if (length(grids_matches) == 0) { + stop("Error: Only 'gaussian' and 'lonlat' grids supported. See e.g: cdo sinfo ", filename) + } + if (sum(grids_matches) > 1) { + if ((all(grids_types[which(grids_matches)] == 'gaussian') || + all(grids_types[which(grids_matches)] == 'lonlat')) && + all(unlist(lapply(grids_info[which(grids_matches)], identical, + grids_info[which(grids_matches)][[1]])))) { + grid_type <- grids_types[which(grids_matches)][1] + } else { + stop("Error: Load() can't disambiguate: ", + "More than one lonlat/gaussian grids with the same size as ", + "the requested variable defined in ", filename) + } + } else if (sum(grids_matches) == 1) { + grid_type <- grids_types[which(grids_matches)] + } else { + stop("Unexpected error.") + } + grid_lons <- length(lon) + grid_lats <- length(lat) + # Convert to CDO grid name as seen in cdo's griddes.c: nlat2ntr() + if (grid_type == 'lonlat') { + grid_name <- paste0('r', grid_lons, 'x', grid_lats) + } else { + grid_name <- paste0('t', .nlat2t(grid_lats), 'grid') + } + if (is.null(work_piece[['grid']])) { + .warning(paste0("Detect the grid type to be '", grid_name, "'. ", + "If it is not expected, assign parameter 'grid' to avoid wrong result.")) + } + } + # If a common grid is requested, we will also calculate its size which we will use + # later on. + if (!is.null(work_piece[['grid']])) { + # Now we calculate the common grid type and its lons and lats + if (any(grep('^t\\d{1,+}grid$', work_piece[['grid']]))) { + common_grid_type <- 'gaussian' + common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + nlonlat <- .t2nlatlon(common_grid_res) + common_grid_lats <- nlonlat[1] + common_grid_lons <- nlonlat[2] + } else if (any(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']]))) { + common_grid_type <- 'lonlat' + common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3]) + } else { + stop("Error: Only supported grid types in parameter 'grid' are tgrid and rx") + } + } else { + ## If no 'grid' is specified, there is no common grid. + ## But these variables are filled in for consistency in the code. + common_grid_lons <- length(lon) + common_grid_lats <- length(lat) + } + first_common_grid_lon <- 0 + last_common_grid_lon <- 360 - 360 / common_grid_lons + ## This is not true for gaussian grids or for some regular grids, but + ## is a safe estimation + first_common_grid_lat <- -90 + last_common_grid_lat <- 90 + # And finally determine whether interpolation is needed or not + remove_shift <- FALSE + if (!is.null(work_piece[['grid']])) { + if ((grid_lons != common_grid_lons) || + (grid_lats != common_grid_lats) || + (grid_type != common_grid_type) || + (lon[1] != first_common_grid_lon)) { + if (grid_lons == common_grid_lons && grid_lats == common_grid_lats && + grid_type == common_grid_type && lon[1] != first_common_grid_lon) { + remove_shift <- TRUE + } + remap_needed <- TRUE + common_grid_name <- work_piece[['grid']] + } + } else if ((lon[1] != first_common_grid_lon) && explore_dims && + !work_piece[['single_dataset']]) { + remap_needed <- TRUE + common_grid_name <- grid_name + remove_shift <- TRUE + } + if (remap_needed && (work_piece[['remap']] == 'con') && + (cdo_version >= as.numeric_version('1.7.0'))) { + work_piece[['remap']] <- 'ycon' + } + if (remove_shift && !explore_dims) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + .warning(paste0("The dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' doesn't start at longitude 0 and will be re-interpolated in order ", + "to align its longitudes with the standard CDO grids definable with ", + "the names 'tgrid' or 'rx', which are by definition ", + "starting at the longitude 0.\n")) + if (!is.null(mask)) { + .warning(paste0("A mask was provided for the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "'. This dataset has been re-interpolated to align its longitudes to ", + "start at 0. You must re-interpolate the corresponding mask to align ", + "its longitudes to start at 0 as well, if you haven't done so yet. ", + "Running cdo remapcon,", common_grid_name, + " original_mask_file.nc new_mask_file.nc will fix it.\n")) + } + } + if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + if (!explore_dims) { + .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is originally on ", + "a grid coarser than the common grid and it has been ", + "extrapolated. Check the results carefully. It is ", + "recommended to specify as common grid the coarsest grid ", + "among all requested datasets via the parameter 'grid'.\n")) + } + } + # Now calculate if the user requests for a lonlat subset or for the + # entire field + lonmin <- work_piece[['lon_limits']][1] + lonmax <- work_piece[['lon_limits']][2] + latmin <- work_piece[['lat_limits']][1] + latmax <- work_piece[['lat_limits']][2] + lon_subsetting_requested <- FALSE + lonlat_subsetting_requested <- FALSE + if (lonmin <= lonmax) { + if ((lonmin > first_common_grid_lon) || (lonmax < last_common_grid_lon)) { + lon_subsetting_requested <- TRUE + } + } else { + if ((lonmin - lonmax) > 360 / common_grid_lons) { + lon_subsetting_requested <- TRUE + } else { + gap_width <- floor(lonmin / (360 / common_grid_lons)) - + floor(lonmax / (360 / common_grid_lons)) + if (gap_width > 0) { + if (!(gap_width == 1 && (lonmin %% (360 / common_grid_lons) == 0) && + (lonmax %% (360 / common_grid_lons) == 0))) { + lon_subsetting_requested <- TRUE + } + } + } + } + if ((latmin > first_common_grid_lat) || (latmax < last_common_grid_lat) + || (lon_subsetting_requested)) { + lonlat_subsetting_requested <- TRUE + } + # Now that we know if subsetting was requested, we can say if final data + # will go across greenwich + if (lonmax < lonmin) { + data_across_gw <- TRUE + } else { + data_across_gw <- !lon_subsetting_requested + } + + # When remap is needed but no subsetting, the file is copied locally + # so that cdo works faster, and then interpolated. + # Otherwise the file is kept as is and the subset will have to be + # interpolated still. + if (!lonlat_subsetting_requested && remap_needed) { + nc_close(fnc) + filecopy <- tempfile(pattern = "load", fileext = ".nc") + file.copy(filein, filecopy) + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the + # underlying hdf5 library. + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " -selname,", namevar, " ", filecopy, " ", filein, + " 2>/dev/null")) + file.remove(filecopy) + work_piece[['dimnames']][['lon']] <- 'lon' + work_piece[['dimnames']][['lat']] <- 'lat' + fnc <- nc_open(filein) + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + } + + # Read and check also the mask + if (!is.null(mask)) { + ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + if (is.list(mask)) { + if (!file.exists(mask[['path']])) { + stop("Error: Couldn't find the mask file", mask[['path']]) + } + mask_file <- mask[['path']] + ###file.copy(work_piece[['mask']][['path']], mask_file) + fnc_mask <- nc_open(mask_file) + vars_in_mask <- sapply(fnc_mask$var, '[[', 'name') + if ('nc_var_name' %in% names(mask)) { + if (!(mask[['nc_var_name']] %in% + vars_in_mask)) { + stop("Error: couldn't find variable", mask[['nc_var_name']], + "in the mask file", mask[['path']]) + } + } else { + if (length(vars_in_mask) != 1) { + stop("Error: one and only one non-coordinate variable should be ", + "defined in the mask file", + mask[['path']], + "if the component 'nc_var_name' is not specified. ", + "Currently found: ", + toString(vars_in_mask), ".") + } else { + mask[['nc_var_name']] <- vars_in_mask + } + } + if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) { + stop("Error: the variable '", + mask[['nc_var_name']], + "' must be defined only over the dimensions '", + work_piece[['dimnames']][['lon']], "' and '", + work_piece[['dimnames']][['lat']], + "' in the mask file ", + mask[['path']]) + } + mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE) + nc_close(fnc_mask) + ### mask_lon <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lon']]) + ### mask_lat <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lat']]) + ###} else { + ### dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + ### dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ### ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + ### fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ### ncvar_put(fnc_mask, ncdf_var, work_piece[['mask']]) + ### nc_close(fnc_mask) + ### fnc_mask <- nc_open(mask_file) + ### work_piece[['mask']] <- list(path = mask_file, nc_var_name = 'LSM') + ### mask_lon <- lon + ### mask_lat <- lat + ###} + ###} + ### Now ready to check that the mask is right + ##if (!(lonlat_subsetting_requested && remap_needed)) { + ### if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) { + ### stop(paste("Error: the mask of the dataset with index ", + ### tail(work_piece[['indices']], 1), " in '", + ### work_piece[['dataset_type']], "' is wrong. ", + ### "It must be on the common grid if the selected output type is 'lonlat', ", + ### "'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on ", + ### "the grid of the corresponding dataset if the selected output type is ", + ### "'areave' and no 'grid' has been specified. For more information ", + ### "check ?Load and see help on parameters 'grid', 'maskmod' and ", + ### "'maskobs'.", sep = "")) + ### } + ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { + ### stop(paste0("Error: the longitudes and latitudes in the masks must be ", + ### "identical to the ones in the corresponding data files if output = 'areave' ", + ### " or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in ", + ### "the mask file must start by 0 and the latitudes must be ordered from ", + ### "highest to lowest. See\n ", + ### work_piece[['mask']][['path']], " and ", filein)) + ###} + } + } + + lon_indices <- seq_along(lon) + if (!(lonlat_subsetting_requested && remap_needed)) { + lon[which(lon < 0)] <- lon[which(lon < 0)] + 360 + } + if (lonmax >= lonmin) { + lon_indices <- lon_indices[which(((lon %% 360) >= lonmin) & ((lon %% 360) <= lonmax))] + } else if (!remap_needed) { + lon_indices <- lon_indices[which(((lon %% 360) <= lonmax) | ((lon %% 360) >= lonmin))] + } + lat_indices <- which(lat >= latmin & lat <= latmax) + ## In most of the cases the latitudes are ordered from -90 to 90. + ## We will reorder them to be in the order from 90 to -90, so mostly + ## always the latitudes are reordered. + ## TODO: This could be avoided in future. + if (lat[1] < lat[length(lat)]) { + lat_indices <- lat_indices[rev(seq_along(lat_indices))] + } + if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) { + if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) { + stop("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is wrong. It must be on the ", + "common grid if the selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must be on the grid of ", + "the corresponding dataset if the selected output type is 'areave' and ", + "no 'grid' has been specified. For more information check ?Load and see ", + "help on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask <- mask[lon_indices, lat_indices] + } + ## If the user requests subsetting, we must extend the lon and lat limits if possible + ## so that the interpolation after is done properly + maximum_extra_points <- work_piece[['remapcells']] + if (lonlat_subsetting_requested && remap_needed) { + if ((maximum_extra_points > (head(lon_indices, 1) - 1)) || + (maximum_extra_points > (length(lon) - tail(lon_indices, 1)))) { + ## if the requested number of points goes beyond the left or right + ## sides of the map, we need to take the entire map so that the + ## interpolation works properly + lon_indices <- seq_along(lon) + } else { + extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1) + if (extra_points > 0) { + lon_indices <- + c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) + } + extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1)) + if (extra_points > 0) { + lon_indices <- c(lon_indices, + (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) + } + } + min_lat_ind <- min(lat_indices) + max_lat_ind <- max(lat_indices) + extra_points <- min(maximum_extra_points, min_lat_ind - 1) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c(lat_indices, (min_lat_ind - 1):(min_lat_ind - extra_points)) + } else { + lat_indices <- c((min_lat_ind - extra_points):(min_lat_ind - 1), lat_indices) + } + } + extra_points <- min(maximum_extra_points, length(lat) - max_lat_ind) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c((max_lat_ind + extra_points):(max_lat_ind + 1), lat_indices) + } else { + lat_indices <- c(lat_indices, (max_lat_ind + 1):(max_lat_ind + extra_points)) + } + } + } + lon <- lon[lon_indices] + lat <- lat[lat_indices] + expected_dims <- c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) + } else { + lon <- 0 + lat <- 0 + } + # We keep on filling the expected dimensions + var_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + nmemb <- nltime <- NULL + ## Sometimes CDO renames 'members' dimension to 'lev' + old_members_dimname <- NULL + if (('lev' %in% var_dimnames) && !(work_piece[['dimnames']][['member']] %in% var_dimnames)) { + old_members_dimname <- work_piece[['dimnames']][['member']] + work_piece[['dimnames']][['member']] <- 'lev' + } + if (work_piece[['dimnames']][['member']] %in% var_dimnames) { + nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], + var_dimnames)]]$len + expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']]) + } else { + nmemb <- 1 + } + if (length(expected_dims) > 0) { + dim_matches <- match(expected_dims, var_dimnames) + if (anyNA(dim_matches)) { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the expected dimension(s)", + toString(expected_dims[which(is.na(dim_matches))]), + "were not found in", filename) + } + time_dimname <- var_dimnames[-dim_matches] + } else { + time_dimname <- var_dimnames + } + if (length(time_dimname) > 0) { + if (length(time_dimname) == 1) { + nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len + expected_dims <- c(expected_dims, time_dimname) + dim_matches <- match(expected_dims, var_dimnames) + } else { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the variable ", namevar, + " is defined over more dimensions than the expected (", + toString(c(expected_dims, 'time')), + "). It could also be that the members, longitude or latitude ", + "dimensions are named incorrectly. In that case, either rename ", + "the dimensions in the file or adjust Load() to recognize the actual ", + "name with the parameter 'dimnames'. See file ", filename) + } + } else { + nltime <- 1 + } + + # Now we must retrieve the data from the file, but only the asked indices. + # So we build up the indices to retrieve. + # Longitudes or latitudes have been retrieved already. + if (explore_dims) { + # If we're exploring the file we only want one time step from one member, + # to regrid it and work out the number of longitudes and latitudes. + # We don't need more. + members <- 1 + ltimes_list <- list(1) + } else { + # The data is arranged in the array 'tmp' with the dimensions in a + # common order: + # 1) Longitudes + # 2) Latitudes + # 3) Members (even if is not a file per member experiment) + # 4) Lead-times + if (work_piece[['is_file_per_dataset']]) { + time_indices <- 1:nltime + mons <- strsplit(system(paste('cdo showmon ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + years <- strsplit(system(paste('cdo showyear ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + mons <- as.numeric(mons[[1]][which(mons[[1]] != "")]) + years <- as.numeric(years[[1]][which(years[[1]] != "")]) + time_indices <- ts(time_indices, start = c(years[1], mons[1]), + end = c(years[length(years)], mons[length(mons)]), + frequency = 12) + ltimes_list <- list() + for (sdate in work_piece[['startdates']]) { + selected_time_indices <- window(time_indices, start = c(as.numeric( + substr(sdate, 1, 4)), as.numeric(substr(sdate, 5, 6))), + end = c(3000, 12), frequency = 12, extend = TRUE) + selected_time_indices <- selected_time_indices[work_piece[['leadtimes']]] + ltimes_list <- c(ltimes_list, list(selected_time_indices)) + } + } else { + ltimes <- work_piece[['leadtimes']] + #if (work_piece[['dataset_type']] == 'exp') { + ltimes_list <- list(ltimes[which(ltimes <= nltime)]) + #} + } + ## TODO: Put, when reading matrices, this kind of warnings + # if (nmember < nmemb) { + # cat("Warning: + members <- 1:work_piece[['nmember']] + members <- members[which(members <= nmemb)] + } + + # Now, for each list of leadtimes to load (usually only one list with all leadtimes), + # we'll join the indices and retrieve data + found_disordered_dims <- FALSE + for (ltimes in ltimes_list) { + if (is_2d_var) { + start <- c(min(lon_indices), min(lat_indices)) + end <- c(max(lon_indices), max(lat_indices)) + if (lonlat_subsetting_requested && remap_needed) { + subset_indices <- list(min(lon_indices):max(lon_indices) - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ncdf_dims <- list(dim_longitudes, dim_latitudes) + } else { + subset_indices <- list(lon_indices - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + ncdf_dims <- list() + } + final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1) + } else { + start <- end <- NULL + subset_indices <- list() + ncdf_dims <- list() + final_dims <- c(1, 1, 1, 1) + } + + if (work_piece[['dimnames']][['member']] %in% expected_dims) { + start <- c(start, head(members, 1)) + end <- c(end, tail(members, 1)) + subset_indices <- c(subset_indices, list(members - head(members, 1) + 1)) + dim_members <- ncdim_def(work_piece[['dimnames']][['member']], "", members) + ncdf_dims <- c(ncdf_dims, list(dim_members)) + final_dims[3] <- length(members) + } + if (time_dimname %in% expected_dims) { + if (!all(is.na(ltimes))) { + start <- c(start, head(ltimes[which(!is.na(ltimes))], 1)) + end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1)) + subset_indices <- c(subset_indices, + list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) + } else { + start <- c(start, NA) + end <- c(end, NA) + subset_indices <- c(subset_indices, list(ltimes)) + } + dim_time <- ncdim_def(time_dimname, "", seq_along(ltimes), unlim = TRUE) + ncdf_dims <- c(ncdf_dims, list(dim_time)) + final_dims[4] <- length(ltimes) + } + count <- end - start + 1 + start <- start[dim_matches] + count <- count[dim_matches] + subset_indices <- subset_indices[dim_matches] + # Now that we have the indices to retrieve, we retrieve the data + if (prod(final_dims) > 0) { + tmp <- take(ncvar_get(fnc, namevar, start, count, + collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + # The data is regridded if it corresponds to an atmospheric variable. When + # the chosen output type is 'areave' the data is not regridded to not + # waste computing time unless the user specified a common grid. + if (is_2d_var) { + ###if (!is.null(work_piece[['mask']]) && !(lonlat_subsetting_requested && remap_needed)) { + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### start[dim_matches[1:2]], count[dim_matches[1:2]], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + if (lonlat_subsetting_requested && remap_needed) { + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + filein2 <- tempfile(pattern = "loadRegridded2", fileext = ".nc") + ncdf_var <- ncvar_def(namevar, "", ncdf_dims[dim_matches], + fnc$var[[namevar]]$missval, + prec = if (fnc$var[[namevar]]$prec == 'int') { + 'integer' + } else { + fnc$var[[namevar]]$prec + }) + scale_factor <- ifelse(fnc$var[[namevar]]$hasScaleFact, fnc$var[[namevar]]$scaleFact, 1) + add_offset <- ifelse(fnc$var[[namevar]]$hasAddOffset, fnc$var[[namevar]]$addOffset, 0) + if (fnc$var[[namevar]]$hasScaleFact || fnc$var[[namevar]]$hasAddOffset) { + tmp <- (tmp - add_offset) / scale_factor + } + #nc_close(fnc) + fnc2 <- nc_create(filein2, list(ncdf_var)) + ncvar_put(fnc2, ncdf_var, tmp) + if (add_offset != 0) { + ncatt_put(fnc2, ncdf_var, 'add_offset', add_offset) + } + if (scale_factor != 1) { + ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor) + } + nc_close(fnc2) + system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) { + "0,360," + } else { + paste0(lonmin, ",", lonmax, ",") + }, latmin, ",", latmax, + " -remap", work_piece[['remap']], ",", common_grid_name, + " ", filein2, " ", filein, " 2>/dev/null")) + file.remove(filein2) + fnc2 <- nc_open(filein) + sub_lon <- ncvar_get(fnc2, 'lon') + sub_lat <- ncvar_get(fnc2, 'lat') + ## We read the longitudes and latitudes from the file. + ## In principle cdo should put in order the longitudes + ## and slice them properly unless data is across greenwich + sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360 + sub_lon_indices <- seq_along(sub_lon) + if (lonmax < lonmin) { + sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))] + } + sub_lat_indices <- seq_along(sub_lat) + ## In principle cdo should put in order the latitudes + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + sub_lat_indices <- rev(seq_along(sub_lat)) + } + final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices)) + subset_indices[[dim_matches[1]]] <- sub_lon_indices + subset_indices[[dim_matches[2]]] <- sub_lat_indices + + tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + + if (!is.null(mask)) { + ## We create a very simple 2d netcdf file that is then interpolated to the common + ## grid to know what are the lons and lats of our slice of data + mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc') + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], + "degrees_east", c(0, 360)) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], + "degrees_north", c(-90, 90)) + ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) + nc_close(fnc_mask) + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " ", mask_file, " ", mask_file_remap, " 2>/dev/null")) + fnc_mask <- nc_open(mask_file_remap) + mask_lons <- ncvar_get(fnc_mask, 'lon') + mask_lats <- ncvar_get(fnc_mask, 'lat') + nc_close(fnc_mask) + file.remove(mask_file, mask_file_remap) + if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) { + stop("Error: the mask of the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' is wrong. It must be on the common grid if the ", + "selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must ", + "be on the grid of the corresponding dataset if the ", + "selected output type is 'areave' and no 'grid' has been ", + "specified. For more information check ?Load and see help ", + "on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360 + if (lonmax >= lonmin) { + mask_lon_indices <- which((mask_lons >= lonmin) & (mask_lons <= lonmax)) + } else { + mask_lon_indices <- which((mask_lons >= lonmin) | (mask_lons <= lonmax)) + } + mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax)) + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + mask_lat_indices <- mask_lat_indices[rev(seq_along(mask_lat_indices))] + } + mask <- mask[mask_lon_indices, mask_lat_indices] + } + sub_lon <- sub_lon[sub_lon_indices] + sub_lat <- sub_lat[sub_lat_indices] + ### nc_close(fnc_mask) + ### system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) { + ### "0,360," + ### } else { + ### paste0(lonmin, ",", lonmax, ",") + ### }, latmin, ",", latmax, + ### " -remap", work_piece[['remap']], ",", common_grid_name, + ###This is wrong: same files + ### " ", mask_file, " ", mask_file, " 2>/dev/null", sep = "")) + ### fnc_mask <- nc_open(mask_file) + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + } + } + if (is.unsorted(dim_matches)) { + if (!found_disordered_dims && + rev(work_piece[['indices']])[2] == 1 && + rev(work_piece[['indices']])[3] == 1) { + found_disordered_dims <- TRUE + .warning(paste0("The dimensions for the variable ", namevar, + " in the files of the experiment with index ", + tail(work_piece[['indices']], 1), + " are not in the optimal order for loading with Load(). ", + "The optimal order would be '", + toString(expected_dims), + "'. One of the files of the dataset is stored in ", filename)) + } + tmp <- aperm(tmp, dim_matches) + } + dim(tmp) <- final_dims + # If we are exploring the file we don't need to process and arrange + # the retrieved data. We only need to keep the dimension sizes. + if (is_2d_var && lonlat_subsetting_requested && remap_needed) { + final_lons <- sub_lon + final_lats <- sub_lat + } else { + final_lons <- lon + final_lats <- lat + } + if (explore_dims) { + if (work_piece[['is_file_per_member']]) { + ## TODO: When the exp_full_path contains asterisks and is file_per_member + ## members from different datasets may be accounted. + ## Also if one file member is missing the accounting will be wrong. + ## Should parse the file name and extract number of members. + if (is_url) { + nmemb <- NULL + } else { + nmemb <- length(files) + } + } + dims <- list(member = nmemb, ftime = nltime, lon = final_lons, lat = final_lats) + } else { + # If we are not exploring, then we have to process the retrieved data + if (is_2d_var) { + tmp <- apply(tmp, c(3, 4), function(x) { + # Disable of large values. + if (!is.na(work_piece[['var_limits']][2])) { + x[which(x > work_piece[['var_limits']][2])] <- NA + } + if (!is.na(work_piece[['var_limits']][1])) { + x[which(x < work_piece[['var_limits']][1])] <- NA + } + if (!is.null(mask)) { + x[which(mask < 0.5)] <- NA + } + + if (output == 'areave' || output == 'lon') { + weights <- InsertDim(cos(final_lats * pi / 180), 1, + length(final_lons), name = 'lon') + weights[which(is.na(x))] <- NA + if (output == 'areave') { + weights <- weights / mean(weights, na.rm = TRUE) + mean(x * weights, na.rm = TRUE) + } else { + weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, + length(final_lats), name = 'lat') + MeanDims(x * weights, 2, na.rm = TRUE) + } + } else if (output == 'lat') { + MeanDims(x, 1, na.rm = TRUE) + } else if (output == 'lonlat') { + signif(x, 5) + } + }) + if (output == 'areave') { + dim(tmp) <- c(1, 1, final_dims[3:4]) + } else if (output == 'lon') { + dim(tmp) <- c(final_dims[1], 1, final_dims[3:4]) + } else if (output == 'lat') { + dim(tmp) <- c(1, final_dims[c(2, 3, 4)]) + } else if (output == 'lonlat') { + dim(tmp) <- final_dims + } + } + var_data <- attach.big.matrix(work_piece[['out_pointer']]) + if (work_piece[['dims']][['member']] > 1 && nmemb > 1 && + work_piece[['dims']][['ftime']] > 1 && + nltime < work_piece[['dims']][['ftime']]) { + work_piece[['indices']][2] <- work_piece[['indices']][2] - 1 + for (jmemb in members) { + work_piece[['indices']][2] <- work_piece[['indices']][2] + 1 + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp[, , jmemb, ]) - 1) + var_data[out_indices] <- as.vector(tmp[, , jmemb, ]) + } + work_piece[['indices']][2] <- work_piece[['indices']][2] - tail(members, 1) + 1 + } else { + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp) - 1) + a <- aperm(tmp, c(1, 2, 4, 3)) + as.vector(a) + var_data[out_indices] <- as.vector(aperm(tmp, c(1, 2, 4, 3))) + } + work_piece[['indices']][3] <- work_piece[['indices']][3] + 1 + } + } + } + nc_close(fnc) + if (is_2d_var) { + if (remap_needed) { + array_across_gw <- FALSE + file.remove(filein) + ###if (!is.null(mask) && lonlat_subsetting_requested) { + ### file.remove(mask_file) + ###} + } else { + if (first_lon_in_original_file < 0) { + array_across_gw <- data_across_gw + } else { + array_across_gw <- FALSE + } + } + } + } + if (explore_dims) { + list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, + units = units, var_long_name = var_long_name, + data_across_gw = data_across_gw, array_across_gw = array_across_gw) + } else { + ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) { + ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) + ###} + if (!silent && !is.null(work_piece[['progress_amount']])) { + message(work_piece[['progress_amount']], appendLF = FALSE) + } + found_file + } +} + +.LoadSampleData <- function(var, exp = NULL, obs = NULL, sdates, + nmember = NULL, nmemberobs = NULL, + nleadtime = NULL, leadtimemin = 1, + leadtimemax = NULL, storefreq = 'monthly', + sampleperiod = 1, lonmin = 0, lonmax = 360, + latmin = -90, latmax = 90, output = 'areave', + method = 'conservative', grid = NULL, + maskmod = vector("list", 15), + maskobs = vector("list", 15), + configfile = NULL, suffixexp = NULL, + suffixobs = NULL, varmin = NULL, varmax = NULL, + silent = FALSE, nprocs = NULL) { + ## This function loads and selects sample data stored in sampleMap and + ## sampleTimeSeries and is used in the examples instead of Load() so as + ## to avoid nco and cdo system calls and computation time in the stage + ## of running examples in the CHECK process on CRAN. + selected_start_dates <- match(sdates, c('19851101', '19901101', '19951101', + '20001101', '20051101')) + start_dates_position <- 3 + lead_times_position <- 4 + + if (output == 'lonlat') { + sampleData <- s2dv::sampleMap + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ] + } else if (output == 'areave') { + sampleData <- s2dv::sampleTimeSeries + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times] + } + + dims_out <- dim(sampleData$mod) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$mod) <- dims_out + + dims_out <- dim(sampleData$obs) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$obs) <- dims_out + + invisible(list(mod = dataOut$mod, obs = dataOut$obs, + lat = dataOut$lat, lon = dataOut$lon)) +} + +.ConfigGetDatasetInfo <- function(matching_entries, table_name) { + # This function obtains the information of a dataset and variable pair, + # applying all the entries that match in the configuration file. + if (table_name == 'experiments') { + id <- 'EXP' + } else { + id <- 'OBS' + } + defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), + paste0('$DEFAULT_', id, '_FILE_PATH$'), + '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', + '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') + info <- NULL + + for (entry in matching_entries) { + if (is.null(info)) { + info <- entry[-1:-2] + info[which(info == '*')] <- defaults[which(info == '*')] + } else { + info[which(entry[-1:-2] != '*')] <- entry[-1:-2][which(entry[-1:-2] != '*')] + } + } + + info <- as.list(info) + names(info) <- c('main_path', 'file_path', 'nc_var_name', 'suffix', 'var_min', 'var_max') + info +} + +.ReplaceGlobExpressions <- function(path_with_globs, actual_path, + replace_values, tags_to_keep, + dataset_name, permissive) { + # The goal of this function is to replace the shell globbing expressions in + # a path pattern (that may contain shell globbing expressions and Load() + # tags) by the corresponding part of the real existing path. + # What is done actually is to replace all the values of the tags in the + # actual path by the corresponding $TAG$ + # + # It takes mainly two inputs. The path with expressions and tags, e.g.: + # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc + # and a complete known path to one of the matching files, e.g.: + # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc + # and it returns the path pattern but without shell globbing expressions: + # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc + # + # To do that, it needs also as inputs the list of replace values (the + # association of each tag to their value). + # + # All the tags not present in the parameter tags_to_keep will be repalced. + # + # Not all cases can be resolved with the implemented algorithm. In an + # unsolvable case a warning is given and one possible guess is returned. + # + # In some cases it is interesting to replace only the expressions in the + # path to the file, but not the ones in the file name itself. To keep the + # expressions in the file name, the parameter permissive can be set to + # TRUE. To replace all the expressions it can be set to FALSE. + clean <- function(x) { + if (nchar(x) > 0) { + x <- gsub('\\\\', '', x) + x <- gsub('\\^', '', x) + x <- gsub('\\$', '', x) + x <- unname(sapply(strsplit(x, '[', fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) + do.call(paste0, as.list(x)) + } else { + x + } + } + + strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") + + if (permissive) { + actual_path_chunks <- strsplit(actual_path, '/')[[1]] + actual_path <- paste(actual_path_chunks[-length(actual_path_chunks)], collapse = '/') + file_name <- tail(actual_path_chunks, 1) + if (length(actual_path_chunks) > 1) { + file_name <- paste0('/', file_name) + } + path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] + path_with_globs <- paste(path_with_globs_chunks[-length(path_with_globs_chunks)], + collapse = '/') + path_with_globs <- .ConfigReplaceVariablesInString(path_with_globs, replace_values) + file_name_with_globs <- tail(path_with_globs_chunks, 1) + if (length(path_with_globs_chunks) > 1) { + file_name_with_globs <- paste0('/', file_name_with_globs) + } + right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) + right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values) + path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags)) + match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), + paste0(actual_path, file_name)) + if (match != 1) { + stop("Incorrect parameters to replace glob expressions. ", + "The path with expressions does not match the actual path.") + } + if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { + path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*') + file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) + } + } + path_with_globs_rx <- utils::glob2rx(path_with_globs) + values_to_replace <- NULL + tags_to_replace_starts <- NULL + tags_to_replace_ends <- NULL + give_warning <- FALSE + for (tag in tags_to_keep) { + matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] + lengths <- attr(matches, 'match.length') + if (!(length(matches) == 1 && matches[1] == -1)) { + for (i in seq_along(matches)) { + left <- NULL + if (matches[i] > 1) { + left <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, + matches[i] - 1), replace_values) + left_known <- + strReverse(head(strsplit(strReverse(left), + strReverse('.*'), fixed = TRUE)[[1]], 1)) + } + right <- NULL + if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { + right <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, + matches[i] + lengths[i], + nchar(path_with_globs_rx)), + replace_values) + right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) + } + match_limits <- NULL + if (!is.null(left)) { + left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) + match_len <- attr(left_match, 'match.length') + left_match_limits <- + c(left_match + match_len - 1 - nchar(clean(right_known)) - + nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - nchar(clean(right_known))) + if (!(left_match < 1)) { + match_limits <- left_match_limits + } + } + right_match <- NULL + if (!is.null(right)) { + right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) + match_len <- attr(right_match, 'match.length') + right_match_limits <- + c(right_match + nchar(clean(left_known)), + right_match + nchar(clean(left_known)) + + nchar(replace_values[[tag]]) - 1) + if (is.null(match_limits) && !(right_match < 1)) { + match_limits <- right_match_limits + } + } + if (!is.null(right_match) && !is.null(left_match)) { + if (!identical(right_match_limits, left_match_limits)) { + give_warning <- TRUE + } + } + if (is.null(match_limits)) { + stop("Too complex path pattern specified for ", dataset_name, + ". Specify a simpler path pattern for this dataset.") + } + values_to_replace <- c(values_to_replace, tag) + tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) + tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) + } + } + } + + if (length(tags_to_replace_starts) > 0) { + reorder <- sort(tags_to_replace_starts, index.return = TRUE) + tags_to_replace_starts <- reorder$x + values_to_replace <- values_to_replace[reorder$ix] + tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] + while (length(values_to_replace) > 0) { + actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1), + '$', head(values_to_replace, 1), '$', + substr(actual_path, head(tags_to_replace_ends, 1) + 1, + nchar(actual_path))) + extra_chars <- nchar(head(values_to_replace, 1)) + 2 - + (head(tags_to_replace_ends, 1) - + head(tags_to_replace_starts, 1) + 1) + values_to_replace <- values_to_replace[-1] + tags_to_replace_starts <- tags_to_replace_starts[-1] + tags_to_replace_ends <- tags_to_replace_ends[-1] + tags_to_replace_starts <- tags_to_replace_starts + extra_chars + tags_to_replace_ends <- tags_to_replace_ends + extra_chars + } + } + + if (give_warning) { + .warning(paste0("Too complex path pattern specified for ", dataset_name, + ". Double check carefully the '$Files' fetched for this dataset ", + "or specify a simpler path pattern.")) + } + + if (permissive) { + paste0(actual_path, file_name_with_globs) + } else { + actual_path + } +} + +.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { + tag <- paste0('\\$', tag, '\\$') + path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$') + parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]] + parts <- as.list(grep(tag, parts, value = TRUE)) + longest_couples <- NULL + pos_longest_couples <- NULL + found_value <- NULL + for (i in seq_along(parts)) { + parts[[i]] <- strsplit(parts[[i]], tag)[[1]] + if (length(parts[[i]]) == 1) { + parts[[i]] <- c(parts[[i]], '') + } + len_parts <- sapply(parts[[i]], nchar) + len_couples <- len_parts[-length(len_parts)] + len_parts[2:length(len_parts)] + pos_longest_couples <- c(pos_longest_couples, which.max(len_couples)) + longest_couples <- c(longest_couples, max(len_couples)) + } + chosen_part <- which.max(longest_couples) + parts[[chosen_part]] <- + parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] + if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) { + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + actual_path <- + substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + } + } + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + found_value <- substr(actual_path, 0, match_right - 1) + } + } + } else { + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + actual_path <- substr(actual_path, 0, match_right - 1) + } + } + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + found_value <- + substr(actual_path, match_left + attr(match_left, 'match.length'), + nchar(actual_path)) + } + } + } + found_value +} + +.FilterUserGraphicArgs <- function(excludedArgs, ...) { + # This function filter the extra graphical parameters passed by the user in + # a plot function, excluding the ones that the plot function uses by default. + # Each plot function has a different set of arguments that are not allowed to + # be modified. + args <- list(...) + userArgs <- list() + for (name in names(args)) { + if ((name != "") & !is.element(name, excludedArgs)) { + # If the argument has a name and it is not in the list of excluded + # arguments, then it is added to the list that will be used + userArgs[[name]] <- args[[name]] + } else { + .warning(paste0("the argument '", name, "' can not be + modified and the new value will be ignored")) + } + } + userArgs +} + +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + .warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + .warning(paste0("some extensions of the filenames provided in 'fileout' ", + "are not ", ext[1], + ". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + .warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + +.message <- function(...) { + # Function to use the 'message' R function with our custom settings + # Default: new line at end of message, indent to 0, exdent to 3, + # collapse to \n* + args <- list(...) + + ## In case we need to specify message arguments + if (!is.null(args[["appendLF"]])) { + appendLF <- args[["appendLF"]] + } else { + ## Default value in message function + appendLF <- TRUE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value in message function + domain <- NULL + } + args[["appendLF"]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n*" + } + args[["collapse"]] <- NULL + + ## Message tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "* " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + message(tmp, appendLF = appendLF, domain = domain) +} + +.warning <- function(...) { + # Function to use the 'warning' R function with our custom settings + # Default: no call information, indent to 0, exdent to 3, + # collapse to \n + args <- list(...) + + ## In case we need to specify warning arguments + if (!is.null(args[["call."]])) { + call <- args[["call."]] + } else { + ## Default: don't show info about the call where the warning came up + call <- FALSE + } + if (!is.null(args[["immediate."]])) { + immediate <- args[["immediate."]] + } else { + ## Default value in warning function + immediate <- FALSE + } + if (!is.null(args[["noBreaks."]])) { + noBreaks <- args[["noBreaks."]] + } else { + ## Default value warning function + noBreaks <- FALSE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value warning function + domain <- NULL + } + args[["call."]] <- NULL + args[["immediate."]] <- NULL + args[["noBreaks."]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n!" + } + args[["collapse"]] <- NULL + + ## Warning tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "! Warning: " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + warning(tmp, call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) +} + +.IsColor <- function(x) { + res <- try(col2rgb(x), silent = TRUE) + return(!is(res, "try-error")) +} + +# This function switches to a specified figure at position (row, col) in a layout. +# This overcomes the bug in par(mfg = ...). However the mode par(new = TRUE) is +# activated, i.e., all drawn elements will be superimposed. Additionally, after +# using this function, the automatical pointing to the next figure in the layout +# will be spoiled: once the last figure in the layout is drawn, the pointer won't +# move to the first figure in the layout. +# Only figures with numbers other than 0 (when creating the layout) will be +# accessible. +# Inputs: either row and col, or n and mat +.SwitchToFigure <- function(row = NULL, col = NULL, n = NULL, mat = NULL) { + if (!is.null(n) && !is.null(mat)) { + if (!is.numeric(n) || length(n) != 1) { + stop("Parameter 'n' must be a single numeric value.") + } + n <- round(n) + if (!is.array(mat)) { + stop("Parameter 'mat' must be an array.") + } + target <- which(mat == n, arr.ind = TRUE)[1, ] + row <- target[1] + col <- target[2] + } else if (!is.null(row) && !is.null(col)) { + if (!is.numeric(row) || length(row) != 1) { + stop("Parameter 'row' must be a single numeric value.") + } + row <- round(row) + if (!is.numeric(col) || length(col) != 1) { + stop("Parameter 'col' must be a single numeric value.") + } + col <- round(col) + } else { + stop("Either 'row' and 'col' or 'n' and 'mat' must be provided.") + } + next_attempt <- c(row, col) + par(mfg = next_attempt) + i <- 1 + layout_size <- par('mfrow') + layout_cells <- matrix(1:prod(layout_size), layout_size[1], layout_size[2], + byrow = TRUE) + while (any((par('mfg')[1:2] != c(row, col)))) { + next_attempt <- which(layout_cells == i, arr.ind = TRUE)[1, ] + par(mfg = next_attempt) + i <- i + 1 + if (i > prod(layout_size)) { + stop("Figure not accessible.") + } + } + plot(0, type = 'n', axes = FALSE, ann = FALSE) + par(mfg = next_attempt) +} + +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +.aperm2 <- function(x, new_order) { + old_dims <- dim(x) + attr_bk <- attributes(x) + if ('dim' %in% names(attr_bk)) { + attr_bk[['dim']] <- NULL + } + if (is.numeric(x)) { + x <- aperm(x, new_order) + } else { + y <- array(seq_along(x), dim = dim(x)) + y <- aperm(y, new_order) + x <- x[as.vector(y)] + } + dim(x) <- old_dims[new_order] + attributes(x) <- c(attributes(x), attr_bk) + x +} + +# This function is a helper for the function .MergeArrays. +# It expects as inputs two named numeric vectors, and it extends them +# with dimensions of length 1 until an ordered common dimension +# format is reached. +# The first output is dims1 extended with 1s. +# The second output is dims2 extended with 1s. +# The third output is a merged dimension vector. If dimensions with +# the same name are found in the two inputs, and they have a different +# length, the maximum is taken. +.MergeArrayDims <- function(dims1, dims2) { + new_dims1 <- NULL + new_dims2 <- NULL + while (length(dims1) > 0) { + if (names(dims1)[1] %in% names(dims2)) { + pos <- which(names(dims2) == names(dims1)[1]) + dims_to_add <- rep(1, pos - 1) + if (length(dims_to_add) > 0) { + names(dims_to_add) <- names(dims2[1:(pos - 1)]) + } + new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) + new_dims2 <- c(new_dims2, dims2[1:pos]) + dims1 <- dims1[-1] + dims2 <- dims2[-(1:pos)] + } else { + new_dims1 <- c(new_dims1, dims1[1]) + new_dims2 <- c(new_dims2, 1) + names(new_dims2)[length(new_dims2)] <- names(dims1)[1] + dims1 <- dims1[-1] + } + } + if (length(dims2) > 0) { + dims_to_add <- rep(1, length(dims2)) + names(dims_to_add) <- names(dims2) + new_dims1 <- c(new_dims1, dims_to_add) + new_dims2 <- c(new_dims2, dims2) + } + list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) +} + +# This function takes two named arrays and merges them, filling with +# NA where needed. +# dim(array1) +# 'b' 'c' 'e' 'f' +# 1 3 7 9 +# dim(array2) +# 'a' 'b' 'd' 'f' 'g' +# 2 3 5 9 11 +# dim(.MergeArrays(array1, array2, 'b')) +# 'a' 'b' 'c' 'e' 'd' 'f' 'g' +# 2 4 3 7 5 9 11 +.MergeArrays <- function(array1, array2, along) { + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in seq_along(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } + } + } + } + } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 + } + array1 +} + +# only can be used in Trend(). Needs generalization or be replaced by other function. +.reorder <- function(output, time_dim, dim_names) { + # Add dim name back + if (is.null(dim(output))) { + dim(output) <- c(stats = length(output)) + } else { #is an array + if (length(dim(output)) == 1) { + if (!is.null(names(dim(output)))) { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { + names(dim(output)) <- time_dim + } + } else { # more than one dim + if (names(dim(output))[1] != "") { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { #regular case + names(dim(output))[1] <- time_dim + } + } + } + # reorder + pos <- match(dim_names, names(dim(output))) + output <- aperm(output, pos) + names(dim(output)) <- dim_names + names(dim(output))[names(dim(output)) == time_dim] <- 'stats' + return(output) +} + +# to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R +.Indices <- function(data, type, monini, indices_for_clim, + fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) { + + if (type == 'dcpp') { + + fyear_dim <- 'fyear' + data <- Season(data = data, time_dim = fmonth_dim, + monini = monini, moninf = 1, monsup = 12, + method = mean, na.rm = na.rm) + names(dim(data))[which(names(dim(data)) == fmonth_dim)] <- fyear_dim + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + + anom <- data + + } else { ## Different indices_for_clim for each forecast year (to use the same calendar years) + + n_fyears <- as.numeric(dim(data)[fyear_dim]) + n_sdates <- as.numeric(dim(data)[sdate_dim]) + + if (is.null(indices_for_clim)) { ## climatology over the whole (common) period + first_years_for_clim <- n_fyears : 1 + last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) + } else { ## indices_for_clim specified as a numeric vector + first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) + last_years_for_clim <- + seq(from = indices_for_clim[length(indices_for_clim)], + by = -1, length.out = n_fyears) + } + + data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) + anom <- array(data = NA, dim = dim(data)) + for (i in 1:n_fyears) { + clim <- mean(data[i, first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) + anom[i, ] <- data[i, ] - clim + } + } + + } else if (type %in% c('obs', 'hist')) { + + data <- multiApply::Apply(data = data, target_dims = month_dim, + fun = mean, na.rm = na.rm)$output1 + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + clim <- 0 + } else if (is.null(indices_for_clim)) { + ## climatology over the whole period + clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, + na.rm = na.rm)$output1 + } else { + ## indices_for_clim specified as a numeric vector + clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, + indices = indices_for_clim), + target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } + + anom <- data - clim + + } else { + stop('type must be dcpp, hist or obs') + } + + return(anom) +} + +#TODO: Remove from s2dv when PlotLayout can get colorbar info from plotting function directly. +# The function is temporarily here because PlotLayout() needs to draw the colorbars of +# PlotMostLikelyQuantileMap(). +#Draws Color Bars for Categories +#A wrapper of s2dv::ColorBar to generate multiple color bars for different +#categories, and each category has different color set. +GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits, var_limits = NULL, + triangle_ends = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, + label_scale = 1, extra_margin = rep(0, 4), + ...) { + # bar_limits + if (!is.numeric(bar_limits) || length(bar_limits) != 2) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2.") + } + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = bar_limits[1], to = bar_limits[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(seq_along(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != nmap) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in seq_along(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { + s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, +# bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + #TODO: col_inf and col_sup + return(list(brks = brks, cols = cols)) + } + +} + + diff --git a/modules/Crossval/recipe_crossval_ecvs.yml b/modules/Crossval/recipe_crossval_ecvs.yml new file mode 100644 index 00000000..e23495ec --- /dev/null +++ b/modules/Crossval/recipe_crossval_ecvs.yml @@ -0,0 +1,184 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N. Pérez-Zanón + Info: This recipe can be use to test Crossval_anomlaies.R and Crossval_skill.R for single and multimodel. +Analysis: + Horizon: seasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'monthly_mean', units: 'K'} + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'ECMWF-SEAS5.1', member: 'all'} + - {name: 'Meteo-France-System7'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '1996' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + - {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: raw # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: no # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + user_def: + DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + alpha: 0.05 + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'robinson' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: yes # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + cross.method: 'leave-one-out' # Default 'leave-one-out' + ncores: 16 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git4/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git4/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032339 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/modules/Crossval/recipe_crossval_ecvs_global.yml b/modules/Crossval/recipe_crossval_ecvs_global.yml new file mode 100644 index 00000000..830225af --- /dev/null +++ b/modules/Crossval/recipe_crossval_ecvs_global.yml @@ -0,0 +1,184 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N. Pérez-Zanón + Info: This recipe can be use to test Crossval_anomlaies.R and Crossval_skill.R for single and multimodel. +Analysis: + Horizon: seasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'monthly_mean', units: 'K'} + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'ECMWF-SEAS5.1', member: 'all'} + - {name: 'Meteo-France-System7'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + - {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: raw # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: no # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + user_def: + DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr rps crps rms rmss # List of skill metrics separated by spaces or commas. (Mandatory, str) + alpha: 0.05 + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'robinson' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: yes # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + cross.method: 'leave-one-out' # Default 'leave-one-out' + ncores: 16 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git4/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git4/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032339 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index 2fa27549..2b92ed4b 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -33,4 +33,8 @@ for (variable in 1:length(recipe$Analysis$Variables)) { Scorecards_plotting(scorecard_recipe) } +# Add BSC logo to scorecards +source("tools/add_logo.R") +add_logo_scorecards(recipe, "bsc_logo_small.png") + print("##### SCORECARDS SAVED TO THE OUTPUT DIRECTORY #####") diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 57f735a8..b5f7355b 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -204,6 +204,10 @@ plot_metrics <- function(recipe, data_cube, metrics, # Get variable name and long name var_name <- data_cube$attrs$Variable$varName[[var]] var_long_name <- data_cube$attrs$Variable$metadata[[var_name]]$long_name + ## Removing "anomaly" from variable name + if (grepl ("anomaly", var_long_name)){ + var_long_name <- str_remove(var_long_name, 'anomaly') + } # Multi-panel or single-panel plots if (recipe$Analysis$Workflow$Visualization$multi_panel) { # Define titles @@ -355,6 +359,23 @@ plot_metrics <- function(recipe, data_cube, metrics, significance_caption <- NULL sign_file_label <- NULL } + + if (recipe$Analysis$Workflow$Anomalies$cross_validation == FALSE & + recipe$Analysis$Workflow$Skill$cross_validation == FALSE){ + cross_val <- 'none' + } else if (recipe$Analysis$Workflow$Anomalies$cross_validation == TRUE & + recipe$Analysis$Workflow$Skill$cross_validation == FALSE){ + cross_val <- 'anomalies' + } else if (recipe$Analysis$Workflow$Anomalies$cross_validation == FALSE & + recipe$Analysis$Workflow$Skill$cross_validation == TRUE){ + cross_val <- 'metrics' + } else if (recipe$Analysis$Workflow$Anomalies$cross_validation == TRUE & + recipe$Analysis$Workflow$Skill$cross_validation == TRUE){ + cross_val <- 'full' + } else { + cross_val <- '' + } + if (identical(fun, PlotRobinson)) { ## TODO: Customize alpha and other technical details depending on the metric base_args[['caption']] <- @@ -363,7 +384,7 @@ plot_metrics <- function(recipe, data_cube, metrics, " Forecast month: ", forecast_time, "\n", " Reference: ", recipe$Analysis$Datasets$Reference, "\n", " Interpolation: ", recipe$Analysis$Regrid$type, "\n", - " Cross-validation: none", "\n", # tolower(recipe$Analysis$Workflow$Skill$cross_validation), "\n", + " Cross-validation: ", cross_val, "\n", paste0(" ",significance_caption)) } fileout <- paste0(outfile, "_ft", forecast_time, diff --git a/rsz_rsz_bsc_logo.png b/rsz_rsz_bsc_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..59406d7a5f242aa87eb276bea39d6544a802b986 GIT binary patch literal 48040 zcmV)lK%c*fP)004R>004l5008;`004mK004C`008P>0026e000+ooVrmw00006 zVoOIv0RI600RN!9r;`8x00(qQO+^Rj0U8N22T|E(7ytl(07*naRCwC#eRrH)McM!N znK|Y5yS*ma^xjA#2@rbkN>PxesGx{Qlir(!CWs<}bfotd2rUVagd~uVo^0y&c6V<* zW#;+)ad$Tf1jYANdG)v7k00EeJ9EyQnRCA9nWqxILHy`>59P-gSOXw>$<3tgV!ZtE zR!kVu4|dK$d{8wuHs{djxlq|G#`PNp*|r89X&q(S&xLzZpeu)t3-=g^YcVN+y zPNWpW6app1F|_#}Ov8Ykbm&v|`OjEKZO)TT2B9^c#Es7Nd zv}qcgNGAEHfot&BhYrD{BU&(UkC8AOAKp7xouZDl4}pg zE01ST8f{~+E$&}gZ6uvCBmgPZMQ8$xF*4=5AFfczgO_?Lm&F^-;aMft9F}R43DvMK7ZI^IXbZBH9ZI3J>Ed@{7yS%7&edm~LMvo%KNJn7+X|7

g2m;UskP*ZknyAh)unB@zTNYLi98$P))niMV9L+TAhC{KUb0!xQ6p-7r0teOK zh9BMX1gc|#7kxY*M;|!ro1XJN^q~*`Mfj(Y=$=1&03}7Gc;S{yFyW*hk?Xiv`PhX_ zL?}G)T9Me472i!5>aphQc{x51haNe1bZ5dp$n$w$v7#F+M5?>GgY0s7}9n*OmIp2VTUFj-1A?y!`<=2=NIAPW`6m zy$^lp!+#0>DI|K}MGs?-WB0-O6}7bColo%T`Zi=YHF5RsLq*fZF5&BdG7B^huCdGO z>fPg92PgU-PuCjePJ~HN$kU_-Ai!(@1Q3IWASAMacq<99**4JRX=)>Zq(S7nzS0bY z3<(MZslB*Bu0CzQ!Sfo@UVEY~OOO5LRqTH10fA`b;z&nm<>~YtTZ4mp4Wkqq^SXZA_IoIQi4=#%tXv%1o6v8C63UAN( z6s7wejIm<}p&z?=?wvNIKD!L({$W0+go!}~Ca(ICJ#mFO-!Ub1uHqY&f$z#rt0x#GIQ~0BB)Lj1lhOkVFI$84qCAaHYoP zMG7z(feJ$ILEd@9Pf}h_P)4YDnNSoKks^u0LRnnVpOTb_d>2Bhr?N=rZh5!Mki7mD*{!&t6JhEj|c7yg!!snCNEPPo|O}UJf zTCB;SWJ?a-M%za=%SDr86f6&8Kt&}F-V4_Hx7U`fZ*a@J9LH_#hczQ8h)6+nB0y~; z`rCB}-f~p)nBoEInLocmPyg^cEW^Jbl~xQW8g^I-9XPX$Uwm^m7sRXR#w#x7W2O)K zrq`$sedxpgVf@1+I_U>@0RWWCw572F9SN693Id|NH7f)eesQ@uA<^#LQS0842&|R5dgy?Fo6I9fdS2cBMBJ@G)TgF)0As8^onrU zSZ$eD-_q2cbU(S>FBo?roqPVF;LWe+rR4Zfy8gT$^FKOIedt3U{{O%~M505Ey%_*d z6bsSvHEnQGJ`WjQEZ4ue6-aMWvAxHhlTPVhDW5|iQUC;yXR#fwq`)kNLD^7H+&y%7 zYtFk()3X_UsORy4O7Uo| z5oBh7zjPaZ_Z20Wd!*j{|0w@&AqbLmhatsQ64Y#4w7jH9&fkAxeD1RgRyEfYRno$T zufz@a`~_G3^!t4z+J`>$VV5wzZo|RH-UJ4tuqZ-{KG+6iG*^r*GLHIrssBL^#guTtobM< zl0G!N6o0UEMNM;a?j+aI-%&osYRw`~nhbz62;5E642A)KKx_~Zgg{ORT4NYuwIE(i z2G$kD%zC!Wx=?YfgIl+_MlOu@G#QTP+3YLwJe6sFbqJdEv?pXz71A-uvS@-}Kt`p$~odKY*_#iH<+@4gi1@nwBnT1hP3U99U??NDj+20!1*{RNT96muQ#JUN&TRqTin9A!^fxN^Ek_Z5!_w76lge z8x&~x;J4@Hh{oV&i*CYQQ;*rDR4Gb+N|r5ID<-QjCQ^c_pxj1bJfOA8@P#j4!-}Es1(1)*uze}PA zo&OgAKvP{at=XJLt|QCQ(x72SEk7v4u*MeWRnOsJAou_q-K3fCoZrSPFr;CkH5!wr zbu{{~L6Q+KHaPL_m6{5l=fX)VWv+|iv!}|dts(8L-ldMiCo7)=-9zlQ=;z52Jvvn| zkpQG2wpb>fkA%gu!$$@e&%5jW3?hT*v*fXuK>PQSCNpqfqarF7Tuu;weDFCtUb{uO zrU7^SApfPK4|f6ZwGC_OnyL|8GV}K&T#wo7pf)!n0i27gNGELt1bcUf(D;>DI%4lpp}E3uaz(is7wkm)rhwi`Pg&MerRpW z!Z0KQz>yTUt**PH9Vr)7n+a z4_^ykOA;M$24H2kit=JKK#C7K;MZf3wwcW`Lsiu8Hel>Vji5ks%k$pTE2k5cM(hhi3F z5KI#v27~h6QKOF1QuN#?uF|t-683h3r*$m}sI|Vx$Fo0;Kt&>X|8G{OGWJw3F zo_qf{J={L@;qT&alIZADZo{d6IS;?v|7uFKX1QQskzw1)_goBXZt`Zk4hBL%0g%0& z80dDOS71mxK#K+p2pnSsjQWli&m1TVCDN{86hb4m_YRVAD+51Inwt? z7=l?dl}{T)jUQd7fe?-R!S7%uw{e>zDC-+2v^)$fi827BU4?lMUDSJuy?^{WELu?y zLrM%AP)Lo5EKKRLr-35Vhh{>jA0RLgGqTJ86b!6={(9{3gFCQpTRj9ZDvOJ$zQaXf z*uao}6$}8OQilvFSZbg4nm7&%)@)!O0eW`r6!zAgG=Mcq*E|S0RY-Pi!#26Ld!${ z;{5$PK4b2K-@KIGhyP6cZAr9b7N+iZJUPDNj!ar2>?o$?r#{R8 zbrWci77QI7JF@@O(w2`NSz~m5_@R%>KDt4pXiwE(jc;BCQ( z`BKi&>tj|i=!~oj4=oQ7$3d- z5<=xe(6D(EszMo*j~D~rC#+dGAF)9bKp=!-L6r9^;*Q3)__q4&g`UT8DPdHU1nz1} zJP|_%l;OC8Mq}f~M%?@}hnFnbg6X?ohKeaC zlN1I9PMwL3AG{1FP>psu6#Gw!VcYUo(VDJ87G0RU_e_MW0=TVf@$R4hisEszP%&{5 zhE>O4heK?b1|GZQ=zss4*oXgE?2<$eIOk8;{MvhPh76?X7mVc3pY9MV?|+<+`}G-t zSDtHlJ(uOZw{PKhl5W%M!3yj|znBPr5$=~GDfp^&(HU`^m!Rw}V+LzlsZB{S_w58<$`tO!g@bf}A5 zS0Aes2fMBJ-46cUZdT}Vh2k&Vd3PXFe(c+S=iV_ewuht&AW!hpKx;4{{!&_O|1Onr zwsp4p(hdrA&A(Oy;87>vM(wQ~3;@luZ}?v=(ycFij0=vro{A2fA$+Bj0qm7Xs1LN_ z{Frx6*uo%?X^NJjBIAH;j+X^(TLi)yjrCdKv^#+3GlDh+3T+G;98t}U^&BcJL#`uB zohc=KA21A{WnueB8Q^>VVz~t1rbBEG-rk1;R0Y-MK<~ki0bx^4$Z!x&ebl^ zw2aFS-&?dcWQA(aGQuXs%R{{XFw)Q6`vhY9epdjjY)Mnv+ueoAXOafIfNSFlbnbs$JP8{njiS#X+j9 ziR#WoM(5HFnHIRLyhzk-OA5E$0eqif=kI^W;HcVITgSE?pyQ9tjvYXb@=Bg7hMtqDFTSF3U?1zVj?@6ch*YDcj4^6vfBy8Nt%wY&;7nc|Gzm6xee-h=>aVap{|$ZfQ&k zR#1gS)@g}^{~&9?wCVHjojhRozx@35T=9$%Xxo_82pfbAzvj||uOomtlOj1{WL)nx zXe6F_^J93aB4(YVmd`HgU99p$eobJ_0LaU^Xm4XKDsn|fGQh}3V31yN1sBY^i8M2V zLD4W#-J9=gU_G63!!3B^isS!J=#TsGpMhPH=w7GZg7xhVH7sf7K>uPRmt()6$UMC> z;s1p-D-h+k@Lwb%2Bd*ZtiU+fXEo1;Fk9dG+{56(2$Y%z5&0#h)`Z4J?`5yYAcY;s zcsp+?5M&@guM8L%{DF{Y-nKqthT{e|&2G}bVjbJ((9<$${cGRVWxf8QfZU!cZx>{_ ztH1x=@4bJ2X}a>+rkDi?WLaW)tjIX0b$Q1J2$NAVut0Zox;hp%X~ghi*a4FP;Qc#( z^bajie7}n@dCWj6l*G@@sh7wo6)d;^m`$r61At+Q%>ydTbA?i!ty$Wwv(x_#=R<2{ zC`7mBTzR4NgQ8}*SKiv>9;h^ivk;b0eo7c}bwz*kEhpu4v}Xm1Eu*MC!-x4klOecc zV3nM^ajSQnX4^5R0VW_>+5>yoN+ z_x>Z&Dfa+|7y#DEfGs{a|Cg2XF8%f_GId8EsVWjkZR{aK-7Ivld!3+^FOtg+38vS7T=k4Fs z5ThjYhT>xT$+nhUb9qHn);D#i@`^}VTeEwRVvJ;xme5|Bgj`cSAn;aeJGI$vmq>43 zubDynoqRMud(C&h>36da{~`DiiQdbMyHB{DYW5h%OW$21wPYoIHM+6ho$Grj%1fI& z>MVPiPZNs0ANxi zAj4sC*GsQY{KbCJr?_PAVM0Npa$wlcq*ZZqoqKmSr@zA*+)bc6lls>t#(%Kw7m+7d z29pLLg#kYt5;t~zaO8Cqc?N?HD5j08Ta~Z{?KNvK9N%RC`1p@M`G*$im}?%RkS08S z-CqH`!`8G57P^j}3IKtSd@K9m4-Ny?aoHgs9n#j~Jg+@Ar9~Vsln1)5o)fT*8Ht4V zjPG&P_OXLW5SbP}uc{6l*3_8W8V;G??&$EIQ9dIWHfp8BnpBrMR0yEF!rI%(aYa|s z`?Ie&+MS7uNe~$ZHpPpK;{y@nlSB3n8h?Gd^XhCyU#JuUJ#!}n5D16|4?MW?tQW6- z$^)|6+Wo+}ZcgtB0wNK4mB-@xScIl`c6j%B9*^aoY+po>5(tLGb*Z^;TpiluPy}u9 zt&W8EtX7Ob*k~f4A)8hcdp^toTYNNmV(j28_1R?3;Yo>h?>Wy$O|R~w`~EG9R`>50 zIJl`Ly~Va{(YjzQUvm3Ne9M`KepBycAO6Fz6EmFptC#T3oAasND#gGEW93JU0(Fa3 zVE0*Xh-m`CBBC;01y%xQ*sxBgs%|OJ_2?~B%(3#!>)w;%q_o}XkB^9 z^7+}UopjO$ZL{|bOy2$EnHL?FY!`e=c1 zV@IohA8QyOz_jo=YYO|G9s~yLpjhNA*fK4=N6poljJ}2yAT6x4ZSkz@@FBjZr&69Q zFPDbAsiMrP-mo$AvFGT1BnidE=D}meL_YrT!;aOSgGvw}4QvyVx;&RhLxZIODkw7c zPA1g{ra?EQQ|eL>xd1F7pqas}K>#Zu$&5zjo}G&_^TQJZA7!(8IuXIJ=wUNxzMV^Y zSNR?f1p%>`ac);f?k+NfQCt%6O;e!ZwadQgce4-wA?R^l$CGy~Ye?Fr;Zx?XY2?i}$>G7)FdK)J3H+E-WgLg+%~2|y_Jc9=u*Q1lUDx4J zpl+QkgL;W)%oRgWq=&I`K;LSo3SOc!d$0R;(M15m+fKuEK1W18&X{r6`qE8a%dLCDTz=nyo zsw(S{Z=KX{hGEef5J+jj&G9U^$$|3GljzklOPe})v~^*q*w6bM*fI~uwVeb`Q-;ed&YE_;OD zyW@S>K^9{s#QkxTi!0J8bp>nAr$Yj5Clu;7v@jq95RJ-tu=3qD)3O5EXOHUM z*uXRQFSNIA$v);fJi0q6Nw*}sBMFfrViApsYrEEFe*+=RX~zsvTkEyRW?VlUGLB2M z`Oo?u7jE|5)Kwtz&WPj_3i`ajG%9SjEF*C>$7Nwr2L| z?9v0YMimH3DMUA{PrWWDPvULcGS6t8R{$_m6uHkqB(`tY1vajH{Gv9`#bMprAu`g`{eznkeuuVn zc>UJbxu>kyr?0XJ_dG^->!^M3Lvrr^nCj}EzhcUEr*OO^;?@- zXvN|ViJD6N_NW2~G8mG7>iQ&k4ahZy?_24wSkfqysjMF>Fpuf# zRF5eifgN0y{_i3$SQ{WH-_{zfc-dB-_riSawZ|kn^Y+*N_NkZ= zHyxaF%y5KFi;)GKoSM|lCsG<>^{hk7pW5{LpXb%pW@lR#&F{`uN?JV~sx0>&Hpsb`Xs8p?_T>r3{#c9d3J0T84FM`PymR;7V$n^N%7ZZQ~9 z01!kO(yBgY*%&fBR9=JB9{nwM#_;j60aA;?ZuwYBLu`T}X0w#DwGht;OFSlQ`HV2} zEGl=07+m;To9-#pFYqp z8&EB)#*E;P7OlX2=N*bEr{4KDE=joY(f4q~#rIO@NAGCfbQ5XC6ME)AX}iDXR#~VxpObi82TxQ-5>Ih(PT&_rbajyj!2p?`-bSXOMx$ zo@p{oTB8=MIO<#Hj6STU#`<+>vAnmmMBWn#>f6y!53PAZ&r%weSc(B7At}Gb1_O@c z&*++SC2iiA{i*VK2mls>G9~54+sTwSN$Y%?Lm{wr=)mYkXKMo5=iPd8Hy8%Z8~vRj zRgS|avLY4+yuuRmKw;2y)506vog~MQv}xmu_ikDD^jfjUgkl~)c>tci`jl_x8SBG; zBnSY?4mlgcDhAX1#0a2vp+vjuM`}X1rqb&CeE4R!F=@x`E|CIQhQjjqne~}-P-+^Z zPbpP;iAzI<8oI7Qm91Q!obS3A+T%a^1*d!lGZ`k;SN5~_)=I4hYzg1xaMaLBqh{;o z+^YKhzN2kv4BEUYvqIm zys$8C&aSV|EmJ-;Nst{7e*!q?bP&%R*t})=XBP4`u{{;?3OZu0zHhSy8xZRpuk#)g$tTD*M<-(EV&{9oH%!8Ui8^ zq`wG4doIU6)e05>5e(5Evt9b^P{cB>{@GqWnFcLfvxT3z>ZE`5yVr;R639a@e-x9Z zjlxTRSx5u>DJm&5RMoI(zotg#4_Z+O#02VQhMjeS!Z0XVR%QLbsxaH)aU01%e{7js zE7vW~=%mNL&gFEEMPKA!_nnM6ECf-k!1zH+lQT~%!#wz)koxcs8|aY*2h02J-te^R z=t(3ENGJ_JC%uXxy!MJe0M<=$#cx<+%; z?fmUCU&qYDPQ})x9|6dG>GB=%aai`!W#kv{Dbq=%m8Sp3^GlzE)p|NPiOd3<@3y3#y?v#U>MY1Tx8u+QfVa)KD2!4+O^3- zrLj9RWIhoMOKFf3ipXD2oo-&U>G}2As4lolDIP!s1Z;Xz6x;78PmRxJ)y0}YhJ-5( z{&mxwwl%?;@FZXJ$b8xXoI^-k)zeY0rxm7#Ac{`nP{0GD!iDp^*4PW^L*xsIU!DU80JS77DCL z^YlcUKVPYQ{iIz)3MqhPFeo0&E=+t6!{hRRlSk>f&n=UVEBsiIc|}*3zP`uleusWv zzJsqizN_E=UfX}E9(!9hs@fF za_hBpM{a(;|~x7RS+^a z#EJs*e*VOTiR<>e9N}oNGMn@!ux4diGCh9Jehc4yY@^C%GIZ>X!?Ez`_6dnjcShFN zk%FLX3`fk5w!ZZ0Is~T3pWHl&UV44=?w+GUBp~8(dwFLfQ(sydDs63bCTWH?Ov;rM zmBM7^z>?2f5ARHSBVu7|bHD2FEA0)P)yl&F5I7Px*LNhG4MGsLY_8+MWBQ}^ zwQK(guT~%ae*x|Li~G>DDMd}04BFnj9B1GCka_>ZwI92V-Ys8CV;2Ucd}NYWQEu;^ zb9kv~ko3crZhfy#pk1lJ^r9DYS$$LwS@y=P?~rHIFws&|6PRHa2R9_@l0s>NT;Ep` znA*|q&+FEM?r=^1dfV-9&QJ93wbfu2rX|+S+^cXJo1%05@}$^%>S(=oV*@RI_$OZ? z(&<0^4WdOsn6^Ppn>w)g;Y*pBL1kwEEf4OzkjCjGh?spvMSBjBL#NfKEuU{f$La*x zl~JA9kRZ1uldr1UWBvjNWx?K29{IiT6poq<5M|RTgd-t-=i^p#J?3zTM5d`3R$;M* z*3_`Lr8msCaCls7`{)`Srtfj#^r5n2ZD&3-))0iC-@W1xh1D@?pMSk3Iu+k}=m@mW zZ9pV!G1-!yeZP)}4Tw_Rx(-oR6H{Q&rk5_^!UJz0t--0JCu{mgbnUYX7Uj!TOOweFS!k+2OcXNS1ZfJ0qyPHTRSJW0D4`D z2pS9#2#S|;A1^%$6{BR~*szvD2+vbW8e-o>oA=K4{@9s@85+bQ5;rbQZOhyu3vBbK zBP#souU$dF`#LcF+p}|7ePHi4zIUWwNEGt-NUrDjKT{|A_m2IowmW3E3P;2(>8+Vd zDNY72-{cQGVmO^NzL0Oa>HIG(&{5a@1@F9GOC-Rdut~{ISM18RriFm8B*I}!H@|iX zXvld8_b=pV)S}LISAYa*rMRfrVmIv~o$^UaLMSY`<p z2rvi2mhRY^;NprP%z#13ge$ZH43p4rNL-ihU(74tXhK;mNSju+3$P?91zguIz>mgal#{p_P@%}lw4-(?#81hDGHfAMSAhyNzf%(MSUAAYtRNES*Y96#7UbXPX5 z&*}B)>_v}rJeUM18k65upugItFxu-~1t`8rKI8j}7F{;;1T!!H17g{fhA1ov-q! zZ|^}~Rw1FYXx-2O5e}d<7DRb001@=z1>y*+EHWtttr)?uiJC!0cx%pj9=XR5qP7fD z85bKiY=z^Sh(&G0>>&Jb0MmA>g6!(Rrn(dyPeBkcZd5fIp0BBa|#t>sxiD@0a8v1$t@XJmOw0GAm_TsW|>Vh?-8-=xvTzt z&qg2q<3X{be@biP(ek;FoonxRoOOuIhyH70|pHyB)qD7PI zYI0mp2Td`)vC&gMUc$hCe`>$|8h3&c<)i;;)~aP=sJ2z ze(T$#PyIXiXUD_;tFU9TYCwiVbboqr=b0#z#wGV0te(94J$NCPw>|&cU3QP`eF>D( z$gbRq(!(c{{&XvJeJf7<`2oD!lyb6ygjLI0_>sHbBUMxc9$JdF$FF0JG*)bG0k)^H z!kC)U|Isb1*xc0n-0W~y7*kW;+h*m~RscH$y>HJ5gVFu}{&kmaYTo5F#t$s}pN)A- z%Z}&kmEp$>_@B+e*TwjgZ@{t*j7h_YavmX_)6g0STEOH%L6i&{jpGlRfl(zEO5&m3 zanZl?ne4+q1q6JTDh?havc6Wf#2&4!{#@OY-IKqtZ|vTVhQi|h%(9L%Q5iHUW|gVN zPueAtt{<)r-wj(<_D(VgvTz`*d7WS6sw$dJE&=p6Of?_Z*k zRsHzxpX{^KDa&@xCw8tHR*6kpI!OdfE-0{R%Z4t=u4J{Uju8w(3@RqWR+u=oRCjeb z?4%W$oaTb!Vl2G>=l|wM>;GR|_UG3D0L#{HqYc|yxVRud$+U)Mf*~}lR0^Lrfqh?* z+oc36Kv)8iLego=n0R zUT1R$y)5pKY=6Z)*-(b=Zb&FfcCgBI9((v!o zNOtJ{)5~<($P%^ky)D$znx%{{xWft4!j~TWrk}I_uW-|uhvJ1-7gB7RW)mKL_@^!S z?4fm7{_s5Zsv1~Ma`3}LN7Kmi2zbTK3af8cdfAPt>;0QCBc7wSmw(C3?MNn5-}J5Y z;cI{foqjiMd}5V=lT-Hee$P2see9R@q1|AHVN(0hp}}#9tg08j5XvXYIli(?%xY`% z-qUKQ(jGhY6_QX|X3fgv^xROulv-&h&B`<|y`{ySs}&o&w0L*-|37*r^*KFaF&pZr|U4HaVUx(<>vryc>i2SU_zHN&Ehm6ooA8!sP zI@Nb_Idw2JRWg``z_vipc&T57e&+M#Ayr-Dq3-=neAbmm(jP89l2^0?>71r@o3>)+ z$O;T9vYCi*>7QO9!;~03q7toL4(_}43T$j0iR7xa2=*Juf;y>k++LWlTOr67O##e{t>!iu(7b93_PJmbURihwhn8GKqo$0I))Fg!0;hu5K*z7VyRG{oi? z{>1$cI2v6Y3Z0AZVCyZ0`GZqP#7yilqXg;XSbV;58IoJu;7@sw@e)uv{XD9iFcuq< z9T+;Wid))}fM(R+*O#ULUkQrtb1fBDN-m8B!%G);e(rl1*qbwSdo%Pz4F!Vo?cB%L z90;5zN)IXFmTg&ytnwq3!Ru2g|B9ZB^gBlZw>8?BlcHBCDQGJWANxZ%P`SLmb-gbVczWBm<|Axh$0Dv?R zM7+>AE}ik84F@fwOCNimZu#CmJDv2~mp)96>+`a8O~PkiOCc(oo1EvJ9Ck-{ zcJ}V;FbSq$NIcRt@5s{)x`rcrPL{cxf)oPLnADEP8tjpig=8jJra*P6RLz|}vRRf& z0k%b4SRUlMEm^@HBM>!sw+Y2MHn@UU&0kNq-v3=*@WQ)v-=kYaXNN=ZiOFOtEDh-P z*=sZg7g70y{>Jz*f# z?g-XEFlKUgeyu9nf0*cK%L!Hjfr#L-6HE1hXV&n|lOIQL#!%^dOhkmr%8+ho%8;8< zWCSHDszNH6@!3l|u!9DrlfGmIEKBgw4cvIfHMD6cg z!cV-r7ANgD;@`n5|6hUn9sVof`@Rl^j4_)xrlYU3V@76pJX1tjF z&Udo_Ze@G-tKV~SdhahK3CSB&@_oa`uTss&+SGY2B3FnMBV+qmPrfSEhs&Gzn(`yPrw4R42I;R z+0Q#pL3zOV$t4rig85BYyrG`)!Ebt-f9Lwg>D_hf(ey!1U`s{?cQ1GPc!{P+%OFb@a2ONoFT~qHzxcRYiQx`A5>4EuCoV=;We!gxyl`<+?a^Bgi`K7d2_yhM)QeXOqC(8BDyBH85;=aa`!X6}aodki=f zpRu19IkH@z`GawozhE0ijf(Rx4?6>L(vJxMspgEvwiH1Gx$59b$x1SH`)t;IxT?F{ z2&i;?Khg5xYBl`)eQ49`>xH{&qXq?hiUj1{5z{j+)c8PDOU{YJ3oDB5SQ5j=Q65gl~MEm&8Z0$h)|>5q8f zqYZTIX@8RNv>&R7KAOq;Cj&q*DE^dfc7GfjBtc9Ebtd$AU{a=q&F;rP`Z>`>`tbAj zz^k|2!@&67C)chiAX#R!VTZ*5lWktGHb`yF4Un;Z8VpjNvy%_3WeU3Bd;Pxn^yBI? zPyHVeMIXKzRCdr!Mk?$2mJr8ucKOe<27|t6m1ls7Nq}N;d4jL-v~5X43f7URp+jM# zV$G`5O5f9k-3c&uW)jYaB^DQ(2WMP8I})&@kQ!OnR{>K_ZE1GrYsKc5d_i|Q$1V&( zU&W!lI#p`Jt z7M5;U-}#Q`aSaj7KmfO767IGA`-czkJRKt<5emy?IajT z?AcwNeoHW9%eHn$#X^|mdOVoHR2Y$qTeH5*Wa)UVI1mlfo6U9U`IY@6!@4s17^Nr{ z4)TXT{rS+@*PZ?jqivP0IsC9a9f!*a!0^H0g`cfYPRQmk)3#{C@cyBv+Z1)RZfwKA z!Np~5&6&NlhLQ%niptP?EzP+!SnuX}7{UN-i#A3=MqMhaXVy2nw|K4|1_I)7^A9Bv zdL~+AEoy6ahA_AS1dc|;=C*`h9WrU0K$|9 zx%8V)f2SAU_6O_LSDO#ZIM~e)+*ML&JS#zFHlrpoL)$iO-Mr0PIATaKt$yz3{OsHf zblj|=-%tjh|6Y(a?P}Bam1hTx^RgLzdvB=cPL(mKTc;`fngPLynT)qW`4EO>P%7st zTZ;YKJG?opfSr;p=ZUAsD&IV6RCIJ(SGLX1NzvS#74WqmDX`8>rPQ6!-O)|EBu{L2 zmtqD<${7aL^DffjbvF^Lxlk)a^P62J^k^r#qije3(!fU3pjAS1BN1$uslQS@M)_O} zrhFmd?H)Sl@pC1?kdn3-2CX)L1^|Y~=2a!%t)vG zj~S4++;gQq@3eD(W%uT<%#7HqtEsFgOp&mm9ZwBhUz=L!`&sgMi#k7O`hC%U6#!$8WS2kB^BIAlQE%B|epjda?YtVpIx%R`y0&)rDbK^u?WI5% zhDoilnDGM;UTS~)H5BY~=vk?hx=|@E%D2mZ&<%q&6&0B$gkxe+?bFK>;w#pC9$J3GCfD8*F(X(1rOVR?a+xNXenNc@vex;}DUJxU0I z6*Sgn)83H0S2QRPpt&PPg^$|2F_$3XA9Z!Ad$nfRwpe%6w+2jn>X(Z%!Re*%=5o9b z5hD^cu7GBlOsQ)*pE6PcU}z@Orfo+YU$xt7&osQ`x_Sl)AQ~|)@7$8PDO4GFC70EQ zg9xFh@nJUY4^#?O=#Hm|M2!nvSKT&xT)bfA^3Erm93R4ro@lt$wkfD;X1QFZdW!L#PJn|7;dFsqQ68$GbxNQ#WW*0)x!0i<~d$L;e>}>OIz7S|Es|at6 z6kA{+IMI<4*)9)`qepaaY}L+gO}&Bhkdi)I{>Vjb_3yE0P9+&>VysO;`IrJt+czS> zF0AgogsuQWFd(1(Xu-jwhm4HQ7(XRGb^MfqspF>^yzJmqy*?xL@xoKKeLQI2frA4R!x4F#5ZF$jyK>d)&FD{B zGmvxi?!J~V4U-O^Xz+JW+;@lOgaKt5<0kj#&z@K(RzJOLlVQ;W~~kPj-3710;l?PANFu+~U6MIy{s$V3^oqTG#+4_&F{~ zcKUxf;IOh_eBVRA+tsE1;JYYfCRi5MN&{`I!Jf+l6AAC`4NqN4Lrs6a3)pmbK zT_?wT)o#dqFVW#5o7K~lLf&jZSoGoxb(z7(_N(xG3}g*32*MDH43pXnORNJ(b|)`S zha&R*KtTLq`6mfd8Y5T}0|d|4^RVex@DzqP1H&Xj zqhA-RKTS2Iy42;7n7o7p=fhM%J)y4e3><9lSz0WgSn*+^3+00?Y-yFK8f#XLSuHor zZ`ziwOI{F(%8N-LKW|^@iq%c*0Fm+dz*TzV4uCtY^uwESWL{~*iIAKr*AXW-7*X5eheMdi@K*EqOiG=n6#xMW1jK93!s|vB7Mf=X z2?ALAK98?D?1q}w#%w7ot^r7sfDDG{;(qr(p|rno85;f>Xri$EYDsBPr%=&GW9uOFlp4+b$4!3c%q zZR5rUYAg%$d&kS%KJ~28<++?bxjTALh9mMOXJKM|ywLo4x3=-2%; zQg1)*-PY&RwGpqj2SiX%9^{b8H2&0EaortFrJC~@%D-^E?dRzAyUdT52#Cea&!nj_ zu%-~ZpLVxE^mN#zX9A&Pz|0tN+V82TGKB2MFVKXURqqRf8o?xmB*bFY#qAxsp`p@}vUbXd(1#lxqJ$*!tZl(hsts}mC^7X48gvwiO7-=;6P{4*?H(*OW?>hhB?{9C`m_l}st$IKc>@7;Tr9x*sdwawwPn+C%1hdw_8FOs{y9FIk4#QYHTzGVr9?efeYrgFQrL^sbrU*Prl6z zR=ODLl2AU6?|v>sC?p?9f7W$=*PIuh`|jx_Z#$0Oy~i6O7?OWlwK7w;;JtMHzPlTH zPT$QK?JkftthCk&y8&P!fJn@^t?1C3SP-oif>K}z00E_h$$1h4z``)7_JI9Me_v4@ ztrdg}m_dRO3W`5}`dM-d5gL0`DnwX?$^GEwylFj;tReA6}1g+7dqG_1ESLOQTk}i0zp58;arbVA2R0vHboSdH2UFHQs zV9S!{>smB0No!bOkd$bw=^wc5(~rCAr9sa!Ia_8(6l{YAxk7ms;pnS3p>V#Bl7nEOb~%J7ae;3vE^Fv*xsNA$%_KjAv83) zBelkWZuN|3uv#+U=+bJX(GSQs4cpc#*JFIXDpi>;iboJc2nFR9_H_TA)shCSu`Kba zWr;H&A<+lq-DuZ(3pm=uv^bZJQ$Fxt+Ft@1s_8*4eZhD zn3h=E-sTO_i~1>`gXV?@_~=~F z1kdBiJ@1;7_!L88aC2%ZcN;~8z{-tVP&a+g;sWJkEV|SBTDGy+cU2s{PY;}msz9o~ zHjPZDbAZX8NtXlKrQSEhWNS0PYkNw`>{guz(*PcFHd-`zS<73g*4%Oj_!hNs9icgxx zYsD2{f&ir}N^I}Fla9q(ceGM@MJ^=`dRdxOuM~${o4q%+>wLPcE&5S#cfH8-^>lz> z81%_Ldl$T(n)^CsGg?17YaUiFPu`eLsY~1(*W|K#&s0)h)cC|&9Jc>piB+8tq}Pxj zfd6K)(;msaq_aH}YTpca4pFFcB>8p534&ZclMfM_@fQ_7%FbQnAw!!945XmNk3 zGMT~U2bYZZJUz5`rUcbyi43iIaL@92J`d|RFfzz-_|m*2$q<4zw!U{wZL$mFSR)R= zXAPus-rgN^9zn88ovjpNAcmB*cGF9jtpLI8S!FvQo)ePx8#+98@h@{1{N;3YjLO2JiW&|&9voW1Ppk- z9;bRpNz&HVx364PcF>T1O7Q?7Uv$qfu<)-hPB*p2upSv8=jf^R@1J259XR7C&(r%N zZ`46S+2((hWUxjw(XPf8S4EC=UHz>duMpD`OI+7SdzT)h6c0u>TWY2%%fg_O zFBuupnn(0_t(dm47!4U{&3PgxhH1&AxojF6wmKC`;T(cgnGBr z`Pashq6+~Dn1ozs26vvd2uoJAkXG96?YLvV9uVcMh}|J936c_SPLWUo+UGcWI(7?f zUkabxelG5QAy-h<1Bybf&{+@2d0t_ZepwW!vx^FiGvdY8+2x0g#>DXjx~Rk&oJy-( zw904J_q85WpHP+6C)|sY{ff=zx)PObpmPTgws$Klv-UDA@qn*IKgSW1vsp1k`6%dq zc;~$EVrwos>u|GlK*S$1xp1(X)8Ef~D(2mpvN?TJaC*h!w-;{vFt}Uc;#^kW*8QSy zpVyt{X1miWGa_V1L!Lv>zDCK+y#N4zIPY7Sbn0FhJvyutb2d`z+gHuAESdvAfXT^b z^!FPZX=!VV|FM(fL!iMz0(Ow@P%g4W6v)pj`Cbk`-moTB>*V+#5L|#DC9Orj3*mV@ zMd#<9n1QZN|5{_}NvpGIeKs@{2|^$s{v4QoH6k(d4Iuz%Ae&Yfu;z!>uIiksNPvR<|wr2Y}(_RGw@Y5P~*sef5W1J7#aD=pGZ~vwyA6m^QuM?RBBjX?>wF^~lxf zwEBtfalUl1P4{nnzJ9e~(jtIKD^O>r|M2ZsEnnf}^eix77~~Zf*;h|IV_IBm4Cs0M zguo}#5O8v8a*r}a5|-2CtF*7>P_0qk?XuTaUS_WN*)LZO)2dsoM1YgygUhGgyDHJ< zJ)#vP0AX9=ovpK*KVgO9?!|PqMlhLBueG>>MgP=4@x>XK}HN13} ze|ILW?_avAt3dfY7)(qAX5U|O<*x^sN?~x1#3>~$HB14eu)X#b(YF2r?YgEnp8U)X zh!;U*u!c@|`In@U>MmxA0<47qEXkh*N(@8_Ol*GcJlcEOkUpc*zX&2a_&@?Y_O)`i z8+N*X-9d=TtK7&XgJ9VPiX$OR8?+lHkJwuXl148n?5qaJOOM2$ z;4ue#D&g)_KBVW`JIC!4fgmL>4GfGR8Z}ra(lmBlS<}9I_rF)SW$#UI_8+u-)i{1q z9Pw(SxTzuch?~Vz5w-0dFEw7_sk+#>pSJ&Fnspba^ebL@~ z$A<(%^f<(J;uRtDS(LAPe&UUfB;xkJUjQ{mUheJ=Sc#27cYioh|gj%b1hg~LNW}&OHdYq=cy^( zX_{p?D(4yoT76fKU}h;KWGE!=yWpDPx8*+GDq3c*c{vmk*GLmeYmVl!`c%*5834nw z#r$}I`5lZahVs=c0z*iKA$bLjy<6E+t7T!4)RXosK)-71+F+1g5`rZ&hw|4WLzyj715HO91Aq2w?$VC!>^87uzb);knY+F62quKG*6y5z1w`{RF6qX+;R}a;SBScKL zO)E+&?6V4I6(C+Z*PL463j{wk%pi1f$1} zXb61R)#;7|xPXKvLvqVrdlW6d@9~Z?fF25=VMye{0j#!c0nhdJB;<9vrsUP1yqIVQ z3TfoH@CnhVINP@IrX3J36_uFhNQq7`1Q8Mi#n$Tb$^c5sgBtNOpw!oiAL+yY1V*x9 zD~J}dIc;$I%l@WCmMWI}mGjnRgUBcfEUclaA-7JRj+tVrEUs(+HYE z=6cky+oOo`J$fC~doL|W5Mr^wXX%VWJZ$kQtBlLGwiUE@B3wPXko^F4YEP#dH|FHF z7Pq;)GW@+Qo6_(3o*s}-uSb41!^hjx|6Cl4%j-u?ELczw@v|#d6j4)aPP%58cHg74 zbLjA*A3XTLrlGFG(};keZMG;M$Yed}ln39E{kwHnJ6gg71bun@yZZYJ^F$0-3c}QR zotow6#iu7PKM4n(d>y~NWCIz7(EY1Q#lT_Jo1)R=L3P`B4>zZF&uJ<*Nv8vrSRRg> z@3Q8m&=KRLf~SH(UGC)#+s&CS5gvsE}KdXu`F3Fh+vyy!J1bOHwNzh(x|-u zEnr!6cX4rGQAd0BL@>)>(ENDxn815iAG?63pEi)!*9JxF+*Q|B96I<}H>dV+eW%1A zBW(oC6;nn<=Du-fivv81qQg)6UCz-@2qFpxXhkxrP#CiGph8=(*qEfc*RM^&}y_~z7QhvxV?It^9yxUaUzHZ>LBSg@)knmbpju;u+qkfySpW`~CBtaXdy*^q* zAJ}Dq=C+m;;t;Dz%WrGV%C<%Nu*1i0T2?OYtq(tr31zQo{ zXWvU!El&>=l5?e%!8;o3GqatHIzuTg01*sHD>ps=+*+9Xi(!Yw`LOf%!@|$Ee$!m@ zUkgIy9Ep^pLNoh4o=N-P?M*(ib99XYAfpj^c{1VL1S!lw-1LX(GY@##b@e0$azdiDyv#l&lkwMfv^kc}k{d3yu1lxXl^`fVkUg3KA)tk% zm8L;+!V&pg(I|J`ny0dC4G5cVyWnhvNdQP;+J};Tn zw*znh7(|dk3$KE?6@UeT>nPeo`52lXJ8w6d?IM^!NJ3R*WI|g97J}*+MJVAKTLEEl@s(q(Qp4#G33W{?zkW+LCnTuwvdg ze}k+ZTd6%yGfAShmaGVcO{Eo6(;I)}=&T=+@)fIOvnZ)7rn=eJDQ3nM_rF20!a@c> z9(j;T^22|fPl=?%!hq;lou=5Jpkf9!y)z#`I|DAorXMbfs)9Pxra2G^3eGDwH376` zOQJV4G!VDMU+zCZe|PN9aq8@w@$h$FXSCh%rHA~de|%^A^A6yxcbA~NL}Ko}n+5!p z%9=W3uIuPML4-(5UY^S6o8V~~B&`v)7yyMWms;Pe)0NYzMf1{DW&&#xgpj>`UOQ6! z;0vc>!HOpO!{s+{|D#VAsgwiG8#XC1=hB^|l+&+ynn1+b_eh#H0B~86LH#Ri20+d2 z8Prt9`2Eiss4?dj^+OvLmZ1IEXlsXLngro*x&|LQUAmyV#NT8rley^j$c`OvP zjAIWP>03cb5B&1@zU})+@-LD z8Lo-?>DShQ`2hr{U;9BWr}pfrBeXM#GMOgT4ILgDok)3I?M)s9LzeWF_9ghc$)tK* zYiLk+^wF+KRD&U`0KM%lxb{GlUMQ<~k1ETOgSKpPKT|$O0b(IRC1u7zuFG#Z4%^vw z$B9&g&q^iSKiDC@r>rFWSI5;w!H{fjZ*@*gcKSaD;0Xyc6sGf?9N$jhfb00V;^N?` z?Fs)OW*`=qXXaeo9}Wsj7~~4@)b=*-L*>H&0L!MOhaOQj1Az15!*v2(zBUI2KkyvA z*G_Y?OA-a7L0(0_@YrP1S*v}B2@^_r{#|EvPs9uWKrk9YV9Z1+=wHIIh)rwPbc$?R z6SQU!$P7r7mIZYEhJ+56hAAAC9156Jzcnq~oFZvSl$Ql{SE7p?k4Q=(l_n<^vySTO z^q*E5j%8uvs9~Xf=RP#9y?Dj~)5-cAikLiga+&T-s~kj!N@F@TT_op!V?irBR2(UW=5kDj7fJivuhAC9mEO()Y;}r ztr$!Q@?NmGv;;B8ynTE1;-?mf>#l@zdx zi|nJ?+B36B2;0plWdtNDDgvsrE62&kc0x*W;izJfXmbT?AP_Qm@BmA1*qF+HpapYD zxv4F;8Y$hzhSZ|1-4&k4FioH^YG}*UXl?g|uNkH!&VK5s>ir9-tt~6H2819kDm8We z#tavh*dm?LLMesPk}xv9>sORU%eHJz-{-n~05n3~+chN&Y7B?OuR7mfbr)h4;u2%aKfH@RUeN+4>tL?~hS8_nfOtuk5-S@( z2J2bJF6IxeT}tWcr()nfd$4TX%JmId<+vJ!CH6cs62HNLj`RSjg7rf9nP=I%*O=Uz$dotweD-9j!EA zNJep3Z5gu1IHb2UxS#&{Jotf=kW|&EHs(^v9w(r-HAh{WTbM=AA8_JfXnX7dN~VV( zx8Oz}02XfTARz^&4mpc2{Og7E`0dZJx8Mb3${;*71Aa*uZHZ|(%4iiH3 zH!MTAnOsM4S#ZOuwb{1#jKN}Xq2!0Id7sC|K${2u1amGp;+y80|5{M#K{pxcoa_s{NhU z?tHg~ay-Zg%Pm`8d~QQ%`cdZj4~+HRx@S|t(gj@$To=Ouz%bA{ba-f*g1ND?Eo*w6 zuIgBkWTb9qYuYscLj*%4JKyinz$40@e$LBImXGL3ANV(tLgn)cShRlV;J`E$@;cfZ z1ckDMt?ys)rR~><6K;X9O-NScdL|0|R&2G0@otqVTC~|i*e3j7pJH73!&5Nwdv~I# zr45mw!M-9iynH19py=?+AVW6D5af5HG5U$~*sW{F!Xtk}5X5%SA}8nN$3`HmyT5lF zCtddl9>3}o0KopookI)PIS3T@2a$kg4X0r}zIDJPymr^UYz-QTx{c3&)6e%VVHn@; z&E?gz4$OoI`n&=b0@w3@CG0#xo|{GbZm2o zh6NO22Jxa4kW~!DWcCk;(R-9>-v_&$9<6)g*9o&WSi0qI!L`a~j%ATId z_zMha$Z$wnjg z>9jt#CsgzcYLQ*fYe39%KE7ra(OER?gu(19VB@pD+=)bs54ZvVU|A+LtnUI$gDZ!W zini9Qa91=kYR*!BpQq?`6ThV@1X|tCFNaDCZ^hKw zQx>NkdXfM|V#c=}Up*HO2aJ~HjCy_PVZ3qa2KnOqTb|Wb}_bkH?^!ntR3(2-0JP1;(u-hJ@F=d)N6H z0L08R{*+q^>l)lWQz?Cg>+qD`em!FV$hPUdSX^AW+o-}Nv+q2&r!RIp{Wk^M>T}b& zy40nvqxa62g{B>Hs_rrs9?)HjDj1RvXKRzEAz~U&J~CZB@^EaLjAk6U@7fPoiqyKRJEI$2*N6=vpzGkPrjH>TYo(z`I)<3>2iiSrt83qT0 zNxwMvc>c+?uj7FmzD<95=_5=U+7HXN&gM_%Bq-UI0~jy@0@Z^fXsOF#pZ%uu(4m8A z>gd6EV)gh0r~Z>4`{Mz83cyBi{C?y}6WY}fhJcM$Osr1e@>gHrBZzRoFYd+LxBvKG{T}{Z zoO{PpD5;90z9~s7A6+6-sIA5k(A+`9TmDw;MDho5^C-g;sltVn>vK?|!M z{OLD6^#6KL@wa|QZEvn3q%+#yec%gDP9OcH#L7VTwj8;k!(w9-iR=aeLj)`dW>3fC z!KxLjI#>9-fL1EyK)yvf4-gp#rr@6_{T5XfC5p-pMNf__F?x z;0e+l*rnDre_3761^Fz(47V^Uwt$mhbq~XhFEI|t`k5;F$#T+ATXA0E1zgSxd}l;qVj^AOSc6~7z{L%2#ZG^ zcxd>`CwzMy^&en!?bE;H!G~Q#$&My?!*++`8LWjx{hc}l-DYH}hhftj@1P(aMAC}G zX9IR5go+{y87<*9Hn9;fA$^w`=Pt%AFI|F5j(Q%aUr-@d)^kbD)!d!EsAFw|k3}fHphpKnq*(SU5iJq5A zsoV2qMRv$1y3@9|`~R7~#w`KeHSZ~3cA39$66m}O{SL;g9h_0S)e55bx1LOZy(Ln1 z)fYX)%f0vhRSBlI7pu#wIs{NuZ0wQF_#Z;Cyzk@d^!Mms0J9kY?s$4GmaJ~3hp&17 z;8(eLzw2cx<)~=XJS5Sj{^EP6$ft?!9uwtrZH6QAH>tU`7o&fr-0jE#I@Og$-R86~ zge2F~BnhnH@zgyA>yA{Km*4YKy6TOW@#`Cw2+w5_#N5AsgxlLZa&n3+OTse29SKK; zLNHh8v4*ENzIOtb^uHgm zAz>DhD3gKkeUN1duBV(KK>UOl{nn&?F=c-E$}}y5T$D^t;lBKI{?%fGQ8Y zS#)M{Drm~R+uQy3SnZI}u9shW%7chsrn=l;!xSdpKHR6X*GS})AE^x9m`bV3wkLJ& zxk6s=XWF#*$Ri78e(Q=c?wNakLI8rCU|BtOxieLkUd2S z-JWRCB5q6G^7_t3r5|7SFr{)nzqP1dCbOQ3M9eAeE$;iuLkI*P1Vb7~vPM8F7TxY% zM!d*8B%SfzEQ$rq*18TIhz67ZsB2Rr|KNqoX#M-2^TG$`$o{(x(rX{R2eIjgLis+Y zKY0znwCzs-5zV-FGN1p$0d&rOtEsKa6TYwgc!706XTtw2m|+FPNAALBX95Z8f9Q`y z^Xe|PD#MzXscXSafc6;xRUCYss2EhK`}rxn`N<}DE7miGBynd?9vZ>GqX1@5MR7Tr zJ6f1PP=mW)8p$SAR~xt=gV`^*c^h?+YTI2u3I@t|=%mAIW6Z z=&la`5#@6M0EEKEBbiN!Q$f*B3#QbyE(jVtMwJ{PQE1AUJZdUE-D->7;)x`_ti(6mnT2p#(smyp?syG@fP9*h_ zo)5z^kQg#RzP-NI{f_7H)RF>uZ(FCg(2ykhSJ-;R)0cl!??@l|@HasdPQHyeU}D*O zTP4~ZKQyZFzD!!5*;7dEi;nY#Nt-52jEzsEbIC*t$T9^EKHlVxZARd=mphhvE|2`O zUX(Qn5|OAl&GGo*XxKK|-o8v>{FM?(&kt4xew$6J3%;^c^M4~4ngol20`ohmtp8*< zU>P0nUhZH1;2gT?j01Wl(M#@q4Y&OGmsGgliNf=hvJC8zNT?6AVhw^33W+~w)@6Q( zkT4LHenGMQ+l-?R0f4YA=5>77aMrM6s-pFE>aJ{#Pt$sPk{KZhp^*G-a_&_ZANQ*V zc+KY>Rjbz|7yGWRkcRLBLAf-Y@+JW!yUS&({{4az6A5=jqSJqdH7O8ePx2UP5{in= z2_?m%u6CpQtn2XJc^z|4$~SVu5pizU(vEwrkugNV=Fy2x{{`(c?69%XG_Wq!Ic`c}sS;rg{Y_g=l#F89aDLr}5$g4LQpaFLw9Qy<6M7^u^2!8k?`;k%10Mcv?OHgIeyT^&PUgb zTmHGkls>fN9a(XcSjUlU8d5q|@rc!u_s0<;_>OVcXW!aILrk zAYb_`vuS@fDJh41v3J+^AzNE=-!_9r%CL?3B=YM_0=^ZNZ<_(}>ybkP^|iI`OHMXV zpjH6SM#8vO2x?RwTj`YkUHQz?-5CVe;*)Pn6LnDaA+sWZRPpSe;a9!Si%J+tpjG9^ChwLb|cBFJitr9J>Z<}uz zG_SkI&i1Fi|x_IA+(zhu!Ca3ls!s*lAL$l zwV|-_A_PGxSgE9cpx5HU^La}5gJlSTY&mIlO)D4s{F@@o^MFmwtQy0MA>6>1^^;3;JNzHZ=YOd`JY_l zhxWYzPBNoAwrJ7t`nn~iMQ`-vpxr?#BLG?{RIOXzdB~PcIi$KA@#(@FIaVVDP5BwZ zA@OJ0u{hk9^ND`>@9rV(lnuWJyE^v2ZTrRXe6ii%YTGjn`FoR6Qm(AbexR_QiAY@P zH{aS91ywPeb;sK~Es#=#Wox%~PYOY4nA<;X#4H3GyH}v&(*(ssW<+KA5X&w;Bp5W0 z96hS2%yW6V(l9_!zQ>gt*Cs!alP7a3;pHp4=Ii$rq1nx6ThBY+l5L7N+O}lhEQ$p; z3!)6V;~;EXF7teXTk6U2$Y?{0;JJGn zpZV(pTQBq+OhC6qc&Cwm70LDdy7SZ2U z+Zr>cr&Ib)X7&Yh{$7bL|Ebjbk;-b;SNico=oskA_-Qj(UOASFE**?zD$EYk~6MGcHUNCYGFmUhl4OI@jh5LG6<%D zG!gj#Vv&gbTI}G%2e-AVetF$2ltKE1XrT>umH6}1wK7@}(k;(l#x&y!8qzP;rw{GJ z|5=EY_x=QR4H-7ZMqw9(xnqlxbuYJlZd&w0cbk0C+wBRPl~#-ubC#z>F%I+=qmt%@AE;Gho3yXXlh(5Uy8BA%lPD(i;PGAxH%UN ziQnd>QToq*X}-!6D^H|S8k8w1vu`aqs1hZk3i$2Y*W>gw8YIRXr{5<1>it}YVUYr-`a`$ zR+nhS!@6%4ny9p*!kp30XGlTRn!i^@AShmz27RZb%>HS(B=A==9(XETWIxdQcEdYL z$=$Su4PppMn}-gMZy7Nzh~m5CHJV6~|%9 z%u(3%*w0biKY-}o1qg=a-9m!%#-8pieV!ZyKyW>M+{pb)D@*!G-*3s%Yk%F7D|agp zvHgc46sBJbiJpA*+mWyTEjrrWv)dP>(6p|T+QLM`heQ;A+9W#Oz4|fBruVwXrN6XI z{!#@{+V!^ktG4a`o3>xIZQs@LT^-BModOX80rAJJi`qYHtxFiKZ(pH?>=(iPFShZ$ z$4&dv0#y}75t=#yGxw;}+0CA4o3r{o+otCM7y$XckMdNPz9y5>=P89C04Ys!V+F?f zh5L_2C@N=5NjYdjI;~C))`VZH+v>g3wk`K=S7UZgHj%rd>D8-pT}cdO<`N)Z4A3-q zy=h6fuAYLP;%5S%fB*1z{jP*tz#8Fh*IF-|^nX-W=e~ISh4Vt;i1Cz=02Gkw^sf@2 z=Uba{^E>LZvr|o(w{r>SC+HVJHtkRD(ey|`wM*__-oEI*&GN`YYA|F}H3ohACMF^f z6@Jt2NFVy}Hz5Gv%-dhZx>cLF{9CufGDs&I9NF=9)4aSin$K0ZgA+OdP`;0{hHbf% zo7QE~*pQYZX?H}Uq`K-n*|E5Dwq?_EJ#Dwg)90h0JeR*)wpVOw+3<+Jb#b#8KXn+F zA3TT(_q%~35qX)EP}`F`@8F;LkF>>6XKCDw!V4GE{AsOIY(mjuu`;b?%;moLl|(>C{$KhKMaQGrF`9a z7TtPG2+3sHSXoqJ9^SgR{f$JsYqmFgOCnKumSG`5AS;{Ehq{iQN&svd^9$m}x8&5T zSZO_7cdK25q}qnpuiUcfqhk%$JQz$5aEE*o!PO?jqZHkM{LLFNDy~BnadZlAJrd=%D5b| zS4?ks>PogJUSc>7`O(tA4aqM5@;rG%_LA`5(oOF8_dlW&1$KSjuR8v>j{U8+3@s!? zyigwJx%%0lZ5rieVV}Ts#_41E{DThKXp3c!UUvbPZsLPVPe8kx3N9m-W-nuCxJKnuhMR)s&$P|WjWt94*ySy1pvVvIW zgw$&f-LLxNM=zV}A{gQD)RK|uEcXKg;h=0xZ*2X1%6|RqW$V&YT#qdhkYy$Ir%j)? zwDRUVao|OFT8o!-PUBp*R7=YZ#jI70om4Bbj?{~5Rn_4mM`W^UDj?Ka`PEmB z{Pdi??$B4yzKCwVcG(oyQ4ta>!Vz;-sySH)L(<9L*%yypa_E1yI;Ri+U!nJn-{a?h zf@s`^P1!hA4;A?M^;U_dtREf~`$IaV&+3jx-_a3X0}!#8xUj1+cdHe*Om{=V!x48P zxb9jb+vWPPV*C7L(!Z58@=n>?LpoUqqF8}EHs|UWq5)}68d>PI=CZW-{slPup#!W3 z?rEIuMilY758$U88DOdyJOP(J_cXSxuH*X8KA_6V zAU7w{wEn%#NcIb3{kkqHDY11)d5i`OE@h=P{lE6!Gs>>w+8Uj!YKN0U=d4z2<(v~j z7?8*!nGD7znH)qgIY$FFHrYgj$zXC43Lu0~mQW6@&bj;KutTl)LxOvKKYZ;Q-u3<7 z^Bkkc=&}2Z+SRqso@1?AwdVADBv!51gO*)ABER9D{cW`iNSi(iU}CaKMv6`DN~IN& z4o#j?hilH7%|t}kU-~Gnz5PtK@(#{9a@;@qz4#~oiN70v5qeF%_!%IZq;H;Hje*CG zpn?cgCWne0J2ES58#O=3)$PN6|0J{x7#x^VP#I`y-O(!=-(AUvTzM2e{q=fc{vEv2 zZuvNuRR{l@ZlryFK!(^hVQP5B5f_#8zIjiNC=Tjs)WmdM-tJldlnO~sQIKYRZy{PyYf zw5!Pxxnzcf>47I`sI==<*@kYy*4w!J@ViA(L$PSt3K;P=wmcz7O-JK%|9Ele z|1aRE+unxlFic(0w$?6eYwZMVVsyj-A{td!hG@t@C;;qeN^#I9Vd@&be&W&}9q%vR z_5&_@3}XgX;J23_O#s+G?N%+5bDVHQKO-KqpJ)DFBJYPiP8T5Q^NA-i+fwJF%%jcz ziPr^1cu##oDmbpD$pLLmxuv#+Di9R#gG`(YK>B?2hs?(0FEA*e4IE&)yBF`|z=&GS zc3meD)(`6Laz5C1)aw5*!Ta%E`TrI8UaVx_c7$$VcU7(DAQ=reBpUlnwBp?Xz2#lIH5YuOl#PVfZv1`d1DAL$!{+6o7&cc`L zp1{=Uze1|tNLU+}V_?XG;j`;7SZ~1Z-&~FIF$bflIEaRJh1M^=1v~&+KLmN1bp6tssHJx_r#yS{QlQ}EyJ^K z+@ojx#b@t$@xy=p+`pGkWPotP>4*RIn+(1X*wqejFDeh6O)Z~v0(gv1x&3MKXb!Gi z(1n99Okwl@8<8O=A{32q+s+ku@7|yNw|qnYZ^J1U--=|G|6rgwS3ZNiYwBsQVW3J9n)hY}8gi~@jQ_P< z1}E%G>-k@$=3ps7p{RC%&ik(Wz3b(ExnqVB z0wNOC&hG8bKkGGhJ@f6&4$&O;O`fjhT*C>4pU&^=kOf0(2HFY= zw0W*l8$FV=mhJ5hOroEkIR}s?|KaMR|K@`)oxTtqUvC9K2uvG8GT`Hg7u1tK3e9e4%+LRVxkrHn%M z<$K5!0&R^tBSm!R+zfy6=uLn37v3y59r=L;5Qq{4v8F4{l!BAr)w^;~HE~BT)ZRT@ zcGPT2r7T%k6>=M%zl>*61iR0|8GX4zS*}C16&mKibN_)N($G?xRg)03?g8| zt$+7?|1Ace{|xQ?^D+os=i*TVDVMe6*wLl#sE5u)2;i@8{u3>^?UWzp*#GB`p?!NR zW&6?`EDcMKsd3k;J<2=0nr0t5hDk9#zVX<Tuq&%10GQsi?M#XrBn)#pD*2b@2FLqgDpH@q+MyT4ONf5hhdn88F+ zLn;)JheF_M&0)Q3OOyS;Z+=2|JYFvosjL$SYNw?V?z75)_Csn9{Lhl;U#s5#hzY>= z1u@M)TR2Qd2uW*MXK4fK0T^}mFOb~37FFR2_=S!So;sW7 zUHUvVZ`*~G<-k*1fo-o{%*QPJ1DU4a60e8KiUX|cl9sI4i*Z!}-1X{Kn0gQ8d;Mx! zKSoQMX!5Kf_Ou^v@$i=KI_dC}N8p|3wt?dvut|qe zQjDJ*I|I*Mb2pYOeVQBUTj`z~Ucq6fOvi)Iu0U-thyHa#&=oeYeQg7Vj;%r4wl;Xf z0eG?wlI`^fmiNcDKVOa7Bk#iA_s-*KXTD19EiDj^i;}7etXuvK*A1_ zQYR^C{{tqPbL=!6e%K7$|L})s+r1kHA2u1=w(i2{iFL?la{R?xOUNuPK^`IKb^;{> z2ji?WXW)jL|G>%Jdw|ZRD4BFM#kg1#7;#2zJnRmL;v9|@f%A?=NVfgHk-}1qiTu6P13|iW{;qw=QV;vw(L-B|a zs0j(QY~6$Hdpl4)v=$R5mE)@~cOn_LQPIB)En6BuRvez&|AFK0oI+?n31uV@VD_1hp-1U`Iy}+Lk_vua>XGteMvW_=ZOueLto9nqYW~F@4S$ z967BJ7u@|B4mz?QFMGF*RJ;o$bO?_Z&5NJsNMt<4V$A@)<%tV!r_~><2P~6mLZ!U; zjKh`WKDlJ|PIv8#3qeFU^r~mDZDT8t?ttShLu&7H7(eAWSef3xu5x_Lb$`VBU#%f$ zyUmedC0a6LImN}E1H0O-Ph1!Jze%rMFr?2fp3AIB{R|bP4W3fq?)cNS{ELTQpch}> zBdw(61dGiZQYm%&zCshr{l}*E*(w{3^WXRAYb~be)<@ksMct%|ki=WJLxK2^O ziBV8zZ^-}vI(tb(K~!NHKCf4Z%+jZ?Q?t+g4UMh#;+fAIVE9e+B{EzUmeig$h?p?A zl=<8%R`Kud7)-nBi}{!5){!9Mm)_nY`g$|U2*|T^DZ0{q_6Nl!9vLY1tL~mYE(!)h z-Lc$pkwWhQu{uu0Yr3wbQ^_24$6N}S;Pa0x!hM&mM`(5_(wd*ELk>0dC^B@x^Tzw} z;qn+MhVuKgvCL{vJng(uT^tfEy?rcoADPnP)OwrY&(Z;7j^KE7BTeh?#S33{Ag^?) zD$#jwN1V$d0g?fSgG!^_tu_HbRe@sN8^VtIcCIan2I6V!_`$W|*Q6#hUQ?s?&IGpX zN&st`xoF;C>h86uqSQy7y=m9)(dOy`Zg*dPsjiVQd?x2@rm7;1n_GLhBuG?Z_Htvq zjF`c~Fa-&z3|;h#C#{R5P~`+-hvnYta?is}-PxW(pCO_l57*V^kWLq(qbJF_F35F( zjq7*u@Ueqv%0M4pT;79hPElnOU?-;lVe-l>}i*F7rv+-R6>FLZK(Y9Z8pcrk`e=Ud^qa_t1_On_LDe!-^X= z=lJIP&c?FO7f~vmK`z&a9i2K!DLJz^%@xDjsH>nBQrBqO=l};II=_3%g*5GF52LP1 z$J^h;kj^ocMVVU~dil9K567#Ec2iGp25s>yA2wq+z4OsGJZfkSwWP8bRa%H-Z#!iT z-P^Aunky_Rc7J{0kyLWn?dVA`c?{-xBZ0TxyBQVL*C1Qs=W5-ct!)koB8;jPyt7#$ zmvhz~w*{tLGL*~^%zwBsgNIf4a@X4AgQ`MD4uyk`L`Z7d? zA;F!Eis8xAsLCISM88MfN%vyU_Ffbf_^EACI~(H*HOpZq6xA+BCe)+MrSKl@lyCZ%?J%8}>z{{Qcp|eZKN0?X0Ty z9BM|q^{K{~-t^fm_L0{=MPGi^sabj3juhxqy1T5GElUjmkpr;&!QAfuZ4w3kRywct z`!Y$7mlju4n`d}KqPerl(YiLL93v`e?93{J16~Gz&+b3#?}|bJVE6&&(d2^-UbeQB zh8;G9o4;!D?%WxF4qz!Hy`poz!y&W6vDFotCZ4ryJYEnr^-OO@#n-ee)C5`ggoGY) zk@==xjh{V`cE;0`4|+&*yl$q|LwZp{1d2?!F1c*yrwYO^;@eZdfKQhtC2cC{RETV6 zMj25r`@*J3_2x+T8E&w`i`cf94A%HLnT4t>^i#skVOVVxUp?|Zm>(3rnXi6F%<8i9 zE5_b+#v4xI!E;1BnRg9=8ANnXdift`q4Fp#T;WskUA+o@J!Fg@D27d|R2x2P;6S-w zrrY|M{f6ST_d=g|y)-;Tc}qC~zqpK9cv5NW9z#kax3<+rMU>$)g*U8Ic5{zgI=Pmt zohy;6sH1$!DxbmaJaMjW8iJr!o3KaX84XNjBWgD7!T?+arlXIVu0;cut?N-^G zJ2~5(k&yvmmSn2Z(h#j!`Yj48D~MfJg$m4?j`rNUOnAr8?8O9v~uRLI#UGoM`V=it^;w6_lrhWI<4+cK1-UDl8P) zh?7$I;x~v)xJ1+)HAt=e-FldM65)}xqOU8j3`en>bVPVy5FMMF6%}d34eL?3@GJB_ zcnUa{XJ|q4k1izP4=X_^W>$}s7kJKW>a_A6UCGqm1lu+%Zp%<)N`)A8K$-e_Su6WY zDN^w?*wxkG<(Hy<`mbfQ(npz$f%^?LV*P9|c(3P>j0`-^Hlhr8_Bt5Ep;7Yag12!BA_f5C+n^Y*;f(vs;U*|omMf$7qenbm-! zI37#?hY0lI`(DPLqz`|5x(A)Dmi+LsE!ep;{(FG>GR@iJQ{Tnk(!7DMH7Q^9di66M zhtG?KwX%Y8pB-Pf(+y55Db!27{YOspXqjz$)TDE#ie9u}Sa~N#_VW{K*?wAOZ-K45 zZeK^55}VtgNjcfJiJ>#1)fz3l>>!b~({Al>zptdq6NrZLYSQ=sSa+*pU8$<+qjlE4 zEa&+XJ#^98V_5BtVcX6a`DRV#@a$J63gR_V((*^1YCTnqpTzmBtvOi-nl6Va<}G64 zDG-%K?1_?34vh{B&Ky+0Cb_v}8a|&CPc5l(%d34-^XUBpb8aee)^xfhWgg|O8jDL$ zv>!X?h^G5hvmsp8tjM&gh+g3vP&>f%cp~h2s!4P#x{~j^=VZ#dx(XKP(KOszZ;dYZ zHmp%~14E*5b+fAI??qQlK!P(ar_vBNx!7?S{b?a5X;w&A0 z$W-QSkFsJCeHn$$j{Gl4icfP%`<6t4mdBUv{xbV*`*{TDYgfb;96I9`)Qu_EWZ!%o zaL_QkI^kN{^u=ko;}hOCo;Y7;DA#rWImU6%f=Ub%li2PkuhmiGmN=)V^g)KC|ng= zHdQ4YVnZm=wxt)x92ix*-`-NG73$ST|D*)5rR#XYFhB9u$8qKSDw|@RJNH;Vn;FbLX z0|T>)i(_3$H(^9kOqNJ4-Oe{%FcM~vY8vM~Qt5nGX(w$vD~~$)3V!@Ihfu01t0GlFKZ=Y2;lWqw5!)SK+++euAy6L#=!Iq$@Qi7?eLxC*4O~2gbjgLEX2%svBr2EYy#U zC*5z6a`fVXA-8c|qblrQEPKA#&de`yWX`SUr_<_A$L1(NYQID2z82LVjOp;>Y50#t zmH3Z*cKdGyckzxjb6_5P*QJ24l z6L0?`Uc2Q!iq1JnWOI(I3pLYr_`Xr&qX(|~{d%kL;K7OtaURHgU4KYm>Odovw=KV4dsGv&E}M6sGCkrrPylg&@IE$3PrM8xbjq*#*ps_CxU!v_SDY4?VJ-+ZuPXR0Gu zEUs3}vmKBvnS-V&@3C$Aytu#!q;t;Y1l69(xCc54r4oomL;AV7oZAx$n5A8P+4~)! z5Fl5Biu--WwehsuBEV1Tnpho=<)73HsXh1L0q)N(|3cUa+YMLvPjwu1stwp0vl9+_ z-b|+KQ!hMw%rTEVw+0g8R+ll#RS+5>ubbLMDF>}pg{FA(rC+Fn=0B>%Gq&T==piBC zq~q2?*h&@_nRL)7!&$P;ni=uwds7hJY{ot}r}Tvpll4^AwmpVCRS0-H>f=wB4hY}q zxGJ5=yR%%7rZbChz*yKD%WcW!6hcws2<7tmHX|>Yi%eZUp2?~|89HkH9(hL2&^2Jsz`+GqcPH~{E0wRzrkq<`aIs=CbwXb#tY4YUxNRO& z8xQaiIm>Z!7EfW9%RB~MEF_K0*?hQiQLYKRmrB@on||^ZM}1eju1dwz_6!$7*O)a; zimREh$>%r5IXQbl_lC?(WfP-%I+wLI!r8hmZ9{YO-F@m#2D#w?z1g|C`}*+M;)5MW zUFCq%66_a*#i=h}xyL`}Bp6Z~=velxHd7D`Ll@XG`^bLResJ}dcH!LGyF6a~rC7?^ zEWu~HE+1>N=#x+tx-Oqdv}Im0hG#@LReY7EiN0jonrbVmlnQz%pr2>i*yS-vh=1<| zZdr6YPK{P#>VyIO=a;wAhRq2s8&{-&I+^+EmJ5A;@#KC2b@!7NfuLN6BYnNjJK=~n zC2!mI1HWz3`d3Jb6d7DMzSuP+X{ogHhvH)W5ZypM82h7IA`mOvLSzkEqZR$$uK=?`-E6Y5_oJ$+Ar^lz^J z*tH+M{sW=%{;n$mLrNV`SRn+$% zCQtwvQd&y4KKLN_b*3rTmxE$Dl8BbBS=E=1%pIf+J8Y=k_UTgY`t(=Mu!5+_rtOE~ zS*I~DByy+gVo1QRou~)=M=|hzGG*VFZMTrkVpckbU+Sj#BwAt~9S(XfbX@v*zk_O` zlcp5iBQGHSvLjQv5i0??p!SRI$R3FjR=ay!3 zYIeiQ)((H6xZPz;jF$OMG9tbOx~@HtE84GaXjIx^98MqQ$W=$G!voSWYsJBUFt zm2pq$Y0aXmFZ;Y=e3wh|;kiWqD3DC$@_c%(H36 zZCjlY44CI|fR6Tf#SOWY>vm{@ zw}1q>Drzc(L-eTYs(Id^_J>Hh=RjXndOxjCBkj;Pjd8a*owny%mNK$ENwjrlL^5U} zopp{#q#b)=9qEa*x>S*<_JGf{64sHB@{L&1{(Y{)LNKJyw=H#*u8Ye8QO|LPDKE@h z>YC~*Zy5<$Vv}~YkSH^~=1+qG&(hAG^lL)OFLLWP9+m6P%x6aV&K*4qTUK`?opfG; zpmo{h+m6XMCg&@r&RER8zpu|mJYi2tX82OW5Nk<_qaoyDIh#Hp34PS;^5a55{WPWM z!EFk0sno9HxHCbw8UTVuRNsf8F_c?E2DaWy5S+J)~>m>R?bmc0j-IiHhl%(7|U8nSWrF$I3Y` z2q`|u3IEZ#yqxbi82sgX4OamlAd^)`o$<4gJ9N-ljzed7!{#pnLyJx~bnP7x#8M@B6*-0A{uqf84J1v~^Q^cPAe( zr@##eM>7N3(#mSzG@oC*LE?LJJK}vrj9_1{^RmCj|H$McioGMIMx5Zpu*L@Au=xXA zA8A^{hpi3SulJl*Qf$of_~_3Nv=nfH$^rG~gKK#x|{ zc*dtYtQ$8iizQokW#z{!FLj^()0rw-TtFRZhriv>!tdUFCSJeo)c@42^O+cKx?}|W zKAoXSC|8Xzbov&Kf|a$^wiTTObaK`F+pu|WANit&6Yy&znQ7X9+VDl4pWKu^Z@@5Z z>_N3J5-2cfdPRwHdAyrk{{56S=(&jj{6ptk37%=L1#MMtIgh7=j)XME!Zgoig>+O>X7*O0shfqoogE zj8u-=ss}VkNiSw|?%7$_LHYd38YwA1dPL#wRF;!ozc{X^rDI!BrpKG^iB&3wb>N(u zuD(S6Rw2aW>9o7V*Ok=UdQ@szu?aeAmzzedN%8P zY&+b#aa(+pH(>U6EW5tzBliZ)6UfN{QAi2n9W?G@Y_}+8K?She|27Tb8=0 zyT|$5NgE4i&58;^fR%SANkQw$lt3)!K70REO$hQA6d21It;iI1)z03}j=7@mtBuLZ zNfihdXm2rKjha?e0mT@gm^YhI6Vep+$QflGCMpg`yo=!!f*AflKwn&Ra1jJ^EELp_i>=?Z zA!sFi9UbvfR?LUZt8V)E^OkFvu`CgH{^ZFg^^y>yI4W-}2=Et$hg>$91a0Z=N$Gh@ zUFh@Z=Qv5#-n+Wph__`of%vQ5-s}M*PkE@E9hHY`T{SdN-|aDVkiaf5yATp!!rvrO z09f_pg{ZIZMBcJlnv&viTNTVMW{f#cOSWYe27=;Bq3yE^-@iw>pGZ*@>vP`^`ou|2 zR@p7}ELGYoH*CmrV&&aVCYG0b-&)m``TBtqA`$I$%|HwGwRr9`cvNuHX?4;1HOUn_ zH>T$#`|?ijx){Frv|dJv4OMY)4Ll<&W&0<~SLRkMKc=`?o9gq6-)jc75<%=ERHFT7 zpw+%&6cqL;%kSIVr0*rFz(1tufD4cV{$5E61h6DY&<$)02IS2(wcfFrhU|q6OS|{@ zLQ+r(Jv=)t#VoG^)ZU&-M887Eg2>9@gBP_)U2BEw4SjdG!mM7Vmu#q})V0QzQjb zAfSD6>`h~E%8{R=sVOy%8Qj{EOC%HdiL5xkdt+bBR)FoOi9WCPoH4)$>)Xw695vf< z+}A7z&`h0O5J-|Bf}Z#jvu0yQtf4+8ckPK&Dz4Dk zYt3>Qy?tqSyQXWp3n*nd&<(*Pu%R#J%v4HML=L#&*$o}7FNJ4Meko(;e#=nViZas~ zGcJNeJPpfJ1Xpu?A+IpoK5S}BDd+x7#yX~~ByfV`@K2Y2*>`?sPZC0k5~;~8{(ue? z_?=(gwx++UP|)&icYQiz|3o*$TQvF=HYShOt*#7Io=t$Q_RegzYx77+d~Vohyt1dW z`wwBi_JWi=+~*bTuFGR3;q7q0AX3Y31%pOIi1&OEFN(^8s-mJ$?p>4K(Yi9R7R)mU z{AV+$p|C8#wgQAu5`@&=lvrnb7jeY4nGCAc*XYJF&V@5s`bIkj44&Bd8nOCm7A*&R|d%XG^fru9+^<^|6iN6={lq6yejfvU1LCd;#WqViE zS9^192@o)Z2wTmtfs(2a|6L>s039FQgnZUQsyE3#pH8~pRMmr{uJr3#GHE{;EzpkE zbm{=J-0vYzAh7GOzpu~zQ?SBw#|v)`<-SEXsca^z)f{st^~A|7m^&KADPq`;(hKI@qkXfa(^&Rc8Xdjr{+B*Uon{bx zrY>Ko8&+^-xWN2Xq|kFsS()$3sbfONw{+%9KxB1%+S>HS_3KP0uOKurl!Iqz@GFSsqi?3Mtl(9v2PgEcHB*+#K4Eo*u2}d zGFJ(R+oiyc*jKmgWW|#;Nvk>UB9*dLlv!KHH! zu3nw7+)=uTrzcJ=zBCv$uMZb^t|=?`UR*c8f6g1f+nn9ICMPyLx0PC7Tgq;hp6iV} z(~ap>9Mnvors>|c_H1LoXFO!vYQn(D(I_hp3>+NZ>{^bYNrtA;WPm$RM>EI-ZrAU!LI(oDhf}e(K;b0n+P}P0^qM!H(wj5DZgJk&?DQ`TUlk z>#BlL{q!x3+hbB~>xCw$3XDxMsDmK@rb$wY@0`vw6fgvgCNtzkIBZIh%E7U%Q4Az{ z(zffmTgsc){=C4k&dUeh7WI4gKE7+nXg*_U?4+mNT%!>O>h@V{4o8RrutIq zxHAV!Vo_IF>Hh{q6O_Uhqs$L)$YY;;Vy#rn{d|7y8%YeWA=KYZqQJg*jU!%kZ0h5v z&!9m=ig@0Ffo@UAr)RVF`~CWRrkGy(bYE(||6no^l!8ho-0ROhbLR(xkH4$dab2fn zcSeSNkk7nwDj#%6wQF0@wyo^yNoR5k%PP(BzJPp0>gWTY0|ezjW-|PX<|b=xAfVqi zbVgAb2f|KwuTt^m4h~j@N$65`Hzl;L3y1XTRi&?2jT!Ru1t*q`Dk;;a27}^f0l#*i z$BQ?mDZVicY&T7`YdX3#16>}IS`7o6HIMkx>%+^zki0Dzq@UK*c*b3E zZU_5@-b>zq2F zZdZx}zkPC5ZJ;RZ#4^fmIUs+0YmfEtK^M=ANI{}y$!S}pkR6uAQz00!?!3En(O$$7 z)|ISyBR|-6q29fHZkiS9q|bMMR(f_P@@B%a_(a!b zrBA*UmZfH}!X~K`q(+dMgr*6wlx?=7M!-Xe_u9^%Ufhnpgu2#s5PbJfd$N90>~(C^ zeEw-mqIpMeL*tgN=C;<{ZH-Ntsh)}uR&Q=2j42nWD+e+6GUFJlD~V)VD(~!!=W|VQ zWU|)r9*_2QERij;T=s2R)sVXAu8GzGheUmjL)U}xH5wccXq;(kwGs-wtywF-;zhWQ zT4?I>g|J_*SBzkLyXE>kf!*Eh*}a{u*{L0!`71Wnr=zaRp_UJx+12!IQttRDl{AEe znX`v`y(wh!%8DNJ;h3(tdh3HnE{-ce<#U+W(V0cp=S|jx8Re$qsEc$>JQqt^!=xtm zF8Zj;^3-3VN~e~Fl|o5jfp<;z^E2V*95}Wb@7ih`G!symC^b5ike`wC6i8EcXN8k0L>&(*)X zrKSD0m*F@#-FDO_jGPNWBHq)P8|85V$mWF=oPFEG`gZI2lkS}CS4>q!1)lYWAsTFl zhoi9x`IdxDVXyn?CynO=oZ9i;o<_x7=k>|2b)8{3Qq^0E6B-%p1hu;~1B#=YIKx#0UfPV5sb z(KH=B!Jxe6q8lonyKrtlw|eAK%~N7ZmEAqdqC@wBwou8dvuZRCToHPtNDn zDUO3tO0k66SH6-ub(*GlzNkq1L*uH{cK9Sg?`7eJWKsWsAO<@2B$W4=nQqCf{?ukZ zfwSI?;fZ;F#MSIYBr2%!@af;7-sE_<5K{zC^=9SaAYWM^24K76HqhI~EkmvWf;=vd3;1w(A z#(M_i(T@`%mULXNS6=NZbrKPMM8q*b3}VIOO;cVZO>JdyiRNqAoqm}J9RZV9C37^B zh=b`>J;wzn7d;~-eV$4=FFOfaKKIgOTzJ`rF95p8VzJ2p|H&oVN=8IxZ@vLpyeEPe5QOsYP2Ys zu%6d6q51s=VoBTad&O;z!`GFU=oPV~t7o$6V?*Qjbcs%AvOJfk`L0q+a*dfwgZ+J% z+72%WnEZ;RsLXLNKNK`ib!@vdn{#i@X4D}A1_VN_?b%nDxhdqus+2{Om{E}VYTXIp z+2f|V3b&{BCJxrh13H-PKtQ|5bul2>o4djnF`s7U;-Ci~dn@4xH zCwjnMLcu|;$ zTW^rSCcn;`QWhOTKtA>5EvFMLK-Do*UMh!{C-piA?L@p5AWbn2ARu+f zjG|LEZBLl3UDjt(@Je0sE@-m9CbXl{Y3pp^s6E1Cyrmhm!^ztXhDJ?k8}l{7T8GuG z*)|8k3gcE+VMf5j>lqu>O5xC=BH!_uRBobe^SpRh`kY8{z_6JtL&IUYynCa^;MWKw zayE|BH1UvQtJl3=?SM?yxnI}#9YazqW6=>(;!DfnyLICGgeK#Op6s2WLK9NZc+28n zr`OgWh-i&K>m4dQ`{s8%rg3XJlj|a>zpm-D$H_Z;O`V(47UoGQHUNBcb)`SLcW?S* zP4aSGM}teEmKDt+#W&<~?#HGn@60)Tpy{RC%L~108k;lk>pCws1e%o;6}C+WJBq){ zwK->nEA?Y6N8R8x@SvUXW~zoe7g<)wn_GKc(I==aQ!|X;?8V zN1?epP4B+99d^dyvVI}a-I)=>gi~GZuW4(|-IUAm>B?baKbbmUND?dzLoALKXxFuG zNv%Y0mSnXl%)$VMk10`2yL&}{QFD(Tgmf#9QM0yXj}Cr&AxUz$J-2g|Hh9*64DKL#JFtMIh5EnAdQB~ zGUJ_sAjLa+T-n;^xY4i?OvQ313%0A3Fm_ZF`(A?~^4VB&GJu_(J;}aP77WAIoxPs+ z&h(g~h<|;1cP{VuYi0GF_Q?UCzNFCSbX$%JB(wI`iqZn%*b>`z#Z)AsjdiX3kwj3^ z@P5))pIR+Tb@}sS2dH>onm6x`Q*TDHu+oztXir}<+Y|Je6bzb3#?lPZ3Q6$Dy0L}J z*Q~W1jqKreCVRY;WSd^k8&Xm}nRRC_JbBowH=p;F8=6yi2<+TUMY7N5Gv9C=yQwfp z)t2Q(ZN|p7R2U=ri_qTs_^E=OG9~qG6(ydxa=B#QqZ{S9yedoN?R8a!L77NqT_G{t z)MchGVeR$!!`_~r)M)}vKcRb8jvZ!x{_W=60oVuV^75~m8-m98|u>=#OH$dk|C(%8AX-`uINo7xLI+tVizJ0&cP%|DqE zee2O1{LY~l^pd85^La)#XHh7ivo9cho3>^az|9R|mX6Sk^~W6_`tZ3&I@u@!u2cKE&y==nz1+;cb66hT$vMvD!udz z4$hu<1i98EMKTtM_+Gd3`DVZ7)ETB!dmH+878UqJENxP*Dep#x`X{)#+~ELh-SoWQ z8+SGZ1EeK0juQ!*!<~G70qyIISsd??Ux&jz2>X4bZI^pp=*@~Pc)WTUDYv|-BfGA; zUmf{#d$bH2lN!J5`#LhQ$c{wbW9HouyWZ` zg(S=HcwY_}S)U_ihgZs2al{{7vom#yX^MtK%>6hR(g(4XIavx)n(2941E*3p&;H4z z$SZ$d9CN~9F-7IG^FShRdOUx&ENg2;ku1vOtlGwp_N)f4qeHL1$XLFt>qMdK!7Q}K zaM*mqZ@Mv$E~AN*HMsu$Tfd!h#si|IJFWzyzowC=p*glS9P*0M!-xFmNE86B{N4L_ z;KIW}M3{2&1GG10AyDYyfLEtITl-|PF{xlkRWjH=E)lb@wk#}AE}Q$)dXz zdZi$$KhV56wh11MWVb_ltye_Ke9G`h^z2Crr(U|b-kTX%gO%}3;>`2f`*n2W$GR?! zv@Cao>tc)~6thG^YT|33$t$Z%wdLm?TK?SyXB=l;a>t+Ow>MnCM_uwbEnWE~5~xE( zL`O-P2g$4rAq0XR!mizEBmh+A*I>x3Q?dBLpCT~mY@Be;k!Waa##C3w;@G2n0RJx0V24;o`*9Ixqn{rD>l~X2e zkp2d99rI*vPt=+h4;Wb#hbP2Ei zVlh4b*5kbB-OuRLw?D%-AKuSIgrP@2Ao6`S1K_esKV5O}I6m!!&*jLG5v7?L7iV`N zEemPuinlRo_;CI%d>~CJH}FM^g^oX*fgwlThh59t!M(p@fZ-qVGZLYGAVus3a)7&E zWro1IouioLqvC*4UCsG@2g?8o78wLlphi#g(Fv0U?*CJYT07zlfPK0{I$3OW~niy&ZliL zVq7_|e{TaLm!$CQiO6Lv_WMZm_2dDNV#O5kXdq2+wl6P6POCsmLm&6Va)7JY3J0LE zA}H1XvJHvHG)-(zrmTlqu@)!`xGA#OA7@!q8C3_3w((aNX4pqvYT_MKJ7Dl@&p3T+;v&V3-+-Eg28p9S9rKQ8?o=oN>-9 z+;!%mJYdE>2$uwC@9GBV8ga6?6tL`lwTWXamtuHYA3 zIk(^NSg&<<-r^Ho2m54E_FIk!4H?s*x55$q@f&Zi`R2ljXY5P=D}O2U%3@JmVzP38 z&gL|!xZj1FXZR$=R*c86GuNQ1K;t=MBi@(4&X#p|W{V4aZb5k=_Z>C4@VjuKnQmzZ z-M{c$KIyt=DI6}rrk#zbD@vm=??>L105NY}niaK?FC?IF)q~4e#^O-z(gC4FbGTl_pW+_P6w#&B5eB%6m*x8VzoqH3)&~=r{DJEvo zo^nOdr>oKskjc5QELVX*j6!&VVNS-HM6x#{iGXl{&u#l4pb%-MQHMs6NM_k4Nd#g; zgRmXmC#<0q1^galG8TJGjcmsiBSw{}&%WyB;vkFe&a~8Z!IhOk)f2a1DW>FSFT2bU zDmrAINX7F)GbNW5=qjCeRdLj$bjBv7l+yfR=$xabww!cwHu=InRZyTSWm!xBDJD|N zQM_L+$LH0BAc0iQQc=G_yPLAo&N~zgd(_bW9<{#Sk^v8?u!j&TD&Sn+;-aXJYD$8< zYJDT+ESGiRP-}-R?YvE%fT=1AB`cvJnaV1#KrohltWEx?PYoLsRRM#zz9CLK z-oBP!TeA_DU-uSy4QAo>Py6_QZyyAkE8wA73*&K}eROd%a6pcLqiS@_u&u;#PmVVPAlCg}ChUD^6 zpGsx&D(sg+2!YO?l=2!HwR8u8q{gNF(<0uNl|l*>7J1yJWJsYWL2v|D4q(ySm6b$5 zFzR!MeRwT59(fx>2;s5-Gb?ElOkE?}a;Tvrp~4kIX;M`hi@t2p?C*N*(3FY?rJ%R+ zj{3^fq>)YMIT-b-;FrHqd#Buhu$S3~wCL_mOQ{Ktgr)2EnIsI2*>YJ2Q+=-^4MTFQ zkKOWO61`n%DJ7w>&~#l_Lp-m<;?H$=@~8xt)&@kpDDM|ka)b(nniU}BFK)v0Q{^}>8En|G(QI~7Kc9-z87#-X4S+O|1% za5kroupOSGT+}GF&nJikLDR$!^&n~8+mHWWX*)6SxO*^mNFkcL5=g}o zbjYD|@%GCvV(;RoVGcWkHRPyl*aUDOh;pyOfDx7W;O*70IyS>oHkPffZDbY=L)YRx z&^_mpfXvU%FQ=2O(KUxnkZT!`w? z$Ks(gCvm831SX#IFf}fF61$eZj;W_Sh;@rzfWQAF^nUXcKoaFMeg;^CRO4#Y&NvMX z11peQyauxWbXFTaB4L!EaQt92KYuoe6plReP85$Ci5H(+0#n*B!V%85E+cQrFw_-0 zh?X$kfASg3IsRPqA2|)r-0~%e76FX!{EBUCS`(f|s#v*IMMe3xUi| zc>Fr6?ytz#Zyv@E>W`CdpTnnB4uu3jp3^b#@FC=~7p?0)hP8JQ-~I62bk`lPqiFgG z7}l=Y!g?_osLAtB*fJcYoPMeNu=DuWnL6()eH4!jPz;;>d-Rp3y42TF&};%$qbpx%sw z!H1p;qi+L_Jo~4(>!SN%_tnFQRzUiT(f8#Y(2FNS44;o;kBzpaufWYE0MxR7!fbLf z?Vy|uSu+vorPrXSydRFa>L$FlC=b2yEnKnbGrnv5xuo1GR1K@c?nQS&^le1#*o)B} z(BQ~A*zMmzmucp{RTLU@FoKyda%u^hRxiPzk!ND`OecURP&IS{4!iJStbG4v z6jcrQ?;+9eal$pvV&|qjDr&ueiqo3rBx=ZqdaVa;$`q-VEM30&PUn@=9->08pFM$m zaMzaJvDpkJxh{`(9E?ydk6?mV3Z!)%Uy~3k0$#DCvP9c-)a>MV;E(s8 zheNJ=9pA0{7H!EO1{Vs%S)*jqMbvaqR;X~qq(V+)lFSsL1I7VW*!|B+;MZ+gU zdPAg0u&o42%`6lHX;&wgCAY1d@?#>HuK>X5b>%*1^yd(pCaJzUp8|FP5Q zyX~8xOARBd%FyuLSFGy>3||;EvyO%mIsgVKmsrUCo}T*-=h|wYU_wBG#ty9HNAAB3 z(+@g=7A;?ix*-*qFk>SB_797wqPiM#^Cv8}e1wt-^J$k}jgpWbhr~W%_w1KIsYdF{ z=x{;2Wp@*5J3mCTcO*45HDk>DGZA0>2OhcLm&m31KupBMOkgNx=#nUE0^P|RcYOW_ zbcIJyOHUuFWeRD{!?Chy6p;?k88?tZ9-TEQ;J7Z%IA9PiKJ7T1_}!a$XZYdh>NlTS zd-^bD)^vafgu?Ds><&MfT3rpQUxW`FHVA=el$Jd4dk7^luOxxbe{~Zd z@X1zkZ5Q75wcNe_J!BGnhz&fNcGP#E@bCp_K7KqOyDo;p-VW-`x#(6F7g;{IrVRqf z<$rN}yY{G&_-`fA?{UI)PvNDz&W1r%_}MM@P_{6PrSGmrM^d7^#6)k8C9~}*#)2|; zz`+sPbLb}A&v(JI`VZo@mz>CsUD*=OShZ;jm9|TT0TYRV)t;>v9bLD|V`S3}ogP|v z#vy#oV;|Gf^)Z;pp(qS&=muI_;|Rb;QN+eEQ;YD#Ys=8PaU+lX$!vW0;j{l}4*Vzn ziT@h5{7297FLBsiK(kAb#0U{-Qo`v> zv9BUPzdLt4@9D|mj`NQCN6+#%91IP9w5lduFp8vK48%*;wL-njSdfAlo}0w>?}0+<2EQRMgQ z=xIwpdcE)lJWz_6mBQ>s^; z|HMD>PyGKJ{|_^He^}ERxX}Or03~!qSaf7zbY(hYa%Ew3WdJfTGBhnPF)cDSR4_R@ zH846gIV&(QIxsLR$YGQK001R)MObuXVRU6WZEs|0W_bWIFfuePFflDMHdHV Date: Fri, 20 Sep 2024 17:11:20 +0200 Subject: [PATCH 30/53] including syear metrics in crossval_metrics --- conf/archive.yml | 44 +++++++++++++++++++----- modules/Crossval/Crossval_metrics.R | 44 ++++++++++++++++++++++-- modules/Loading/R/orography_correction.R | 4 +-- tools/add_logo.R | 7 ++-- tools/prepare_outputs.R | 2 ++ 5 files changed, 87 insertions(+), 14 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 71bdfae2..706e0260 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -16,6 +16,22 @@ gpfs: time_stamp_lag: "0" reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/monthly_mean/tas_f6h/tas_20180501.nc" land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" + ECMWF-SEAS5: + name: "ECMWF SEAS5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/system5c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system5c3s/constant/lsm/lsm.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system5c3s/constant/orography.nc" CMCC-SPS3.5: name: "CMCC-SPS3.5" institution: "European Centre for Medium-Range Weather Forecasts" @@ -30,6 +46,7 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system35c3s.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/cmcc/system35c3s/constant/orography.nc" Meteo-France-System8: name: "Meteo-France System 8" institution: "European Centre for Medium-Range Weather Forecasts" @@ -43,7 +60,9 @@ gpfs: hcst: 25 time_stamp_lag: "+1" calendar: "proleptic_gregorian" - reference_grid: "conf/grid_description/griddes_system7c3s.txt" + reference_grid: "conf/grid_description/griddes_system8c3s.txt" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/meteofrance/system8c3s/constant/lsm/lsm.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/meteofrance/system8c3s/constant/orography.nc" UK-MetOffice-Glosea601: name: "UK MetOffice GloSea 6 (v6.01)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -57,7 +76,8 @@ gpfs: hcst: 28 calendar: "proleptic_gregorian" time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_ukmo600.txt" + reference_grid: "conf/grid_description/griddes_ukmo601.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ukmo/glosea6_system601-c3s/constant/orography.nc" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? @@ -72,6 +92,7 @@ gpfs: calendar: "gregorian" time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ncep/cfs-v2/constant/orography.nc" DWD-GCFS2.1: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" @@ -86,6 +107,7 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system21_m1.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/dwd/system21_m1/constant/orography.nc" ECCC-CanCM4i: name: "ECCC CanCM4i (v3)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -112,6 +134,7 @@ gpfs: calendar: "standard" reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/orography.nc" ######################################################################### @@ -187,7 +210,7 @@ esarchive: monthly_mean: {"tas":"monthly_mean/tas_f6h/", "g500":"monthly_mean/g500_f12h/", "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind": "monthly_mean/sfcWind_f6h/", "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", - "tos":"monthly_mean/tos_f6h/"} + "tos":"monthly_mean/tos_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 51 hcst: 25 @@ -201,7 +224,8 @@ esarchive: src: "exp/dwd/system21_m1/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", "g500":"monthly_mean/g500_f12h/", "sfcWind":"monthly_mean/sfcWind_f6h/", - "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/"} + "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", + "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 50 hcst: 30 @@ -215,7 +239,8 @@ esarchive: src: "exp/cmcc/system35c3s/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", "g500":"monthly_mean/g500_f12h/", "sfcWind":"monthly_mean/sfcWind_f6h/", - "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/"} + "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", + "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 50 hcst: 40 @@ -240,7 +265,8 @@ esarchive: institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/eccc/eccc3/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_s0-24h/", - "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 10 hcst: 10 @@ -265,7 +291,8 @@ esarchive: institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ukmo/glosea6_system601-c3s/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tasmin":"monthly_mean/tasmin_f24h/", - "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f24h/"} + "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 62 hcst: 28 @@ -278,7 +305,8 @@ esarchive: institution: "NOAA NCEP" #? src: "exp/ncep/cfs-v2/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", - "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/", + "sfcWind":"monthly_mean/sfcWind_f6h/"} nmember: fcst: 20 hcst: 20 diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index 09d4f1ac..43ebca41 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -9,6 +9,7 @@ source("modules/Crossval/R/tmp/Corr.R") source("modules/Crossval/R/tmp/Bias.R") source("modules/Crossval/R/tmp/SprErr.R") source("modules/Crossval/R/tmp/Eno.R") +source("modules/Skill/R/tmp/CRPS.R") ## data_crossval is the result from function full_crossval_anomalies or similar. ## this is a list with the required elements: @@ -77,6 +78,24 @@ Crossval_metrics <- function(recipe, data_crossval, #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps #skill_metrics[[paste0('rps_clim', # exe_rps[ps])]] <- rps_clim + } + if ('rps_syear' %in% requested_metrics) { + rps_syear <- RPS(exp = data_crossval$probs$hcst_ev[[ps]], + obs = data_crossval$probs$obs_ev[[1]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, return_mean = FALSE, + ncores = ncores) + rps_syear <- .drop_dims(rps_syear) + skill_metrics$rps_syear <- rps_syear + } + if ('rps_clim_syear' %in% requested_metrics) { + rps_clim_syear <- Apply(list(data_crossval$probs$obs_ev[[1]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = fair, + cross.val = FALSE, return_mean = FALSE, + output_dims = 'syear', ncores = ncores)$output1 + rps_clim_syear <- .drop_dims(rps_clim_syear) + skill_metrics$rps_clim_syear <- rps_clim_syear } if ('rpss' %in% requested_metrics) { rpss <- RPSS(exp = data_crossval$probs$hcst_ev[[1]], @@ -91,6 +110,8 @@ Crossval_metrics <- function(recipe, data_crossval, cross.val = FALSE, na.rm = na.rm, sig_method.type = 'two.sided.approx', alpha = alpha, ncores = ncores) + rpss <- lapply(rpss, function(x) { + .drop_dims(x)}) skill_metrics$rpss <- rpss$rpss skill_metrics$rpss_significance <- rpss$sign # TO USE IT when visualization works for more rpsss @@ -113,6 +134,23 @@ Crossval_metrics <- function(recipe, data_crossval, Fair = fair, ncores = ncores) skill_metrics$crps_clim <- crps_clim } + if ('crps_syear' %in% requested_metrics) { + crps_syear <- CRPS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, return_mean = FALSE, + ncores = ncores) + crps_syear <- .drop_dims(crps_syear) + skill_metrics$crps_syear <- crps_syear + } + if ('crps_clim_syear' %in% requested_metrics) { + crps_clim_syear <- CRPS(exp = data_crossval$ref_obs_tr, + obs = data_crossval$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, return_mean = FALSE, ncores = ncores) + crps_clim_syear <- .drop_dims(crps_clim_syear) + skill_metrics$crps_clim_syear <- crps_clim_syear + } if ('crpss' %in% requested_metrics) { crpss <- CRPSS(exp = data_crossval$hcst$data, obs = data_crossval$obs$data, @@ -120,6 +158,8 @@ Crossval_metrics <- function(recipe, data_crossval, memb_dim = 'ensemble', Fair = fair, time_dim = 'syear', clim.cross.val = FALSE, ncores = ncores) + crpss <- lapply(crpss, function(x) { + .drop_dims(x)}) skill_metrics$crpss <- crpss$crpss skill_metrics$crpss_significance <- crpss$sign } @@ -161,8 +201,8 @@ Crossval_metrics <- function(recipe, data_crossval, memb_dim = 'ensemble', dat_dim = NULL, time_dim = 'syear', pval = TRUE, ncores = ncores) - skill_metrics$SprErr <- enssprerr$ratio - skill_metrics$SprErr_significance <- enssprerr$p.val <= alpha + skill_metrics$enssprerr <- enssprerr$ratio + skill_metrics$enssprerr_significance <- enssprerr$p.val <= alpha } if ('rms' %in% requested_metrics) { rms <- RMS(exp = data_crossval$hcst$data, diff --git a/modules/Loading/R/orography_correction.R b/modules/Loading/R/orography_correction.R index 407ec1bf..76802622 100644 --- a/modules/Loading/R/orography_correction.R +++ b/modules/Loading/R/orography_correction.R @@ -11,8 +11,8 @@ orography_correction <- function(recipe, data) { lons.min <- recipe$Analysis$Region$lonmin lons.max <- recipe$Analysis$Region$lonmax - archive <- read_yaml("conf/archive.yml")$esarchive - + archive <- read_yaml("conf/archive.yml")[[recipe$Run$filesystem]] + # Define regrid parameters: regrid_params <- get_regrid_params(recipe, archive) diff --git a/tools/add_logo.R b/tools/add_logo.R index 13fe2866..96d4c930 100644 --- a/tools/add_logo.R +++ b/tools/add_logo.R @@ -24,7 +24,10 @@ add_logo_scorecards <- function(recipe, logo) { scorecards <- paste0(recipe$Run$output_dir, "/plots/Scorecards/", scorecards_f)[[1]] dim(scorecards) <- c(file = length(scorecards)) Apply(list(scorecards), target_dims = NULL, function(x) { - system(paste("composite -gravity northeast -geometry +5+5", - logo, x, x))}, ncores = recipe$Analysis$ncores) + system(paste('convert ',x, ' -gravity north -background white -splice 0x15 ', x)) + + system(paste("composite -gravity northeast -geometry +15+15", + logo, x, x))}, ncores = recipe$Analysis$ncores) + } diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index eab9c4fe..376876b7 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -113,12 +113,14 @@ prepare_outputs <- function(recipe_file, system <- recipe$Analysis$Datasets$System reference <- recipe$Analysis$Datasets$Reference region <- recipe$Analysis$Region + variable <- recipe$Analysis$Variables recipe$Analysis <- rapply(recipe$Analysis, tolower, how = "replace", classes = "character") recipe$Analysis$Datasets$System <- system recipe$Analysis$Datasets$Reference <- reference recipe$Analysis$Region <- region + recipe$Analysis$Variables <- variable # Run recipe checker if (disable_checks) { -- GitLab From 411ec1c7eb12f95da847adae95ba4f74fd69cb52 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 9 Oct 2024 16:48:51 +0200 Subject: [PATCH 31/53] including ukmo system 602 --- conf/archive.yml | 52 ++++++++++++++++++----- conf/grid_description/griddes_ukmo602.txt | 17 ++++++++ 2 files changed, 58 insertions(+), 11 deletions(-) create mode 100644 conf/grid_description/griddes_ukmo602.txt diff --git a/conf/archive.yml b/conf/archive.yml index 706e0260..8b0f2ac3 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -6,7 +6,7 @@ gpfs: institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ecmwf/system51c3s/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", + "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: @@ -14,7 +14,7 @@ gpfs: hcst: 25 calendar: "proleptic_gregorian" time_stamp_lag: "0" - reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/monthly_mean/tas_f6h/tas_20180501.nc" + reference_grid: "conf/grid_description/griddes_system51c3s.txt" land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" ECMWF-SEAS5: name: "ECMWF SEAS5" @@ -78,6 +78,21 @@ gpfs: time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo601.txt" orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ukmo/glosea6_system601-c3s/constant/orography.nc" + UK-MetOffice-Glosea602: + name: "UK MetOffice GloSea 602" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system602-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo602.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ukmo/glosea6_system602-c3s/constant/orography.nc" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? @@ -87,8 +102,8 @@ gpfs: "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: - fcst: 124 - hcst: 24 + fcst: 20 + hcst: 20 calendar: "gregorian" time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" @@ -96,9 +111,9 @@ gpfs: DWD-GCFS2.1: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/dwd/system21_m1/" + src: "exp/dwd/system21c3s/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", + "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: @@ -107,7 +122,7 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system21_m1.txt" - orography: "/gpfs/projects/bsc32/esarchive_cache/exp/dwd/system21_m1/constant/orography.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/dwd/system21c3s/constant/orography.nc" ECCC-CanCM4i: name: "ECCC CanCM4i (v3)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -122,6 +137,7 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/eccc/eccc3/constant/orography.nc" Reference: ERA5: name: "ERA5" @@ -221,10 +237,10 @@ esarchive: DWD-GCFS2.1: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/dwd/system21_m1/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", + src: "exp/dwd/system21c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_s0-24h/", "g500":"monthly_mean/g500_f12h/", "sfcWind":"monthly_mean/sfcWind_f6h/", - "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", + "tasmin":"monthly_mean/tasmin_s0-24h/", "tasmax":"monthly_mean/tasmax_s0-24h/", "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 50 @@ -273,7 +289,7 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" - orography: "/esarchive/exp/eccc/eccc2/constant/orography.nc" + orography: "/esarchive/exp/eccc/eccc3/constant/orography.nc" UK-MetOffice-Glosea600: name: "UK MetOffice GloSea 6 (v6.0)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -300,6 +316,20 @@ esarchive: time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo601.txt" orography: "/esarchive/exp/ukmo/glosea6_system601-c3s/constant/orography.nc" + UK-MetOffice-Glosea602: + name: "UK MetOffice GloSea 602" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system602-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tasmin":"monthly_mean/tasmin_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo602.txt" + orography: "/esarchive/exp/ukmo/glosea6_system602-c3s/constant/orography.nc" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? diff --git a/conf/grid_description/griddes_ukmo602.txt b/conf/grid_description/griddes_ukmo602.txt new file mode 100644 index 00000000..05a49ea6 --- /dev/null +++ b/conf/grid_description/griddes_ukmo602.txt @@ -0,0 +1,17 @@ +# Grid description file for UKMO602 (CDS) +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 -- GitLab From c0b4e4b4ebbb6abae6f82f4e68b53f43fc0c8556 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 17 Oct 2024 16:36:55 +0200 Subject: [PATCH 32/53] bug fixes --- conf/archive.yml | 12 ++- modules/Loading/R/load_tas_tos.R | 25 ++++-- modules/Loading/R/mask_tas_tos.R | 5 +- modules/Scorecards/R/tmp/ScorecardsMulti.R | 4 +- modules/Scorecards/Scorecards_calculations.R | 85 ++++++++++---------- 5 files changed, 77 insertions(+), 54 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 8b0f2ac3..ef4c31f7 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -16,6 +16,7 @@ gpfs: time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_system51c3s.txt" land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/orography.nc" ECMWF-SEAS5: name: "ECMWF SEAS5" institution: "European Centre for Medium-Range Weather Forecasts" @@ -205,6 +206,8 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_system51c3s.txt" + land_sea_mask: "/esarchive/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" + orography: "/esarchive/exp/ecmwf/system51c3s/constant/orography.nc" Meteo-France-System7: name: "Meteo-France System 7" institution: "European Centre for Medium-Range Weather Forecasts" @@ -225,7 +228,7 @@ esarchive: src: "exp/meteofrance/system8c3s/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", "g500":"monthly_mean/g500_f12h/", "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind": "monthly_mean/sfcWind_f6h/", - "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", + "tasmax":"monthly_mean/tasmax_s0-24h/", "tasmin": "monthly_mean/tasmin_s0-24h/", "tos":"monthly_mean/tos_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 51 @@ -440,6 +443,13 @@ esarchive: monthly_mean: {"tas":"monthly_mean/tas/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/obs/berkeleyearth/berkeleyearth/monthly_mean/tas/tas_201805.nc" + MERRA2: + name: "MERRA-2" + institution: "NASA" + src: "recon/nasa/merra_v2/" + monthly_mean: {"tas":"monthly_mean/tas_f1h/", "prlr":"monthly_mean/prlr_f1h/" } + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/nasa/merra_v2/monthly_mean/tas_f1h/tas_201205.nc" mars: src: "/esarchive/scratch/aho/tmp/GRIB/" #"/mars/" diff --git a/modules/Loading/R/load_tas_tos.R b/modules/Loading/R/load_tas_tos.R index 18c000ad..6430932a 100644 --- a/modules/Loading/R/load_tas_tos.R +++ b/modules/Loading/R/load_tas_tos.R @@ -19,12 +19,13 @@ load_tas_tos <- function(recipe) { lons.max <- recipe$Analysis$Region$lonmax ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name + ncores <- recipe$Analysis$ncores variable <- c("tas", "tos", "sic") store.freq <- recipe$Analysis$Variables$freq if(is.null(recipe$Analysis$Variables$sic_threshold)){ - sic.threshold = 0.15 + sic.threshold <- 0.15 } else { sic.threshold <- recipe$Analysis$Variables$sic_threshold } @@ -159,10 +160,15 @@ load_tas_tos <- function(recipe) { # Combine hcst tas and tos data #------------------------------------------------------------------- - hcst <- mask_tas_tos(input_data = hcst, region = c(lons.min, lons.max,lats.min, lats.max), - mask_path = archive$System[[exp.name]]$land_sea_mask, lsm_var_name = 'lsm', - lon = hcst$coords$longitude, lat = hcst$coords$latitude, - lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + hcst <- mask_tas_tos(input_data = hcst, + region = c(lons.min, lons.max,lats.min, lats.max), + mask_path = archive$System[[exp.name]]$land_sea_mask, + lsm_var_name = 'lsm', + lon = as.vector(hcst$coords$longitude), + lat = as.vector(hcst$coords$latitude), + lon_dim = 'longitude', + lat_dim = 'latitude', + ncores = ncores) hcst$dims[['var']] <- dim(hcst$data)[['var']] hcst$attrs$Variable$varName <- 'tas-tos' @@ -359,9 +365,12 @@ load_tas_tos <- function(recipe) { ## TODO: Ask about this list if(!recipe$Analysis$Datasets$Reference$name %in% c('HadCRUT4','HadCRUT5','BEST','GISTEMPv4')){ - obs <- mask_tas_tos(input_data = obs, region = c(lons.min, lons.max,lats.min, lats.max), - mask_path = archive$Reference[[ref.name]]$land_sea_mask, lsm_var_name = 'sftof', - lon = obs$coords$longitude, lat = obs$coords$latitude, + obs <- mask_tas_tos(input_data = obs, + region = c(lons.min, lons.max,lats.min, lats.max), + mask_path = archive$Reference[[ref.name]]$land_sea_mask, + lsm_var_name = 'sftof', + lon = as.vector(obs$coords$longitude), + lat = as.vector(obs$coords$latitude), lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) obs$dims[['var']] <- dim(obs$data)[['var']] diff --git a/modules/Loading/R/mask_tas_tos.R b/modules/Loading/R/mask_tas_tos.R index cc693795..c82bbb42 100644 --- a/modules/Loading/R/mask_tas_tos.R +++ b/modules/Loading/R/mask_tas_tos.R @@ -6,7 +6,6 @@ mask_tas_tos <- function(input_data, mask_path, lsm_var_name = lsm_var_name, lon lon_dim = 'lon', lat_dim = 'lat', ncores = NULL){ - mask <- .load_mask(mask_path = mask_path, lsm_var_name = lsm_var_name, lon_dim = lon_dim, lat_dim = lat_dim, sea_value = 1, land_value = 0, region = region) @@ -40,6 +39,8 @@ mask_tas_tos <- function(input_data, mask_path, lsm_var_name = lsm_var_name, lon lats.min <- region[3] lats.max <- region[4] + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + data <- startR::Start(dat = mask_path, var = lsm_var_name, lon = values(list(lons.min, lons.max)), @@ -48,7 +49,7 @@ mask_tas_tos <- function(input_data, mask_path, lsm_var_name = lsm_var_name, lon synonims = list(lon = c('lon','longitude'), lat = c('lat','latitude')), lat_reorder = Sort(decreasing = TRUE), - lon_reorder = CircularSort(0,360), + lon_reorder = circularsort, num_procs = 1, retrieve = TRUE) mask <- list(mask = drop(data), diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index 1633eb86..c17025fa 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -148,7 +148,7 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, system.name1 <- sys_dict$System[[system[sys]]]$name system.name <- c(system.name, system.name1) } - for(ref in 1:length(length)){ + for(ref in 1:length(reference)){ reference.name1 <- sys_dict$Reference[[reference[ref]]]$name reference.name <- c(reference.name, reference.name1) } @@ -244,7 +244,7 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, sign_sc_9 <- NULL } } - + VizScorecard(data = data_sc_9, sign = sign_sc_9, row_dim = model, diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index 75168908..ebe36bbc 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -302,46 +302,49 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, order = c('var','time','region')) } else if (met == 'mean_bias') { + ## Temporarily removing mean_bias significance since degrees of freedom not included + ## and causing issues with SPEI data + # ## Calculate ensemble mean - hcst_data_ens <- MeanDims(data$hcst$data, dims = 'ensemble') - obs_data_ens <- MeanDims(data$obs$data, dims = 'ensemble') - - ## Aggregate data over regions - hcst_data_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = hcst_data_ens, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - obs_data_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = obs_data_ens, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(hcst_data_aggr))[length(dim(hcst_data_aggr))] <- 'region' - names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' - - ## Remove unnecessary dimension - hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','sday','sweek'), - list(1,1,1) , drop = 'selected') - obs_data_aggr <- Subset(obs_data_aggr, c('dat','sday','sweek'), - list(1,1,1) , drop = 'selected') - - ## Calculate significance - pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), - target_dims = c('syear'), ncores = ncores, - fun = function(x,y) - {t.test(as.vector(x),as.vector(y))})$p.value - - sign_mean_bias <- pval_mean_bias <= alpha + # hcst_data_ens <- MeanDims(data$hcst$data, dims = 'ensemble') + # obs_data_ens <- MeanDims(data$obs$data, dims = 'ensemble') + # + # ## Aggregate data over regions + # hcst_data_aggr <- sapply(X = 1:length(regions), + # FUN = function(X) { + # WeightedMean(data = hcst_data_ens, + # region = regions[[X]], + # lon = lon, londim = lon_dim, + # lat = lat, latdim = lat_dim, + # na.rm = na.rm) + # }, simplify = 'array') + # + # obs_data_aggr <- sapply(X = 1:length(regions), + # FUN = function(X) { + # WeightedMean(data = obs_data_ens, + # region = regions[[X]], + # lon = lon, londim = lon_dim, + # lat = lat, latdim = lat_dim, + # na.rm = na.rm) + # }, simplify = 'array') + # + # ## Include name of region dimension + # names(dim(hcst_data_aggr))[length(dim(hcst_data_aggr))] <- 'region' + # names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' + # + # ## Remove unnecessary dimension + # hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','sday','sweek'), + # list(1,1,1) , drop = 'selected') + # obs_data_aggr <- Subset(obs_data_aggr, c('dat','sday','sweek'), + # list(1,1,1) , drop = 'selected') + # + # ## Calculate significance + # pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), + # target_dims = c('syear'), ncores = ncores, + # fun = function(x,y) + # {t.test(as.vector(x),as.vector(y))})$p.value + # + # sign_mean_bias <- pval_mean_bias <= alpha ## Calculate aggregated mean bias metric mean_bias <- sapply(X = 1:length(regions), @@ -362,8 +365,8 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Save metric result in array aggr_metrics[,,,which(metrics == met)] <- Reorder(data = mean_bias, order = c('var', 'time', 'region')) - aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_mean_bias, - order = c('var', 'time', 'region')) + # aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_mean_bias, + # order = c('var', 'time', 'region')) } else if (met == 'enssprerr') { ## Calculate metric -- GitLab From b505be48f4d345b82823f928dd9865420b45f113 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 28 Oct 2024 12:33:52 +0100 Subject: [PATCH 33/53] including indicators modules --- conf/archive.yml | 4 +- conf/variable-dictionary.yml | 11 + launch_SUNSET.sh | 3 + modules/Indicators/Indicators.R | 118 ++ .../Indicators/R/data_format_csindicators.R | 36 + modules/Indicators/R/data_transformation.R | 15 + modules/Indicators/R/malaria_or_ticks.R | 83 + modules/Indicators/R/spei_spi.R | 193 ++ modules/Indicators/R/threshold.R | 61 + .../CSIndicators_CST_PeriodStandardization.R | 647 ++++++ modules/Indices/R/tmp/EOF.R | 293 +++ modules/Indices/R/tmp/GetProbs.R | 346 ++++ modules/Indices/R/tmp/NAO.R | 574 ++++++ modules/Indices/R/tmp/ProjectField.R | 272 +++ modules/Indices/R/tmp/Utils.R | 1779 +++++++++++++++++ modules/Skill/Skill.R | 12 + modules/Statistics/Statistics.R | 12 + modules/Visualization/output_size.yml | 42 +- tools/check_recipe.R | 194 ++ tools/divide_recipe.R | 14 +- tools/libs.R | 1 + 21 files changed, 4704 insertions(+), 6 deletions(-) create mode 100644 modules/Indicators/Indicators.R create mode 100644 modules/Indicators/R/data_format_csindicators.R create mode 100644 modules/Indicators/R/data_transformation.R create mode 100644 modules/Indicators/R/malaria_or_ticks.R create mode 100644 modules/Indicators/R/spei_spi.R create mode 100644 modules/Indicators/R/threshold.R create mode 100644 modules/Indicators/R/tmp/CSIndicators_CST_PeriodStandardization.R create mode 100644 modules/Indices/R/tmp/EOF.R create mode 100644 modules/Indices/R/tmp/GetProbs.R create mode 100644 modules/Indices/R/tmp/NAO.R create mode 100644 modules/Indices/R/tmp/ProjectField.R create mode 100644 modules/Indices/R/tmp/Utils.R diff --git a/conf/archive.yml b/conf/archive.yml index ef4c31f7..3250a87f 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -12,7 +12,7 @@ gpfs: nmember: fcst: 51 hcst: 25 - calendar: "proleptic_gregorian" + calendar: "gregorian" time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_system51c3s.txt" land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" @@ -203,7 +203,7 @@ esarchive: nmember: fcst: 51 hcst: 25 - calendar: "proleptic_gregorian" + calendar: "gregorian" time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_system51c3s.txt" land_sea_mask: "/esarchive/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index 78b19b1a..466653b2 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -219,6 +219,17 @@ vars: long_name: "NAO index" standard_name: "nao_index" accum: no + spei: + units: "adim" + long_name: "Standardized Precipitation-Evapotranspiration Index" + standard_name: "SPEI" + accum: no + spi: + units: "adim" + long_name: "Standardized Precipitation Index" + standard_name: "SPI" + accum: no + # Coordinates coords: diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index 6149a963..9722010e 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -128,6 +128,9 @@ if [[ $run_method == "sbatch" ]]; then mkdir -p $logdir echo "Slurm job logs will be stored in $logdir" + # Get corrected recipe + recipe=${outdir}/logs/recipes/$(basename $recipe) + # Launch one job per atomic recipe cd $codedir job_number=0 diff --git a/modules/Indicators/Indicators.R b/modules/Indicators/Indicators.R new file mode 100644 index 00000000..24b25884 --- /dev/null +++ b/modules/Indicators/Indicators.R @@ -0,0 +1,118 @@ +# Load functions +source("modules/Indicators/R/data_format_csindicators.R") +source("modules/Indicators/R/spei_spi.R") +source("modules/Indicators/R/threshold.R") +source("modules/Indicators/R/data_transformation.R") +source("modules/Indicators/R/malaria_or_ticks.R") + +source("modules/Indicators/R/tmp/CSIndicators_CST_PeriodStandardization.R") #### tmp!!! + +Indicators <- function(recipe, data) { + var.list <- strsplit(recipe$Analysis$Variables$name, ", ")[[1]] + ncores <- recipe$Analysis$ncores + + # SPEI + if (!is.null(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + if (recipe$Analysis$Workflow$Indicators$SPEI$return_spei) { + # read recipe parameters + pet_method <- recipe$Analysis$Workflow$Indicators$SPEI$PET_method + if (pet_method == "none") { + pet_method <- NULL + } + spei_accum <- recipe$Analysis$Workflow$Indicators$SPEI$Nmonths_accum + spei_standardization <- recipe$Analysis$Workflow$Indicators$SPEI$standardization + spei_stand_refperiod <- recipe$Analysis$Workflow$Indicators$SPEI$standardization_ref_period + if (is.null(spei_stand_refperiod)) { + spei_stand_refperiod <- c(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$hcst_end) + } + spei_stand_refperiod <- as.numeric(spei_stand_refperiod) + spei_stand_handleinf <- recipe$Analysis$Workflow$Indicators$SPEI$standardization_handle_infinity + + # call spei_spi function from modules/Indicators/R + result_spei <- spei_spi( + data = data, + indicator = "spei", + var.list = var.list, + pet_method = pet_method, + accum = spei_accum, + standardization = spei_standardization, + stand_refperiod = spei_stand_refperiod, + stand_handleinf = spei_stand_handleinf, + ncores = ncores + ) + } + } + + # SPI + if (!is.null(recipe$Analysis$Workflow$Indicators$SPI$return_spi)) { + if (recipe$Analysis$Workflow$Indicators$SPI$return_spi) { + # read recipe parameters + spi_accum <- recipe$Analysis$Workflow$Indicators$SPI$Nmonths_accum + spi_standardization <- recipe$Analysis$Workflow$Indicators$SPI$standardization + spi_stand_refperiod <- recipe$Analysis$Workflow$Indicators$SPI$standardization_ref_period + if (is.null(spi_stand_refperiod)) { + spi_stand_refperiod <- c(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$hcst_end) + } + spi_stand_refperiod <- as.numeric(spi_stand_refperiod) + spi_stand_handleinf <- recipe$Analysis$Workflow$Indicators$SPI$standardization_handle_infinity + + # call spei_spi function from modules/Indicators/R + result_spi <- spei_spi( + data = data, + indicator = "spi", + var.list = var.list, + pet_method = NULL, + accum = spi_accum, + standardization = spi_standardization, + stand_refperiod = spi_stand_refperiod, + stand_handleinf = spi_stand_handleinf, + ncores = ncores + ) + } + } + + # Threshold based: + if (!is.null(recipe$Analysis$Workflow$Indicators$SelectedThreshold$return_thresholdbased)) { + if (recipe$Analysis$Workflow$Indicators$SelectedThreshold$return_thresholdbased) { + thrs <- recipe$Analysis$Workflow$Indicators$SelectedThreshold$threshold + return.values <- recipe$Analysis$Workflow$Indicators$SelectedThreshold$returnValues + result_threshold <- threshold(data, thrs, var.list, return.values) + } + } + + # Malaria indicator: + if (!is.null(recipe$Analysis$Workflow$Indicators$Malaria$return_climate_suitability)) { + if (recipe$Analysis$Workflow$Indicators$Malaria$return_climate_suitability) { + result_malaria <- list() + for (ssp in recipe$Analysis$Workflow$Indicators$Malaria$ssp) { + result_malaria[[ssp]] <- malaria_or_ticks(data, var.list, ssp) + } + } + } + # Tick-borne disease indicator: + if (!is.null(recipe$Analysis$Workflow$Indicators$Ticks$return_climate_suitability)) { + if (recipe$Analysis$Workflow$Indicators$Ticks$return_climate_suitability) { + result_ticks <- list() + for (ssp in recipe$Analysis$Workflow$Indicators$Ticks$ssp) { + result_ticks[[ssp]] <- malaria_or_ticks(data, var.list, ssp) + } + } + } + + indicators.list <- c( + "SPEI" = "result_spei", + "SPI" = "result_spi", + "ThresholdBased" = "result_threshold", + "Malaria" = "result_malaria", + "Ticks" = "result_ticks" + ) + + result <- list() + for (ind in indicators.list) { + if (exists(ind)) { + result[[names(indicators.list)[which(indicators.list == ind)]]] <- eval(parse(text = ind)) + } + } + + return(result) +} diff --git a/modules/Indicators/R/data_format_csindicators.R b/modules/Indicators/R/data_format_csindicators.R new file mode 100644 index 00000000..15fe06a4 --- /dev/null +++ b/modules/Indicators/R/data_format_csindicators.R @@ -0,0 +1,36 @@ +# data format for input in CSIndicators SPEI functions +data_format_csindicators <- function(data, vars, var.list) { + dict <- c('tas' = 'tmean', + 'tasmax' = 'tmax', + 'tasmin' = 'tmin', + 'prlr' = 'pr', + 'pet' = 'pet', + 'hurs' = 'hurs', + 'tdps' = 'tdps') + dim.names <- names(dim(data$data)) + + result <- list() + for (var in vars) { + if (var %in% var.list) { + data.var <- CST_Subset(x = data, + var_dim = "var", + dat_dim = "dat", + along = "var", + indices = list(which(var.list == var)), + drop = FALSE) + + # read original units + var.metadata.pos <- which(names(data$attrs$Variable$metadata) == var) + units <- data$attrs$Variable$metadata[[var.metadata.pos]]$units + + ## TODO: Is this needed in any way? + data.var$attrs$Units <- units + } else { + data.var <- NULL + } + # append all variables in a list of s2dv_cubes + result <- append(result, list(data.var)) + } + names(result) <- dict[vars] + return(result) +} diff --git a/modules/Indicators/R/data_transformation.R b/modules/Indicators/R/data_transformation.R new file mode 100644 index 00000000..cc9397b1 --- /dev/null +++ b/modules/Indicators/R/data_transformation.R @@ -0,0 +1,15 @@ +data_transform_hurs <- function(tas, tdps) { + # obtain relative humidity (in %; tas and tdps need to be in C) + # ref: https://www.osti.gov/servlets/purl/548871&lang=en + # Alduchov OA, Eskridge RE. + # Improved Magnus form approximation of saturation vapor pressure. + # Journal of Applied Meteorology and Climatology. 1996 Apr;35(4):601-9 + + b = 17.625 + c = 243.04 + hurs <- tas # for metadata (lon, lat, and attrs$Dates) + hurs$data <- 100*exp(b * c * (tdps$data - tas$data) / ((c + tdps$data) * (c + tas$data))) + hurs$attrs$Units <- '%' + + return(hurs) # metadata: coords and dates +} diff --git a/modules/Indicators/R/malaria_or_ticks.R b/modules/Indicators/R/malaria_or_ticks.R new file mode 100644 index 00000000..9113ea25 --- /dev/null +++ b/modules/Indicators/R/malaria_or_ticks.R @@ -0,0 +1,83 @@ +malaria_or_ticks <- function(data, var.list, ssp) { + # define thresholds: + if (tolower(ssp) == "p.falciparum") { + thrs_tas <- c(18, 32) + thrs_prlr <- c(80, Inf) + thrs_hurs <- c(60, Inf) + var.thrs.list <- c("tas" = "thrs_tas", + "prlr" = "thrs_prlr", + "hurs" = "thrs_hurs") + # check in prepare_outputs that variables in names(var.list) are requested + } else if (tolower(ssp) == "p.vivax") { + thrs_tas <- c(14.5, 33) + thrs_prlr <- c(80, Inf) + thrs_hurs <- c(60, Inf) + var.thrs.list <- c("tas" = "thrs_tas", + "prlr" = "thrs_prlr", + "hurs" = "thrs_hurs") + # check in prepare_outputs that variables in names(var.list) are requested + } else if (tolower(ssp) == "i.ricinus") { + thrs_tas <- c(10, 26) + thrs_hurs <- c(45, Inf) + var.thrs.list <- c("tas" = "thrs_tas", + "hurs" = "thrs_hurs") + # check in prepare_outputs that variables in names(var.list) are requested + } else { + var.thrs.list <- c() # no variables to comply thresholds + } + + # obtain climatic suitability for obs, hcst and fcst (or elements of "data") + result <- data + for (ll in names(result)) { + data_element <- data[[ll]] + if (!is.null(data_element)) { + # prepare data + ## TODO: There is probably a way to simplify this into a single call + tas <- data_format_csindicators(data_element, + vars = c("tas"), + var.list = var.list)[[1]] + if ("hurs" %in% var.list) { + hurs <- data_format_csindicators(data_element, + vars = c("hurs"), + var.list = var.list)[[1]] + } else { + tdps <- data_format_csindicators(data_element, + vars = c("tdps"), + var.list = var.list)[[1]] + hurs <- data_transform_hurs(tas, tdps) + } + if ("prlr" %in% var.list) { + prlr <- data_format_csindicators(data_element, + vars = c("prlr"), + var.list = var.list)[[1]] + } + + # create result object + if (length(var.thrs.list > 0)) { + result_element <- eval(parse(text = names(var.thrs.list)[1])) # tas + result_element$attrs$Units <- NULL + result_element$data[which(!is.na(result_element$data))] <- 1 + } else { + result_element <- NULL + } + + # apply threholds + if (length(var.thrs.list > 0)) { + for (var in names(var.thrs.list)) { + thrs <- eval(parse(text = var.thrs.list[which(names(var.thrs.list) == var)])) + var.data <- eval(parse(text = var)) + new.data <- array(0, dim = dim(result_element$data)) + new.data[which(var.data$data >= thrs[1] & var.data$data <= thrs[2])] <- 1 + result_element$data <- result_element$data * new.data + ## TODO: Change s2dv_cube metadata here? + result_element$metadata <- append(result_element$metadata, + paste0(var, " suitability threshold: ", + thrs[1], " to ", thrs[2], + " (", var.data$attrs$Units, ")")) + } + } + } # end "if (!is.null...)" + result[[ll]] <- result_element + } # end "for" + return(result) +} diff --git a/modules/Indicators/R/spei_spi.R b/modules/Indicators/R/spei_spi.R new file mode 100644 index 00000000..ebe70126 --- /dev/null +++ b/modules/Indicators/R/spei_spi.R @@ -0,0 +1,193 @@ +spei_spi <- function(data, indicator, + var.list, + pet_method = NULL, + accum, + standardization, + stand_refperiod, + stand_handleinf, + ncores = NULL) { + + if (indicator == "spei") { + long_name <- "Standardized Precipitation-Evapotranspiration Index" + # obtain PET + if (is.null(pet_method)) { # alredy checked (prepare_outputs) that PET exists + # in the data when SPEI is requested without PET method + data_obs <- data_format_csindicators(data$obs, + vars = c("pet", "prlr"), + var.list = var.list) + if (!is.null(data$hcst)) { + data_hcst <- data_format_csindicators(data$hcst, + vars = c("pet", "prlr"), + var.list = var.list) + } else { + data_hcst <- NULL + } + if (!is.null(data$fcst)) { + data_fcst <- data_format_csindicators(data$fcst, + vars = c("pet", "prlr"), + var.list = var.list) + } else { + data_fcst <- NULL + } + } else { + if (pet_method == "hargreaves") { + vars <- c("tasmax", "tasmin") + } else if (pet_method == "hargreaves_modified") { + vars <- c("tasmax", "tasmin", "prlr") + } else if (pet_method == "thornthwaite") { + vars <- c("tas") + } + + # add prlr to the data for prlr-pet + if (!("prlr" %in% vars)) { + vars <- c(vars, "prlr") + } + + # call CST_PeriodPET from CSIndicators + data_obs <- data_format_csindicators(data$obs, + vars = vars, + var.list = var.list) + data_obs$pet <- CST_PeriodPET(data = data_obs, + pet_method = pet_method, + ncores = ncores) + + if (!is.null(data$hcst)) { + data_hcst <- data_format_csindicators(data$hcst, + vars = vars, + var.list = var.list) + data_hcst$pet <- CST_PeriodPET(data = data_hcst, + pet_method = pet_method, + ncores = ncores) + } + + if (!is.null(data$fcst)) { + data_fcst <- data_format_csindicators(data$fcst, + vars = vars, + var.list = var.list) + data_fcst$pet <- CST_PeriodPET(data = data_fcst, + pet_method = pet_method, + ncores = ncores) + } + } + + # Obtain difference Precipitation - PET + data_obs_diff <- data_obs$pr + data_obs_diff$data <- data_obs$pr$data - data_obs$pet$data + + if (!is.null(data$hcst)) { + data_hcst_diff <- data_hcst$pr + data_hcst_diff$data <- data_hcst$pr$data - data_hcst$pet$data + } + + if (!is.null(data$fcst)) { + data_fcst_diff <- data_fcst$pr + data_fcst_diff$data <- data_fcst$pr$data - data_fcst$pet$data + } + } else if (indicator == "spi") { + # spi (no PET calculation and use of precipitation directly instead of Precipitation - PET) + long_name <- "Standardized Precipitation Index" + data_obs <- data_format_csindicators(data$obs, + vars = "prlr", + var.list = var.list) + data_obs_diff <- data_obs$pr + + if (!is.null(data$hcst)) { + data_hcst <- data_format_csindicators(data$hcst, + vars = "prlr", + var.list = var.list) + data_hcst_diff <- data_hcst$pr + } + + if (!is.null(data$fcst)) { + data_fcst <- data_format_csindicators(data$fcst, + vars = "prlr", + var.list = var.list) + data_fcst_diff <- data_fcst$pr + } + } + + #### same workflow for SPEI and SPI starting here + + # call CST_PeriodAccumulation function from CSIndicators + data_obs_accum <- CST_PeriodAccumulation( + data = data_obs_diff, + rollwidth = accum, + sdate_dim = "syear", + ncores = ncores + ) + if (!is.null(data$hcst)) { + data_hcst_accum <- CST_PeriodAccumulation( + data = data_hcst_diff, + rollwidth = accum, + sdate_dim = "syear", + ncores = ncores + ) + } else { + data_hcst_accum <- NULL + } + if (!is.null(data$fcst)) { + data_fcst_accum <- CST_PeriodAccumulation( + data = data_fcst_diff, + rollwidth = accum, + sdate_dim = "syear", + ncores = ncores + ) + data$fcst$accum + } else { + data_fcst_accum <- NULL + } + + # call CST_PeriodStandardization function from CSIndicators + if (standardization) { + data_obs_ind <- CST_PeriodStandardization( + data = data_obs_accum, + data_cor = NULL, + ref_period = stand_refperiod, + handle_infinity = stand_handleinf, + ncores = ncores + ) + if (!is.null(data$hcst)) { + data_hcst_ind <- CST_PeriodStandardization( + data = data_hcst_accum, + data_cor = NULL, + ref_period = stand_refperiod, + handle_infinity = stand_handleinf, + ncores = ncores + ) + } else { + data_hcst_ind <- NULL + } + + if (!is.null(data$fcst)) { + data_fcst_ind <- CST_PeriodStandardization( + data = data_hcst_accum, + data_cor = data_fcst_accum, + ref_period = stand_refperiod, + handle_infinity = stand_handleinf, + ncores = ncores + ) + } else { + data_fcst_ind <- NULL + } + } else { + data_obs_ind <- data_obs_accum + data_hcst_ind <- data_hcst_accum + data_fcst_ind <- data_fcst_accum + } + + # result: spi or spei (create list of previous data s2dv_cubes) + result <- list(hcst = data_hcst_ind, + fcst = data_fcst_ind, + obs = data_obs_ind) + for (element in names(result[!is.null(result)])) { + old_var_name <- result[[element]]$attrs$Variable$varName + old_metadata <- result[[element]]$attrs$Variable$metadata[[old_var_name]] + result[[element]]$attrs$Variable$metadata[[old_var_name]] <- NULL + result[[element]]$coords$var <- indicator + result[[element]]$attrs$Variable$varName <- indicator + result[[element]]$attrs$Variable$metadata[[indicator]] <- old_metadata + result[[element]]$attrs$Variable$metadata[[indicator]] <- + list(long_name = long_name, units = "") + } + return(result) +} diff --git a/modules/Indicators/R/threshold.R b/modules/Indicators/R/threshold.R new file mode 100644 index 00000000..7a2768a1 --- /dev/null +++ b/modules/Indicators/R/threshold.R @@ -0,0 +1,61 @@ +threshold <- function(data, thrs, var.list, return.values = TRUE) { + if (!is.null(data$obs)) { + result_obs <- c() + dim_var <- which(names(dim(data$obs$data)) == "var") + for (var in var.list) { + data_tmp <- Subset(data$obs$data, along = "var", indices = which(var.list == var), drop = FALSE) + data_threshold <- data_tmp + data_threshold[which(data_tmp >= thrs[[which(var.list == var)]][[1]] & data_tmp <= thrs[[which(var.list == var)]][[2]])] <- TRUE + data_threshold[which(data_tmp < thrs[[which(var.list == var)]][[1]] | data_tmp > thrs[[which(var.list == var)]][[2]])] <- FALSE + data_tmp[which(!data_threshold)] <- NA + if (return.values) { + result_obs <- abind(result_obs, data_tmp, along = dim_var) + } else { + result_obs <- abind(result_obs, data_threshold, along = dim_var) + } + } + names(dim(result_obs)) <- names(dim(data$obs$data)) + data$obs$data <- result_obs + } + + if (!is.null(data$hcst)) { + result_hcst <- c() + dim_var <- which(names(dim(data$hcst$data)) == "var") + for (var in var.list) { + data_tmp <- Subset(data$hcst$data, along = "var", indices = which(var.list == var), drop = FALSE) + data_threshold <- data_tmp + data_threshold[which(data_tmp >= thrs[[which(var.list == var)]][[1]] & data_tmp <= thrs[[which(var.list == var)]][[2]])] <- TRUE + data_threshold[which(data_tmp < thrs[[which(var.list == var)]][[1]] | data_tmp > thrs[[which(var.list == var)]][[2]])] <- FALSE + data_tmp[which(!data_threshold)] <- NA + if (return.values) { + result_hcst <- abind(result_hcst, data_tmp, along = dim_var) + } else { + result_hcst <- abind(result_hcst, data_threshold, along = dim_var) + } + } + names(dim(result_hcst)) <- names(dim(data$hcst$data)) + data$hcst$data <- result_hcst + } + + if (!is.null(data$fcst)) { + result_fcst <- c() + dim_var <- which(names(dim(data$fcst$data)) == "var") + for (var in var.list) { + data_tmp <- Subset(data$fcst$data, along = "var", indices = which(var.list == var), drop = FALSE) + data_threshold <- data_tmp + data_threshold[which(data_tmp >= thrs[[which(var.list == var)]][[1]] & data_tmp <= thrs[[which(var.list == var)]][[2]])] <- TRUE + data_threshold[which(data_tmp < thrs[[which(var.list == var)]][[1]] | data_tmp > thrs[[which(var.list == var)]][[2]])] <- FALSE + data_tmp[which(!data_threshold)] <- NA + if (return.values) { + result_fcst <- abind(result_fcst, data_tmp, along = dim_var) + } else { + result_fcst <- abind(result_fcst, data_threshold, along = dim_var) + } + } + names(dim(result_fcst)) <- names(dim(data$fcst$data)) + data$fcst$data <- result_fcst + } + + # result <- list(obs = result_obs, hcst = result_hcst, fcst = result_fcst) + return(data) +} diff --git a/modules/Indicators/R/tmp/CSIndicators_CST_PeriodStandardization.R b/modules/Indicators/R/tmp/CSIndicators_CST_PeriodStandardization.R new file mode 100644 index 00000000..b71270f1 --- /dev/null +++ b/modules/Indicators/R/tmp/CSIndicators_CST_PeriodStandardization.R @@ -0,0 +1,647 @@ +#'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'The Standardization of the data is the last step of computing the SPEI +#'(Standarized Precipitation-Evapotranspiration Index). With this function the +#'data is fit to a probability distribution to transform the original values to +#'standardized units that are comparable in space and time and at different SPEI +#'time scales. +#' +#'Next, some specifications for the calculation of the standardization will be +#'discussed. If there are NAs in the data and they are not removed with the +#'parameter 'na.rm', the standardization cannot be carried out for those +#'coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. When NAs are not removed, if the length of the data for +#'a computational step is smaller than 4, there will not be enough data for +#'standardization and the result will be also filled with NAs for those coordinates. +#'About the distribution used to fit the data, there are only two possibilities: +#''log-logistic' and 'Gamma'. The 'Gamma' method works only when precipitation +#'is the sole variable provided, and all other variables are 0 because it is positive +#'defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +#'standardization is computed with cross validation. This function is built to +#'be compatible with other tools in that work with 's2dv_cube' object +#'class. The input data must be this object class. If you don't work with +#''s2dv_cube', see PeriodStandardization. For more information on the SPEI +#'indicator calculation, see CST_PeriodPET and CST_PeriodAccumulation. +#' +#'@param data An 's2dv_cube' that element 'data' stores a multidimensional +#' array containing the data to be standardized. +#'@param data_cor An 's2dv_cube' that element 'data' stores a multidimensional +#' array containing the data in which the standardization should be applied +#' using the fitting parameters from 'data'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params An optional parameter that needs to be a multidimensional array +#' with named dimensions. This option overrides computation of fitting +#' parameters. It needs to be of same time dimensions (specified in 'time_dim' +#' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +#' of the coefficients needed for the used distribution (for 'Gamma' coef +#' dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +#' to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +#' will only be used if 'data_cor' is not provided. +#'@param handle_infinity A logical value wether to return infinite values (TRUE) +#' or not (FALSE). When it is TRUE, the positive infinite values (negative +#' infinite) are substituted by the maximum (minimum) values of each +#' computation step, a subset of the array of dimensions time_dim, leadtime_dim +#' and memb_dim. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param return_params A logical value indicating wether to return parameters +#' array (TRUE) or not (FALSE). It is FALSE by default. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' standardization cannot be carried out for those coordinates and therefore, +#' the result will be filled with NA for the specific coordinates. If it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return An object of class \code{s2dv_cube} containing the standardized data. +#'If 'data_cor' is provided the array stored in element data will be of the same +#'dimensions as 'data_cor'. If 'data_cor' is not provided, the array stored in +#'element data will be of the same dimensions as 'data'. The parameters of the +#'standardization will only be returned if 'return_params' is TRUE, in this +#'case, the output will be a list of two objects one for the standardized data +#'and one for the parameters. +#' +#'@examples +#'dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) +#'data <- NULL +#'data$data <- array(rnorm(600, -204.1, 78.1), dim = dims) +#'class(data) <- 's2dv_cube' +#'SPEI <- CST_PeriodStandardization(data = data) +#'@export +CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + ref_period = NULL, + handle_infinity = FALSE, + method = 'parametric', + distribution = 'log-Logistic', + params = NULL, return_params = FALSE, + na.rm = FALSE, ncores = NULL) { + # Check 's2dv_cube' + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of 's2dv_cube' class.") + } + if (!is.null(data_cor)) { + if (!inherits(data_cor, 's2dv_cube')) { + stop("Parameter 'data_cor' must be of 's2dv_cube' class.") + } + } + res <- PeriodStandardization(data = data$data, data_cor = data_cor$data, + dates = data$attrs$Dates, + time_dim = time_dim, leadtime_dim = leadtime_dim, + memb_dim = memb_dim, + ref_period = ref_period, + handle_infinity = handle_infinity, method = method, + distribution = distribution, + params = params, return_params = return_params, + na.rm = na.rm, ncores = ncores) + if (return_params) { + std <- res$spei + params <- res$params + } else { + std <- res + } + + if (is.null(data_cor)) { + data$data <- std + data$attrs$Variable$varName <- paste0(data$attrs$Variable$varName, ' standardized') + if (return_params) { + return(list(spei = data, params = params)) + } else { + return(data) + } + } else { + data_cor$data <- std + data_cor$attrs$Variable$varName <- paste0(data_cor$attrs$Variable$varName, ' standardized') + data_cor$attrs$Datasets <- c(data_cor$attrs$Datasets, data$attrs$Datasets) + data_cor$attrs$source_files <- c(data_cor$attrs$source_files, data$attrs$source_files) + return(data_cor) + } +} + +#'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'The Standardization of the data is the last step of computing the SPEI +#'indicator. With this function the data is fit to a probability distribution to +#'transform the original values to standardized units that are comparable in +#'space and time and at different SPEI time scales. +#' +#'Next, some specifications for the calculation of the standardization will be +#'discussed. If there are NAs in the data and they are not removed with the +#'parameter 'na.rm', the standardization cannot be carried out for those +#'coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. When NAs are not removed, if the length of the data for +#'a computational step is smaller than 4, there will not be enough data for +#'standarize and the result will be also filled with NAs for that coordinates. +#'About the distribution used to fit the data, there are only two possibilities: +#''log-logistic' and 'Gamma'. The 'Gamma' method only works when only +#'precipitation is provided and other variables are 0 because it is positive +#'defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +#'standardization is computed with cross validation. For more information about +#'SPEI, see functions PeriodPET and PeriodAccumulation. +#' +#'@param data A multidimensional array containing the data to be standardized. +#'@param data_cor A multidimensional array containing the data in which the +#' standardization should be applied using the fitting parameters from 'data'. +#'@param dates An array containing the dates of the data with the same time +#' dimensions as the data. It is optional and only necessary for using the +#' parameter 'ref_period' to select a reference period directly from dates. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params An optional parameter that needs to be a multidimensional array +#' with named dimensions. This option overrides computation of fitting +#' parameters. It needs to be of same time dimensions (specified in 'time_dim' +#' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +#' of the coefficients needed for the used distribution (for 'Gamma' coef +#' dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +#' to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +#' will only be used if 'data_cor' is not provided. +#'@param handle_infinity A logical value wether to return infinite values (TRUE) +#' or not (FALSE). When it is TRUE, the positive infinite values (negative +#' infinite) are substituted by the maximum (minimum) values of each +#' computation step, a subset of the array of dimensions time_dim, leadtime_dim +#' and memb_dim. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param return_params A logical value indicating wether to return parameters +#' array (TRUE) or not (FALSE). It is FALSE by default. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' standardization cannot be carried out for those coordinates and therefore, +#' the result will be filled with NA for the specific coordinates. If it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return A multidimensional array containing the standardized data. +#'If 'data_cor' is provided the array will be of the same dimensions as +#''data_cor'. If 'data_cor' is not provided, the array will be of the same +#'dimensions as 'data'. The parameters of the standardization will only be +#'returned if 'return_params' is TRUE, in this case, the output will be a list +#'of two objects one for the standardized data and one for the parameters. +#' +#'@examples +#'dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) +#'dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) +#'data <- array(rnorm(600, -194.5, 64.8), dim = dims) +#'datacor <- array(rnorm(100, -217.8, 68.29), dim = dimscor) +#' +#'SPEI <- PeriodStandardization(data = data) +#'SPEIcor <- PeriodStandardization(data = data, data_cor = datacor) +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom lmomco pwm.pp pwm.ub pwm2lmom are.lmom.valid parglo pargam parpe3 +#'@importFrom lmom cdfglo cdfgam cdfpe3 pelglo pelgam pelpe3 +#'@importFrom SPEI parglo.maxlik +#'@importFrom stats qnorm sd window +#'@export +PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', + ref_period = NULL, handle_infinity = FALSE, + method = 'parametric', + distribution = 'log-Logistic', + params = NULL, return_params = FALSE, + na.rm = FALSE, ncores = NULL) { + # Check inputs + ## data + if (!is.array(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have dimension names.") + } + ## data_cor + if (!is.null(data_cor)) { + if (!is.array(data_cor)) { + stop("Parameter 'data_cor' must be a numeric array.") + } + if (is.null(names(dim(data_cor)))) { + stop("Parameter 'data_cor' must have dimension names.") + } + } + ## dates + if (!is.null(dates)) { + if (!any(inherits(dates, 'Date'), inherits(dates, 'POSIXct'))) { + stop("Parameter 'dates' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!time_dim %in% names(dim(dates)) | !leadtime_dim %in% names(dim(dates))) { + stop("Parameter 'dates' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } + if (dim(data)[c(time_dim)] != dim(dates)[c(time_dim)]) { + stop("Parameter 'dates' needs to have the same length of 'time_dim' ", + "as 'data'.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!time_dim %in% names(dim(data_cor))) { + stop("Parameter 'time_dim' is not found in 'data_cor' dimension.") + } + } + ## leadtime_dim + if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { + stop("Parameter 'leadtime_dim' must be a character string.") + } + if (!leadtime_dim %in% names(dim(data))) { + stop("Parameter 'leadtime_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!leadtime_dim %in% names(dim(data_cor))) { + stop("Parameter 'leadtime_dim' is not found in 'data_cor' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) != 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!memb_dim %in% names(dim(data_cor))) { + stop("Parameter 'memb_dim' is not found in 'data_cor' dimension.") + } + } + ## data_cor (2) + if (!is.null(data_cor)) { + if (dim(data)[leadtime_dim] != dim(data_cor)[leadtime_dim]) { + stop("Parameter 'data' and 'data_cor' have dimension 'leadtime_dim' ", + "of different length.") + } + } + ## ref_period + if (!is.null(ref_period)) { + years_dates <- format(dates, "%Y") + if (is.null(dates)) { + warning("Parameter 'dates' is not provided so 'ref_period' can't be ", + "used.") + ref_period <- NULL + } else if (length(ref_period) != 2) { + warning("Parameter 'ref_period' must be of length two indicating the ", + "first and end years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (!all(sapply(ref_period, is.numeric))) { + warning("Parameter 'ref_period' must be a numeric vector indicating the ", + "'start' and 'end' years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (ref_period[[1]] > ref_period[[2]]) { + warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", + "will not be used.") + ref_period <- NULL + } else if (!all(unlist(ref_period) %in% years_dates)) { + warning("Parameter 'ref_period' contains years outside the dates. ", + "It will not be used.") + ref_period <- NULL + } else { + years <- format(Subset(dates, along = leadtime_dim, indices = 1), "%Y") + ref_period[[1]] <- which(ref_period[[1]] == years) + ref_period[[2]] <- which(ref_period[[2]] == years) + } + } + ## handle_infinity + if (!is.logical(handle_infinity)) { + stop("Parameter 'handle_infinity' must be a logical value.") + } + ## method + if (!(method %in% c('parametric', 'non-parametric'))) { + stop("Parameter 'method' must be a character string containing one of ", + "the following methods: 'parametric' or 'non-parametric'.") + } + ## distribution + if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { + stop("Parameter 'distribution' must be a character string containing one ", + "of the following distributions: 'log-Logistic', 'Gamma' or ", + "'PearsonIII'.") + } + ## params + if (!is.null(params)) { + if (!is.numeric(params)) { + stop("Parameter 'params' must be numeric.") + } + if (!all(c(time_dim, leadtime_dim, 'coef') %in% names(dim(params)))) { + stop("Parameter 'params' must be a multidimensional array with named ", + "dimensions: '", time_dim, "', '", leadtime_dim, "' and 'coef'.") + } + dims_data <- dim(data)[-which(names(dim(data)) == memb_dim)] + dims_params <- dim(params)[-which(names(dim(params)) == 'coef')] + if (!all(dims_data == dims_params)) { + stop("Parameter 'data' and 'params' must have same common dimensions ", + "except 'memb_dim' and 'coef'.") + } + + if (distribution == "Gamma") { + if (dim(params)['coef'] != 2) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 2.") + } + } else { + if (dim(params)['coef'] != 3) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 3.") + } + } + } + ## return_params + if (!is.logical(return_params)) { + stop("Parameter 'return_params' must be logical.") + } + ## na.rm + if (!is.logical(na.rm)) { + stop("Parameter 'na.rm' must be logical.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + if (is.null(ref_period)) { + ref_start <- NULL + ref_end <- NULL + } else { + ref_start <- ref_period[[1]] + ref_end <- ref_period[[2]] + } + + # Standardization + if (is.null(data_cor)) { + if (is.null(params)) { + res <- Apply(data = list(data), + target_dims = c(leadtime_dim, time_dim, memb_dim), + fun = .standardization, data_cor = NULL, params = NULL, + leadtime_dim = leadtime_dim, time_dim = time_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } else { + res <- Apply(data = list(data = data, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), + params = c(leadtime_dim, time_dim, 'coef')), + fun = .standardization, data_cor = NULL, + leadtime_dim = leadtime_dim, time_dim = time_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } + } else { + res <- Apply(data = list(data = data, data_cor = data_cor), + target_dims = c(leadtime_dim, time_dim, memb_dim), + fun = .standardization, params = NULL, + leadtime_dim = leadtime_dim, time_dim = time_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } + if (return_params) { + spei <- res$spei + params <- res$params + } else { + spei <- res$output1 + } + + if (is.null(data_cor)) { + pos <- match(names(dim(data)), names(dim(spei))) + spei <- aperm(spei, pos) + } else { + pos <- match(names(dim(data_cor)), names(dim(spei))) + spei <- aperm(spei, pos) + } + + if (return_params) { + pos <- match(c(names(dim(spei))[-which(names(dim(spei)) == memb_dim)], 'coef'), + names(dim(params))) + params <- aperm(params, pos) + return(list('spei' = spei, 'params' = params)) + } else { + return(spei) + } +} + +.standardization <- function(data, data_cor = NULL, params = NULL, + leadtime_dim = 'time', time_dim = 'syear', + ref_start = NULL, ref_end = NULL, handle_infinity = FALSE, + method = 'parametric', distribution = 'log-Logistic', + return_params = FALSE, na.rm = FALSE) { + # data (data_cor): [leadtime_dim, time_dim, memb_dim] + dims <- dim(data)[-1] + fit = 'ub-pwm' + + coef = switch(distribution, + "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha', 'beta'))), + "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi', 'alpha', 'kappa'))), + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu', 'sigma', 'gamma')))) + + if (is.null(data_cor)) { + # cross_val = TRUE + spei_mod <- data*NA + if (return_params) { + params_result <- array(dim = c(dim(data)[-length(dim(data))], coef = length(coef))) + } + for (ff in 1:dim(data)[leadtime_dim]) { + data2 <- data[ff, , ] + dim(data2) <- dims + if (method == 'non-parametric') { + bp <- matrix(0, length(data2), 1) + for (i in 1:length(data2)) { + bp[i,1] = sum(data2[] <= data2[i], na.rm = na.rm); # Writes the rank of the data + } + std_index <- qnorm((bp - 0.44)/(length(data2) + 0.12)) + dim(std_index) <- dims + spei_mod[ff, , ] <- std_index + } else { + if (!is.null(ref_start) && !is.null(ref_end)) { + data_fit <- window(data2, ref_start, ref_end) + } else { + data_fit <- data2 + } + for (nsd in 1:dim(data)[time_dim]) { + if (is.null(params)) { + acu <- as.vector(data_fit[-nsd, ]) + if (na.rm) { + acu_sorted <- sort.default(acu, method = "quick") + } else { + acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) + } + f_params <- NA + if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { + acu_sd <- sd(acu_sorted) + if (!is.na(acu_sd) & acu_sd != 0) { + if (distribution != "log-Logistic") { + acu_sorted <- acu_sorted[acu_sorted > 0] + } + if (length(acu_sorted) >= 4) { + f_params <- .std(data = acu_sorted, fit = fit, + distribution = distribution) + } + } + } + } else { + f_params <- params[ff, nsd, ] + } + if (all(is.na(f_params))) { + cdf_res <- NA + } else { + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data2, f_params), + "Gamma" = lmom::cdfgam(data2, f_params), + "PearsonIII" = lmom::cdfpe3(data2, f_params)) + } + std_index_cv <- array(qnorm(cdf_res), dim = dims) + spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] + if (return_params) params_result[ff, nsd, ] <- f_params + } + } + } + } else { + # cross_val = FALSE + spei_mod <- data_cor*NA + dimscor <- dim(data_cor)[-1] + if (return_params) { + params_result <- array(dim = c(dim(data_cor)[-length(dim(data_cor))], coef = length(coef))) + } + for (ff in 1:dim(data)[leadtime_dim]) { + data_cor2 <- data_cor[ff, , ] + dim(data_cor2) <- dimscor + if (method == 'non-parametric') { + bp <- matrix(0, length(data_cor2), 1) + for (i in 1:length(data_cor2)) { + bp[i,1] = sum(data_cor2[] <= data_cor2[i], na.rm = na.rm); # Writes the rank of the data + } + std_index <- qnorm((bp - 0.44)/(length(data_cor2) + 0.12)) + dim(std_index) <- dimscor + spei_mod[ff, , ] <- std_index + } else { + data2 <- data[ff, , ] + dim(data2) <- dims + if (!is.null(ref_start) && !is.null(ref_end)) { + data_fit <- window(data2, ref_start, ref_end) + } else { + data_fit <- data2 + } + acu <- as.vector(data_fit) + if (na.rm) { + acu_sorted <- sort.default(acu, method = "quick") + } else { + acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) + } + if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { + acu_sd <- sd(acu_sorted) + if (!is.na(acu_sd) & acu_sd != 0) { + if (distribution != "log-Logistic") { + acu_sorted <- acu_sorted[acu_sorted > 0] + } + if (length(acu_sorted) >= 4) { + f_params <- .std(data = acu_sorted, fit = fit, + distribution = distribution) + } + if (all(is.na(f_params))) { + cdf_res <- NA + } else { + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data_cor2, f_params), + "Gamma" = lmom::cdfgam(data_cor2, f_params), + "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) + } + std_index_cv <- array(qnorm(cdf_res), dim = dimscor) + spei_mod[ff, , ] <- std_index_cv + if (return_params) params_result[ff, , ] <- f_params + } + } + } + } + } + if (handle_infinity) { + # could also use "param_error" ?; we are giving it the min/max value of the grid point + spei_mod[is.infinite(spei_mod) & spei_mod < 0] <- min(spei_mod[!is.infinite(spei_mod)]) + spei_mod[is.infinite(spei_mod) & spei_mod > 0] <- max(spei_mod[!is.infinite(spei_mod)]) + } + if (return_params) { + return(list(spei = spei_mod, params = params_result)) + } else { + return(spei_mod) + } +} + +.std <- function(data, fit = 'pp-pwm', distribution = 'log-Logistic') { + pwm = switch(fit, + 'pp-pwm' = lmomco::pwm.pp(data, -0.35, 0, nmom = 3), + lmomco::pwm.ub(data, nmom = 3) + # TLMoments::PWM(data, order = 0:2) + ) + lmom <- lmomco::pwm2lmom(pwm) + if (!any(!lmomco::are.lmom.valid(lmom), anyNA(lmom[[1]]), any(is.nan(lmom[[1]])))) { + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + params_result = switch(distribution, + 'log-Logistic' = tryCatch(lmom::pelglo(fortran_vec), + error = function(e){lmomco::parglo(lmom)$para}), + 'Gamma' = tryCatch(lmom::pelgam(fortran_vec), + error = function(e){lmomco::pargam(lmom)$para}), + 'PearsonIII' = tryCatch(lmom::pelpe3(fortran_vec), + error = function(e){lmomco::parpe3(lmom)$para})) + if (distribution == 'log-Logistic' && fit == 'max-lik') { + params_result = SPEI::parglo.maxlik(data, params_result)$para + } + return(params_result) + } else { + return(NA) + } +} \ No newline at end of file diff --git a/modules/Indices/R/tmp/EOF.R b/modules/Indices/R/tmp/EOF.R new file mode 100644 index 00000000..a55b3a09 --- /dev/null +++ b/modules/Indices/R/tmp/EOF.R @@ -0,0 +1,293 @@ +#'Area-weighted empirical orthogonal function analysis using SVD +#' +#'Perform an area-weighted EOF analysis using single value decomposition (SVD) +#'based on a covariance matrix or a correlation matrix if parameter 'corr' is +#'set to TRUE. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs +#' could exist but it should be consistent along time_dim. That is, if one grid +#' point has NAs, all the time steps at this point should be NAs. +#'@param lat A vector of the latitudes of 'ano'. +#'@param lon A vector of the longitudes of 'ano'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param neofs A positive integer of the modes to be kept. The default value is +#' 15. If time length or the product of the length of space_dim is smaller than +#' neofs, neofs will be changed to the minimum of the three values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{EOFs}{ +#' An array of EOF patterns normalized to 1 (unitless) with dimensions +#' (number of modes, rest of the dimensions of 'ano' except 'time_dim'). +#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed +#' field. +#'} +#'\item{PCs}{ +#' An array of principal components with the units of the original field to +#' the power of 2, with dimensions (time_dim, number of modes, rest of the +#' dimensions of 'ano' except 'space_dim'). +#' 'PCs' contains already the percentage of explained variance so, +#' to reconstruct the original field it's only needed to multiply 'EOFs' +#' by 'PCs'. +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode (number of modes). The dimensions are (number of +#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). +#'} +#'\item{mask}{ +#' An array of the mask with dimensions (space_dim, rest of the dimensions of +#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that +#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to +#' replace NAs with 0s for EOF calculation and mask the result with NAs again +#' after the calculation. +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by cosine of 'lat' and used to compute the fraction of variance explained by +#' each EOFs. +#'} +#'\item{tot_var}{ +#' A number or a numeric array of the total variance explained by all the modes. +#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. +#'} +#' +#'@seealso ProjectField, NAO, PlotBoxWhisker +#'@examples +#'# This example computes the EOFs along forecast horizons and plots the one +#'# that explains the greatest amount of variability. The example data has low +#'# resolution so the result may not be explanatory, but it displays how to +#'# use this function. +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'tmp <- MeanDims(ano$exp, c('dataset', 'member')) +#'ano <- tmp[1, , ,] +#'names(dim(ano)) <- names(dim(tmp))[-2] +#'eof <- EOF(ano, sampleData$lat, sampleData$lon) +#'\dontrun{ +#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) +#'} +#' +#'@import multiApply +#'@importFrom stats sd +#'@export +EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), + neofs = 15, corr = FALSE, ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (any(!space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.")) + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + ## lon + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.")) + } + if (any(lon > 360 | lon < -360)) { + .warning("Some 'lon' is out of the range [-360, 360].") + } + ## neofs + if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) { + stop("Parameter 'neofs' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate EOF + + # # Replace mask of NAs with 0s for EOF analysis. + # ano[!is.finite(ano)] <- 0 + + # Area weighting. Weights for EOF; needed to compute the + # fraction of variance explained by each EOFs + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi/180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anomaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + # neofs is bounded + if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) { + neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs) + .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and neofs.")) + } + + res <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(EOFs = c('mode', space_dim), + PCs = c(time_dim, 'mode'), + var = 'mode', + tot_var = NULL, + mask = space_dim), + fun = .EOF, + corr = corr, neofs = neofs, + wght = wght, + ncores = ncores) + + return(c(res, wght = list(wght))) + +} + +.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) { + # ano: [time, lat, lon] + + # Dimensions + nt <- dim(ano)[1] + ny <- dim(ano)[2] + nx <- dim(ano)[3] + + # Check if all the time steps at one grid point are NA-consistent. + # The grid point should have all NAs or no NA along time dim. + if (anyNA(ano)) { + ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon] + na_ind <- which(is.na(ano_latlon), arr.ind = T) + if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) { + stop("Detect certain grid points have NAs but not consistent across time ", + "dimension. If the grid point is NA, it should have NA at all time step.") + } + } + + # Build the mask + mask <- ano[1, , ] + mask[!is.finite(mask)] <- NA + mask[is.finite(mask)] <- 1 + dim(mask) <- c(ny, nx) + + # Replace mask of NAs with 0s for EOF analysis. + ano[!is.finite(ano)] <- 0 + + ano <- ano * InsertDim(wght, 1, nt) + + # The use of the correlation matrix is done under the option corr. + if (corr == TRUE) { + stdv <- apply(ano, c(2, 3), sd, na.rm = T) + ano <- ano/InsertDim(stdv, 1, nt) + } + + # Time/space matrix for SVD + dim(ano) <- c(nt, ny * nx) + dim.dat <- dim(ano) + + # 'transpose' means the array needs to be transposed before + # calling La.svd for computational efficiency because the + # spatial dimension is larger than the time dimension. This + # goes with transposing the outputs of LA.svd also. + if (dim.dat[2] > dim.dat[1]) { + transpose <- TRUE + } else { + transpose <- FALSE + } + if (transpose) { + pca <- La.svd(t(ano)) + } else { + pca <- La.svd(ano) + } + + # La.svd conventions: decomposition X = U D t(V) La.svd$u + # returns U La.svd$d returns diagonal values of D La.svd$v + # returns t(V) !! The usual convention is PC=U and EOF=V. + # If La.svd is called for ano (transpose=FALSE case): EOFs: + # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE + # case): EOFs: t($u) PCs: t($v) + + if (transpose) { + pca.EOFs <- t(pca$u) + pca.PCs <- t(pca$v) + } else { + pca.EOFs <- pca$v + pca.PCs <- pca$u + } + + # The numbers of transposition is limited to neofs + PC <- pca.PCs[, 1:neofs] + EOF <- pca.EOFs[1:neofs, ] + dim(EOF) <- c(neofs, ny, nx) + + # To sort out crash when neofs=1. + if (neofs == 1) { + PC <- InsertDim(PC, 2, 1, name = 'new') + } + + # Computation of the % of variance associated with each mode + W <- pca$d[1:neofs] + tot.var <- sum(pca$d^2) + var.eof <- 100 * pca$d[1:neofs]^2/tot.var + + for (e in 1:neofs) { + # Set all masked grid points to NA in the EOFs + # Divide patterns by area weights so that EOF * PC gives unweigthed (original) data + EOF[e, , ] <- EOF[e, , ] * mask / wght + # PC is multiplied by the explained variance, + # so that the reconstruction is only EOF * PC + PC[, e] <- PC[, e] * W[e] + } + + if (neofs == 1) { + var.eof <- as.array(var.eof) + } + + return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask))) +} + diff --git a/modules/Indices/R/tmp/GetProbs.R b/modules/Indices/R/tmp/GetProbs.R new file mode 100644 index 00000000..983d9665 --- /dev/null +++ b/modules/Indices/R/tmp/GetProbs.R @@ -0,0 +1,346 @@ +#'Compute probabilistic forecasts or the corresponding observations +#' +#'Compute probabilistic forecasts from an ensemble based on the relative +#'thresholds, or the probabilistic observations (i.e., which probabilistic +#'category was observed). A reference period can be specified to calculate the +#'absolute thresholds between each probabilistic category. The absolute +#'thresholds can be computed in cross-validation mode. If data is an ensemble, +#'the probabilities are calculated as the percentage of members that fall into +#'each category. For observations (or forecast without member dimension), 1 +#'means that the event happened, while 0 indicates that the event did not +#'happen. Weighted probabilities can be computed if the weights are provided for +#'each ensemble member and time step. The absolute thresholds can also be +#'provided directly for probabilities calculation. +#' +#'@param data A named numerical array of the forecasts or observations with, at +#' least, time dimension. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast, or NULL if there is no member +#' dimension (e.g., for observations, or for forecast with only one ensemble +#' member). The default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param abs_thresholds A numeric array or vector of the absolute thresholds in +#' the same units as \code{data}. If an array is provided, it should have at +#' least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different +#' thresholds for different locations, i.e. lon and lat dimensions), they +#' should match the dimensions of \code{data}, except the member dimension +#' which should not be included. The default value is NULL and, in this case, +#' 'prob_thresholds' is used for calculating the probabilities. +#'@param bin_dim_abs A character string of the dimension name of +#' 'abs_thresholds' array in which category limits are stored. It will also be +#' the probabilistic category dimension name in the output. The default value +#' is 'bin'. +#'@param indices_for_quantiles A vector of the indices to be taken along +#' 'time_dim' for computing the absolute thresholds between the probabilistic +#' categories. If NULL (default), the whole period is used. It is only used +#' when 'prob_thresholds' is provided. +#'@param weights A named numerical array of the weights for 'data' with +#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value +#' is NULL. The ensemble should have at least 70 members or span at least 10 +#' time steps and have more than 45 members if consistency between the weighted +#' and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation mode. The default value +#' is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of probabilities with dimensions c(bin_dim_abs, the rest +#'dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of +#'probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. +#' +#'@examples +#'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) +#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' indices_for_quantiles = 4:17) +#' +#'# abs_thresholds is provided +#'abs_thr1 <- c(-0.2, 0.3) +#'abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) +#'res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr1) +#'res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', + indices_for_quantiles = NULL, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + bin_dim_abs = 'bin', weights = NULL, cross.val = FALSE, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimensions.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimensions. If no member ", + "dimension exists, set it as NULL.") + } + } + ## bin_dim_abs + if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { + stop('Parameter "bin_dim_abs" must be a character string.') + } + ## prob_thresholds, abs_thresholds + if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { + .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", + "Only the first one is used.")) + abs_thresholds <- NULL + } else if (is.null(abs_thresholds) & is.null(prob_thresholds)) { + stop("One of the parameters 'prob_thresholds' and 'abs_thresholds' must be provided.") + } + if (!is.null(prob_thresholds)) { + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_quantiles + if (is.null(indices_for_quantiles)) { + indices_for_quantiles <- 1:dim(data)[time_dim] + } else { + if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { + stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") + } else if (length(indices_for_quantiles) > dim(data)[time_dim] | + max(indices_for_quantiles) > dim(data)[time_dim] | + any(indices_for_quantiles < 1)) { + stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + } + } + + } else { # abs_thresholds + + if (is.null(dim(abs_thresholds))) { # a vector + dim(abs_thresholds) <- length(abs_thresholds) + names(dim(abs_thresholds)) <- bin_dim_abs + } + # bin_dim_abs + if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { + stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") + } + if (!is.null(memb_dim) && memb_dim %in% names(dim(abs_thresholds))) { + stop("Parameter abs_thresholds' cannot have member dimension.") + } + dim_name_abs <- names(dim(abs_thresholds))[which(names(dim(abs_thresholds)) != bin_dim_abs)] + if (any(!dim_name_abs %in% names(dim(data)))) { + stop("Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well.") + } else { + if (any(dim(abs_thresholds)[dim_name_abs] != dim(data)[dim_name_abs])) { + stop("Parameter 'abs_thresholds' dimensions must have the same length as 'data'.") + } + } + if (!is.null(indices_for_quantiles)) { + warning("Parameter 'indices_for_quantiles' is not used when 'abs_thresholds' are provided.") + } + abs_target_dims <- bin_dim_abs + if (time_dim %in% names(dim(abs_thresholds))) { + abs_target_dims <- c(bin_dim_abs, time_dim) + } + + } + + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + + # if (is.null(dat_dim)) { + if (!is.null(memb_dim)) { + lendim_weights <- 2 + namesdim_weights <- c(time_dim, memb_dim) + } else { + lendim_weights <- 1 + namesdim_weights <- c(time_dim) + } + if (length(dim(weights)) != lendim_weights | + any(!names(dim(weights)) %in% namesdim_weights)) { + stop(paste0("Parameter 'weights' must have dimension ", + paste0(namesdim_weights, collapse = ' and '), ".")) + } + if (any(dim(weights)[namesdim_weights] != dim(data)[namesdim_weights])) { + stop(paste0("Parameter 'weights' must have the same dimension length as ", + paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.")) + } + weights <- Reorder(weights, namesdim_weights) + + # } else { + # if (length(dim(weights)) != 3 | any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) + # stop("Parameter 'weights' must have three dimensions with the names of 'memb_dim', 'time_dim' and 'dat_dim'.") + # if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + # dim(weights)[time_dim] != dim(exp)[time_dim] | + # dim(weights)[dat_dim] != dim(exp)[dat_dim]) { + # stop(paste0("Parameter 'weights' must have the same dimension lengths ", + # "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) + # } + # weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) + # } + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + if (is.null(abs_thresholds)) { + res <- Apply(data = list(data = data), + target_dims = c(time_dim, memb_dim), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = prob_thresholds, + indices_for_quantiles = indices_for_quantiles, + weights = weights, cross.val = cross.val, ncores = ncores)$output1 + } else { + res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), + target_dims = list(c(time_dim, memb_dim), abs_target_dims), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = NULL, + indices_for_quantiles = NULL, + weights = NULL, cross.val = FALSE, ncores = ncores)$output1 + } + + return(res) +} + +.GetProbs <- function(data, indices_for_quantiles, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + weights = NULL, cross.val = FALSE) { + # .GetProbs() is used in RPS, RPSS, ROCSS + # data + ## if data is exp: [sdate, memb] + ## if data is obs: [sdate, (memb)] + # weights: [sdate, (memb)], same as data + # if abs_thresholds is not NULL: [bin, (sdate)] + + # Add dim [memb = 1] to data if it doesn't have memb_dim + if (length(dim(data)) == 1) { + dim(data) <- c(dim(data), 1) + if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) + } + + # Calculate absolute thresholds + if (is.null(abs_thresholds)) { + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i_time in 1:dim(data)[1]) { + if (is.null(weights)) { + quantiles[, i_time] <- quantile(x = as.vector(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ], + weights[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[, i_time] <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + } + + } else { + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], + weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + quantiles <- array(rep(quantiles, dim(data)[1]), + dim = c(bin = length(quantiles), dim(data)[1])) + } + + } else { # abs_thresholds provided + quantiles <- abs_thresholds + if (length(dim(quantiles)) == 1) { + quantiles <- InsertDim(quantiles, len = dim(data)[1], + pos = 2, name = names(dim(data))[1]) + } + } + # quantiles: [bin-1, sdate] + + # Probabilities + probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] + for (i_time in 1:dim(data)[1]) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) + } else { + if (is.null(weights)) { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], + threshold = quantiles[, i_time])) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = dim(quantiles)) + for (i_quant in 1:dim(quantiles)[1]) { + # for thresholds falling under the distribution + if (quantiles[i_quant, i_time] < min(sorted_data)) { + integrated_probs[i_quant, i_time] <- 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant, i_time]) { + integrated_probs[i_quant, i_time] <- 1 + } else { + integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, + quantiles[i_quant, i_time], "linear")$y + } + } + probs[, i_time] <- append(integrated_probs[, i_time], 1) - append(0, integrated_probs[, i_time]) + if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { + stop(paste0("Probability in i_time = ", i_time, " is out of [0, 1].")) + } + } + } + } + + return(probs) +} + +.sorted_distributions <- function(data_vector, weights_vector) { + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list("data" = data_vector[sorter], "cumulative_weights" = cumulative_weights)) +} + + + diff --git a/modules/Indices/R/tmp/NAO.R b/modules/Indices/R/tmp/NAO.R new file mode 100644 index 00000000..ab2c1811 --- /dev/null +++ b/modules/Indices/R/tmp/NAO.R @@ -0,0 +1,574 @@ +#'Compute the North Atlantic Oscillation (NAO) Index +#' +#'Compute the North Atlantic Oscillation (NAO) index based on the leading EOF +#'of the sea level pressure (SLP) anomalies over the north Atlantic region +#'(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and +#'observed anomalies onto the observed EOF pattern or the forecast +#'anomalies onto the EOF pattern of the other years of the forecast. +#'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month +#'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns +#'cross-validated PCs of the NAO index for hindcast (exp) and observations +#'(obs) based on the leading EOF pattern, or, if forecast (exp_cor) is provided, +#'the NAO index for forecast and the corresponding data (exp and obs). +#' +#'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' hindcast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of observational data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param obs A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of experimental data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param exp_cor A named numeric array of the Nort Atlantic SLP (20-80N, 80W-40E) +#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimension 'time_dim' of length 1 (as in the case of an operational +#' forecast), 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of reference period needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param lat A vector of the latitudes of 'exp' and 'obs'. +#'@param lon A vector of the longitudes of 'exp' and 'obs'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'exp' and 'obs'. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length +#' must be 1. The default value is 'member'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension of 'exp' and 'obs'. The default value is 'ftime'. +#'@param ftime_avg A numeric vector of the forecast time steps to average +#' across the target period. If average is not needed, set NULL. The default +#' value is 2:4, i.e., from 2nd to 4th forecast time steps. +#'@param obsproj A logical value indicating whether to compute the NAO index by +#' projecting the forecast anomalies onto the leading EOF of observational +#' reference (TRUE, default) or compute the NAO by first computing the leading +#' EOF of the forecast anomalies (in cross-validation mode, i.e. leave the +#' evaluated year out), then projecting forecast anomalies onto this EOF +#' (FALSE). If 'exp_cor' is provided, 'obs' will be used when obsproj is TRUE +#' and 'exp' will be used when obsproj is FALSE, and no cross-validation is +#' applied. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list which contains some of the following items depending on the data inputs: +#'\item{exp}{ +#' A numeric array of hindcast NAO index in verification format with the same +#' dimensions as 'exp' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#'\item{obs}{ +#' A numeric array of observation NAO index in verification format with the same +#' dimensions as 'obs' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#'} +#'\item{exp_cor}{ +#' A numeric array of forecast NAO index in verification format with the same +#' dimensions as 'exp_cor' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#' +#'@references +#'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of +#' multi-model seasonal forecasts of the wintertime North Atlantic +#' Oscillation. Climate Dynamics, 21, 501-514. +#' DOI: 10.1007/s00382-003-0350-4 +#' +#'@examples +#'# Make up synthetic data +#'set.seed(1) +#'exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'set.seed(2) +#'obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'lat <- seq(20, 80, length.out = 6) +#'lon <- seq(-80, 40, length.out = 9) +#'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) +#' +#'exp_cor <- array(rnorm(540), dim = c(member = 2, sdate = 1, ftime = 5, lat = 6, lon = 9)) +#'nao <- NAO(exp = exp, obs = obs, exp_cor = exp_cor, lat = lat, lon = lon, obsproj = TRUE) +#'# plot the NAO index +#' \dontrun{ +#'nao$exp <- Reorder(nao$exp, c(2, 1)) +#'nao$obs <- Reorder(nao$obs, c(2, 1)) +#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", +#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") +#' } +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, lon, time_dim = 'sdate', + memb_dim = 'member', space_dim = c('lat', 'lon'), + ftime_dim = 'ftime', ftime_avg = 2:4, + obsproj = TRUE, ncores = NULL) { + # Check inputs + ## exp, obs, and exp_cor (1) + if (is.null(obs) & is.null(exp)) { + stop("Parameter 'exp' and 'obs' cannot both be NULL.") + } + if (!is.null(exp)) { + if (!is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (is.null(dim(exp))) { + stop(paste0("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.")) + } + if(any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { + stop("Parameter 'exp' must have dimension names.") + } + } + if (!is.null(obs)) { + if (!is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (is.null(dim(obs))) { + stop(paste0("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.")) + } + if(any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'obs' must have dimension names.") + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(exp_cor)) { + stop("Parameter 'exp_cor' must be a numeric array.") + } + if (is.null(dim(exp_cor))) { + stop(paste0("Parameter 'exp_cor' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.")) + } + if (any(is.null(names(dim(exp_cor)))) | any(nchar(names(dim(exp_cor))) == 0)) { + stop("Parameter 'exp_cor' must have dimension names.") + } + if (is.null(exp) || is.null(obs)) { + stop("Parameters 'exp' and 'obs' are required when 'exp_cor' is not provided.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!time_dim %in% names(dim(exp))) { + stop("Parameter 'time_dim' is not found in 'exp' dimension.") + } + } + if (!is.null(obs)) { + if (!time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!time_dim %in% names(dim(exp_cor))) { + stop("Parameter 'time_dim' is not found in 'exp_cor' dimension.") + } + if (dim(exp_cor)[time_dim] > 1) { + stop("Parameter 'exp_cor' is expected to have length 1 in ", + time_dim, "dimension.") + } + } + + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + add_member_back <- FALSE + if (!is.null(obs)) { + if (memb_dim %in% names(dim(obs))) { + if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } else { + add_member_back <- TRUE + obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected') + } + } + } + if (!is.null(exp_cor)) { + if (!memb_dim %in% names(dim(exp_cor))) { + stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") + } + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!is.null(exp)) { + if (any(!space_dim %in% names(dim(exp)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (any(!space_dim %in% names(dim(obs)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (any(!space_dim %in% names(dim(exp_cor)))) { + stop("Parameter 'space_dim' is not found in 'exp_cor' dimensions.") + } + } + ## ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!ftime_dim %in% names(dim(exp))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!ftime_dim %in% names(dim(obs))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!ftime_dim %in% names(dim(exp_cor))) { + stop("Parameter 'ftime_dim' is not found in 'exp_cor' dimensions.") + } + } + ## exp and obs (2) + #TODO: Add checks for exp_cor + if (!is.null(exp) & !is.null(obs)) { + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + throw_error <- FALSE + if (length(name_exp) != length(name_obs)) { + throw_error <- TRUE + } else if (any(name_exp != name_obs)) { + throw_error <- TRUE + } else if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + throw_error <- TRUE + } + if (throw_error) { + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'memb_dim'.") + } + } + ## ftime_avg + if (!is.null(ftime_avg)) { + if (!is.vector(ftime_avg) | !is.numeric(ftime_avg)) { + stop("Parameter 'ftime_avg' must be an integer vector.") + } + if (!is.null(exp)) { + if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(obs)) { + if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(exp_cor)) { + if (max(ftime_avg) > dim(exp_cor)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + } + ## sdate >= 2 + if (!is.null(exp)) { + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } else { + if (dim(obs)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } + ## lat and lon + if (!is.null(exp)) { + if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.")) + } + if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.")) + } + } + if (!is.null(obs)) { + if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.")) + } + if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.")) + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(lat) | length(lat) != dim(exp_cor)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp_cor'.")) + } + if (!is.numeric(lon) | length(lon) != dim(exp_cor)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp_cor'.")) + } + } + stop_needed <- FALSE + if (max(lat) > 80 | min(lat) < 20) { + stop_needed <- TRUE + } + #NOTE: different from s2dverification + # lon is not used in the calculation actually. EOF only uses lat to do the + # weight. So we just need to ensure the data is in this region, regardless + # the order. + if (any(lon < 0)) { #[-180, 180] + if (!(min(lon) > -90 & min(lon) < -70 & max(lon) < 50 & max(lon) > 30)) { + stop_needed <- TRUE + } + } else { #[0, 360] + if (any(lon >= 50 & lon <= 270)) { + stop_needed <- TRUE + } else { + lon_E <- lon[which(lon < 50)] + lon_W <- lon[-which(lon < 50)] + if (max(lon_E) < 30 | min(lon_W) > 290) { + stop_needed <- TRUE + } + } + } + if (stop_needed) { + stop(paste0("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.")) + } + ## obsproj + if (!is.logical(obsproj) | length(obsproj) > 1) { + stop("Parameter 'obsproj' must be either TRUE or FALSE.") + } + if (obsproj) { + if (is.null(obs)) { + stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.") + } + if (is.null(exp) & is.null(exp_cor)) { + .warning("parameter 'obsproj' set to TRUE but no 'exp' nor 'exp_cor' provided.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores == 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # Average ftime + if (!is.null(ftime_avg)) { + if (!is.null(exp)) { + exp_sub <- ClimProjDiags::Subset(exp, ftime_dim, ftime_avg, drop = FALSE) + exp <- MeanDims(exp_sub, ftime_dim, na.rm = TRUE) + ## Cross-validated PCs. Fabian. This should be extended to + ## nmod and nlt by simple loops. Virginie + } + if (!is.null(obs)) { + obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE) + obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE) + } + if (!is.null(exp_cor)) { + exp_cor_sub <- ClimProjDiags::Subset(exp_cor, ftime_dim, ftime_avg, drop = FALSE) + exp_cor <- MeanDims(exp_cor_sub, ftime_dim, na.rm = TRUE) + } + } + + # wght + wght <- array(sqrt(cos(lat * pi/180)), dim = c(length(lat), length(lon))) + if (is.null(exp_cor)) { + if (!is.null(exp) & !is.null(obs)) { + res <- Apply(list(exp, obs), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } else if (!is.null(exp)) { + res <- Apply(list(exp = exp), + target_dims = list(exp = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, obs = NULL, + obsproj = obsproj, add_member_back = FALSE, + ncores = ncores) + } else if (!is.null(obs)) { + if (add_member_back) { + output_dims <- list(obs = c(time_dim, memb_dim)) + } else { + output_dims <- list(obs = time_dim) + } + res <- Apply(list(obs = obs), + target_dims = list(obs = c(time_dim, space_dim)), + output_dims = output_dims, + fun = .NAO, + lat = lat, wght = wght, exp = NULL, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + } else { # exp_cor provided + res <- Apply(list(exp = exp, obs = obs, exp_cor = exp_cor), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim), + exp_cor = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + + return(res) +} + +.NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, wght, obsproj = TRUE, + add_member_back = FALSE) { + # exp: [memb_exp, sdate, lat, lon] + # obs: [sdate, lat, lon] + # exp_cor: [memb, sdate = 1, lat, lon] + # wght: [lat, lon] + + if (!is.null(exp)) { + ntime <- dim(exp)[2] + nlat <- dim(exp)[3] + nlon <- dim(exp)[4] + nmemb_exp <- dim(exp)[1] + } else { + ntime <- dim(obs)[1] + nlat <- dim(obs)[2] + nlon <- dim(obs)[3] + } + if (!is.null(exp_cor)) { + ntime_exp_cor <- dim(exp_cor)[2] # should be 1 + nmemb_exp_cor <- dim(exp_cor)[1] + } + + if (!is.null(obs)) nao_obs <- array(NA, dim = ntime) + if (!is.null(exp)) nao_exp <- array(NA, dim = c(ntime, nmemb_exp)) + if (!is.null(exp_cor)) { + nao_exp_cor <- array(NA, dim = c(ntime_exp_cor, nmemb_exp_cor)) + #NOTE: The dimensions are flipped to fill in data correctly. Need to flip it back later. + } + + if (is.null(exp_cor)) { + + for (tt in 1:ntime) { # cross-validation + + if (!is.null(obs)) { + ## Calculate observation EOF. Excluding one forecast start year. + obs_sub <- obs[c(1:ntime)[-tt], , , drop = FALSE] + EOF_obs <- .EOF(obs_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode = 1, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies. + PF <- .ProjectField(obs, eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + ## Keep PCs of excluded forecast start year. Fabian. + nao_obs[tt] <- PF[tt] + } + + if (!is.null(exp)) { + if (!obsproj) { + exp_sub <- exp[, c(1:ntime)[-tt], , , drop = FALSE] + # Combine 'memb' and 'sdate' to calculate EOF + dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) + EOF_exp <- .EOF(exp_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + + ## Correct polarity of pattern + ##NOTE: different from s2dverification, which doesn't use mean(). + # if (0 < EOF_exp[1, which.min(abs(lat - 65)), ]) { + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + + ### Lines below could be simplified further by computing + ### ProjectField() only on the year of interest... (though this is + ### not vital). Lauriane + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_exp[1, , ], wght = wght) # [sdate, memb] + nao_exp[tt, imemb] <- PF[tt] + } + } else { + ## Project forecast anomalies on obs EOF + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + nao_exp[tt, imemb] <- PF[tt] + } + } + } + + } # for loop sdate + + } else { # exp_cor provided + + ## Calculate observation EOF. Without cross-validation + EOF_obs <- .EOF(obs, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies + PF <- .ProjectField(obs, eof_mode = EOF_obs, wght = wght) # [mode = 1, sdate] + nao_obs[] <- PF[1, ] + + if (!obsproj) { + # Calculate EOF_exp + tmp <- array(exp, dim = c(nmemb_exp * ntime, nlat, nlon)) + EOF_exp <- .EOF(tmp, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + eof_mode_input <- EOF_exp[1, , ] + } else { + eof_mode_input <- EOF_obs[1, , ] + } + + # Calculate NAO_exp + for (imemb in 1:dim(exp)[1]) { + exp_sub <- ClimProjDiags::Subset(exp, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp[ , imemb] <- PF + } + + # Calculate NAO_exp_cor + for (imemb in 1:dim(exp_cor)[1]) { + exp_sub <- ClimProjDiags::Subset(exp_cor, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp_cor[, imemb] <- PF + } + + } + # add_member_back + if (add_member_back) { + memb_dim_name <- ifelse(!is.null(names(dim(exp))[1]), names(dim(exp))[1], 'member') + nao_obs <- InsertDim(nao_obs, 2, 1, name = memb_dim_name) + } + + # Return results + if (is.null(exp_cor)) { + res <- NULL + if (!is.null(exp)) { + res <- c(res, list(exp = nao_exp)) + } + if (!is.null(obs)) { + res <- c(res, list(obs = nao_obs)) + } + return(res) + + } else { + return(list(exp = nao_exp, obs = nao_obs, exp_cor = nao_exp_cor)) + } +} + diff --git a/modules/Indices/R/tmp/ProjectField.R b/modules/Indices/R/tmp/ProjectField.R new file mode 100644 index 00000000..f4b8903a --- /dev/null +++ b/modules/Indices/R/tmp/ProjectField.R @@ -0,0 +1,272 @@ +#'Project anomalies onto modes of variability +#' +#'Project anomalies onto modes of variability to get the temporal evolution of +#'the EOF mode selected. It returns principal components (PCs) by area-weighted +#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from +#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and +#'returns NA if the whole spatial pattern is NA. +#' +#'@param ano A numerical array of anomalies with named dimensions. The +#' dimensions must have at least 'time_dim' and 'space_dim'. It can be +#' generated by Ano(). +#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which +#' are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and +#' 'space_dim' at least. 'wght' has dimensions space_dim. It can be generated +#' by EOF() or REOF(). +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param mode An integer of the variability mode number in the EOF to be +#' projected on. The default value is NULL, which means all the modes of 'eof' +#' is calculated. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numerical array of the principal components in the verification +#' format. The dimensions are the same as 'ano' except 'space_dim'. +#' +#'@seealso EOF, NAO, PlotBoxWhisker +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) +#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) +#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) +#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) +#' +#'\dontrun{ +#' # Plot the forecast and the observation of the first mode for the last year +#' # of forecast +#' sdate_dim_length <- dim(mode1_obs)['sdate'] +#' plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), +#' lwd = 2) +#' for (i in 1:dim(mode1_exp)['member']) { +#' par(new = TRUE) +#' plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], +#' ylim = c(-15000, 15000)) +#' } +#'} +#' +#'@import multiApply +#'@export +ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), + mode = NULL, ncores = NULL) { + + # Check inputs + ## ano (1) + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## eof (1) + if (is.null(eof)) { + stop("Parameter 'eof' cannot be NULL.") + } + if (!is.list(eof)) { + stop("Parameter 'eof' must be a list generated by EOF() or REOF().") + } + if ('EOFs' %in% names(eof)) { + EOFs <- "EOFs" + } else if ('REOFs' %in% names(eof)) { + EOFs <- "REOFs" + } else if ('patterns' %in% names(eof)) { + EOFs <- "patterns" + } else { + stop(paste0("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().")) + } + if (!'wght' %in% names(eof)) { + stop(paste0("Parameter 'eof' must be a list that contains 'wght'. ", + "It can be generated by EOF() or REOF().")) + } + if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") + } + if (!is.numeric(eof$wght) || !is.array(eof$wght)) { + stop("The component 'wght' of parameter 'eof' must be a numeric array.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (any(!space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## ano (2) + if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { + stop(paste0("Parameter 'ano' must be an array with dimensions named as ", + "parameter 'space_dim' and 'time_dim'.")) + } + ## eof (2) + if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | + !'mode' %in% names(dim(eof[[EOFs]]))) { + stop(paste0("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.")) + } + if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { + stop(paste0("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.")) + } + ## mode + if (!is.null(mode)) { + if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) { + stop("Parameter 'mode' must be NULL or a positive integer.") + } + if (mode > dim(eof[[EOFs]])['mode']) { + stop(paste0("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.")) + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + #------------------------------------------------------- + + # Keep the chosen mode + if (!is.null(mode)) { + eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected') + } else { + eof_mode <- eof[[EOFs]] + } + + if ('mode' %in% names(dim(eof_mode))) { + dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')] + } else { + dimnames_without_mode <- names(dim(eof_mode)) + } + + if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon] + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + + } else { + + if (!all(dimnames_without_mode %in% names(dim(ano)))) { + stop(paste0("The array 'EOF' in parameter 'eof' has dimension not in parameter ", + "'ano'. Check if 'ano' and 'eof' are compatible.")) + } + + common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] + if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != + dim(eof_mode)[dimnames_without_mode])) { + stop(paste0("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", + "with different length. Check if 'ano' and 'eof' are compatible.")) + } + + # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent + # between ano and eof. + additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] + additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] + if (length(additional_dims) != 0) { + for (i in 1:length(additional_dims)) { + eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), + lendim = additional_dims[i], name = names(additional_dims)[i]) + } + } + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + } + + return(res) +} + + +.ProjectField <- function(ano, eof_mode, wght) { + # ano: [sdate, lat, lon] + # eof_mode: [lat, lon] or [mode, lat, lon] + # wght: [lat, lon] + + ntime <- dim(ano)[1] + if (length(dim(eof_mode)) == 2) { # mode != NULL + # Initialization of pc.ver. + pc.ver <- array(NA, dim = ntime) #[sdate] + + # Weight + e.1 <- eof_mode * wght + ano <- ano * InsertDim(wght, 1, ntime) + #ano <- aaply(ano, 1, '*', wght) # much heavier + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + #na <- apply(ano, 1, mean, na.rm = TRUE) # much heavier + tmp <- ano * InsertDim(e.1, 1, ntime) # [sdate, lat, lon] + rm(ano) + #pc.ver <- apply(tmp, 1, sum, na.rm = TRUE) # much heavier + pc.ver <- rowSums(tmp, na.rm = TRUE) + pc.ver[which(is.na(na))] <- NA + + } else { # mode = NULL + # Weight + e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1]) + dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3])) # [mode, lat*lon] + ano <- ano * InsertDim(wght, 1, ntime) + dim(ano) <- c(ntime, prod(dim(ano)[2:3])) # [sdate, lat*lon] + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1)) + + # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate] + # Result: [mode, sdate] + pc.ver <- e.1 %*% t(ano) + pc.ver[which(is.na(na))] <- NA + + # # Change back dimensions to feet original input + # dim(projection) <- c(moredims, mode = unname(neofs)) + # return(projection) + } + + return(pc.ver) +} + + diff --git a/modules/Indices/R/tmp/Utils.R b/modules/Indices/R/tmp/Utils.R new file mode 100644 index 00000000..6e9b56b4 --- /dev/null +++ b/modules/Indices/R/tmp/Utils.R @@ -0,0 +1,1779 @@ +#'@importFrom abind abind +#'@import plyr ncdf4 +#'@importFrom grDevices png jpeg pdf svg bmp tiff +#'@importFrom easyVerification convert2prob + +## Function to tell if a regexpr() match is a complete match to a specified name +.IsFullMatch <- function(x, name) { + ifelse(x > 0 && attributes(x)$match.length == nchar(name), TRUE, FALSE) +} + +.ConfigReplaceVariablesInString <- function(string, replace_values, allow_undefined_key_vars = FALSE) { + # This function replaces all the occurrences of a variable in a string by + # their corresponding string stored in the replace_values. + if (length(strsplit(string, "\\$")[[1]]) > 1) { + parts <- strsplit(string, "\\$")[[1]] + output <- "" + i <- 0 + for (part in parts) { + if (i %% 2 == 0) { + output <- paste(output, part, sep = "") + } else { + if (part %in% names(replace_values)) { + output <- paste(output, .ConfigReplaceVariablesInString(replace_values[[part]], replace_values, allow_undefined_key_vars), sep = "") + } else if (allow_undefined_key_vars) { + output <- paste0(output, "$", part, "$") + } else { + stop(paste('Error: The variable $', part, '$ was not defined in the configuration file.', sep = '')) + } + } + i <- i + 1 + } + output + } else { + string + } +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') +} + +.t2nlatlon <- function(t) { + ## As seen in cdo's griddes.c: ntr2nlat() + nlats <- (t * 3 + 1) / 2 + if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) { + nlats <- ceiling(nlats) + } else { + nlats <- round(nlats) + } + if (nlats %% 2 > 0) { + nlats <- nlats + 1 + } + ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF + nlons <- 2 * nlats + keep_going <- TRUE + while (keep_going) { + n <- nlons + if (n %% 8 == 0) n <- trunc(n / 8) + while (n %% 6 == 0) n <- trunc(n / 6) + while (n %% 5 == 0) n <- trunc(n / 5) + while (n %% 4 == 0) n <- trunc(n / 4) + while (n %% 3 == 0) n <- trunc(n / 3) + if (n %% 2 == 0) n <- trunc(n / 2) + if (n <= 8) { + keep_going <- FALSE + } else { + nlons <- nlons + 2 + if (nlons > 9999) { + stop("Error: pick another gaussian grid truncation. It doesn't fulfill the standards to apply FFT.") + } + } + } + c(nlats, nlons) +} + +.nlat2t <- function(nlats) { + trunc((nlats * 2 - 1) / 3) +} + +.LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) { + # The purpose, working modes, inputs and outputs of this function are + # explained in ?LoadDataFile + #suppressPackageStartupMessages({library(ncdf4)}) + #suppressPackageStartupMessages({library(bigmemory)}) + #suppressPackageStartupMessages({library(plyr)}) + # Auxiliar function to convert array indices to lineal indices + arrayIndex2VectorIndex <- function(indices, dims) { + if (length(indices) > length(dims)) { + stop("Error: indices do not match dimensions in arrayIndex2VectorIndex.") + } + position <- 1 + dims <- rev(dims) + indices <- rev(indices) + for (i in 1:length(indices)) { + position <- position + (indices[i] - 1) * prod(dims[-c(1:i)]) + } + position + } + + found_file <- NULL + dims <- NULL + grid_name <- units <- var_long_name <- NULL + is_2d_var <- array_across_gw <- NULL + data_across_gw <- NULL + + filename <- work_piece[['filename']] + namevar <- work_piece[['namevar']] + output <- work_piece[['output']] + # The names of all data files in the directory of the repository that match + # the pattern are obtained. + if (length(grep("^http", filename)) > 0) { + is_url <- TRUE + files <- filename + ## TODO: Check that the user is not using shell globbing exps. + } else { + is_url <- FALSE + files <- Sys.glob(filename) + } + + # If we don't find any, we leave the flag 'found_file' with a NULL value. + if (length(files) > 0) { + # The first file that matches the pattern is chosen and read. + filename <- head(files, 1) + filein <- filename + found_file <- filename + mask <- work_piece[['mask']] + + if (!silent) { + if (explore_dims) { + .message(paste("Exploring dimensions...", filename)) + } + ##} else { + ## cat(paste("* Reading & processing data...", filename, '\n')) + ##} + } + + # We will fill in 'expected_dims' with the names of the expected dimensions of + # the data array we'll retrieve from the file. + expected_dims <- NULL + remap_needed <- FALSE + # But first we open the file and work out whether the requested variable is 2d + fnc <- nc_open(filein) + if (!(namevar %in% names(fnc$var))) { + stop(paste("Error: The variable", namevar, "is not defined in the file", filename)) + } + var_long_name <- fnc$var[[namevar]]$longname + units <- fnc$var[[namevar]]$units + file_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + # The following two 'ifs' are to allow for 'lon'/'lat' by default, instead of + # 'longitude'/'latitude'. + if (!(work_piece[['dimnames']][['lon']] %in% file_dimnames) && + (work_piece[['dimnames']][['lon']] == 'longitude') && + ('lon' %in% file_dimnames)) { + work_piece[['dimnames']][['lon']] <- 'lon' + } + if (!(work_piece[['dimnames']][['lat']] %in% file_dimnames) && + (work_piece[['dimnames']][['lat']] == 'latitude') && + ('lat' %in% file_dimnames)) { + work_piece[['dimnames']][['lat']] <- 'lat' + } + if (is.null(work_piece[['is_2d_var']])) { + is_2d_var <- all(c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) %in% + unlist(lapply(fnc$var[[namevar]][['dim']], + '[[', 'name'))) + } else { + is_2d_var <- work_piece[['is_2d_var']] + } + if ((is_2d_var || work_piece[['is_file_per_dataset']])) { + if (Sys.which("cdo")[[1]] == "") { + stop("Error: CDO libraries not available") + } + + cdo_version <- strsplit(suppressWarnings(system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] + + cdo_version <- as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) + + } + # If the variable to load is 2-d, we need to determine whether: + # - interpolation is needed + # - subsetting is requested + if (is_2d_var) { + ## We read the longitudes and latitudes from the file. + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + first_lon_in_original_file <- lon[1] + # If a common grid is requested or we are exploring the file dimensions + # we need to read the grid type and size of the file to finally work out the + # CDO grid name. + if (!is.null(work_piece[['grid']]) || explore_dims) { + # Here we read the grid type and its number of longitudes and latitudes + file_info <- system(paste('cdo -s griddes', filein, '2> /dev/null'), intern = TRUE) + grids_positions <- grep('# gridID', file_info) + if (length(grids_positions) < 1) { + stop("The grid should be defined in the files.") + } + grids_first_lines <- grids_positions + 2 + grids_last_lines <- c((grids_positions - 2)[-1], length(file_info)) + grids_info <- as.list(1:length(grids_positions)) + grids_info <- lapply(grids_info, function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) + grids_info <- lapply(grids_info, function (x) gsub(" *", " ", x)) + grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x)) + grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = "))) + grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1])) + grids_matches <- unlist(lapply(grids_info, function (x) { + nlons <- if (length(grep('xsize', x)) > 0) { + as.numeric(x[grep('xsize', x) + 1]) + } else { + NA + } + nlats <- if (length(grep('ysize', x)) > 0) { + as.numeric(x[grep('ysize', x) + 1]) + } else { + NA + } + result <- FALSE + if (!anyNA(c(nlons, nlats))) { + if ((nlons == length(lon)) && + (nlats == length(lat))) { + result <- TRUE + } + } + result + })) + grids_matches <- grids_matches[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_info <- grids_info[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_types <- grids_types[which(grids_types %in% c('gaussian', 'lonlat'))] + if (length(grids_matches) == 0) { + stop("Error: Only 'gaussian' and 'lonlat' grids supported. See e.g: cdo sinfo ", filename) + } + if (sum(grids_matches) > 1) { + if ((all(grids_types[which(grids_matches)] == 'gaussian') || + all(grids_types[which(grids_matches)] == 'lonlat')) && + all(unlist(lapply(grids_info[which(grids_matches)], identical, + grids_info[which(grids_matches)][[1]])))) { + grid_type <- grids_types[which(grids_matches)][1] + } else { + stop("Error: Load() can't disambiguate: More than one lonlat/gaussian grids with the same size as the requested variable defined in ", filename) + } + } else if (sum(grids_matches) == 1) { + grid_type <- grids_types[which(grids_matches)] + } else { + stop("Unexpected error.") + } + grid_lons <- length(lon) + grid_lats <- length(lat) + # Convert to CDO grid name as seen in cdo's griddes.c: nlat2ntr() + if (grid_type == 'lonlat') { + grid_name <- paste0('r', grid_lons, 'x', grid_lats) + } else { + grid_name <- paste0('t', .nlat2t(grid_lats), 'grid') + } + if (is.null(work_piece[['grid']])) { + .warning(paste0("Detect the grid type to be '", grid_name, "'. ", + "If it is not expected, assign parameter 'grid' to avoid wrong result.")) + } + } + # If a common grid is requested, we will also calculate its size which we will use + # later on. + if (!is.null(work_piece[['grid']])) { + # Now we calculate the common grid type and its lons and lats + if (length(grep('^t\\d{1,+}grid$', work_piece[['grid']])) > 0) { + common_grid_type <- 'gaussian' + common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + nlonlat <- .t2nlatlon(common_grid_res) + common_grid_lats <- nlonlat[1] + common_grid_lons <- nlonlat[2] + } else if (length(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']])) > 0) { + common_grid_type <- 'lonlat' + common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3]) + } else { + stop("Error: Only supported grid types in parameter 'grid' are tgrid and rx") + } + } else { + ## If no 'grid' is specified, there is no common grid. + ## But these variables are filled in for consistency in the code. + common_grid_lons <- length(lon) + common_grid_lats <- length(lat) + } + first_common_grid_lon <- 0 + last_common_grid_lon <- 360 - 360/common_grid_lons + ## This is not true for gaussian grids or for some regular grids, but + ## is a safe estimation + first_common_grid_lat <- -90 + last_common_grid_lat <- 90 + # And finally determine whether interpolation is needed or not + remove_shift <- FALSE + if (!is.null(work_piece[['grid']])) { + if ((grid_lons != common_grid_lons) || + (grid_lats != common_grid_lats) || + (grid_type != common_grid_type) || + (lon[1] != first_common_grid_lon)) { + if (grid_lons == common_grid_lons && grid_lats == common_grid_lats && + grid_type == common_grid_type && lon[1] != first_common_grid_lon) { + remove_shift <- TRUE + } + remap_needed <- TRUE + common_grid_name <- work_piece[['grid']] + } + } else if ((lon[1] != first_common_grid_lon) && explore_dims && + !work_piece[['single_dataset']]) { + remap_needed <- TRUE + common_grid_name <- grid_name + remove_shift <- TRUE + } + if (remap_needed && (work_piece[['remap']] == 'con') && + (cdo_version >= as.numeric_version('1.7.0'))) { + work_piece[['remap']] <- 'ycon' + } + if (remove_shift && !explore_dims) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + .warning(paste0("The dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], "' doesn't start at longitude 0 and will be re-interpolated in order to align its longitudes with the standard CDO grids definable with the names 'tgrid' or 'rx', which are by definition starting at the longitude 0.\n")) + if (!is.null(mask)) { + .warning(paste0("A mask was provided for the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], "'. This dataset has been re-interpolated to align its longitudes to start at 0. You must re-interpolate the corresponding mask to align its longitudes to start at 0 as well, if you haven't done so yet. Running cdo remapcon,", common_grid_name, " original_mask_file.nc new_mask_file.nc will fix it.\n")) + } + } + if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + if (!explore_dims) { + .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is originally on ", + "a grid coarser than the common grid and it has been ", + "extrapolated. Check the results carefully. It is ", + "recommended to specify as common grid the coarsest grid ", + "among all requested datasets via the parameter 'grid'.\n")) + } + } + # Now calculate if the user requests for a lonlat subset or for the + # entire field + lonmin <- work_piece[['lon_limits']][1] + lonmax <- work_piece[['lon_limits']][2] + latmin <- work_piece[['lat_limits']][1] + latmax <- work_piece[['lat_limits']][2] + lon_subsetting_requested <- FALSE + lonlat_subsetting_requested <- FALSE + if (lonmin <= lonmax) { + if ((lonmin > first_common_grid_lon) || (lonmax < last_common_grid_lon)) { + lon_subsetting_requested <- TRUE + } + } else { + if ((lonmin - lonmax) > 360/common_grid_lons) { + lon_subsetting_requested <- TRUE + } else { + gap_width <- floor(lonmin / (360/common_grid_lons)) - + floor(lonmax / (360/common_grid_lons)) + if (gap_width > 0) { + if (!(gap_width == 1 && (lonmin %% (360/common_grid_lons) == 0) && + (lonmax %% (360/common_grid_lons) == 0))) { + lon_subsetting_requested <- TRUE + } + } + } + } + if ((latmin > first_common_grid_lat) || (latmax < last_common_grid_lat) + || (lon_subsetting_requested)) { + lonlat_subsetting_requested <- TRUE + } + # Now that we know if subsetting was requested, we can say if final data + # will go across greenwich + if (lonmax < lonmin) { + data_across_gw <- TRUE + } else { + data_across_gw <- !lon_subsetting_requested + } + + # When remap is needed but no subsetting, the file is copied locally + # so that cdo works faster, and then interpolated. + # Otherwise the file is kept as is and the subset will have to be + # interpolated still. + if (!lonlat_subsetting_requested && remap_needed) { + nc_close(fnc) + filecopy <- tempfile(pattern = "load", fileext = ".nc") + file.copy(filein, filecopy) + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the + # underlying hdf5 library. + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " -selname,", namevar, " ", filecopy, " ", filein, + " 2>/dev/null", sep = "")) + file.remove(filecopy) + work_piece[['dimnames']][['lon']] <- 'lon' + work_piece[['dimnames']][['lat']] <- 'lat' + fnc <- nc_open(filein) + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + } + + # Read and check also the mask + if (!is.null(mask)) { + ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + if (is.list(mask)) { + if (!file.exists(mask[['path']])) { + stop(paste("Error: Couldn't find the mask file", mask[['path']])) + } + mask_file <- mask[['path']] + ###file.copy(work_piece[['mask']][['path']], mask_file) + fnc_mask <- nc_open(mask_file) + vars_in_mask <- sapply(fnc_mask$var, '[[', 'name') + if ('nc_var_name' %in% names(mask)) { + if (!(mask[['nc_var_name']] %in% + vars_in_mask)) { + stop(paste("Error: couldn't find variable", mask[['nc_var_name']], + "in the mask file", mask[['path']])) + } + } else { + if (length(vars_in_mask) != 1) { + stop(paste("Error: one and only one non-coordinate variable should be defined in the mask file", + mask[['path']], "if the component 'nc_var_name' is not specified. Currently found: ", + paste(vars_in_mask, collapse = ', '), ".")) + } else { + mask[['nc_var_name']] <- vars_in_mask + } + } + if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) { + stop(paste0("Error: the variable '", + mask[['nc_var_name']], + "' must be defined only over the dimensions '", + work_piece[['dimnames']][['lon']], "' and '", + work_piece[['dimnames']][['lat']], + "' in the mask file ", + mask[['path']])) + } + mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE) + nc_close(fnc_mask) + ### mask_lon <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lon']]) + ### mask_lat <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lat']]) + ###} else { + ### dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + ### dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ### ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + ### fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ### ncvar_put(fnc_mask, ncdf_var, work_piece[['mask']]) + ### nc_close(fnc_mask) + ### fnc_mask <- nc_open(mask_file) + ### work_piece[['mask']] <- list(path = mask_file, nc_var_name = 'LSM') + ### mask_lon <- lon + ### mask_lat <- lat + ###} + ###} + ### Now ready to check that the mask is right + ##if (!(lonlat_subsetting_requested && remap_needed)) { + ### if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) { + ### stop(paste("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' is wrong. It must be on the common grid if the selected output type is 'lonlat', 'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on the grid of the corresponding dataset if the selected output type is 'areave' and no 'grid' has been specified. For more information check ?Load and see help on parameters 'grid', 'maskmod' and 'maskobs'.", sep = "")) + ### } + ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { + ### stop(paste0("Error: the longitudes and latitudes in the masks must be identical to the ones in the corresponding data files if output = 'areave' or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in the mask file must start by 0 and the latitudes must be ordered from highest to lowest. See\n ", + ### work_piece[['mask']][['path']], " and ", filein)) + ###} + } + } + + lon_indices <- 1:length(lon) + if (!(lonlat_subsetting_requested && remap_needed)) { + lon[which(lon < 0)] <- lon[which(lon < 0)] + 360 + } + if (lonmax >= lonmin) { + lon_indices <- lon_indices[which(((lon %% 360) >= lonmin) & ((lon %% 360) <= lonmax))] + } else if (!remap_needed) { + lon_indices <- lon_indices[which(((lon %% 360) <= lonmax) | ((lon %% 360) >= lonmin))] + } + lat_indices <- which(lat >= latmin & lat <= latmax) + ## In most of the cases the latitudes are ordered from -90 to 90. + ## We will reorder them to be in the order from 90 to -90, so mostly + ## always the latitudes are reordered. + ## TODO: This could be avoided in future. + if (lat[1] < lat[length(lat)]) { + lat_indices <- lat_indices[length(lat_indices):1] + } + if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) { + if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) { + stop(paste("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' is wrong. It must be on the common grid if the selected output type is 'lonlat', 'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on the grid of the corresponding dataset if the selected output type is 'areave' and no 'grid' has been specified. For more information check ?Load and see help on parameters 'grid', 'maskmod' and 'maskobs'.", sep = "")) + } + mask <- mask[lon_indices, lat_indices] + } + ## If the user requests subsetting, we must extend the lon and lat limits if possible + ## so that the interpolation after is done properly + maximum_extra_points <- work_piece[['remapcells']] + if (lonlat_subsetting_requested && remap_needed) { + if ((maximum_extra_points > (head(lon_indices, 1) - 1)) || + (maximum_extra_points > (length(lon) - tail(lon_indices, 1)))) { + ## if the requested number of points goes beyond the left or right + ## sides of the map, we need to take the entire map so that the + ## interpolation works properly + lon_indices <- 1:length(lon) + } else { + extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1) + if (extra_points > 0) { + lon_indices <- c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) + } + extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1)) + if (extra_points > 0) { + lon_indices <- c(lon_indices, (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) + } + } + min_lat_ind <- min(lat_indices) + max_lat_ind <- max(lat_indices) + extra_points <- min(maximum_extra_points, min_lat_ind - 1) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c(lat_indices, (min_lat_ind - 1):(min_lat_ind - extra_points)) + } else { + lat_indices <- c((min_lat_ind - extra_points):(min_lat_ind - 1), lat_indices) + } + } + extra_points <- min(maximum_extra_points, length(lat) - max_lat_ind) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c((max_lat_ind + extra_points):(max_lat_ind + 1), lat_indices) + } else { + lat_indices <- c(lat_indices, (max_lat_ind + 1):(max_lat_ind + extra_points)) + } + } + } + lon <- lon[lon_indices] + lat <- lat[lat_indices] + expected_dims <- c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) + } else { + lon <- 0 + lat <- 0 + } + # We keep on filling the expected dimensions + var_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + nmemb <- nltime <- NULL + ## Sometimes CDO renames 'members' dimension to 'lev' + old_members_dimname <- NULL + if (('lev' %in% var_dimnames) && !(work_piece[['dimnames']][['member']] %in% var_dimnames)) { + old_members_dimname <- work_piece[['dimnames']][['member']] + work_piece[['dimnames']][['member']] <- 'lev' + } + if (work_piece[['dimnames']][['member']] %in% var_dimnames) { + nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], var_dimnames)]]$len + expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']]) + } else { + nmemb <- 1 + } + if (length(expected_dims) > 0) { + dim_matches <- match(expected_dims, var_dimnames) + if (anyNA(dim_matches)) { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop(paste("Error: the expected dimension(s)", + paste(expected_dims[which(is.na(dim_matches))], collapse = ', '), + "were not found in", filename)) + } + time_dimname <- var_dimnames[-dim_matches] + } else { + time_dimname <- var_dimnames + } + if (length(time_dimname) > 0) { + if (length(time_dimname) == 1) { + nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len + expected_dims <- c(expected_dims, time_dimname) + dim_matches <- match(expected_dims, var_dimnames) + } else { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop(paste("Error: the variable", namevar, + "is defined over more dimensions than the expected (", + paste(c(expected_dims, 'time'), collapse = ', '), + "). It could also be that the members, longitude or latitude dimensions are named incorrectly. In that case, either rename the dimensions in the file or adjust Load() to recognize the actual name with the parameter 'dimnames'. See file", filename)) + } + } else { + nltime <- 1 + } + + # Now we must retrieve the data from the file, but only the asked indices. + # So we build up the indices to retrieve. + # Longitudes or latitudes have been retrieved already. + if (explore_dims) { + # If we're exploring the file we only want one time step from one member, + # to regrid it and work out the number of longitudes and latitudes. + # We don't need more. + members <- 1 + ltimes_list <- list(c(1)) + } else { + # The data is arranged in the array 'tmp' with the dimensions in a + # common order: + # 1) Longitudes + # 2) Latitudes + # 3) Members (even if is not a file per member experiment) + # 4) Lead-times + if (work_piece[['is_file_per_dataset']]) { + time_indices <- 1:nltime + mons <- strsplit(system(paste('cdo showmon ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + years <- strsplit(system(paste('cdo showyear ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + mons <- as.numeric(mons[[1]][which(mons[[1]] != "")]) + years <- as.numeric(years[[1]][which(years[[1]] != "")]) + time_indices <- ts(time_indices, start = c(years[1], mons[1]), + end = c(years[length(years)], mons[length(mons)]), + frequency = 12) + ltimes_list <- list() + for (sdate in work_piece[['startdates']]) { + selected_time_indices <- window(time_indices, start = c(as.numeric( + substr(sdate, 1, 4)), as.numeric(substr(sdate, 5, 6))), + end = c(3000, 12), frequency = 12, extend = TRUE) + selected_time_indices <- selected_time_indices[work_piece[['leadtimes']]] + ltimes_list <- c(ltimes_list, list(selected_time_indices)) + } + } else { + ltimes <- work_piece[['leadtimes']] + #if (work_piece[['dataset_type']] == 'exp') { + ltimes_list <- list(ltimes[which(ltimes <= nltime)]) + #} + } + ## TODO: Put, when reading matrices, this kind of warnings + # if (nmember < nmemb) { + # cat("Warning: + members <- 1:work_piece[['nmember']] + members <- members[which(members <= nmemb)] + } + + # Now, for each list of leadtimes to load (usually only one list with all leadtimes), + # we'll join the indices and retrieve data + found_disordered_dims <- FALSE + for (ltimes in ltimes_list) { + if (is_2d_var) { + start <- c(min(lon_indices), min(lat_indices)) + end <- c(max(lon_indices), max(lat_indices)) + if (lonlat_subsetting_requested && remap_needed) { + subset_indices <- list(min(lon_indices):max(lon_indices) - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ncdf_dims <- list(dim_longitudes, dim_latitudes) + } else { + subset_indices <- list(lon_indices - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + ncdf_dims <- list() + } + final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1) + } else { + start <- end <- c() + subset_indices <- list() + ncdf_dims <- list() + final_dims <- c(1, 1, 1, 1) + } + + if (work_piece[['dimnames']][['member']] %in% expected_dims) { + start <- c(start, head(members, 1)) + end <- c(end, tail(members, 1)) + subset_indices <- c(subset_indices, list(members - head(members, 1) + 1)) + dim_members <- ncdim_def(work_piece[['dimnames']][['member']], "", members) + ncdf_dims <- c(ncdf_dims, list(dim_members)) + final_dims[3] <- length(members) + } + if (time_dimname %in% expected_dims) { + if (any(!is.na(ltimes))) { + start <- c(start, head(ltimes[which(!is.na(ltimes))], 1)) + end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1)) + subset_indices <- c(subset_indices, list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) + } else { + start <- c(start, NA) + end <- c(end, NA) + subset_indices <- c(subset_indices, list(ltimes)) + } + dim_time <- ncdim_def(time_dimname, "", 1:length(ltimes), unlim = TRUE) + ncdf_dims <- c(ncdf_dims, list(dim_time)) + final_dims[4] <- length(ltimes) + } + count <- end - start + 1 + start <- start[dim_matches] + count <- count[dim_matches] + subset_indices <- subset_indices[dim_matches] + # Now that we have the indices to retrieve, we retrieve the data + if (prod(final_dims) > 0) { + tmp <- take(ncvar_get(fnc, namevar, start, count, + collapse_degen = FALSE), + 1:length(subset_indices), subset_indices) + # The data is regridded if it corresponds to an atmospheric variable. When + # the chosen output type is 'areave' the data is not regridded to not + # waste computing time unless the user specified a common grid. + if (is_2d_var) { + ###if (!is.null(work_piece[['mask']]) && !(lonlat_subsetting_requested && remap_needed)) { + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### start[dim_matches[1:2]], count[dim_matches[1:2]], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + if (lonlat_subsetting_requested && remap_needed) { + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + filein2 <- tempfile(pattern = "loadRegridded2", fileext = ".nc") + ncdf_var <- ncvar_def(namevar, "", ncdf_dims[dim_matches], + fnc$var[[namevar]]$missval, + prec = if (fnc$var[[namevar]]$prec == 'int') { + 'integer' + } else { + fnc$var[[namevar]]$prec + }) + scale_factor <- ifelse(fnc$var[[namevar]]$hasScaleFact, fnc$var[[namevar]]$scaleFact, 1) + add_offset <- ifelse(fnc$var[[namevar]]$hasAddOffset, fnc$var[[namevar]]$addOffset, 0) + if (fnc$var[[namevar]]$hasScaleFact || fnc$var[[namevar]]$hasAddOffset) { + tmp <- (tmp - add_offset) / scale_factor + } + #nc_close(fnc) + fnc2 <- nc_create(filein2, list(ncdf_var)) + ncvar_put(fnc2, ncdf_var, tmp) + if (add_offset != 0) { + ncatt_put(fnc2, ncdf_var, 'add_offset', add_offset) + } + if (scale_factor != 1) { + ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor) + } + nc_close(fnc2) + system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) { + "0,360," + } else { + paste0(lonmin, ",", lonmax, ",") + }, latmin, ",", latmax, + " -remap", work_piece[['remap']], ",", common_grid_name, + " ", filein2, " ", filein, " 2>/dev/null", sep = "")) + file.remove(filein2) + fnc2 <- nc_open(filein) + sub_lon <- ncvar_get(fnc2, 'lon') + sub_lat <- ncvar_get(fnc2, 'lat') + ## We read the longitudes and latitudes from the file. + ## In principle cdo should put in order the longitudes + ## and slice them properly unless data is across greenwich + sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360 + sub_lon_indices <- 1:length(sub_lon) + if (lonmax < lonmin) { + sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))] + } + sub_lat_indices <- 1:length(sub_lat) + ## In principle cdo should put in order the latitudes + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + sub_lat_indices <- length(sub_lat):1 + } + final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices)) + subset_indices[[dim_matches[1]]] <- sub_lon_indices + subset_indices[[dim_matches[2]]] <- sub_lat_indices + + tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), + 1:length(subset_indices), subset_indices) + + if (!is.null(mask)) { + ## We create a very simple 2d netcdf file that is then interpolated to the common + ## grid to know what are the lons and lats of our slice of data + mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc') + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", c(0, 360)) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", c(-90, 90)) + ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) + nc_close(fnc_mask) + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", common_grid_name, + " ", mask_file, " ", mask_file_remap, " 2>/dev/null", sep = "")) + fnc_mask <- nc_open(mask_file_remap) + mask_lons <- ncvar_get(fnc_mask, 'lon') + mask_lats <- ncvar_get(fnc_mask, 'lat') + nc_close(fnc_mask) + file.remove(mask_file, mask_file_remap) + if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) { + stop(paste("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' is wrong. It must be on the common grid if the selected output type is 'lonlat', 'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on the grid of the corresponding dataset if the selected output type is 'areave' and no 'grid' has been specified. For more information check ?Load and see help on parameters 'grid', 'maskmod' and 'maskobs'.", sep = "")) + } + mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360 + if (lonmax >= lonmin) { + mask_lon_indices <- which((mask_lons >= lonmin) & (mask_lons <= lonmax)) + } else { + mask_lon_indices <- which((mask_lons >= lonmin) | (mask_lons <= lonmax)) + } + mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax)) + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + mask_lat_indices <- mask_lat_indices[length(mask_lat_indices):1] + } + mask <- mask[mask_lon_indices, mask_lat_indices] + } + sub_lon <- sub_lon[sub_lon_indices] + sub_lat <- sub_lat[sub_lat_indices] + ### nc_close(fnc_mask) + ### system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) { + ### "0,360," + ### } else { + ### paste0(lonmin, ",", lonmax, ",") + ### }, latmin, ",", latmax, + ### " -remap", work_piece[['remap']], ",", common_grid_name, + ###This is wrong: same files + ### " ", mask_file, " ", mask_file, " 2>/dev/null", sep = "")) + ### fnc_mask <- nc_open(mask_file) + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + } + } + if (!all(dim_matches == sort(dim_matches))) { + if (!found_disordered_dims && rev(work_piece[['indices']])[2] == 1 && rev(work_piece[['indices']])[3] == 1) { + found_disordered_dims <- TRUE + .warning(paste0("The dimensions for the variable ", namevar, " in the files of the experiment with index ", tail(work_piece[['indices']], 1), " are not in the optimal order for loading with Load(). The optimal order would be '", paste(expected_dims, collapse = ', '), "'. One of the files of the dataset is stored in ", filename)) + } + tmp <- aperm(tmp, dim_matches) + } + dim(tmp) <- final_dims + # If we are exploring the file we don't need to process and arrange + # the retrieved data. We only need to keep the dimension sizes. + if (is_2d_var && lonlat_subsetting_requested && remap_needed) { + final_lons <- sub_lon + final_lats <- sub_lat + } else { + final_lons <- lon + final_lats <- lat + } + if (explore_dims) { + if (work_piece[['is_file_per_member']]) { + ## TODO: When the exp_full_path contains asterisks and is file_per_member + ## members from different datasets may be accounted. + ## Also if one file member is missing the accounting will be wrong. + ## Should parse the file name and extract number of members. + if (is_url) { + nmemb <- NULL + } else { + nmemb <- length(files) + } + } + dims <- list(member = nmemb, ftime = nltime, lon = final_lons, lat = final_lats) + } else { + # If we are not exploring, then we have to process the retrieved data + if (is_2d_var) { + tmp <- apply(tmp, c(3, 4), function(x) { + # Disable of large values. + if (!is.na(work_piece[['var_limits']][2])) { + x[which(x > work_piece[['var_limits']][2])] <- NA + } + if (!is.na(work_piece[['var_limits']][1])) { + x[which(x < work_piece[['var_limits']][1])] <- NA + } + if (!is.null(mask)) { + x[which(mask < 0.5)] <- NA + } + + if (output == 'areave' || output == 'lon') { + weights <- InsertDim(cos(final_lats * pi / 180), 1, length(final_lons), name = 'lon') + weights[which(is.na(x))] <- NA + if (output == 'areave') { + weights <- weights / mean(weights, na.rm = TRUE) + mean(x * weights, na.rm = TRUE) + } else { + weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, length(final_lats), name = 'lat') + MeanDims(x * weights, 2, na.rm = TRUE) + } + } else if (output == 'lat') { + MeanDims(x, 1, na.rm = TRUE) + } else if (output == 'lonlat') { + signif(x, 5) + } + }) + if (output == 'areave') { + dim(tmp) <- c(1, 1, final_dims[3:4]) + } else if (output == 'lon') { + dim(tmp) <- c(final_dims[1], 1, final_dims[3:4]) + } else if (output == 'lat') { + dim(tmp) <- c(1, final_dims[c(2, 3, 4)]) + } else if (output == 'lonlat') { + dim(tmp) <- final_dims + } + } + var_data <- attach.big.matrix(work_piece[['out_pointer']]) + if (work_piece[['dims']][['member']] > 1 && nmemb > 1 && + work_piece[['dims']][['ftime']] > 1 && + nltime < work_piece[['dims']][['ftime']]) { + work_piece[['indices']][2] <- work_piece[['indices']][2] - 1 + for (jmemb in members) { + work_piece[['indices']][2] <- work_piece[['indices']][2] + 1 + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp[, , jmemb, ]) - 1) + var_data[out_indices] <- as.vector(tmp[, , jmemb, ]) + } + work_piece[['indices']][2] <- work_piece[['indices']][2] - tail(members, 1) + 1 + } else { + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp) - 1) + a <- aperm(tmp, c(1, 2, 4, 3)) + as.vector(a) + var_data[out_indices] <- as.vector(aperm(tmp, c(1, 2, 4, 3))) + } + work_piece[['indices']][3] <- work_piece[['indices']][3] + 1 + } + } + } + nc_close(fnc) + if (is_2d_var) { + if (remap_needed) { + array_across_gw <- FALSE + file.remove(filein) + ###if (!is.null(mask) && lonlat_subsetting_requested) { + ### file.remove(mask_file) + ###} + } else { + if (first_lon_in_original_file < 0) { + array_across_gw <- data_across_gw + } else { + array_across_gw <- FALSE + } + } + } + } + if (explore_dims) { + list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, + units = units, var_long_name = var_long_name, + data_across_gw = data_across_gw, array_across_gw = array_across_gw) + } else { + ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) { + ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) + ###} + if (!silent && !is.null(work_piece[['progress_amount']])) { + message(paste0(work_piece[['progress_amount']]), appendLF = FALSE) + } + found_file + } +} + +.LoadSampleData <- function(var, exp = NULL, obs = NULL, sdates, + nmember = NULL, nmemberobs = NULL, + nleadtime = NULL, leadtimemin = 1, + leadtimemax = NULL, storefreq = 'monthly', + sampleperiod = 1, lonmin = 0, lonmax = 360, + latmin = -90, latmax = 90, output = 'areave', + method = 'conservative', grid = NULL, + maskmod = vector("list", 15), + maskobs = vector("list", 15), + configfile = NULL, suffixexp = NULL, + suffixobs = NULL, varmin = NULL, varmax = NULL, + silent = FALSE, nprocs = NULL) { + ## This function loads and selects sample data stored in sampleMap and + ## sampleTimeSeries and is used in the examples instead of Load() so as + ## to avoid nco and cdo system calls and computation time in the stage + ## of running examples in the CHECK process on CRAN. + selected_start_dates <- match(sdates, c('19851101', '19901101', '19951101', + '20001101', '20051101')) + start_dates_position <- 3 + lead_times_position <- 4 + + if (output == 'lonlat') { + sampleData <- s2dv::sampleMap + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ] + } + else if (output == 'areave') { + sampleData <- s2dv::sampleTimeSeries + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times] + } + + dims_out <- dim(sampleData$mod) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$mod) <- dims_out + + dims_out <- dim(sampleData$obs) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$obs) <- dims_out + + invisible(list(mod = dataOut$mod, obs = dataOut$obs, + lat = dataOut$lat, lon = dataOut$lon)) +} + +.ConfigGetDatasetInfo <- function(matching_entries, table_name) { + # This function obtains the information of a dataset and variable pair, + # applying all the entries that match in the configuration file. + if (table_name == 'experiments') { + id <- 'EXP' + } else { + id <- 'OBS' + } + defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), paste0('$DEFAULT_', id, '_FILE_PATH$'), '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') + info <- NULL + + for (entry in matching_entries) { + if (is.null(info)) { + info <- entry[-1:-2] + info[which(info == '*')] <- defaults[which(info == '*')] + } else { + info[which(entry[-1:-2] != '*')] <- entry[-1:-2][which(entry[-1:-2] != '*')] + } + } + + info <- as.list(info) + names(info) <- c('main_path', 'file_path', 'nc_var_name', 'suffix', 'var_min', 'var_max') + info +} + +.ReplaceGlobExpressions <- function(path_with_globs, actual_path, + replace_values, tags_to_keep, + dataset_name, permissive) { + # The goal of this function is to replace the shell globbing expressions in + # a path pattern (that may contain shell globbing expressions and Load() + # tags) by the corresponding part of the real existing path. + # What is done actually is to replace all the values of the tags in the + # actual path by the corresponding $TAG$ + # + # It takes mainly two inputs. The path with expressions and tags, e.g.: + # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc + # and a complete known path to one of the matching files, e.g.: + # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc + # and it returns the path pattern but without shell globbing expressions: + # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc + # + # To do that, it needs also as inputs the list of replace values (the + # association of each tag to their value). + # + # All the tags not present in the parameter tags_to_keep will be repalced. + # + # Not all cases can be resolved with the implemented algorithm. In an + # unsolvable case a warning is given and one possible guess is returned. + # + # In some cases it is interesting to replace only the expressions in the + # path to the file, but not the ones in the file name itself. To keep the + # expressions in the file name, the parameter permissive can be set to + # TRUE. To replace all the expressions it can be set to FALSE. + clean <- function(x) { + if (nchar(x) > 0) { + x <- gsub('\\\\', '', x) + x <- gsub('\\^', '', x) + x <- gsub('\\$', '', x) + x <- unname(sapply(strsplit(x, '[',fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) + do.call(paste0, as.list(x)) + } else { + x + } + } + + strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") + + if (permissive) { + actual_path_chunks <- strsplit(actual_path, '/')[[1]] + actual_path <- paste(actual_path_chunks[-length(actual_path_chunks)], collapse = '/') + file_name <- tail(actual_path_chunks, 1) + if (length(actual_path_chunks) > 1) { + file_name <- paste0('/', file_name) + } + path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] + path_with_globs <- paste(path_with_globs_chunks[-length(path_with_globs_chunks)], + collapse = '/') + path_with_globs <- .ConfigReplaceVariablesInString(path_with_globs, replace_values) + file_name_with_globs <- tail(path_with_globs_chunks, 1) + if (length(path_with_globs_chunks) > 1) { + file_name_with_globs <- paste0('/', file_name_with_globs) + } + right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) + right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values) + path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags)) + match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), paste0(actual_path, file_name)) + if (match != 1) { + stop("Incorrect parameters to replace glob expressions. The path with expressions does not match the actual path.") + } + if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { + path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*') + file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) + } + } + path_with_globs_rx <- utils::glob2rx(path_with_globs) + values_to_replace <- c() + tags_to_replace_starts <- c() + tags_to_replace_ends <- c() + give_warning <- FALSE + for (tag in tags_to_keep) { + matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] + lengths <- attr(matches, 'match.length') + if (!(length(matches) == 1 && matches[1] == -1)) { + for (i in 1:length(matches)) { + left <- NULL + if (matches[i] > 1) { + left <- .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, matches[i] - 1), replace_values) + left_known <- strReverse(head(strsplit(strReverse(left), strReverse('.*'), fixed = TRUE)[[1]], 1)) + } + right <- NULL + if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { + right <- .ConfigReplaceVariablesInString(substr(path_with_globs_rx, matches[i] + lengths[i], nchar(path_with_globs_rx)), replace_values) + right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) + } + final_match <- NULL + match_limits <- NULL + if (!is.null(left)) { + left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) + match_len <- attr(left_match, 'match.length') + left_match_limits <- c(left_match + match_len - 1 - nchar(clean(right_known)) - nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - nchar(clean(right_known))) + if (!(left_match < 1)) { + match_limits <- left_match_limits + } + } + right_match <- NULL + if (!is.null(right)) { + right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) + match_len <- attr(right_match, 'match.length') + right_match_limits <- c(right_match + nchar(clean(left_known)), + right_match + nchar(clean(left_known)) + nchar(replace_values[[tag]]) - 1) + if (is.null(match_limits) && !(right_match < 1)) { + match_limits <- right_match_limits + } + } + if (!is.null(right_match) && !is.null(left_match)) { + if (!identical(right_match_limits, left_match_limits)) { + give_warning <- TRUE + } + } + if (is.null(match_limits)) { + stop("Too complex path pattern specified for ", dataset_name, + ". Specify a simpler path pattern for this dataset.") + } + values_to_replace <- c(values_to_replace, tag) + tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) + tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) + } + } + } + + if (length(tags_to_replace_starts) > 0) { + reorder <- sort(tags_to_replace_starts, index.return = TRUE) + tags_to_replace_starts <- reorder$x + values_to_replace <- values_to_replace[reorder$ix] + tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] + while (length(values_to_replace) > 0) { + actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1), + '$', head(values_to_replace, 1), '$', + substr(actual_path, head(tags_to_replace_ends, 1) + 1, nchar(actual_path))) + extra_chars <- nchar(head(values_to_replace, 1)) + 2 - (head(tags_to_replace_ends, 1) - head(tags_to_replace_starts, 1) + 1) + values_to_replace <- values_to_replace[-1] + tags_to_replace_starts <- tags_to_replace_starts[-1] + tags_to_replace_ends <- tags_to_replace_ends[-1] + tags_to_replace_starts <- tags_to_replace_starts + extra_chars + tags_to_replace_ends <- tags_to_replace_ends + extra_chars + } + } + + if (give_warning) { + .warning(paste0("Too complex path pattern specified for ", dataset_name, + ". Double check carefully the '$Files' fetched for this dataset or specify a simpler path pattern.")) + } + + if (permissive) { + paste0(actual_path, file_name_with_globs) + } else { + actual_path + } +} + +.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { + tag <- paste0('\\$', tag, '\\$') + path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$') + parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]] + parts <- as.list(parts[grep(tag, parts)]) + longest_couples <- c() + pos_longest_couples <- c() + found_value <- NULL + for (i in 1:length(parts)) { + parts[[i]] <- strsplit(parts[[i]], tag)[[1]] + if (length(parts[[i]]) == 1) { + parts[[i]] <- c(parts[[i]], '') + } + len_parts <- sapply(parts[[i]], nchar) + len_couples <- len_parts[-length(len_parts)] + len_parts[2:length(len_parts)] + pos_longest_couples <- c(pos_longest_couples, which.max(len_couples)) + longest_couples <- c(longest_couples, max(len_couples)) + } + chosen_part <- which.max(longest_couples) + parts[[chosen_part]] <- parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] + if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) { + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + actual_path <- substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + } + } + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + found_value <- substr(actual_path, 0, match_right - 1) + } + } + } else { + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + actual_path <- substr(actual_path, 0, match_right - 1) + } + } + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + found_value <- substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + } + } + } + found_value +} + +.FilterUserGraphicArgs <- function(excludedArgs, ...) { + # This function filter the extra graphical parameters passed by the user in + # a plot function, excluding the ones that the plot function uses by default. + # Each plot function has a different set of arguments that are not allowed to + # be modified. + args <- list(...) + userArgs <- list() + for (name in names(args)) { + if ((name != "") & !is.element(name, excludedArgs)) { + # If the argument has a name and it is not in the list of excluded + # arguments, then it is added to the list that will be used + userArgs[[name]] <- args[[name]] + } else { + .warning(paste0("the argument '", name, "' can not be + modified and the new value will be ignored")) + } + } + userArgs +} + +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + .warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + .warning(paste0("some extensions of the filenames provided in 'fileout' are not ", ext[1],". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + .warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + +.message <- function(...) { + # Function to use the 'message' R function with our custom settings + # Default: new line at end of message, indent to 0, exdent to 3, + # collapse to \n* + args <- list(...) + + ## In case we need to specify message arguments + if (!is.null(args[["appendLF"]])) { + appendLF <- args[["appendLF"]] + } else { + ## Default value in message function + appendLF <- TRUE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value in message function + domain <- NULL + } + args[["appendLF"]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n*" + } + args[["collapse"]] <- NULL + + ## Message tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "* " + } + args[["tag"]] <- NULL + + message(paste0(tag, paste(strwrap( + args, indent = indent, exdent = exdent + ), collapse = collapse)), appendLF = appendLF, domain = domain) +} + +.warning <- function(...) { + # Function to use the 'warning' R function with our custom settings + # Default: no call information, indent to 0, exdent to 3, + # collapse to \n + args <- list(...) + + ## In case we need to specify warning arguments + if (!is.null(args[["call."]])) { + call <- args[["call."]] + } else { + ## Default: don't show info about the call where the warning came up + call <- FALSE + } + if (!is.null(args[["immediate."]])) { + immediate <- args[["immediate."]] + } else { + ## Default value in warning function + immediate <- FALSE + } + if (!is.null(args[["noBreaks."]])) { + noBreaks <- args[["noBreaks."]] + } else { + ## Default value warning function + noBreaks <- FALSE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value warning function + domain <- NULL + } + args[["call."]] <- NULL + args[["immediate."]] <- NULL + args[["noBreaks."]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n!" + } + args[["collapse"]] <- NULL + + ## Warning tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "! Warning: " + } + args[["tag"]] <- NULL + + warning(paste0(tag, paste(strwrap( + args, indent = indent, exdent = exdent + ), collapse = collapse)), call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) +} + +.IsColor <- function(x) { + res <- try(col2rgb(x), silent = TRUE) + return(!"try-error" %in% class(res)) +} + +# This function switches to a specified figure at position (row, col) in a layout. +# This overcomes the bug in par(mfg = ...). However the mode par(new = TRUE) is +# activated, i.e., all drawn elements will be superimposed. Additionally, after +# using this function, the automatical pointing to the next figure in the layout +# will be spoiled: once the last figure in the layout is drawn, the pointer won't +# move to the first figure in the layout. +# Only figures with numbers other than 0 (when creating the layout) will be +# accessible. +# Inputs: either row and col, or n and mat +.SwitchToFigure <- function(row = NULL, col = NULL, n = NULL, mat = NULL) { + if (!is.null(n) && !is.null(mat)) { + if (!is.numeric(n) || length(n) != 1) { + stop("Parameter 'n' must be a single numeric value.") + } + n <- round(n) + if (!is.array(mat)) { + stop("Parameter 'mat' must be an array.") + } + target <- which(mat == n, arr.ind = TRUE)[1, ] + row <- target[1] + col <- target[2] + } else if (!is.null(row) && !is.null(col)) { + if (!is.numeric(row) || length(row) != 1) { + stop("Parameter 'row' must be a single numeric value.") + } + row <- round(row) + if (!is.numeric(col) || length(col) != 1) { + stop("Parameter 'col' must be a single numeric value.") + } + col <- round(col) + } else { + stop("Either 'row' and 'col' or 'n' and 'mat' must be provided.") + } + next_attempt <- c(row, col) + par(mfg = next_attempt) + i <- 1 + layout_size <- par('mfrow') + layout_cells <- matrix(1:prod(layout_size), layout_size[1], layout_size[2], + byrow = TRUE) + while (any((par('mfg')[1:2] != c(row, col)))) { + next_attempt <- which(layout_cells == i, arr.ind = TRUE)[1, ] + par(mfg = next_attempt) + i <- i + 1 + if (i > prod(layout_size)) { + stop("Figure not accessible.") + } + } + plot(0, type = 'n', axes = FALSE, ann = FALSE) + par(mfg = next_attempt) +} + +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +.aperm2 <- function(x, new_order) { + old_dims <- dim(x) + attr_bk <- attributes(x) + if ('dim' %in% names(attr_bk)) { + attr_bk[['dim']] <- NULL + } + if (is.numeric(x)) { + x <- aperm(x, new_order) + } else { + y <- array(1:length(x), dim = dim(x)) + y <- aperm(y, new_order) + x <- x[as.vector(y)] + } + dim(x) <- old_dims[new_order] + attributes(x) <- c(attributes(x), attr_bk) + x +} + +# This function is a helper for the function .MergeArrays. +# It expects as inputs two named numeric vectors, and it extends them +# with dimensions of length 1 until an ordered common dimension +# format is reached. +# The first output is dims1 extended with 1s. +# The second output is dims2 extended with 1s. +# The third output is a merged dimension vector. If dimensions with +# the same name are found in the two inputs, and they have a different +# length, the maximum is taken. +.MergeArrayDims <- function(dims1, dims2) { + new_dims1 <- c() + new_dims2 <- c() + while (length(dims1) > 0) { + if (names(dims1)[1] %in% names(dims2)) { + pos <- which(names(dims2) == names(dims1)[1]) + dims_to_add <- rep(1, pos - 1) + if (length(dims_to_add) > 0) { + names(dims_to_add) <- names(dims2[1:(pos - 1)]) + } + new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) + new_dims2 <- c(new_dims2, dims2[1:pos]) + dims1 <- dims1[-1] + dims2 <- dims2[-c(1:pos)] + } else { + new_dims1 <- c(new_dims1, dims1[1]) + new_dims2 <- c(new_dims2, 1) + names(new_dims2)[length(new_dims2)] <- names(dims1)[1] + dims1 <- dims1[-1] + } + } + if (length(dims2) > 0) { + dims_to_add <- rep(1, length(dims2)) + names(dims_to_add) <- names(dims2) + new_dims1 <- c(new_dims1, dims_to_add) + new_dims2 <- c(new_dims2, dims2) + } + list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) +} + +# This function takes two named arrays and merges them, filling with +# NA where needed. +# dim(array1) +# 'b' 'c' 'e' 'f' +# 1 3 7 9 +# dim(array2) +# 'a' 'b' 'd' 'f' 'g' +# 2 3 5 9 11 +# dim(.MergeArrays(array1, array2, 'b')) +# 'a' 'b' 'c' 'e' 'd' 'f' 'g' +# 2 4 3 7 5 9 11 +.MergeArrays <- function(array1, array2, along) { + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in 1:length(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } + } + } + } + } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 + } + array1 +} + +# only can be used in Trend(). Needs generalization or be replaced by other function. +.reorder <- function(output, time_dim, dim_names) { + # Add dim name back + if (is.null(dim(output))) { + dim(output) <- c(stats = length(output)) + } else { #is an array + if (length(dim(output)) == 1) { + if (!is.null(names(dim(output)))) { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { + names(dim(output)) <- time_dim + } + } else { # more than one dim + if (names(dim(output))[1] != "") { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { #regular case + names(dim(output))[1] <- time_dim + } + } + } + # reorder + pos <- match(dim_names, names(dim(output))) + output <- aperm(output, pos) + names(dim(output)) <- dim_names + names(dim(output))[names(dim(output)) == time_dim] <- 'stats' + return(output) +} + +# to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R +.Indices <- function(data, type, monini, indices_for_clim, + fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) { + + if (type == 'dcpp') { + + fyear_dim <- 'fyear' + data <- Season(data = data, time_dim = fmonth_dim, + monini = monini, moninf = 1, monsup = 12, + method = mean, na.rm = na.rm) + names(dim(data))[which(names(dim(data))==fmonth_dim)] <- fyear_dim + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + + anom <- data + + } else { ## Different indices_for_clim for each forecast year (to use the same calendar years) + + n_fyears <- as.numeric(dim(data)[fyear_dim]) + n_sdates <- as.numeric(dim(data)[sdate_dim]) + + if (is.null(indices_for_clim)) { ## climatology over the whole (common) period + first_years_for_clim <- n_fyears : 1 + last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) + } else { ## indices_for_clim specified as a numeric vector + first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) + last_years_for_clim <- seq(from = indices_for_clim[length(indices_for_clim)], by = -1, length.out = n_fyears) + } + + data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) + anom <- array(data = NA, dim = dim(data)) + for (i in 1:n_fyears) { + clim <- mean(data[i,first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) + anom[i,] <- data[i,] - clim + } + } + + } else if (type %in% c('obs','hist')) { + + data <- multiApply::Apply(data = data, target_dims = month_dim, fun = mean, na.rm = na.rm)$output1 + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + clim <- 0 + } else if (is.null(indices_for_clim)) { ## climatology over the whole period + clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } else { ## indices_for_clim specified as a numeric vector + clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, indices = indices_for_clim), + target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } + + anom <- data - clim + + } else {stop('type must be dcpp, hist or obs')} + + return(anom) +} + +#TODO: Remove from s2dv when PlotLayout can get colorbar info from plotting function directly. +# The function is temporarily here because PlotLayout() needs to draw the colorbars of +# PlotMostLikelyQuantileMap(). +#Draws Color Bars for Categories +#A wrapper of s2dv::ColorBar to generate multiple color bars for different +#categories, and each category has different color set. +GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits, var_limits = NULL, + triangle_ends = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, label_scale = 1, extra_margin = rep(0, 4), + ...) { + # bar_limits + if (!is.numeric(bar_limits) || length(bar_limits) != 2) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2.") + } + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = bar_limits[1], to = bar_limits[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(1:length(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != nmap) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in 1:length(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { + s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, + # bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + #TODO: col_inf and col_sup + return(list(brks = brks, cols = cols)) + } + +} + + diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 7c829c25..bb143ca5 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -354,6 +354,18 @@ Skill <- function(recipe, data, agg = 'global') { skill_metrics[[ metric ]] <- skill } } + # NaN values and NAs in logical arrays cause problems when saving + skill_metrics <- lapply(skill_metrics, + function(x) { + if (is.logical(x)) { + # x[] <- as.numeric(x) + x[is.na(x)] <- FALSE + } else { + x[is.nan(x)] <- NA + } + return(x) + }) + info(recipe$Run$logger, "##### SKILL METRIC COMPUTATION COMPLETE #####") .log_memory_usage(recipe$Run$logger, when = "After skill metric computation") # Save outputs diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index e3c27e97..82b6c6bd 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -69,6 +69,18 @@ Statistics <- function(recipe, data, agg = 'global') { } } ## close on stat + # NaN values and NAs in logical arrays cause problems when saving + statistics <- lapply(statistics, + function(x) { + if (is.logical(x)) { + # x[] <- as.numeric(x) + x[is.na(x)] <- FALSE + } else { + x[is.nan(x)] <- NA + } + return(x) + }) + info(recipe$Run$logger, "##### STATISTICS COMPUTATION COMPLETE #####") .log_memory_usage(recipe$Run$logger, when = "After statistics computation") diff --git a/modules/Visualization/output_size.yml b/modules/Visualization/output_size.yml index 4e8e16dc..f216bda1 100644 --- a/modules/Visualization/output_size.yml +++ b/modules/Visualization/output_size.yml @@ -1,5 +1,5 @@ region: #units inches - EU: #latmin: 20, latmax: 80, lonmin: -20, lonmax: 40 + Europe: #latmin: 20, latmax: 80, lonmin: -20, lonmax: 40 PlotEquiMap: skill_metrics: width: 8.5 @@ -11,6 +11,7 @@ region: #units inches dot_symbol: 4 font.main: 1 colNA: "white" + country.borders: TRUE forecast_ensemble_mean: width: 8.5 height: 8.5 @@ -21,12 +22,14 @@ region: #units inches dot_size: 1.7 font.main: 1 colNA: "white" + country.borders: TRUE most_likely_terciles: width: 8.5 height: 8.5 dot_size: 2 plot_margin: !expr c(0, 4.1, 4.1, 2.1) colNA: "white" + country.borders: TRUE Multipanel: forecast_ensemble_mean: width: 8.5 @@ -84,4 +87,41 @@ region: #units inches colNA: "white" Mediterranean: Global: + Ethiopia: + PlotEquiMap: + skill_metrics: + width: 8.5 + height: 8 + axes_label_scale: 0.8 + bar_label_scale: 1.2 + bar_extra_margin: !expr c(2,1,0.5,1) + dot_size: 1.7 + dot_symbol: 4 + font.main: 1 + colNA: "white" + country.borders: TRUE + intylat: 5 + intxlon: 5 + forecast_ensemble_mean: + width: 8.5 + height: 8 + axes_label_scale: 0.8 + bar_label_scale: 1.2 + bar_extra_margin: !expr c(1.7,1,0.5,1) + dot_symbol: 4 + dot_size: 1.7 + font.main: 1 + colNA: "white" + country.borders: TRUE + intylat: 5 + intxlon: 5 + most_likely_terciles: + width: 8.5 + height: 8 + dot_size: 2 + plot_margin: !expr c(0, 4.1, 4.1, 2.1) + colNA: "white" + country.borders: TRUE + intylat: 5 + intxlon: 5 # Add other regions diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 6cb4dfea..98c1c035 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -680,6 +680,200 @@ check_recipe <- function(recipe) { } } + # Indicators + if ("Indicators" %in% names(recipe$Analysis$Workflow)) { + # list of variables requested to be loaded: + var.list <- unlist(strsplit(recipe_variables, ", | |,")) + # SPEI/SPI checks + # Check that precipiation is a requested variable + # when drought indices (SPEI or SPI) are requested + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + if (!('prlr' %in% var.list)) { + error(recipe$Run$logger, + paste0("Precipitation is necessary to calculate ", + "SPEI and it is not a variable in the recipe")) + error_status <- TRUE + } + } + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPI$return_spi)) { + if (!('prlr' %in% var.list)) { + error(recipe$Run$logger, + paste0("Precipitation is necessary to calculate ", + "SPI and it is not a variable in the recipe")) + error_status <- TRUE + } + } + # SPEI: check that necessary variables for the selected PET method are in the recipe + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + pet_method <- recipe$Analysis$Workflow$Indicators$SPEI$PET_method + if (!is.null(pet_method)) { + if (pet_method == 'none') { + # check that "pet" is in the variable list + ## NOTE: "pet" is not the correct abbr but no examples exist in esarchive now + if (!('pet' %in% var.list)) { + error(recipe$Run$logger, + paste0("a PET method is necessary to estimate potential ", + "evapotranspiration in the calculation of SPEI")) + error_status <- TRUE + } + } else { + if (pet_method == 'hargreaves') { + var.list.method <- c('tasmax', 'tasmin') + known_pet_method <- TRUE + } else if (pet_method == 'hargreaves_modified') { + var.list.method <- c('tasmax', 'tasmin', 'prlr') + known_pet_method <- TRUE + } else if (pet_method == 'thornthwaite') { + var.list.method <- c('tas') + known_pet_method <- TRUE + } else { + known_pet_method <- FALSE + error(recipe$Run$logger, + paste0("PET method ", pet_method, " unknown")) + error_status <- TRUE + } + if (known_pet_method) { + # check that the necessary variables are requested + missing.vars <- c() + for (var in var.list.method) { + if (identical(which(var.list == var), integer(0))) { + missing.vars <- c(missing.vars, var) + } + } + if (length(missing.vars) > 0) { + error(recipe$Run$logger, + paste0(missing.vars, " are necessary for ", pet_method, + " method and they are NOT selected in the recipe")) + error_status <- TRUE + } + } + } + } else { # same as not NULL but pet_method == 'none' + # check that "pet" is in the variable list + ## NOTE: "pet" is not the correct abbr but no examples exist in esarchive now + if (!('pet' %in% var.list)) { + error(recipe$Run$logger, + paste0("a PET method is necessary to estimate potential ", + "evapotranspiration in the calculation of SPEI")) + error_status <- TRUE + } + } + + } + + # SPEI/SPI check accum number + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + accum <- recipe$Analysis$Workflow$Indicators$SPEI$Nmonths_accum + ftime_interval <- recipe$Analysis$Time$ftime_max - recipe$Analysis$Time$ftime_min + 1 + if ((accum > 12 & ftime_interval < 12) || (accum > ftime_interval)) { + error(recipe$Run$logger, + paste0("not possible to accumulate ", accum, " months with the specified ftime ", + "in the calculation of SPEI")) + error_status <- TRUE + } + + } + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPI$return_spi)) { + accum <- recipe$Analysis$Workflow$Indicators$SPI$Nmonths_accum + ftime_interval <- recipe$Analysis$Time$ftime_max - recipe$Analysis$Time$ftime_min + 1 + if ((accum > 12 & ftime_interval < 12) || (accum > ftime_interval)) { + error(recipe$Run$logger, + paste0("not possible to accumulate ", accum, " months with the specified ftime ", + "in the calculation of SPI")) + error_status <- TRUE + } + } + + # Check standardization reference period + # SPEI + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + stand_refperiod <- recipe$Analysis$Workflow$Indicators$SPEI$standardization_ref_period + year_start <- recipe$Analysis$Time$hcst_start + year_end <- recipe$Analysis$Time$hcst_end + if (!is.null(stand_refperiod)) { + if (!(stand_refperiod[1] >= year_start & stand_refperiod[2] <= year_end)){ + error(recipe$Run$logger, + paste0("the standardization_ref_period needs to be contained ", + "in hcst_start and hcst_end period for the calculation of SPEI")) + error_status <- TRUE + } + } + } + # SPI + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPI$return_spi)) { + stand_refperiod <- recipe$Analysis$Workflow$Indicators$SPI$standardization_ref_period + year_start <- recipe$Analysis$Time$hcst_start + year_end <- recipe$Analysis$Time$hcst_end + if (!is.null(stand_refperiod)){ + if (!(stand_refperiod[1] >= year_start & stand_refperiod[2] <= year_end)){ + error(recipe$Run$logger, + paste0("the standardization_ref_period needs to be contained ", + "in hcst_start and hcst_end period for the calculation of SPI")) + error_status <- TRUE + } + } + } + + # Threshold indicator: check that length of requested thresholds matches length variables + if (isTRUE(recipe$Analysis$Workflow$Indicators$SelectedThreshold$return_thresholdbased)) { + thrs <- recipe$Analysis$Workflow$Indicators$SelectedThreshold$threshold + if (is.null(thrs)) { + error(recipe$Run$logger, + paste0("Threshold based indicator is requested but no threshold ", + "has been indicated")) + error_status <- TRUE + } else { + if (length(thrs) != length(var.list)){ + error(recipe$Run$logger, + paste0("Threshold based indicators is requested but thresholds ", + "do NOT match the number of requested variables")) + error_status <- TRUE + } + } + } + + # Threshold-based predifined indicators (Malaria and Ticks) + if (isTRUE(recipe$Analysis$Workflow$Indicators$Malaria$return_climate_suitability)) { + # check that necessary variables are requested + if ((!all(c("tas", "tdps", "prlr") %in% var.list)) & + (!all(c("tas", "hurs", "prlr") %in% var.list))) { + error(recipe$Run$logger, + paste0("Necessary variables for Malaria indicator are ", + " tas, tdps or hurs, and prlr, NOT included in requested ", + "variables: ", var.list)) + error_status <- TRUE + } + # check that ssp is known + for (ssp in recipe$Analysis$Workflow$Indicators$Malaria$ssp) { + if (ssp != 'p.falciparum' & ssp != 'p.vivax'){ + error(recipe$Run$logger, + paste0("Unknown requested ssp ", ssp)) + error_status <- TRUE + } + } + } + # Tick-borne disease indicator: + if (isTRUE(recipe$Analysis$Workflow$Indicators$Ticks$return_climate_suitability)) { + # check that necessary variables are requested + if ((!all(c("tas", "tdps") %in% var.list)) & + (!all(c("tas", "hurs") %in% var.list))) { + error(recipe$Run$logger, + paste0("Necessary variables for Tick indicator are ", + " tas, and tdps or hurs, NOT included in requested ", + "variables: ", var.list)) + error_status <- TRUE + } + # check that ssp is known + for (ssp in recipe$Analysis$Workflow$Indicators$Malaria$ssp) { + if (ssp != 'i.ricinus') { + error(recipe$Run$logger, + paste0("Unknown requested ssp ", ssp)) + error_status <- TRUE + } + } + } + } # end checks Indicators + # Visualization if ("Visualization" %in% names(recipe$Analysis$Workflow)) { PLOT_OPTIONS <- c("skill_metrics", "forecast_ensemble_mean", diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index ceb696f4..bbd60b9c 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -172,9 +172,15 @@ divide_recipe <- function(recipe) { sdate_weekday <- DAYS_OF_WEEK[[tolower(recipe$Analysis$Time$week_day)]] days <- as.POSIXlt(paste(recipe$Analysis$Time$sdate[sdate], 1:366, sep="-"), format="%Y-%j") + # Remove extra NA days if sdate is not a leap year: + days <- days[!is.na(days)] day_of_the_week_ini <- days[days$wday == sdate_weekday] } else if (nchar(recipe$Analysis$Time$sdate[sdate]) == 8) { - day_of_the_week_ini <- recipe$Analysis$Time$sdate[sdate] + day_of_the_week_ini <- as.POSIXct(as.character(recipe$Analysis$Time$sdate[sdate]), + format = "%Y%m%d", + origin = '1970-01-01', + tz = 'UTC') + } else { stop("what other cases can exist in subaseasonal?") } @@ -211,9 +217,11 @@ divide_recipe <- function(recipe) { ## TODO: Document recipe_model <- paste0("sys-", gsub('\\.', '', all_recipes[[reci]]$Analysis$Datasets$System$name)) - # + variable_name <- gsub(pattern = ", |,| ", + replacement = "-", + x = all_recipes[[reci]]$Analysis$Variables$name) recipe_split <- paste0("_ref-", all_recipes[[reci]]$Analysis$Datasets$Reference$name, - "_var-", all_recipes[[reci]]$Analysis$Variables$name, + "_var-", variable_name, "_reg-", all_recipes[[reci]]$Analysis$Region$name, "_sdate-", all_recipes[[reci]]$Analysis$Time$sdate) recipe_name <- paste0(recipe_model, recipe_split) diff --git a/tools/libs.R b/tools/libs.R index 40146786..9cf1b60f 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -24,6 +24,7 @@ library(ncdf4) library(formattable) ## to plot horizontal color bars - used ?? library(kableExtra) library(memuse) # To check mem usage. +library(CSIndicators) # Functions ## To be removed when new package is done by library(CSOperational) -- GitLab From 8db11470453bde980fae9a136515852ab82422a6 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 28 Oct 2024 15:39:18 +0100 Subject: [PATCH 34/53] remove psl units conversion --- modules/Scorecards/R/tmp/ScorecardsMulti.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index c17025fa..146220fd 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -207,9 +207,6 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, ## Find position of mean bias metric to calculate breaks if ('mean_bias' %in% metrics) { pos_bias <- which(metrics == 'mean_bias') - if(var == 'psl'){ - data[,,pos_bias,,,] <- data[,,pos_bias,,,]/100 ## temporary - } breaks_bias <- .SCBiasBreaks(Subset(data, along = c('metric','region'), indices = list(pos_bias,reg))) } -- GitLab From 7c685c5632f459dbe411e61889978e5b4d837611 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 30 Oct 2024 15:48:41 +0100 Subject: [PATCH 35/53] Change plot captions to include correct start date and forecast time for monthly data --- modules/Visualization/R/plot_ensemble_mean.R | 5 ++++- modules/Visualization/R/plot_metrics.R | 15 ++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 953aafd1..cb672523 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -205,6 +205,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o " / Start date: ", format(as.Date(i_syear, format = "%Y%m%d"), "%d-%m-%Y")) + forecast_time_caption <- paste0("Forecast month: ", sprintf("%02d", i)) } else if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), @@ -212,6 +213,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o "Issued on ", format(ymd(start_date), "%d-%m-%Y"), "\n", time_labels[i], years[i]) + forecast_time_caption <- paste0("Forecast week: ", sprintf("%02d", i)) } else { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), @@ -219,6 +221,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o time_labels[i], " ", years[i], " / Start date: ", i_syear) + forecast_time_caption <- paste0("Forecast month: ", sprintf("%02d", i)) } # Define caption if (identical(fun, PlotRobinson)) { @@ -226,7 +229,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o base_args[['caption']] <- paste0("Nominal start date: ", format(ymd(start_date), "%d-%m-%Y"), "\n", - "Forecast week: ", sprintf("%02d", i), "\n", + forecast_time_caption, "\n", "Reference: ", recipe$Analysis$Datasets$Reference, "\n", "Units: ", units) } diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 6ffddafc..c01ca545 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -52,7 +52,7 @@ plot_metrics <- function(recipe, data_cube, metrics, weeks <- paste0(0, ftime_min:ftime_max) # This week_label appears on the name of the file. It's just the start date. week_label <- recipe$Analysis$Time$sdate - } else { # Decadal + } else { # Decadal init_month <- 1 init_week <- 1 months <- lubridate::month(Subset(data_cube$attrs$Dates, @@ -350,6 +350,8 @@ plot_metrics <- function(recipe, data_cube, metrics, hcst_period) } } + nominal_startdate_caption <- paste0("1st of ", str_to_title(month_label)) + forecast_time_caption <- paste0("Forecast month: " forecast_time) } else if (tolower(recipe$Analysis$Horizon == "subseasonal")) { # Get forecast time label forecast_time <- weeks[i] @@ -361,8 +363,9 @@ plot_metrics <- function(recipe, data_cube, metrics, paste("Valid from", format(week_valid_ini[i], "%d-%m"), "to", format(week_valid_end[i], "%d-%m"), "of", year(ymd(start_date)))) - # "/ valid week", format(weeks[i], - # "%Y-%m-%d"), "/", hcst_period) + + nominal_startdate_caption <- ymd(start_date) + forecast_time_caption <- paste0("Forecast week: ", sprintf("%02d", i)) } else if (recipe$Analysis$Horizon == "decadal") { # Case without time aggregation: if (is.null(attributes(data_cube$attrs$time_bounds))) { @@ -400,6 +403,8 @@ plot_metrics <- function(recipe, data_cube, metrics, hcst_period) } } + nominal_startdate_caption <- start_date + forecast_time_caption <- paste0("Forecast month: " forecast_time) } else { warning("Unknown time horizon?") } @@ -414,8 +419,8 @@ plot_metrics <- function(recipe, data_cube, metrics, if (identical(fun, PlotRobinson)) { ## TODO: Customize alpha and other technical details depending on the metric base_args[['caption']] <- - paste0("Nominal start date: ", ymd(start_date), "\n", - "Forecast week: ", sprintf("%02d", i), "\n", ## This is specific for subseasonal, would need a loop to specify time horizon + paste0("Nominal start date: ", nominal_startdate_caption, "\n", + forecast_time_caption, "\n", "Reference: ", recipe$Analysis$Datasets$Reference, "\n", "Units: ", data_cube$attrs$Variable$metadata[[var_name]]$units, "\n", significance_caption) -- GitLab From e035c040500406f9877cb54e8e2b3f785b979f85 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 30 Oct 2024 15:59:05 +0100 Subject: [PATCH 36/53] Add missing comma --- modules/Visualization/R/plot_metrics.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index c01ca545..c19b91aa 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -351,7 +351,7 @@ plot_metrics <- function(recipe, data_cube, metrics, } } nominal_startdate_caption <- paste0("1st of ", str_to_title(month_label)) - forecast_time_caption <- paste0("Forecast month: " forecast_time) + forecast_time_caption <- paste0("Forecast month: ", forecast_time) } else if (tolower(recipe$Analysis$Horizon == "subseasonal")) { # Get forecast time label forecast_time <- weeks[i] @@ -404,7 +404,7 @@ plot_metrics <- function(recipe, data_cube, metrics, } } nominal_startdate_caption <- start_date - forecast_time_caption <- paste0("Forecast month: " forecast_time) + forecast_time_caption <- paste0("Forecast month: ", forecast_time) } else { warning("Unknown time horizon?") } -- GitLab From 0c238b8bcda7a5cf84d6ef5576ed5bedc6fc5bc8 Mon Sep 17 00:00:00 2001 From: abatalla Date: Thu, 31 Oct 2024 14:34:19 +0100 Subject: [PATCH 37/53] Mask and/or dots for metrics visualization (new MR) --- example_scripts/exec_units.R | 75 +++++++++++++++---- modules/Visualization/R/plot_ensemble_mean.R | 9 ++- modules/Visualization/R/plot_metrics.R | 68 ++++++++++++----- .../R/plot_most_likely_terciles_map.R | 5 +- modules/Visualization/R/tmp/PlotRobinson.R | 8 +- modules/Visualization/Visualization.R | 36 ++++++++- modules/Visualization/output_size.yml | 32 ++++++-- recipe_template.yml | 3 +- .../examples/recipe_tas_seasonal_units.yml | 4 +- 9 files changed, 184 insertions(+), 56 deletions(-) diff --git a/example_scripts/exec_units.R b/example_scripts/exec_units.R index 819121c9..e0c49958 100644 --- a/example_scripts/exec_units.R +++ b/example_scripts/exec_units.R @@ -1,6 +1,5 @@ rm(list=ls()) gc() -setwd("/esarchive/scratch/nperez/git/auto-s2s") source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") @@ -11,28 +10,74 @@ source("modules/Visualization/Visualization.R") source("tools/prepare_outputs.R") # Read recipe -#args = commandArgs(trailingOnly = TRUE) -#recipe_file <- args[1] -#recipe <- read_atomic_recipe(recipe_file) -## to test a single recipe: - # recipe_file <- "recipes/examples/recipe_tas_seasonal_units.yml" - # recipe_file <- "recipes/examples/recipe_prlr_seasonal_units.yml" +## To run many recipes using launch_SUNSET.sh: +# args = commandArgs(trailingOnly = TRUE) +# recipe_file <- args[1] +# recipe <- read_atomic_recipe(recipe_file) +## To test a single recipe: +# recipe_file <- "recipes/examples/recipe_tas_seasonal_units.yml" +# recipe_file <- "recipes/examples/recipe_prlr_seasonal_units.yml" recipe <- prepare_outputs(recipe_file) # Load datasets -data <- load_datasets(recipe) +data <- Loading(recipe) # Units transformation source("modules/Units/Units.R") test <- Units(recipe, data) # Calibrate datasets -data <- calibrate_datasets(recipe, test) +data <- Calibration(recipe, test) # Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, data) +skill_metrics <- Skill(recipe, data) # Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, data) -# Export all data to netCDF -## TODO: Fix plotting -# save_data(recipe, data, skill_metrics, probabilities) +probabilities <- Probabilities(recipe, data) + # Plot data -plot_data(recipe, data, skill_metrics, probabilities, significance = T) +# Below, many examples of different ways to plot the data are shown, using +# different projections and ways of adding statistical significance masking. +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = F) +list.files(paste0(recipe$Run$output_dir, "/plots/ECMWF-SEAS51/ERA5/evmos/tas/")) +# signif TRUE: +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) +list.files(paste0(recipe$Run$output_dir, "/plots/ECMWF-SEAS51/ERA5/evmos/tas/")) +# signif mask: this option is not available in PlotEquiMap, it return dots +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'mask') +list.files(paste0(recipe$Run$output_dir, "/plots/ECMWF-SEAS51/ERA5/evmos/tas/")) +## The difference between TRUE and mask for PlotEquiMap is the file name + +# signif dots: +visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'dots') +list.files(paste0(recipe$run$output_dir, "/plots/ecmwf-seas51/era5/evmos/tas/")) + +# In this case duplicates files but simplifying the code makes it very complex +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'both') + +recipe$Analysis$Workflow$Visualization$multi_panel <- TRUE +recipe$Analysis$Workflow$Visualization$mask_terciles <- NULL +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = F) + +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) + +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'mask') + +recipe$Analysis$Workflow$Visualization$multi_panel <- FALSE +recipe$Analysis$Workflow$Visualization$projection <- 'Robinson' +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = F) +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'dots') +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'mask') +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'both') + diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 953aafd1..42c2b53b 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -141,7 +141,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o "\n", "Forecast Ensemble Mean / ", "Init.: ", i_syear) } # Plots - output_configuration <- output_conf$Multipanel$forecast_ensemble_mean + output_configuration <- output_conf$multipanel$forecast_ensemble_mean base_args <- list(fun = "PlotEquiMap", plot_dims = c('longitude', 'latitude'), var = i_var_ens_mean, lon = longitude, @@ -159,7 +159,6 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o # Define function and parameters depending on projection if (projection == 'cylindrical_equidistant') { fun <- PlotEquiMap - output_configuration <- output_conf$PlotEquiMap$forecast_ensemble_mean base_args <- list(var = NULL, dots = NULL, mask = NULL, lon = longitude, lat = latitude, dot_symbol = 20, title_scale = 0.6, @@ -167,7 +166,6 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o filled.continents = F, brks = brks, cols = cols, bar_label_digits = 4, bar_label_scale = 1.5, axes_label_scale = 1, units = units) - base_args[names(output_configuration)] <- output_configuration } else { fun <- PlotRobinson common_projections <- c("robinson", "stereographic", "lambert_europe") @@ -180,8 +178,11 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o lon = longitude, lat = latitude, lon_dim = 'longitude', lat_dim = 'latitude', target_proj = target_proj, legend = 's2dv', - style = 'point', brks = brks, cols = cols) + style = 'point', brks = brks, cols = cols, + dots_size = 0.2, dots_shape = 47) } + output_configuration <- output_conf[[projection]]$forecast_ensemble_mean + base_args[names(output_configuration)] <- output_configuration # Loop over forecast times for (i in 1:length(time_labels)) { # Get mask subset diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 6ffddafc..4223af16 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -191,19 +191,23 @@ plot_metrics <- function(recipe, data_cube, metrics, # Reorder dimensions metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, - # retrieve it and reorder its dimensions. - significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(metrics))) { - metric_significance <- var_metric[[significance_name]] - metric_significance <- Reorder(metric_significance, c("time", - "longitude", - "latitude")) - # Split significance into list of lists, along the time dimension - # This allows for plotting the significance dots correctly. - metric_significance <- ClimProjDiags::ArrayToList(metric_significance, - dim = "time", - level = "sublist", - names = "dots") + # retrieve it and reorder its dimensions. + if (significance != FALSE) { # Both, dots, mask or TRUE + significance_name <- paste0(name, "_significance") + if ((significance_name %in% names(metrics))) { + metric_significance <- var_metric[[significance_name]] + metric_significance <- Reorder(metric_significance, c("time", + "longitude", + "latitude")) + # Split significance into list of lists, along the time dimension + # This allows for plotting the significance dots correctly. + metric_significance <- ClimProjDiags::ArrayToList(metric_significance, + dim = "time", + level = "sublist", + names = "dots") + } else { + metric_significance <- NULL + } } else { metric_significance <- NULL } @@ -256,7 +260,7 @@ plot_metrics <- function(recipe, data_cube, metrics, titles <- "Unknown" } ## TODO: Combine PlotLayout with PlotRobinson? - output_configuration <- output_conf$Multipanel$plot_metrics + output_configuration <- output_conf$multipanel$skill_metrics base_args <- list(fun = "PlotEquiMap", plot_dims = c('longitude', 'latitude'), var = asplit(metric, MARGIN = 1), @@ -277,7 +281,6 @@ plot_metrics <- function(recipe, data_cube, metrics, # Define function and parameters depending on projection if (projection == 'cylindrical_equidistant') { fun <- PlotEquiMap - output_configuration <- output_conf$PlotEquiMap$skill_metric base_args <- list(var = NULL, dots = NULL, lon = longitude, lat = latitude, dot_symbol = 20, dot_size = 1, @@ -287,7 +290,6 @@ plot_metrics <- function(recipe, data_cube, metrics, units = units, font.main = 2, bar_label_digits = 3, bar_label_scale = 1.5, axes_label_scale = 1, width = 8, height = 5) - base_args[names(output_configuration)] <- output_configuration } else { fun <- PlotRobinson common_projections <- c("robinson", "stereographic", "lambert_europe") @@ -301,10 +303,14 @@ plot_metrics <- function(recipe, data_cube, metrics, lon = longitude, lat = latitude, lon_dim = 'longitude', lat_dim = 'latitude', target_proj = target_proj, legend = 's2dv', - style = 'point', brks = brks, cols = cols, + style = 'point', dots = NULL, brks = brks, + cols = cols, col_inf = col_inf, col_sup = col_sup, - units = units) + units = units, + dots_size = 0.2, dots_shape = 47) } + output_configuration <- output_conf[[projection]]$skill_metrics + base_args[names(output_configuration)] <- output_configuration # Loop over forecast times for (i in 1:dim(metric)[['time']]) { # Get forecast time label @@ -406,10 +412,31 @@ plot_metrics <- function(recipe, data_cube, metrics, # Modify base arguments base_args[[1]] <- metric[i, , ] if (!is.null(metric_significance)) { - base_args[[2]] <- metric_significance[[i]][[1]] + sign_file_label <- NULL + if (is.logical(significance)) { + if (significance) { + base_args[[2]] <- metric_significance[[i]][[1]] + sign_file_label <- '_mask' + } + } else { + if (significance == 'dots') { + if (projection != 'cylindrical_equidistant') { + base_args[[10]] <- metric_significance[[i]][[1]] + } else { + # The position of arguments in base_args requires this cond + # so PlotEquiMap plots dots when requested + base_args[[2]] <- metric_significance[[i]][[1]] + } + sign_file_label <- '_dots' + } else if (significance == 'mask') { + base_args[[2]] <- metric_significance[[i]][[1]] + sign_file_label <- '_mask' + } + } significance_caption <- "alpha = 0.05" } else { significance_caption <- NULL + sign_file_label <- NULL } if (identical(fun, PlotRobinson)) { ## TODO: Customize alpha and other technical details depending on the metric @@ -420,7 +447,8 @@ plot_metrics <- function(recipe, data_cube, metrics, "Units: ", data_cube$attrs$Variable$metadata[[var_name]]$units, "\n", significance_caption) } - fileout <- paste0(outfile, "_ft", forecast_time, ".pdf") + fileout <- paste0(outfile, "_ft", forecast_time, + sign_file_label, ".pdf") # Plot info(recipe$Run$logger, paste("Plotting", display_name)) diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index 9d03347f..67c3d978 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -171,7 +171,7 @@ plot_most_likely_terciles <- function(recipe, triangle_ends = c(F, F)) # , width = 11, height = 8) ) } else { - output_configuration <- output_conf$PlotEquiMap$most_likely_terciles + output_configuration <- output_conf$cylindrical_equidistant$most_likely_terciles base_args <- list(cat_dim = 'bin', probs = NULL, lon = longitude, lat = latitude, @@ -194,7 +194,8 @@ plot_most_likely_terciles <- function(recipe, plot_margin = c(1, 5, 5, 5), # plot_margin = c(5.1, 4.1, 4.1, 2.1), return_leg = T, - triangle_ends = c(F, T), width = 10, height = 8) + triangle_ends = c(F, T), width = 10, height = 8, + dots_size = 0.2, dots_shape = 47) base_args[names(output_configuration)] <- output_configuration for (i in 1:length(time_labels)) { ## variables ending in *_i represent each forecast time diff --git a/modules/Visualization/R/tmp/PlotRobinson.R b/modules/Visualization/R/tmp/PlotRobinson.R index bd427448..67ca034d 100644 --- a/modules/Visualization/R/tmp/PlotRobinson.R +++ b/modules/Visualization/R/tmp/PlotRobinson.R @@ -118,11 +118,11 @@ PlotRobinson <- function(data, lon, lat, lon_dim = NULL, lat_dim = NULL, target_proj = 54030, legend = 's2dv', style = 'point', dots = NULL, mask = NULL, brks = NULL, cols = NULL, bar_limits = NULL, triangle_ends = NULL, col_inf = NULL, col_sup = NULL, colNA = NULL, - color_fun = clim.palette(), bar_extra_margin = rep(0, 4), vertical = TRUE, + color_fun = clim.palette(), bar_extra_margin = c(3.5, 0, 3.5, 0), vertical = TRUE, toptitle = NULL, caption = NULL, units = NULL, crop_coastlines = NULL, - point_size = "auto", title_size = 16, dots_size = 0.5, + point_size = "auto", title_size = 10, dots_size = 0.2, dots_shape = 47, coastlines_width = 0.3, - fileout = NULL, width = 8, height = 4, size_units = "in", + fileout = NULL, width = 8, height = 5, size_units = "in", res = 300) { # Sanity check @@ -234,12 +234,14 @@ PlotRobinson <- function(data, lon, lat, lon_dim = NULL, lat_dim = NULL, if (!identical(dim(mask), dim(data))) { stop("Parameter 'mask' must have the same dimensions as 'data'.") } else if (is.numeric(mask)) { + mask[which(is.na(mask))] <- 0 if (all(mask %in% c(0, 1))) { mask <- array(as.logical(mask), dim = dim(mask)) } else { stop("Parameter 'mask' must have only TRUE/FALSE or 0/1.") } } else if (is.logical(mask)) { + mask[which(is.na(mask))] <- F if (!all(mask %in% c(T, F))) { stop("Parameter 'mask' must have only TRUE/FALSE or 0/1.") } diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index be46f68e..749fe3b6 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -71,16 +71,46 @@ Visualization <- function(recipe, "NULL, so there is no data that can be plotted.")) stop() } + # Set default single-panel plots if not specified if (is.null(recipe$Analysis$Workflow$Visualization$multi_panel)) { recipe$Analysis$Workflow$Visualization$multi_panel <- FALSE } + + # Warning if significance parameter not included in function call + if (!missing(significance) && !is.null(recipe$Analysis$Workflow$Visualization$significance)) { + if (significance != recipe$Analysis$Workflow$Visualization$significance) { + warning("The significance value provided in the function call does not match ", + "the value in the recipe. The function call value will be used.") + } + } + + # If not specified in function call, set significance to recipe value + if (missing(significance) && !is.null(recipe$Analysis$Workflow$Visualization$significance)) { + significance <- recipe$Analysis$Workflow$Visualization$significance + } + # Plot skill metrics if ("skill_metrics" %in% plots) { if (!is.null(skill_metrics)) { - plot_metrics(recipe = recipe, data_cube = data$hcst, - metrics = skill_metrics, outdir = outdir, - significance = significance, output_conf = output_conf) + if (is.logical(significance)) { + plot_metrics(recipe = recipe, data_cube = data$hcst, + metrics = skill_metrics, outdir = outdir, + significance = significance, output_conf = output_conf) + info(recipe$Run$logger, + paste("Skill metrics significance set as", significance)) + } else { + if (significance %in% c('both', 'dots')) { + plot_metrics(recipe, data$hcst, skill_metrics, outdir, + significance = 'dots', output_conf = output_conf) + info(recipe$Run$logger, "Skill metrics significance as dots") + } + if (significance %in% c('both', 'mask')) { + plot_metrics(recipe, data$hcst, skill_metrics, outdir, + significance = 'mask', output_conf = output_conf) + info(recipe$Run$logger, "Skill metrics significance as mask") + } + } } else { error(recipe$Run$logger, paste0("The skill metric plots have been requested, but the ", diff --git a/modules/Visualization/output_size.yml b/modules/Visualization/output_size.yml index 4e8e16dc..65265916 100644 --- a/modules/Visualization/output_size.yml +++ b/modules/Visualization/output_size.yml @@ -1,6 +1,26 @@ +# This file allows you to define custom plotting parameters for any region. To add +# custom parameters for a region, simply include it following this file structure: +# If a parameter or region is not defined in this file, the default values will be +# used. +# The structure is as follows: +# Replace elements enclosed in <...> with the appropriate value. +# region: +# : (name of the region you will specify in the recipe) +# : (cylindrical_equidistant, robinson, stereographic, lambert_europe) +# : (can be skill_metrics, forecast_ensemble_mean or most_likely_terciles) +# parameter1: value +# parameter2: value +# ... +# +# * How do I know which parameters are available? +# - cylindrical_equidistant: uses s2dv::PlotEquiMap() +# - robinson, stereographic, lambert_europe: uses s2dv::PlotRobinson() +# - multipanel: uses s2dv::PlotEquiMap() and s2dv::PlotLayout() +# - For most_likely_terciles, only 'cylindrical_equidistant' is available + region: #units inches EU: #latmin: 20, latmax: 80, lonmin: -20, lonmax: 40 - PlotEquiMap: + cylindrical_equidistant: # Projection skill_metrics: width: 8.5 height: 8.5 @@ -27,16 +47,16 @@ region: #units inches dot_size: 2 plot_margin: !expr c(0, 4.1, 4.1, 2.1) colNA: "white" - Multipanel: + multipanel: forecast_ensemble_mean: width: 8.5 height: 8.5 - Robinson: + robinson: skill_metrics: {width: 8, height: 5} colNA: "white" NA-EU: #Norht Atlantic European region Iberia: #latmin: 34, latmax: 46, lonmin: -10, lonmax: 5 - PlotEquiMap: + cylindrical_equidistant: skill_metrics: width: 8 height: 7.5 @@ -70,7 +90,7 @@ region: #units inches plot_margin: !expr c(2, 5, 7.5, 5) bar_extra_margin: !expr c(2.5, 1, 0.5, 1) # colorbar proportions colNA: "white" - Multipanel: + multipanel: forecast_ensemble_mean: width: 8.5 height: 9.5 @@ -79,7 +99,7 @@ region: #units inches width: 8.5 height: 9.5 title_margin_scale: 4 - Robinson: + robinson: skill_metrics: {width: 8, height: 5} colNA: "white" Mediterranean: diff --git a/recipe_template.yml b/recipe_template.yml index 0db9e8fb..a9d383ce 100644 --- a/recipe_template.yml +++ b/recipe_template.yml @@ -139,7 +139,8 @@ Analysis: plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) multi_panel: yes # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) projection: 'cylindrical_equidistant' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) - mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + significance: 'dots' # Type of mask for statistical significance. Options are 'dots', and yes/no. 'dots'. 'mask' and 'both' options are not available for projections other than cylindrical_equidistant. + mask_terciles: no # Whether to dot or mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. diff --git a/recipes/examples/recipe_tas_seasonal_units.yml b/recipes/examples/recipe_tas_seasonal_units.yml index d2b25321..17df1efd 100644 --- a/recipes/examples/recipe_tas_seasonal_units.yml +++ b/recipes/examples/recipe_tas_seasonal_units.yml @@ -59,5 +59,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE - output_dir: /esarchive/scratch/nperez/cs_oper/ - code_dir: /esarchive/scratch/nperez/git/s2s-suite/ + output_dir: /esarchive/scratch/nperez/ + code_dir: /esarchive/scratch/nperez/git6/sunset/ -- GitLab From aca037a0b386a3b22dacd35efc1e9369714db119 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 4 Nov 2024 08:56:19 +0100 Subject: [PATCH 38/53] Fix bug in plot_metrics(): typo in 'forecast_time_ini' --- modules/Visualization/R/plot_metrics.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index c19b91aa..ea205d2e 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -384,10 +384,10 @@ plot_metrics <- function(recipe, data_cube, metrics, forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) # title names: forecast_time_ini <- init_month + forecast_time_ini - 1 - forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) + forecast_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) forecast_time_ini <- month.name[forecast_time_ini] forecast_time_end <- init_month + forecast_time_end - 1 - forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) + forecast_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) forecast_time_end <- month.name[forecast_time_end] toptitle <- paste(system_name, "/", str_to_title(var_long_name), -- GitLab From 56908c60518b6488f777c67654845ac87b5fe7e0 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 20 Nov 2024 16:38:47 +0100 Subject: [PATCH 39/53] Add retrieve = TRUE to logger calls in Visualization() --- modules/Visualization/Visualization.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 6a8c1e4d..fc361db1 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -102,20 +102,20 @@ Visualization <- function(recipe, if (is.logical(significance)) { plot_metrics(recipe = recipe, metrics = skill_metrics, outdir = outdir, significance = significance, output_conf = output_conf) - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, paste("##### Skill metrics significance set as", significance, " #####")) } else { if (significance %in% c('both', 'dots')) { plot_metrics(recipe = recipe, metrics = skill_metrics, outdir = outdir, significance = 'dots', output_conf = output_conf) - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, "##### Skill metrics significance as dots #####") } if (significance %in% c('both', 'mask')) { plot_metrics(recipe = recipe, metrics = skill_metrics, outdir = outdir, significance = 'mask', output_conf = output_conf) - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, "##### Skill metrics significance as mask #####") } -- GitLab From 36fe86a1fbbcff8b4a400e0ab659d3ecfd945165 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 22 Nov 2024 15:04:31 +0100 Subject: [PATCH 40/53] Fix conditionals in plot_metrics() --- modules/Visualization/R/plot_metrics.R | 32 +++++++++++++++++--------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 2e755986..d1d9aed8 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -227,28 +227,37 @@ plot_metrics <- function(recipe, metrics, # Multi-panel or single-panel plots if (recipe$Analysis$Workflow$Visualization$multi_panel) { # Define titles - toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), - "\n", display_name, " / ", hcst_period) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), + "\n", display_name, " / ", hcst_period) ## time_bounds in cube_info to know if Time_aggregation was applied if (!is.null(attributes(cube_info$attrs$time_bounds))) { info(recipe$Run$logger, retrieve = FALSE, "Using plotting attrs from time_bounds.") if (length(attributes(cube_info$attrs$time_bounds)$plotting_attr) > 1) { titles <- unlist( - lapply(1:length(attributes(cube_info$attrs$time_bounds)$plotting_attr$ini_ftime), - function(x) { - paste("Forecast time", - attributes(cube_info$attrs$time_bounds)$plotting_attr$ini_ftime[x], - "to", - attributes(cube_info$attrs$time_bounds)$plotting_attr$end_ftime[x])})) + lapply(1:length(attributes(cube_info$attrs$time_bounds)$plotting_attr$ini_ftime), + function(x) { + paste("Forecast time", + attributes(cube_info$attrs$time_bounds)$plotting_attr$ini_ftime[x], + "to", + attributes(cube_info$attrs$time_bounds)$plotting_attr$end_ftime[x])})) } else { titles <- attributes(cube_info$attrs$time_bounds)$plotting_attr[[1]] } } else { - titles <- attributes(cube_info$attrs$time_bounds)$plotting_attr[[1]] + titles <- as.vector(months) } - } else { - titles <- as.vector(months) + } else if (tolower(recipe$Analysis$Horizon) == "subseasonal") { + toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), + "\n", display_name, " / ", "Issued on ", + format(ymd(start_date), "%d-%m-%Y"), + " / ", hcst_period) + titles <- paste("Valid from", format(week_valid_ini, "%d-%m"), + "to", format(week_valid_end, "%d-%m")) + } else { + toptitle <- "Unknown" + titles <- "Unknown" } ## TODO: Combine PlotLayout with PlotRobinson? output_configuration <- output_conf$multipanel$skill_metrics @@ -477,6 +486,7 @@ plot_metrics <- function(recipe, metrics, } } } + } info(recipe$Run$logger, retrieve = TRUE, "##### METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") -- GitLab From f788f57ee3e51e32bbdf99aa599c01a643e56a73 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 22 Nov 2024 17:08:30 +0100 Subject: [PATCH 41/53] Adapt Crossval_metrics and Scorecards_calculations, generalize metric retrieval after Compute() --- build_compute_workflow_crossval.R | 86 ++++++++++++++------ modules/Crossval/Crossval_metrics.R | 37 ++++++--- modules/Scorecards/Scorecards_calculations.R | 17 ++-- 3 files changed, 95 insertions(+), 45 deletions(-) diff --git a/build_compute_workflow_crossval.R b/build_compute_workflow_crossval.R index 9b1cbb3d..bad9c116 100644 --- a/build_compute_workflow_crossval.R +++ b/build_compute_workflow_crossval.R @@ -11,7 +11,10 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, appenders = list(console_appender(layout = .custom_log_layout()))) modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) - return_datasets <- strsplit(recipe$Run$startR_workflow$return, ", | |,")[[1]] + return_datasets <- NULL + if (!is.null(recipe$Run$startR_workflow$return)) { + return_datasets <- strsplit(recipe$Run$startR_workflow$return, ", | |,")[[1]] + } ## TODO: Decide on file path # for (module in modules) { @@ -71,7 +74,8 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, return_list <- list(hcst = data$hcst$data, fcst = data$fcst$data, obs = data$obs$data, - skill = skill_metrics, + skill = skill_metrics$skill, + skill_syear = skill_metrics$skill_syear, statistics = statistics, probabilities = probabilities) # Eliminate NULL elements from the return list @@ -103,7 +107,10 @@ run_compute_workflow <- function(recipe, data, last_module = NULL) { # Step 1: Retrieve the modules that will be called inside the workflow modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) - return_datasets <- strsplit(recipe$Run$startR_workflow$return, ", | |,")[[1]] + return_datasets <- NULL + if (!is.null(recipe$Run$startR_workflow$return)) { + return_datasets <- strsplit(recipe$Run$startR_workflow$return, ", | |,")[[1]] + } # --------------------------------------------------------------------------- # Step 2: Define the inputs and outputs for Compute() @@ -156,6 +163,12 @@ run_compute_workflow <- function(recipe, data, last_module = NULL) { skill_output_dims <- skill_dims[!skill_dims %in% chunking_dims] output_dims <- c(output_dims, list(skill = skill_output_dims)) + if (grepl("syear", recipe$Analysis$Workflow$Skill$metric)) { + skill_syear_dims <- c('metric', 'syear', 'var', 'time', spatial_output_dims) + skill_syear_output_dims <- skill_syear_dims[!skill_syear_dims %in% chunking_dims] + output_dims <- c(output_dims, + list(skill_syear = skill_syear_output_dims)) + } } # Add statistics if statistics module is called if ("statistics" %in% modules) { @@ -277,31 +290,52 @@ run_compute_workflow <- function(recipe, data, last_module = NULL) { } ## TODO: Replicate for probabilities and other modules; refactor - if (!is.null(res$skill)) { - tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Skill") - metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) - res$skill <- ClimProjDiags::ArrayToList(res$skill, - dim = 'metric', - level = 'list', - names = metric_list) - # Put chunked metadata back together - res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir, - chunks = recipe$Run$startR_workflow$chunk_along, - array_dims = dim(res$skill[[1]]), - metadata_file_pattern = "metric_metadata") + for (metrics in c("skill", "skill_syear", "statistics")) { + if (!is.null(res[[metrics]])) { + tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", + str_to_title(metrics)) + metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) + res[[metrics]] <- ClimProjDiags::ArrayToList(res[[metrics]], + dim = 'metric', + level = 'list', + names = metric_list) + res[[metrics]]$metadata <- retrieve_metadata(tmp_dir = tmp_dir, + chunks = recipe$Run$startR_workflow$chunk_along, + array_dims = dim(res[[metrics]][[1]]), + metadata_file_pattern = "metric_metadata") + } } - if (!is.null(res$statistics)) { - tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Statistics") - metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) - res$statistics <- ClimProjDiags::ArrayToList(res$statistics, - dim = 'metric', - level = 'list', - names = metric_list) - res$statistics$metadata <- retrieve_metadata(tmp_dir = tmp_dir, - chunks = recipe$Run$startR_workflow$chunk_along, - array_dims = dim(res$statistics[[1]]), - metadata_file_pattern = "metric_metadata") + if (!is.null(res$skill_syear)) { + res$skill_syear$metadata <- NULL + res$skill <- c(res$skill[-which(names(res$skill) == "metadata")], + res$skill_syear, res$skill["metadata"]) } + + # if (!is.null(res$skill)) { + # tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Skill") + # metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) + # res$skill <- ClimProjDiags::ArrayToList(res$skill, + # dim = 'metric', + # level = 'list', + # names = metric_list) + # # Put chunked metadata back together + # res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir, + # chunks = recipe$Run$startR_workflow$chunk_along, + # array_dims = dim(res$skill[[1]]), + # metadata_file_pattern = "metric_metadata") + # } + # if (!is.null(res$statistics)) { + # tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Statistics") + # metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) + # res$statistics <- ClimProjDiags::ArrayToList(res$statistics, + # dim = 'metric', + # level = 'list', + # names = metric_list) + # res$statistics$metadata <- retrieve_metadata(tmp_dir = tmp_dir, + # chunks = recipe$Run$startR_workflow$chunk_along, + # array_dims = dim(res$statistics[[1]]), + # metadata_file_pattern = "metric_metadata") + # } # --------------------------------------------------------------------------- # Step 5: Remove temporary files diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index 120ea037..02acc31b 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -86,7 +86,6 @@ Crossval_metrics <- function(recipe, data_crossval, cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', Fair = fair, nmemb = nmemb, return_mean = FALSE, ncores = ncores) - rps_syear <- .drop_dims(rps_syear) skill_metrics$rps_syear <- rps_syear } if ('rps_clim_syear' %in% requested_metrics) { @@ -95,7 +94,6 @@ Crossval_metrics <- function(recipe, data_crossval, RPS_clim, bin_dim_abs = 'cat', Fair = fair, cross.val = FALSE, return_mean = FALSE, output_dims = 'syear', ncores = ncores)$output1 - rps_clim_syear <- .drop_dims(rps_clim_syear) skill_metrics$rps_clim_syear <- rps_clim_syear } if ('rpss' %in% requested_metrics) { @@ -112,8 +110,6 @@ Crossval_metrics <- function(recipe, data_crossval, sig_method.type = 'two.sided.approx', alpha = alpha, ncores = ncores) ## TODO: These lines are probably not needed - rpss <- lapply(rpss, function(x) { - .drop_dims(x)}) skill_metrics$rpss <- rpss$rpss skill_metrics$rpss_significance <- rpss$sign # TO USE IT when visualization works for more rpsss @@ -142,7 +138,6 @@ Crossval_metrics <- function(recipe, data_crossval, time_dim = 'syear', memb_dim = 'ensemble', Fair = fair, return_mean = FALSE, ncores = ncores) - crps_syear <- .drop_dims(crps_syear) skill_metrics$crps_syear <- crps_syear } if ('crps_clim_syear' %in% requested_metrics) { @@ -150,7 +145,6 @@ Crossval_metrics <- function(recipe, data_crossval, obs = data_crossval$obs$data, time_dim = 'syear', memb_dim = 'ensemble', Fair = fair, return_mean = FALSE, ncores = ncores) - crps_clim_syear <- .drop_dims(crps_clim_syear) skill_metrics$crps_clim_syear <- crps_clim_syear } if ('crpss' %in% requested_metrics) { @@ -287,6 +281,12 @@ Crossval_metrics <- function(recipe, data_crossval, # reduce dimension to work with Visualization module: skill_metrics <- lapply(skill_metrics, function(x) {.drop_dims(x)}) + ## TODO: Generalize this or find a better way to handle it + # Separate skill metrics that have syear dimension + syear_metrics <- grepl("syear", names(skill_metrics)) + skill_metrics_syear <- skill_metrics[syear_metrics] + skill_metrics <- skill_metrics[!syear_metrics] + # Save metrics if (retrieve) { ## TODO: Update this part @@ -298,11 +298,26 @@ Crossval_metrics <- function(recipe, data_crossval, outdir = recipe$Run$output_dir) } else { source("tools/save_metadata_chunks.R") - skill_metrics <- save_metadata_chunks(recipe = recipe, - metrics = skill_metrics, - data_cube = data_crossval$hcst, - module = "Skill", - nchunks = nchunks) + if (length(skill_metrics) > 0) { + skill_metrics <- save_metadata_chunks(recipe = recipe, + metrics = skill_metrics, + data_cube = data_crossval$hcst, + module = "Skill", + nchunks = nchunks) + } else { + skill_metrics <- NULL + } + if (length(skill_metrics_syear) > 0) { + skill_metrics_syear <- save_metadata_chunks(recipe = recipe, + metrics = skill_metrics_syear, + data_cube = data_crossval$hcst, + module = "Skill_syear", + nchunks = nchunks) + } else { + skill_metrics_syear <- NULL + } + skill_metrics <- list(skill = skill_metrics, + skill_syear = skill_metrics_syear) } return(skill_metrics) } diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index cbf66525..c3a9e746 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -44,7 +44,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, system <- recipe$Analysis$Datasets$System$name reference <- recipe$Analysis$Datasets$Reference$name - var <- data$hcst$attrs$Variable$varName + var <- skill_metrics$metadata$attrs$Variable$varName # var <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) @@ -59,6 +59,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + ## Define array to filled with data aggr_significance <- array(data = NA, @@ -66,7 +67,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, time = length(forecast.months), region = length(regions), metric = length(metrics))) - + ## For data that is already aggregated by region if ("region" %in% names(dim(skill_metrics[[1]]))) { aggr_metrics <- NULL @@ -119,7 +120,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Score Aggregation if (metric.aggregation == 'score') { ## Spatially aggregate data for each metric - for (met in metrics) { + for (met in metrics) { if (met == 'rpss') { rps_syear <- sapply(X = 1:length(regions), FUN = function(X) { @@ -262,7 +263,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Include name of region dimension names(dim(cov))[length(dim(cov))] <- 'region' names(dim(var_hcst))[length(dim(var_hcst))] <- 'region' - nvar <- data$hcst$attrs$Variable$varNameames(dim(var_obs))[length(dim(var_obs))] <- 'region' + names(dim(var_obs))[length(dim(var_obs))] <- 'region' names(dim(n_eff))[length(dim(n_eff))] <- 'region' } @@ -280,10 +281,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) - sign_corr<- array(data = NA, - dim = c(var = length(var), - time = length(forecast.months), - region = length(regions))) + sign_corr <- array(data = NA, + dim = c(var = length(var), + time = length(forecast.months), + region = length(regions))) for (var in 1:dim(sign_corr)[['var']]) { for (time in 1:dim(sign_corr)[['time']]) { -- GitLab From 30b1c49760efdc2e4697397849ece3436f4f1677 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 25 Nov 2024 14:02:07 +0100 Subject: [PATCH 42/53] Add systems and references from Nadia's working branch --- conf/archive_reference.yml | 2 + conf/archive_seasonal.yml | 93 ++++++++++++++++++++++++++++++++------ 2 files changed, 80 insertions(+), 15 deletions(-) diff --git a/conf/archive_reference.yml b/conf/archive_reference.yml index ac7bf27f..a7dd5413 100644 --- a/conf/archive_reference.yml +++ b/conf/archive_reference.yml @@ -12,6 +12,7 @@ gpfs: calendar: "standard" reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/orography.nc" esarchive: src_ref: "/esarchive/" @@ -53,6 +54,7 @@ esarchive: calendar: "gregorian" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" land_sea_mask: "/esarchive/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + orography: "/esarchive/recon/ecmwf/era5/constant/orography.nc" ERA5-Land: name: "ERA5-Land" institution: "European Centre for Medium-Range Weather Forecasts" diff --git a/conf/archive_seasonal.yml b/conf/archive_seasonal.yml index 3ce0db5c..2332bdad 100644 --- a/conf/archive_seasonal.yml +++ b/conf/archive_seasonal.yml @@ -35,6 +35,23 @@ gpfs: time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_system51c3s.txt" land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/orography.nc" + ECMWF-SEAS5: + name: "ECMWF SEAS5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/system5c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system5c3s/constant/lsm/lsm.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system5c3s/constant/orography.nc" CMCC-SPS3.5: name: "CMCC-SPS3.5" institution: "European Centre for Medium-Range Weather Forecasts" @@ -49,6 +66,7 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system35c3s.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/cmcc/system35c3s/constant/orography.nc" Meteo-France-System8: name: "Meteo-France System 8" institution: "European Centre for Medium-Range Weather Forecasts" @@ -63,6 +81,8 @@ gpfs: time_stamp_lag: "+1" calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system7c3s.txt" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/meteofrance/system8c3s/constant/lsm/lsm.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/meteofrance/system8c3s/constant/orography.nc" UK-MetOffice-Glosea601: name: "UK MetOffice GloSea 6 (v6.01)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -77,6 +97,7 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo600.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ukmo/glosea6_system601-c3s/constant/orography.nc" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? @@ -91,6 +112,7 @@ gpfs: calendar: "gregorian" time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ncep/cfs-v2/constant/orography.nc" DWD-GCFS2.1: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" @@ -107,6 +129,7 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system21_m1.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/dwd/system21c3s/constant/orography.nc" ECCC-CanCM4i: name: "ECCC CanCM4i (v3)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -121,18 +144,7 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" - Reference: - ERA5: - name: "ERA5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/ecmwf/era5/" - monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", - "psl":"monthly_mean/psl_f1h-r1440x721cds/", - "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", - "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} - calendar: "standard" - reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" - land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/eccc/eccc3/constant/orography.nc" ######################################################################### esarchive: @@ -166,6 +178,7 @@ esarchive: time_stamp_lag: "0" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" land_sea_mask: "/esarchive/exp/ecmwf/system5c3s/constant/lsm/lsm.nc" + orography: "/esarchive/exp/ecmwf/system5c3s/constant/orography.nc" ECMWF-SEAS5.1: name: "ECMWF SEAS5 (v5.1)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -185,6 +198,8 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_system51c3s.txt" + land_sea_mask: "/esarchive/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" + orography: "/esarchive/exp/ecmwf/system51c3s/constant/orography.nc" Meteo-France-System7: name: "Meteo-France System 7" institution: "European Centre for Medium-Range Weather Forecasts" @@ -199,6 +214,21 @@ esarchive: time_stamp_lag: "+1" calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system7c3s.txt" + Meteo-France-System8: + name: "Meteo-France System 8" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/meteofrance/system8c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "g500":"monthly_mean/g500_f12h/", + "prlr":"monthly_mean/prlr_f24h/", "sfcWind": "monthly_mean/sfcWind_f6h/", + "tasmax":"monthly_mean/tasmax_f24h/", "tasmin": "monthly_mean/tasmin_f24h/", + "tos":"monthly_mean/tos_f6h/", "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system8c3s.txt" + orography: "/esarchive/exp/meteofrance/system8c3s/constant/orography.nc" DWD-GCFS2.1: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" @@ -212,6 +242,7 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system21_m1.txt" + orography: "/esarchive/exp/dwd/system21_m1/constant/orography.nc" CMCC-SPS3.5: name: "CMCC-SPS3.5" institution: "European Centre for Medium-Range Weather Forecasts" @@ -225,6 +256,7 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system35c3s.txt" + orography: "/esarchive/exp/cmcc/system35c3s/constant/orography.nc" JMA-CPS2: name: "JMA System 2" institution: "European Centre for Medium-Range Weather Forecasts" @@ -240,15 +272,17 @@ esarchive: ECCC-CanCM4i: name: "ECCC CanCM4i" institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/eccc/eccc1/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", - "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} + src: "exp/eccc/eccc3/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", "tasmin":"monthly_mean/tasmin_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 10 hcst: 10 calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" + orography: "/esarchive/exp/eccc/eccc3/constant/orography.nc" UK-MetOffice-Glosea600: name: "UK MetOffice GloSea 6 (v6.0)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -261,6 +295,34 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo600.txt" + UK-MetOffice-Glosea601: + name: "UK MetOffice GloSea 6 (v6.01)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system601-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tasmin":"monthly_mean/tasmin_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo601.txt" + orography: "/esarchive/exp/ukmo/glosea6_system601-c3s/constant/orography.nc" + UK-MetOffice-Glosea602: + name: "UK MetOffice GloSea 602" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system602-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tasmin":"monthly_mean/tasmin_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo602.txt" + orography: "/esarchive/exp/ukmo/glosea6_system602-c3s/constant/orography.nc" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? @@ -273,6 +335,7 @@ esarchive: calendar: "gregorian" time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" + orography: "/esarchive/exp/ncep/cfs-v2/constant/orography.nc" mars: src_sys: "/esarchive/scratch/aho/tmp/GRIB/" #"/mars/" System: -- GitLab From f899cce3a81397659965bb661b417f34cbaf718c Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 27 Nov 2024 13:05:57 +0100 Subject: [PATCH 43/53] Use Apply() for orography correction and change dimension names --- modules/Loading/R/orography_correction.R | 55 +++++++++++------------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/modules/Loading/R/orography_correction.R b/modules/Loading/R/orography_correction.R index 76802622..29ff0a54 100644 --- a/modules/Loading/R/orography_correction.R +++ b/modules/Loading/R/orography_correction.R @@ -10,8 +10,9 @@ orography_correction <- function(recipe, data) { lats.max <- recipe$Analysis$Region$latmax lons.min <- recipe$Analysis$Region$lonmin lons.max <- recipe$Analysis$Region$lonmax - - archive <- read_yaml("conf/archive.yml")[[recipe$Run$filesystem]] + + archive <- get_archive(recipe) + # archive <- read_yaml("conf/archive.yml")[[recipe$Run$filesystem]] # Define regrid parameters: regrid_params <- get_regrid_params(recipe, archive) @@ -21,17 +22,17 @@ orography_correction <- function(recipe, data) { ## Load exp orography orography_exp <- Start(dat = archive$System[[exp.name]]$orography, var = 'orography', - lon = values(list(lons.min, lons.max)), - lon_reorder = circularsort, - lat = values(list(lats.min, lats.max)), - lat_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), transform = regrid_params$fcst.transform, transform_params = list(grid = regrid_params$fcst.gridtype, method = regrid_params$fcst.gridmethod), - transform_vars = c('lat','lon'), - return_vars = list(lat = NULL, lon = NULL), - synonims = list(lon = c('lon','longitude'), - lat = c('lat','latitude')), + transform_vars = c('latitude', 'longitude'), + return_vars = list(latitude = NULL, longitude = NULL), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), num_procs = 1, retrieve = TRUE) ## Set negative values to zero @@ -40,17 +41,17 @@ orography_correction <- function(recipe, data) { ## load obs orography orography_obs <- Start(dat = archive$Reference[[ref.name]]$orography, var = 'orography', - lon = values(list(lons.min, lons.max)), - lon_reorder = circularsort, - lat = values(list(lats.min, lats.max)), - lat_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), transform = regrid_params$obs.transform, transform_params = list(grid = regrid_params$obs.gridtype, method = regrid_params$obs.gridmethod), - transform_vars = c('lat','lon'), - return_vars = list(lat = NULL, lon = NULL), - synonims = list(lon = c('lon','longitude'), - lat = c('lat','latitude')), + transform_vars = c('latitude', 'longitude'), + return_vars = list(latitude = NULL, longitude = NULL), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), num_procs = 1, retrieve = TRUE) ## Set negative values to zero @@ -61,19 +62,15 @@ orography_correction <- function(recipe, data) { ## Apply lapse rate factor to correct temperature = -0.0065 K/m (-6.5 K/km) oro_obs_corr <- oro_diff * -0.0065 - oro_obs_corr <- Reorder(data = drop(oro_obs_corr), order = c('lat', 'lon')) + oro_obs_corr <- Reorder(data = drop(oro_obs_corr), order = c('latitude', 'longitude')) ## Apply correction to obs temperature data - for(ens in 1:dim(data$obs$data)['ensemble']){ - for(time in 1:dim(data$obs$data)['time']){ - for(syear in 1:dim(data$obs$data)['syear']){ - data$obs$data[,,,,syear,time,,,ens] <- data$obs$data[,,,,syear,time,,,ens] + oro_obs_corr - - } - } - } - + data$obs$data <- Apply(data = list(data$obs$data, oro_obs_corr), + target_dims = c("latitude", "longitude"), + fun = "+", + ncores = recipe$Analysis$ncores)$output1 + data$obs$data <- Reorder(data$obs$data, names(data$obs$dims)) + return(data) - } -- GitLab From a69ee81a92856728046b3b2157da7e875fd1c3fb Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 27 Nov 2024 14:19:38 +0100 Subject: [PATCH 44/53] Remove commented code --- build_compute_workflow_crossval.R | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/build_compute_workflow_crossval.R b/build_compute_workflow_crossval.R index bad9c116..5e42f477 100644 --- a/build_compute_workflow_crossval.R +++ b/build_compute_workflow_crossval.R @@ -311,32 +311,6 @@ run_compute_workflow <- function(recipe, data, last_module = NULL) { res$skill_syear, res$skill["metadata"]) } - # if (!is.null(res$skill)) { - # tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Skill") - # metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) - # res$skill <- ClimProjDiags::ArrayToList(res$skill, - # dim = 'metric', - # level = 'list', - # names = metric_list) - # # Put chunked metadata back together - # res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir, - # chunks = recipe$Run$startR_workflow$chunk_along, - # array_dims = dim(res$skill[[1]]), - # metadata_file_pattern = "metric_metadata") - # } - # if (!is.null(res$statistics)) { - # tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Statistics") - # metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) - # res$statistics <- ClimProjDiags::ArrayToList(res$statistics, - # dim = 'metric', - # level = 'list', - # names = metric_list) - # res$statistics$metadata <- retrieve_metadata(tmp_dir = tmp_dir, - # chunks = recipe$Run$startR_workflow$chunk_along, - # array_dims = dim(res$statistics[[1]]), - # metadata_file_pattern = "metric_metadata") - # } - # --------------------------------------------------------------------------- # Step 5: Remove temporary files unlink(file.path(recipe$Run$output_dir, "outputs", "tmp"), recursive = TRUE) -- GitLab From 4e8d8ddd00bd30a5c9427ef02f556fc87017f628 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 27 Nov 2024 16:19:05 +0100 Subject: [PATCH 45/53] Add orography correction to Compute() workflow (preliminary version) --- build_compute_workflow_crossval.R | 6 +++++- tools/check_recipe.R | 5 +++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/build_compute_workflow_crossval.R b/build_compute_workflow_crossval.R index 5e42f477..026fa130 100644 --- a/build_compute_workflow_crossval.R +++ b/build_compute_workflow_crossval.R @@ -24,6 +24,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, source("modules/Crossval/Crossval_anomalies.R") source("modules/Crossval/Crossval_metrics.R") source("modules/old_modules/Statistics/Statistics.R") + source("modules/Loading/R/orography_correction.R") # Define appender with custom log layout so that it knows not to append # within Compute(). # Create data list @@ -54,7 +55,10 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, data <- Units(recipe, data, retrieve = F) # Call workflows for (module in modules) { - if (module == "aggregation") { + if (module == "orography") { + ## TODO: This should work differently + data <- orography_correction(recipe, data) + } else if (module == "aggregation") { data <- Aggregation(recipe, data, retrieve = F, nchunks = nchunks) } else if (module == "crossval_anomalies") { data <- Crossval_anomalies(recipe = recipe, data = data, retrieve = FALSE) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index f70fc026..605c4a9d 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -1167,9 +1167,10 @@ check_recipe <- function(recipe) { STARTR_PARAMS <- c("modules", "chunk_along") STARTR_MODULES <- c("calibration", "anomalies", "downscaling", "skill", "probabilities", "indices", "aggregation", - "crossval_anomalies", "crossval_metrics", "statistics") + "crossval_anomalies", "crossval_metrics", "statistics", + "orography") CHUNK_DIMS <- c("var", "time", "latitude", "longitude") - MODULES_USING_LATLON <- c("downscaling", "indices") + MODULES_USING_LATLON <- c("downscaling", "indices", "orography") MODULES_USING_TIME <- c("indicators", "aggregation") MODULES_USING_VAR <- c("indicators") # Check that all required fields are present -- GitLab From 0023edf763c9c47c88e8684be957aa51ee99348f Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 27 Nov 2024 16:32:49 +0100 Subject: [PATCH 46/53] Add condition for orography --- build_compute_workflow_crossval.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/build_compute_workflow_crossval.R b/build_compute_workflow_crossval.R index 026fa130..e524c285 100644 --- a/build_compute_workflow_crossval.R +++ b/build_compute_workflow_crossval.R @@ -24,7 +24,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, source("modules/Crossval/Crossval_anomalies.R") source("modules/Crossval/Crossval_metrics.R") source("modules/old_modules/Statistics/Statistics.R") - source("modules/Loading/R/orography_correction.R") # Define appender with custom log layout so that it knows not to append # within Compute(). # Create data list @@ -55,11 +54,12 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, data <- Units(recipe, data, retrieve = F) # Call workflows for (module in modules) { - if (module == "orography") { + if (module == "orography" && recipe$Analysis$Variables$name == 'tas') { ## TODO: This should work differently - data <- orography_correction(recipe, data) + source("modules/Loading/R/orography_correction.R") + data <- orography_correction(recipe = recipe, data = data) } else if (module == "aggregation") { - data <- Aggregation(recipe, data, retrieve = F, nchunks = nchunks) + data <- Aggregation(recipe = recipe, data = data, retrieve = F, nchunks = nchunks) } else if (module == "crossval_anomalies") { data <- Crossval_anomalies(recipe = recipe, data = data, retrieve = FALSE) } else if (module == "crossval_metrics") { -- GitLab From 2041d3137d345c2d661201028b4ca7b93bbff177 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 29 Nov 2024 10:09:24 +0100 Subject: [PATCH 47/53] Adapt to multiple atomic recipes --- build_compute_workflow_crossval.R | 8 ++++++-- tools/divide_recipe.R | 12 +++++++----- tools/save_metadata_chunks.R | 7 ++++++- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/build_compute_workflow_crossval.R b/build_compute_workflow_crossval.R index e524c285..7dd49c06 100644 --- a/build_compute_workflow_crossval.R +++ b/build_compute_workflow_crossval.R @@ -294,10 +294,14 @@ run_compute_workflow <- function(recipe, data, last_module = NULL) { } ## TODO: Replicate for probabilities and other modules; refactor + atomic_name <- "" + if (!is.null(recipe$Run$atomic_name)) { + atomic_name <- recipe$Run$atomic_name + } for (metrics in c("skill", "skill_syear", "statistics")) { if (!is.null(res[[metrics]])) { tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", - str_to_title(metrics)) + recipe$Run$atomic_name, str_to_title(metrics)) metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) res[[metrics]] <- ClimProjDiags::ArrayToList(res[[metrics]], dim = 'metric', @@ -317,7 +321,7 @@ run_compute_workflow <- function(recipe, data, last_module = NULL) { # --------------------------------------------------------------------------- # Step 5: Remove temporary files - unlink(file.path(recipe$Run$output_dir, "outputs", "tmp"), recursive = TRUE) + unlink(file.path(recipe$Run$output_dir, "outputs", "tmp", atomic_name), recursive = TRUE) # --------------------------------------------------------------------------- # Step 6: Save data diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 18d10c01..3344e79b 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -2,7 +2,8 @@ divide_recipe <- function(recipe) { ## TODO: Implement dependent vs independent verifications? - info(recipe$Run$logger, "Splitting recipe in single verifications.") + info(recipe$Run$logger, retrieve = TRUE, + "Splitting recipe in single verifications.") beta_recipe <- list(Description = append(recipe$Description, list(Origin = paste("Atomic recipe,", "split from:", @@ -20,7 +21,8 @@ divide_recipe <- function(recipe) { Output_format = recipe$Analysis$Output_format), Run = recipe$Run[c("Loglevel", "output_dir", "Terminal", - "code_dir", "logfile", "filesystem")]) + "code_dir", "logfile", "filesystem", + "startR_workflow")]) # duplicate recipe by independent variables: # If a single variable is not given inside a list, rebuild structure @@ -225,7 +227,7 @@ divide_recipe <- function(recipe) { "_reg-", all_recipes[[reci]]$Analysis$Region$name, "_sdate-", all_recipes[[reci]]$Analysis$Time$sdate) recipe_name <- paste0(recipe_model, recipe_split) - + all_recipes[[reci]]$Run$atomic_name <- recipe_name if (all_recipes[[reci]]$Analysis$Datasets$System$name == 'Multimodel') { recipe_dir <- paste0(recipe$Run$output_dir, "/logs/recipes/multimodel/") @@ -241,13 +243,13 @@ divide_recipe <- function(recipe) { } # Print information for user - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, paste("The main recipe has been divided into", length(chunk_to_recipe), "single model atomic recipes, plus", length(split_to_recipe), "multi-model atomic recipes.")) text <- paste0("Check output directory ", recipe$Run$output_dir, "/logs/recipes/ to see all the individual atomic recipes.") - info(recipe$Run$logger, text) + info(recipe$Run$logger, retrieve = TRUE, text) ## TODO: Change returns? return(list(n_atomic_recipes = length(chunk_to_recipe), # length(all_recipes) outdir = recipe$Run$output_dir, diff --git a/tools/save_metadata_chunks.R b/tools/save_metadata_chunks.R index 5edc5c22..489549d4 100644 --- a/tools/save_metadata_chunks.R +++ b/tools/save_metadata_chunks.R @@ -22,7 +22,12 @@ save_metadata_chunks <- function(recipe, metrics, data_cube, module, nchunks) { # Retrieve dimname attributes metric_names <- dimnames(metrics)[[1]] ## TODO: Force module into Camel Case - tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", module) + if (!is.null(recipe$Run$atomic_name)) { + tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", + recipe$Run$atomic_name, module) + } else { + tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", module) + } # Save metric names (only done once) if (!dir.exists(tmp_dir)) { dir.create(tmp_dir, recursive = TRUE) -- GitLab From a05f27211dc574cbd5e3336cc17ede121c537813 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 29 Nov 2024 10:09:44 +0100 Subject: [PATCH 48/53] Update example --- example_scripts/test_compute_crossval.R | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/example_scripts/test_compute_crossval.R b/example_scripts/test_compute_crossval.R index 4751dcf6..11b3a8e3 100644 --- a/example_scripts/test_compute_crossval.R +++ b/example_scripts/test_compute_crossval.R @@ -6,18 +6,22 @@ source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") source("build_compute_workflow_crossval.R") -recipe_file <- "../vic-personal-code/auto-s2s-tests/cross-validation-global/recipe_scorecards_example.yml" +# recipe_file <- "/gpfs/projects/bsc32/bsc032762/git/vic-personal-code/auto-s2s-tests/cross-validation-global/recipe_scorecards_example.yml" # recipe_file <- "/home/Earth/vagudets/oldhome/git/victoria-personal-code/auto-s2s-tests/cross-validation-global/recipe_scorecards_example.yml" -recipe <- prepare_outputs(recipe_file) +# recipe <- prepare_outputs(recipe_file) + +args <- commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file)#### LOAD MODULES #### ## TODOs: ## 1. Statistics -> DONE ## 2. Do not return hcst, fcst and obs -> DONE-ish (Should be generalized) ## 3. Recipe checks -> DONE-ish (should be generalized) -## 4. Test without orography correction (full workflow) +## 4. Test without orography correction (full workflow) -> DONE ## 5. Fix everything to make sure retrieve = TRUE works well (full workflow) -## 6. orography correction -## 7. enssprerr and SprErr do not have the same name: problem? Check Nadia's branch +## 6. orography correction -> DONE +## 7. enssprerr and SprErr do not have the same name: problem? Check Nadia's branch -> DONE-ish (needs merging) ## 8. Workflow requirements (check multimodel case) # Load datasets @@ -39,8 +43,8 @@ if (recipe$Analysis$Workflow$Scorecards$metric_aggregation == 'score') { } ## Add logo to plots -# source("tools/add_logo.R") -# add_logo(recipe, "rsz_rsz_bsc_logo.png") +source("tools/add_logo.R") +add_logo(recipe, "rsz_rsz_bsc_logo.png") if (recipe$Analysis$Dataset$System$name == 'UK-MetOffice-Glosea601' & recipe$Analysis$Time$sdate == '0101' -- GitLab From 7d6a026d244b508d4c70974b4be565f683e9cf07 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 4 Dec 2024 10:50:29 +0100 Subject: [PATCH 49/53] Refactor image file conversion into function --- modules/Visualization/Visualization.R | 19 +++---------------- tools/convert_plot_format.R | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 16 deletions(-) create mode 100644 tools/convert_plot_format.R diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index fc361db1..6f223338 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -265,20 +265,7 @@ Visualization <- function(recipe, } # Convert plots to user-chosen format - if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && - tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { - extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) - plot_files <- list.files(path = recipe$Run$output_dir, pattern = "\\.pdf$", - recursive = TRUE, full.names = TRUE) - for (file in plot_files) { - system_command <- paste("convert -density 300", file, - "-resize 40% -alpha remove", - paste0(tools::file_path_sans_ext(file), ".", - extension)) - system(system_command) - } - unlink(plot_files) - info(recipe$Run$logger, retrieve = TRUE, - paste0("##### PLOT FILES CONVERTED TO ", toupper(extension), " #####")) - } + ## TODO: Think of a better system for parallel executions + source("tools/convert_plot_format.R") + convert_plot_format(recipe) } diff --git a/tools/convert_plot_format.R b/tools/convert_plot_format.R new file mode 100644 index 00000000..5eb8b7ed --- /dev/null +++ b/tools/convert_plot_format.R @@ -0,0 +1,20 @@ +convert_plot_format <- function(recipe) { + # Convert plots to user-chosen format + if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && + tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { + extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) + plot_files <- list.files(path = paste0(recipe$Run$output_dir), + pattern = "\\.pdf$", + recursive = TRUE, full.names = TRUE) + for (file in plot_files) { + system_command <- paste("convert -density 300", file, + "-resize 40% -alpha remove", + paste0(tools::file_path_sans_ext(file), ".", + extension)) + system(system_command) + } + unlink(plot_files) + info(recipe$Run$logger, retrieve = TRUE, + paste0("##### PLOT FILES CONVERTED TO ", toupper(extension), " #####")) + } +} -- GitLab From 156c4eac012758f91fc7228b4827b10c36ced9eb Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 4 Dec 2024 10:50:45 +0100 Subject: [PATCH 50/53] Get system name from recipe --- tools/add_logo.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/add_logo.R b/tools/add_logo.R index b254e819..9490ffe1 100644 --- a/tools/add_logo.R +++ b/tools/add_logo.R @@ -2,8 +2,8 @@ add_logo <- function(recipe, logo) { # recipe: SUNSET recipe # logo: URL to the logo - system <- list.files(paste0(recipe$Run$output_dir, "/plots/")) - reference <- recipe$Analysis$Datasets$Reference$name + system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) + reference <- gsub('.','', recipe$Analysis$Datasets$Reference$name, fixed = T) calibration <- recipe$Analysis$Workflow$Calibration$method variable <- recipe$Analysis$Variable$name files <- lapply(variable, function(x) { @@ -22,7 +22,7 @@ add_logo_scorecards <- function(recipe, logo) { # recipe: SUNSET recipe # logo: URL to the logo scorecards_f <- list.files(paste0(recipe$Run$output_dir, "/plots/Scorecards/")) - scorecards <- paste0(recipe$Run$output_dir, "/plots/Scorecards/", scorecards_f)[[1]] + scorecards <- paste0(recipe$Run$output_dir, "/plots/Scorecards/", scorecards_f) dim(scorecards) <- c(file = length(scorecards)) Apply(list(scorecards), target_dims = NULL, function(x) { -- GitLab From 6ce636b568aec199356b4f24c213ea9de61e669f Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 4 Dec 2024 10:51:14 +0100 Subject: [PATCH 51/53] Testing --- example_scripts/test_compute_crossval.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/example_scripts/test_compute_crossval.R b/example_scripts/test_compute_crossval.R index 11b3a8e3..0f90da99 100644 --- a/example_scripts/test_compute_crossval.R +++ b/example_scripts/test_compute_crossval.R @@ -12,7 +12,9 @@ source("build_compute_workflow_crossval.R") args <- commandArgs(trailingOnly = TRUE) recipe_file <- args[1] -recipe <- read_atomic_recipe(recipe_file)#### LOAD MODULES #### +recipe <- read_atomic_recipe(recipe_file) + +#### LOAD MODULES #### ## TODOs: ## 1. Statistics -> DONE -- GitLab From ce5e32fe9a82870c744916afab29a9bbd243859e Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 4 Dec 2024 10:51:23 +0100 Subject: [PATCH 52/53] Fix output dir --- modules/Scorecards/Scorecards_calculations.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index c3a9e746..517985d5 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -451,10 +451,8 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, metadata = skill_metrics$metadata) ## Save metric data arrays - recipe$Run$output_dir <- file.path(recipe$Run$output_dir, - "outputs", "Scorecards/") save_metrics(recipe = recipe, metrics = aggr_scorecards, - agg = "region", module = "scorecards") + agg = "region", module = "Scorecards") # save_metrics(recipe = recipe, metrics = aggr_scorecards, # data_cube = data$hcst, agg = 'region', # module = "scorecards") -- GitLab From 17c299f9d3aa7e132b3ac7eb5931c6105017b266 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 4 Dec 2024 10:52:30 +0100 Subject: [PATCH 53/53] Avoid duplicating skill outputs --- build_compute_workflow_crossval.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build_compute_workflow_crossval.R b/build_compute_workflow_crossval.R index 7dd49c06..0519e1a5 100644 --- a/build_compute_workflow_crossval.R +++ b/build_compute_workflow_crossval.R @@ -301,7 +301,7 @@ run_compute_workflow <- function(recipe, data, last_module = NULL) { for (metrics in c("skill", "skill_syear", "statistics")) { if (!is.null(res[[metrics]])) { tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", - recipe$Run$atomic_name, str_to_title(metrics)) + atomic_name, str_to_title(metrics)) metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) res[[metrics]] <- ClimProjDiags::ArrayToList(res[[metrics]], dim = 'metric', @@ -317,6 +317,7 @@ run_compute_workflow <- function(recipe, data, last_module = NULL) { res$skill_syear$metadata <- NULL res$skill <- c(res$skill[-which(names(res$skill) == "metadata")], res$skill_syear, res$skill["metadata"]) + res$skill_syear <- NULL } # --------------------------------------------------------------------------- -- GitLab