diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 707790e9c2e5e0eb98b1e72b0c7010e90318155d..97ce1f51ea976bc7da561bb1d968fbf6dbb37dfa 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -1,5 +1,3 @@ -source("modules/Anomalies/tmp/CST_Anomaly.R") - # Compute the hcst, obs and fcst anomalies with or without cross-validation # and return them, along with the hcst and obs climatologies. @@ -7,8 +5,8 @@ compute_anomalies <- function(recipe, data) { if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { error(recipe$Run$logger, - paste("The anomaly module has been called, but the element", - "'Workflow:Anomalies:compute' is missing from the recipe.")) + paste("The anomaly module has been called, but the element", + "'Workflow:Anomalies:compute' is missing from the recipe.")) stop() } @@ -24,13 +22,13 @@ compute_anomalies <- function(recipe, data) { # Compute anomalies anom <- CST_Anomaly(data$hcst, data$obs, - cross = cross, - memb = TRUE, - memb_dim = 'ensemble', - dim_anom = 'syear', - dat_dim = c('dat', 'ensemble'), - ftime_dim = 'time', - ncores = recipe$Analysis$ncores) + cross = cross, + memb = TRUE, + memb_dim = 'ensemble', + dim_anom = 'syear', + dat_dim = c('dat', 'ensemble'), + ftime_dim = 'time', + ncores = recipe$Analysis$ncores) # Reorder dims anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) @@ -57,12 +55,12 @@ compute_anomalies <- function(recipe, data) { if (!is.null(data$fcst)) { # Compute hindcast climatology ensemble mean clim <- s2dv::Clim(hcst_fullvalue$data, obs_fullvalue$data, - time_dim = "syear", - dat_dim = c("dat", "ensemble"), - memb = FALSE, - memb_dim = "ensemble", - ftime_dim = "time", - ncores = recipe$Analysis$ncores) + time_dim = "syear", + dat_dim = c("dat", "ensemble"), + memb = FALSE, + memb_dim = "ensemble", + ftime_dim = "time", + ncores = recipe$Analysis$ncores) clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, name = "syear") dims <- dim(clim_hcst) @@ -79,24 +77,38 @@ compute_anomalies <- function(recipe, data) { } info(recipe$Run$logger, - paste("The anomalies have been computed,", cross_msg, - "cross-validation. The original full fields are returned as", - "$hcst.full_val and $obs.full_val.")) + paste("The anomalies have been computed,", cross_msg, + "cross-validation. The original full fields are returned as", + "$hcst.full_val and $obs.full_val.")) info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") - + # Save outputs + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only', 'fcst_only')) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = data$hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { + save_observations(recipe = recipe, data_cube = data$obs) + } } else { warn(recipe$Run$logger, paste("The Anomalies module has been called, but", - "recipe parameter Analysis:Variables:anomaly is set to FALSE.", - "The full fields will be returned.")) + "recipe parameter Analysis:Variables:anomaly is set to FALSE.", + "The full fields will be returned.")) hcst_fullvalue <- NULL obs_fullvalue <- NULL info(recipe$Run$logger, "##### ANOMALIES NOT COMPUTED #####") } - ## TODO: Return fcst full value? - return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, - hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) + hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) } diff --git a/modules/Anomalies/tmp/CST_Anomaly.R b/modules/Anomalies/tmp/CST_Anomaly.R deleted file mode 100644 index f38e39b050f7c46be452ac6e6571542c465264b9..0000000000000000000000000000000000000000 --- a/modules/Anomalies/tmp/CST_Anomaly.R +++ /dev/null @@ -1,246 +0,0 @@ -#'Anomalies relative to a climatology along selected dimension with or without cross-validation -#' -#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -#'@author Pena Jesus, \email{jesus.pena@bsc.es} -#'@description This function computes the anomalies relative to a climatology -#'computed along the selected dimension (usually starting dates or forecast -#'time) allowing the application or not of crossvalidated climatologies. The -#'computation is carried out independently for experimental and observational -#'data products. -#' -#'@param exp An object of class \code{s2dv_cube} as returned by \code{CST_Load} -#' function, containing the seasonal forecast experiment data in the element -#' named \code{$data}. -#'@param obs An object of class \code{s2dv_cube} as returned by \code{CST_Load} -#' function, containing the observed data in the element named \code{$data}. -#'@param dim_anom A character string indicating the name of the dimension -#' along which the climatology will be computed. The default value is 'sdate'. -#'@param cross A logical value indicating whether cross-validation should be -#' applied or not. Default = FALSE. -#'@param memb_dim A character string indicating the name of the member -#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no -#' member dimension, set NULL. The default value is 'member'. -#'@param memb A logical value indicating whether to subtract the climatology -#' based on the individual members (TRUE) or the ensemble mean over all -#' members (FALSE) when calculating the anomalies. The default value is TRUE. -#'@param dat_dim A character vector indicating the name of the dataset and -#' member dimensions. If there is no dataset dimension, it can be NULL. -#' The default value is "c('dataset', 'member')". -#'@param filter_span A numeric value indicating the degree of smoothing. This -#' option is only available if parameter \code{cross} is set to FALSE. -#'@param ftime_dim A character string indicating the name of the temporal -#' dimension where the smoothing with 'filter_span' will be applied. It cannot -#' be NULL if 'filter_span' is provided. The default value is 'ftime'. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. It will be used only when -#' 'filter_span' is not NULL. -#' -#'@return A list with two S3 objects, 'exp' and 'obs', of the class -#''s2dv_cube', containing experimental and date-corresponding observational -#'anomalies, respectively. These 's2dv_cube's can be ingested by other functions -#'in CSTools. -#' -#'@examples -#'# Example 1: -#'mod <- 1 : (2 * 3 * 4 * 5 * 6 * 7) -#'dim(mod) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'obs <- 1 : (1 * 1 * 4 * 5 * 6 * 7) -#'dim(obs) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'lon <- seq(0, 30, 5) -#'lat <- seq(0, 25, 5) -#'exp <- list(data = mod, lat = lat, lon = lon) -#'obs <- list(data = obs, lat = lat, lon = lon) -#'attr(exp, 'class') <- 's2dv_cube' -#'attr(obs, 'class') <- 's2dv_cube' -#' -#'anom <- CST_Anomaly(exp = exp, obs = obs, cross = FALSE, memb = TRUE) -#' -#'@seealso \code{\link[s2dv]{Ano_CrossValid}}, \code{\link[s2dv]{Clim}} and \code{\link{CST_Load}} -#' -#'@import multiApply -#'@importFrom s2dv InsertDim Clim Ano_CrossValid Reorder -#'@export -CST_Anomaly <- function(exp = NULL, obs = NULL, dim_anom = 'sdate', cross = FALSE, - memb_dim = 'member', memb = TRUE, dat_dim = c('dataset', 'member'), - filter_span = NULL, ftime_dim = 'ftime', ncores = NULL) { - # s2dv_cube - if (!inherits(exp, 's2dv_cube') & !is.null(exp) || - !inherits(obs, 's2dv_cube') & !is.null(obs)) { - stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - # exp and obs - if (is.null(exp$data) & is.null(obs$data)) { - stop("One of the parameter 'exp' or 'obs' cannot be NULL.") - } - case_exp = case_obs = 0 - if (is.null(exp)) { - exp <- obs - case_obs = 1 - warning("Parameter 'exp' is not provided and 'obs' will be used instead.") - } - if (is.null(obs)) { - obs <- exp - case_exp = 1 - warning("Parameter 'obs' is not provided and 'exp' will be used instead.") - } - if(any(is.null(names(dim(exp$data))))| any(nchar(names(dim(exp$data))) == 0) | - any(is.null(names(dim(obs$data))))| any(nchar(names(dim(obs$data))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names in element 'data'.") - } - if(!all(names(dim(exp$data)) %in% names(dim(obs$data))) | - !all(names(dim(obs$data)) %in% names(dim(exp$data)))) { - stop("Parameter 'exp' and 'obs' must have same dimension names in element 'data'.") - } - dim_exp <- dim(exp$data) - dim_obs <- dim(obs$data) - dimnames_data <- names(dim_exp) - # dim_anom - if (is.numeric(dim_anom) & length(dim_anom) == 1) { - warning("Parameter 'dim_anom' must be a character string and a numeric value will not be ", - "accepted in the next release. The corresponding dimension name is assigned.") - dim_anom <- dimnames_data[dim_anom] - } - if (!is.character(dim_anom)) { - stop("Parameter 'dim_anom' must be a character string.") - } - if (!dim_anom %in% names(dim_exp) | !dim_anom %in% names(dim_obs)) { - stop("Parameter 'dim_anom' is not found in 'exp' or in 'obs' dimension in element 'data'.") - } - if (dim_exp[dim_anom] <= 1 | dim_obs[dim_anom] <= 1) { - stop("The length of dimension 'dim_anom' in label 'data' of the parameter ", - "'exp' and 'obs' must be greater than 1.") - } - # cross - if (!is.logical(cross) | !is.logical(memb) ) { - stop("Parameters 'cross' and 'memb' must be logical.") - } - if (length(cross) > 1 | length(memb) > 1 ) { - cross <- cross[1] - warning("Parameter 'cross' has length greater than 1 and only the first element", - "will be used.") - } - # memb - if (length(memb) > 1) { - memb <- memb[1] - warning("Parameter 'memb' has length greater than 1 and only the first element", - "will be used.") - } - # memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim_exp) | !memb_dim %in% names(dim_obs)) { - stop("Parameter 'memb_dim' is not found in 'exp' or in 'obs' dimension.") - } - } - # dat_dim - if (!is.null(dat_dim)) { - if (!is.character(dat_dim)) { - stop("Parameter 'dat_dim' must be a character vector.") - } - if (!all(dat_dim %in% names(dim_exp)) | !all(dat_dim %in% names(dim_obs))) { - stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension in element 'data'.", - " Set it as NULL if there is no dataset dimension.") - } - } - # filter_span - if (!is.null(filter_span)) { - if (!is.numeric(filter_span)) { - warning("Paramater 'filter_span' is not numeric and any filter", - " is being applied.") - filter_span <- NULL - } - # ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - # ftime_dim - if (!is.character(ftime_dim)) { - stop("Parameter 'ftime_dim' must be a character string.") - } - if (!ftime_dim %in% names(dim_exp) | !memb_dim %in% names(dim_obs)) { - stop("Parameter 'ftime_dim' is not found in 'exp' or in 'obs' dimension in element 'data'.") - } - } - - # Computating anomalies - #---------------------- - - # With cross-validation - if (cross) { - ano <- Ano_CrossValid(exp = exp$data, obs = obs$data, - time_dim = dim_anom, - memb_dim = memb_dim, - memb = memb, - dat_dim = dat_dim, - ncores = ncores) - - # Without cross-validation - } else { - tmp <- Clim(exp = exp$data, obs = obs$data, - time_dim = dim_anom, - memb_dim = memb_dim, - memb = memb, - dat_dim = dat_dim, - ncores = ncores) - if (!is.null(filter_span)) { - tmp$clim_exp <- Apply(tmp$clim_exp, - target_dims = c(ftime_dim), - output_dims = c(ftime_dim), - fun = .Loess, - loess_span = filter_span, - ncores = ncores)$output1 - tmp$clim_obs <- Apply(tmp$clim_obs, - target_dims = c(ftime_dim), - output_dims = c(ftime_dim), - fun = .Loess, - loess_span = filter_span, - ncores = ncores)$output1 - } - if (memb) { - clim_exp <- tmp$clim_exp - clim_obs <- tmp$clim_obs - } else { - clim_exp <- InsertDim(tmp$clim_exp, 1, dim_exp[memb_dim]) - clim_obs <- InsertDim(tmp$clim_obs, 1, dim_obs[memb_dim]) - } - clim_exp <- InsertDim(clim_exp, 1, dim_exp[dim_anom]) - clim_obs <- InsertDim(clim_obs, 1, dim_obs[dim_anom]) - ano <- NULL - - # Permuting back dimensions to original order - clim_exp <- Reorder(clim_exp, dimnames_data) - clim_obs <- Reorder(clim_obs, dimnames_data) - - ano$exp <- exp$data - clim_exp - ano$obs <- obs$data - clim_obs - } - - exp$data <- ano$exp - obs$data <- ano$obs - - # Outputs - # ~~~~~~~~~ - if (case_obs == 1) { - return(obs) - } - else if (case_exp == 1) { - return(exp) - } - else { - return(list(exp = exp, obs = obs)) - } -} - -.Loess <- function(clim, loess_span) { - data <- data.frame(ensmean = clim, day = 1 : length(clim)) - loess_filt <- loess(ensmean ~ day, data, span = loess_span) - output <- predict(loess_filt) - return(output) -} - diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 2b6e18abb4168c38b0ee8bedabc6a5bc0d713323..dedfbd855c45718fbb81da3892cd466939e20918 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -11,9 +11,9 @@ calibrate_datasets <- function(recipe, data) { if (method == "raw") { warn(recipe$Run$logger, - paste("The Calibration module has been called, but the calibration", - "method in the recipe is 'raw'. The hcst and fcst will not be", - "calibrated.")) + paste("The Calibration module has been called, but the calibration", + "method in the recipe is 'raw'. The hcst and fcst will not be", + "calibrated.")) fcst_calibrated <- data$fcst hcst_calibrated <- data$hcst if (!is.null(data$hcst.full_val)) { @@ -57,9 +57,9 @@ calibrate_datasets <- function(recipe, data) { CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") ## TODO: implement other calibration methods if (!(method %in% CST_CALIB_METHODS)) { - error(recipe$Run$logger, - paste("Calibration method in the recipe is not available for", - "monthly data.")) + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "monthly data.")) stop() } else { # Calibrate the hindcast @@ -74,24 +74,24 @@ calibrate_datasets <- function(recipe, data) { memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) - # In the case where anomalies have been computed, calibrate full values - if (!is.null(data$hcst.full_val)) { - hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, - data$obs.full_val, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - } else { - hcst_full_calibrated <- NULL - } + # In the case where anomalies have been computed, calibrate full values + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, + data$obs.full_val, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + hcst_full_calibrated <- NULL + } - # Calibrate the forecast + # Calibrate the forecast if (!is.null(data$fcst)) { fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst, cal.method = method, @@ -111,66 +111,81 @@ calibrate_datasets <- function(recipe, data) { } else if (recipe$Analysis$Variables$freq == "daily_mean") { # Daily data calibration using Quantile Mapping if (!(method %in% c("qmap"))) { - error(recipe$Run$logger, - paste("Calibration method in the recipe is not available for", - "daily data. Only quantile mapping 'qmap is implemented.")) + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "daily data. Only quantile mapping 'qmap is implemented.")) stop() } # Calibrate the hindcast dim_order <- names(dim(data$hcst$data)) hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, - exp_cor = NULL, - sdate_dim = "syear", - memb_dim = "ensemble", - # window_dim = "time", - method = "QUANT", - ncores = ncores, - na.rm = na.rm, - wet.day = F) + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) # Restore dimension order hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) # In the case where anomalies have been computed, calibrate full values if (!is.null(data$hcst.full_val)) { - hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, - data$obs.full_val, - exp_cor = NULL, - sdate_dim = "syear", - memb_dim = "ensemble", - method = "QUANT", - ncores = ncores, - na.rm = na.rm, - wet.day = F) + hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, + data$obs.full_val, + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) } else { - hcst_full_calibrated <- NULL + hcst_full_calibrated <- NULL } if (!is.null(data$fcst)) { # Calibrate the forecast fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, - exp_cor = data$fcst, - sdate_dim = "syear", - memb_dim = "ensemble", - # window_dim = "time", - method = "QUANT", - ncores = ncores, - na.rm = na.rm, - wet.day = F) + exp_cor = data$fcst, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) # Restore dimension order - fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) + fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) } else { fcst_calibrated <- NULL } } } info(recipe$Run$logger, CALIB_MSG) + ## TODO: What do we do with the full values? + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Calibration/") + if (recipe$Analysis$Workflow$Calibration$save %in% + c('all', 'exp_only', 'fcst_only')) { + save_forecast(recipe = recipe, data_cube = fcst_calibrated, type = 'fcst') + } + if (recipe$Analysis$Workflow$Calibration$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = hcst_calibrated, type = 'hcst') + } + if (recipe$Analysis$Workflow$Calibration$save == 'all') { + save_observations(recipe = recipe, data_cube = data$obs) + } + ## TODO: Sort out returns return_list <- list(hcst = hcst_calibrated, - obs = data$obs, - fcst = fcst_calibrated) + obs = data$obs, + fcst = fcst_calibrated) if (!is.null(hcst_full_calibrated)) { return_list <- append(return_list, - list(hcst.full_val = hcst_full_calibrated, - obs.full_val = data$obs.full_val)) + list(hcst.full_val = hcst_full_calibrated, + obs.full_val = data$obs.full_val)) } return(return_list) } diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 55b13451adfd22f623f2ad01e1567fe124ad84ef..c5eb41e6aed1740c11dfc99eae751240947c5dcf 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -72,17 +72,17 @@ load_datasets <- function(recipe) { obs.path <- paste0(archive$src, obs.dir, store.freq, "/$var$", reference_descrip[[store.freq]][[variable]], - "$var$_$file_date$.nc") + "$var$_$file_date$.nc") hcst.path <- paste0(archive$src, hcst.dir, store.freq, "/$var$", exp_descrip[[store.freq]][[variable]], - "$var$_$file_date$.nc") + "$var$_$file_date$.nc") fcst.path <- paste0(archive$src, hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], - "$var$_$file_date$.nc") + exp_descrip[[store.freq]][[variable]], + "$var$_$file_date$.nc") # Define regrid parameters: #------------------------------------------------------------------- @@ -114,7 +114,7 @@ load_datasets <- function(recipe) { transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), + ensemble = c('member', 'ensemble')), ensemble = indices(1:hcst.nmember), return_vars = list(latitude = 'dat', longitude = 'dat', @@ -134,7 +134,7 @@ load_datasets <- function(recipe) { # Change time attribute dimensions default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) names(dim(attr(hcst, "Variables")$common$time))[which(names( - dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- dim(attr(hcst, "Variables")$common$time) dim(attr(hcst, "Variables")$common$time) <- default_time_dims @@ -157,26 +157,26 @@ load_datasets <- function(recipe) { # multiple dims split fcst <- Start(dat = fcst.path, - var = variable, - file_date = sdates$fcst, + var = variable, + file_date = sdates$fcst, time = idxs$fcst, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = indices(1:fcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) + retrieve = TRUE) if (recipe$Analysis$Variables$freq == "daily_mean") { # Adjusts dims for daily case, could be removed if startR allows @@ -201,7 +201,7 @@ load_datasets <- function(recipe) { # Adjust dates for models where the time stamp goes into the next month if (recipe$Analysis$Variables$freq == "monthly_mean") { fcst$attrs$Dates[] <- - fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) } } else { @@ -215,9 +215,9 @@ load_datasets <- function(recipe) { # the corresponding observations are loaded correctly. dates <- hcst$attrs$Dates dim(dates) <- dim(Subset(hcst$data, - along=c('dat', 'var', - 'latitude', 'longitude', 'ensemble'), - list(1,1,1,1,1), drop="selected")) + along=c('dat', 'var', + 'latitude', 'longitude', 'ensemble'), + list(1,1,1,1,1), drop="selected")) # Separate Start() call for monthly vs daily data if (store.freq == "monthly_mean") { @@ -227,22 +227,22 @@ load_datasets <- function(recipe) { obs <- Start(dat = obs.path, var = variable, - file_date = dates_file, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'y', 'latitude'), - longitude = c('lon', 'x', 'longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'y', 'latitude'), + longitude = c('lon', 'x', 'longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) } else if (store.freq == "daily_mean") { @@ -256,28 +256,28 @@ load_datasets <- function(recipe) { dim(dates) <- dim(dates_file) obs <- Start(dat = obs.path, - var = variable, - file_date = sort(unique(dates_file)), - time = dates, - time_var = 'time', - time_across = 'file_date', - merge_across_dims = TRUE, - merge_across_dims_narm = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) } # Adds ensemble dim to obs (for consistency with hcst/fcst) @@ -294,27 +294,27 @@ load_datasets <- function(recipe) { if (!(recipe$Analysis$Regrid$type == 'none')) { if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lat_error_msg) hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) info(recipe$Run$logger, hcst_lat_msg) obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) + "; Last obs lat: ", obs$lat[length(obs$lat)]) info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lon_error_msg) hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) info(recipe$Run$logger, hcst_lon_msg) obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) + "; Last obs lon: ", obs$lon[length(obs$lon)]) info(recipe$Run$logger, obs_lon_msg) stop("hcst and obs don't share the same longitudes.") @@ -325,7 +325,7 @@ load_datasets <- function(recipe) { dictionary <- read_yaml("conf/variable-dictionary.yml") if (dictionary$vars[[variable]]$accum) { info(recipe$Run$logger, - "Accumulated variable: setting negative values to zero.") + "Accumulated variable: setting negative values to zero.") obs$data[obs$data < 0] <- 0 hcst$data[hcst$data < 0] <- 0 if (!is.null(fcst)) { @@ -339,9 +339,9 @@ load_datasets <- function(recipe) { if (variable == "prlr") { # Verify that the units are m/s and the same in obs and hcst if (((obs$attrs$Variable$metadata[[variable]]$units == "m s-1") || - (obs$attrs$Variable$metadata[[variable]]$units == "m s**-1")) && - ((hcst$attrs$Variable$metadata[[variable]]$units == "m s-1") || - (hcst$attrs$Variable$metadata[[variable]]$units == "m s**-1"))) { + (obs$attrs$Variable$metadata[[variable]]$units == "m s**-1")) && + ((hcst$attrs$Variable$metadata[[variable]]$units == "m s-1") || + (hcst$attrs$Variable$metadata[[variable]]$units == "m s**-1"))) { info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") obs$data <- obs$data*86400*1000 diff --git a/modules/Saving/R/Utils.R b/modules/Saving/R/Utils.R new file mode 100644 index 0000000000000000000000000000000000000000..9ff695a649f6ee8a202aecb56013b70ee74ca8cc --- /dev/null +++ b/modules/Saving/R/Utils.R @@ -0,0 +1,69 @@ +.get_global_attributes <- function(recipe, archive) { + # Generates metadata of interest to add to the global attributes of the + # netCDF files. + parameters <- recipe$Analysis + hcst_period <- paste0(parameters$Time$hcst_start, " to ", + parameters$Time$hcst_end) + current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) + system_name <- parameters$Datasets$System$name + reference_name <- parameters$Datasets$Reference$name + + attrs <- list(reference_period = hcst_period, + institution_system = archive$System[[system_name]]$institution, + institution_reference = archive$Reference[[reference_name]]$institution, + system = system_name, + reference = reference_name, + calibration_method = parameters$Workflow$Calibration$method, + computed_on = current_time) + + return(attrs) +} + +.get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { + # Generates time dimensions and the corresponding metadata. + ## TODO: Subseasonal + + switch(fcst.horizon, + "seasonal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}, + "subseasonal" = {len <- 4; ref <- 'hours since '; + stdname <- ''}, + "decadal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}) + + dim(time) <- length(time) + sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting + metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), + calendar = calendar)) + attr(time, 'variables') <- metadata + names(dim(time)) <- 'time' + + sdate <- 1:length(sdate) + dim(sdate) <- length(sdate) + metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), + collapse=", "), + units = paste0('Init date'))) + attr(sdate, 'variables') <- metadata + names(dim(sdate)) <- 'sdate' + + return(list(time=time)) +} + +.get_latlon <- function(latitude, longitude) { + # Adds dimensions and metadata to lat and lon + # latitude: array containing the latitude values + # longitude: array containing the longitude values + + dim(longitude) <- length(longitude) + metadata <- list(longitude = list(units = 'degrees_east')) + attr(longitude, 'variables') <- metadata + names(dim(longitude)) <- 'longitude' + + dim(latitude) <- length(latitude) + metadata <- list(latitude = list(units = 'degrees_north')) + attr(latitude, 'variables') <- metadata + names(dim(latitude)) <- 'latitude' + + return(list(lat=latitude, lon=longitude)) + +} diff --git a/modules/Saving/R/get_dir.R b/modules/Saving/R/get_dir.R new file mode 100644 index 0000000000000000000000000000000000000000..ac63396815957fa71933f4731bb5931327eeaeb5 --- /dev/null +++ b/modules/Saving/R/get_dir.R @@ -0,0 +1,57 @@ +## TODO: Separate by time aggregation +## TODO: Build a default path that accounts for: +## variable, system, reference, start date and region name + +get_dir <- function(recipe, agg = "global") { + # This function builds the path for the output directory. The output + # directories will be subdirectories within outdir, organized by variable, + # startdate, and aggregation. + + ## TODO: Get aggregation from recipe + outdir <- recipe$Run$output_dir + ## TODO: multivar case + variable <- recipe$Analysis$Variables$name + system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) + + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { + # Define output dir name accordint to Scorecards format + dict <- read_yaml("conf/output_dictionaries/scorecards.yml") + # system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name + dir <- paste0(outdir, "/", system, "/", variable, "/") + } else { + # Default generic output format based on FOCUS + # Get startdate or hindcast period + if (!is.null(recipe$Analysis$Time$fcst_year)) { + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + # decadal doesn't have sdate + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') + } else { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } + } else { + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + # decadal doesn't have sdate + fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$hcst_end, + sep = '_')) + } else { + fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) + } + } + + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + store.freq <- recipe$Analysis$Variables$freq + ## TODO: Change "_country" + switch(tolower(agg), + "country" = {dir <- paste0(outdir, "/", system, "/", calib.method, + "-", store.freq, "/", variable, + "_country/", fcst.sdate, "/")}, + "global" = {dir <- paste0(outdir, "/", system, "/", calib.method, + "-", store.freq, "/", variable, "/", + fcst.sdate, "/")}) + } + ## TODO: Multivar case + dir.create(dir, showWarnings = FALSE, recursive = TRUE) + return(dir) +} diff --git a/modules/Saving/paths2save.R b/modules/Saving/R/get_filename.R similarity index 50% rename from modules/Saving/paths2save.R rename to modules/Saving/R/get_filename.R index d22d9856d290894fc7194218dc6f4bb8561f5da0..1c92565179ab74ee0968323258b3efa18e396f77 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/R/get_filename.R @@ -48,64 +48,10 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { "exp" = {file <- paste0(var, gg, "_", date)}, "obs" = {file <- paste0(var, gg, "-obs_", date)}, "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, - shortdate)}, + shortdate)}, "probs" = {file <- paste0(var, gg, "-probs_", date)}, "bias" = {file <- paste0(var, gg, "-bias_", date)}) } return(paste0(dir, file, ".nc")) } -get_dir <- function(recipe, agg = "global") { - # This function builds the path for the output directory. The output - # directories will be subdirectories within outdir, organized by variable, - # startdate, and aggregation. - - ## TODO: Get aggregation from recipe - outdir <- paste0(recipe$Run$output_dir, "/outputs/") - ## TODO: multivar case - variable <- recipe$Analysis$Variables$name - system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) - - if (tolower(recipe$Analysis$Output_format) == 'scorecards') { - # Define output dir name accordint to Scorecards format - dict <- read_yaml("conf/output_dictionaries/scorecards.yml") - # system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name - dir <- paste0(outdir, "/", system, "/", variable, "/") - } else { - # Default generic output format based on FOCUS - # Get startdate or hindcast period - if (!is.null(recipe$Analysis$Time$fcst_year)) { - if (tolower(recipe$Analysis$Horizon) == 'decadal') { - # decadal doesn't have sdate - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') - } else { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } - } else { - if (tolower(recipe$Analysis$Horizon) == 'decadal') { - # decadal doesn't have sdate - fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$hcst_end, - sep = '_')) - } else { - fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) - } - } - - calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - store.freq <- recipe$Analysis$Variables$freq - ## TODO: Change "_country" - if (!is.null(recipe$Analysis$Region$name)) { - outdir <- paste0(outdir, "/", recipe$Analysis$Region$name) - } - switch(tolower(agg), - "country" = {dir <- paste0(outdir, "/", system, "/", calib.method, - "-", store.freq, "/", variable, - "_country/", fcst.sdate, "/")}, - "global" = {dir <- paste0(outdir, "/", system, "/", calib.method, - "-", store.freq, "/", variable, "/", - fcst.sdate, "/")}) - } - return(dir) -} diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R new file mode 100644 index 0000000000000000000000000000000000000000..8b9453186578e1610bae65bba9f7cb578782a69a --- /dev/null +++ b/modules/Saving/R/save_corr.R @@ -0,0 +1,117 @@ +save_corr <- function(recipe, + skill, + data_cube, + agg = "global", + outdir = NULL) { + # This function adds metadata to the ensemble correlation in 'skill' + # and exports it to a netCDF file inside 'outdir'. + + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + skill <- lapply(skill, function(x) { + Reorder(x, c(lalo, 'ensemble', 'time'))}) + } + # Add global and variable attributes + global_attributes <- .get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(global_attributes, + list(from_anomalies = "Yes")) + } else { + global_attributes <- c(global_attributes, + list(from_anomalies = "No")) + } + attr(skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(skill)) { + metric <- names(skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + skill[[i]][is.na(skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'ensemble', 'time') + } else { + sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) + dims <- c(lalo, 'ensemble', 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "corr") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- .get_countries(grid) + ArrayToNc(append(country, time, skill), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, skill) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, + "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R new file mode 100644 index 0000000000000000000000000000000000000000..9ec8499c181e96afc4f5580080232f34ca64dcd6 --- /dev/null +++ b/modules/Saving/R/save_forecast.R @@ -0,0 +1,137 @@ +save_forecast <- function(recipe, + data_cube, + type = "hcst", + agg = "global", + outdir = NULL) { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name + global_attributes <- .get_global_attributes(recipe, archive) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) + # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') + ## Method 2: use initial month + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + if (type == 'hcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else if (type == 'fcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } + } else { + if (type == 'hcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } else if (type == 'fcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + } + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + # expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) + + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'ensemble', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- attr(data_cube$Variable, 'variable')$units + } else { + dims <- c(lalo, 'ensemble', 'time') + var.expname <- variable + var.sdname <- var.sdname + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal + ## data has been reshaped + # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') + + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "exp") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } + } + info(recipe$Run$logger, paste("#####", toupper(type), + "SAVED TO NETCDF FILE #####")) +} diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R new file mode 100644 index 0000000000000000000000000000000000000000..d48d9d8d6e83b24606f68e664151225bfd602dba --- /dev/null +++ b/modules/Saving/R/save_metrics.R @@ -0,0 +1,119 @@ +save_metrics <- function(recipe, + skill, + data_cube, + agg = "global", + outdir = NULL) { + # This function adds metadata to the skill metrics in 'skill' + # and exports them to a netCDF file inside 'outdir'. + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + + + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + skill <- lapply(skill, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + # Add global and variable attributes + global_attributes <- .get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + attr(skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(skill)) { + metric <- names(skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + skill[[i]][is.na(skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'time') + } else { + sdname <- paste0(metric) #, " grid point metric") + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "skill") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, skill), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, skill) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R new file mode 100644 index 0000000000000000000000000000000000000000..dcaf0765b783e6ebe1f4e5061ea69b2d469b84a3 --- /dev/null +++ b/modules/Saving/R/save_observations.R @@ -0,0 +1,137 @@ +save_observations <- function(recipe, + data_cube, + agg = "global", + outdir = NULL) { + # Loops over the years in the s2dv_cube containing the observations and + # exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name + global_attributes <- .get_global_attributes(recipe, archive) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$Reference[[global_attributes$reference]]$calendar + + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) +# init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') + ## Method 2: use initial month + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) + + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } else { + dims <- c(lalo, 'time') + var.expname <- variable + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date. The date is computed for each year, and adapted for + # consistency with the hcst/fcst dates, so that both sets of files have + # the same name pattern. + ## Because observations are loaded differently in the daily vs. monthly + ## cases, different approaches are necessary. + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + } else { + + if (store.freq == "monthly_mean") { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + } else { + fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) + } + } + + # Ensure the year is correct if the first leadtime goes to the next year + init_date <- as.POSIXct(init_date) + if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { + lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 + } + # Ensure that the initialization month is consistent with the hindcast + lubridate::month(fcst.sdate) <- lubridate::month(init_date) + fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "obs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } + } + info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R new file mode 100644 index 0000000000000000000000000000000000000000..b4aab60550000a5a930e764632cff9de0326905a --- /dev/null +++ b/modules/Saving/R/save_percentiles.R @@ -0,0 +1,107 @@ +save_percentiles <- function(recipe, + percentiles, + data_cube, + agg = "global", + outdir = NULL) { + # This function adds metadata to the percentiles + # and exports them to a netCDF file inside 'outdir'. + archive <- get_archive(recipe) + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + percentiles <- lapply(percentiles, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + + # Add global and variable attributes + global_attributes <- .get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + attr(percentiles[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(percentiles)) { + ## TODO: replace with proper standard names + percentile <- names(percentiles[i]) + long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = percentile, long_name = long_name)) + attr(percentiles[[i]], 'variables') <- metadata + names(dim(percentiles[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "percentiles") + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, percentiles), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, percentiles) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R new file mode 100644 index 0000000000000000000000000000000000000000..20af9be7858388c08f8d7de62d90092134599495 --- /dev/null +++ b/modules/Saving/R/save_probabilities.R @@ -0,0 +1,124 @@ +save_probabilities <- function(recipe, + probs, + data_cube, + agg = "global", + type = "hcst", + outdir = NULL) { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports the corresponding category probabilities to a netCDF file. + # probs: array containing the probability data + # recipe: the auto-s2s recipe + # data_cube: s2dv_cube containing the data and metadata + # outdir: directory where the files should be saved + # type: 'exp' (hcst and fcst) or 'obs' + # agg: aggregation, "global" or "country" + # type: 'hcst' or 'fcst' + + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name + global_attributes <- .get_global_attributes(recipe, archive) + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + # Add anomaly computation to global attributes + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + for (i in syears) { + # Select year from array and rearrange dimensions + probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') + if (tolower(agg) == "global") { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c(lalo, 'time'))}) + } else { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c('country', 'time'))}) + } + + ## TODO: Replace for loop with something more efficient? + for (bin in 1:length(probs_syear)) { + prob_bin <- names(probs_syear[bin]) + long_name <- paste0(prob_bin, " probability category") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = prob_bin, long_name = long_name)) + attr(probs_syear[[bin]], 'variables') <- metadata + names(dim(probs_syear[[bin]])) <- dims # is this necessary? + } + + # Add global attributes + attr(probs_syear[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "probs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, probs_syear), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, probs_syear) + ArrayToNc(vars, outfile) + } + } + + info(recipe$Run$logger, + paste("#####", toupper(type), + "PROBABILITIES SAVED TO NETCDF FILE #####")) +} diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 85c3a3a043bc9dffca0aaad3219d9cba410f8a58..75c7908cc83fd3999173f7b1bd4c0bd9cd68577c 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -1,10 +1,18 @@ ## TODO: Save obs percentiles -source("modules/Saving/paths2save.R") +source("modules/Saving/R/get_dir.R") +source("modules/Saving/R/get_filename.R") +source("modules/Saving/R/Utils.R") +source("modules/Saving/R/save_forecast.R") +source("modules/Saving/R/save_observations.R") +source("modules/Saving/R/save_metrics.R") +source("modules/Saving/R/save_corr.R") +source("modules/Saving/R/save_probabilities.R") +source("modules/Saving/R/save_percentiles.R") save_data <- function(recipe, data, - skill_metrics = NULL, - probabilities = NULL) { + skill_metrics = NULL, + probabilities = NULL) { # Wrapper for the saving functions. # recipe: The auto-s2s recipe # archive: The auto-s2s archive @@ -22,8 +30,8 @@ save_data <- function(recipe, data, if (is.null(data)) { error(recipe$Run$logger, - paste("The 'data' parameter is mandatory. It should be a list", - "of at least two s2dv_cubes containing the hcst and obs.")) + paste("The 'data' parameter is mandatory. It should be a list", + "of at least two s2dv_cubes containing the hcst and obs.")) stop() } # Create output directory @@ -32,15 +40,15 @@ save_data <- function(recipe, data, # Export hindcast, forecast and observations onto outfile save_forecast(recipe = recipe, data_cube = data$hcst, - type = 'hcst', - outdir = outdir) + type = 'hcst', + outdir = outdir) if (!is.null(data$fcst)) { save_forecast(recipe = recipe, data_cube = data$fcst, - type = 'fcst', - outdir = outdir) + type = 'fcst', + outdir = outdir) } save_observations(recipe = recipe, data_cube = data$obs, - outdir = outdir) + outdir = outdir) # Separate ensemble correlation from the rest of the metrics, as it has one # extra dimension "ensemble" and must be saved to a different file @@ -58,29 +66,29 @@ save_data <- function(recipe, data, # Export skill metrics onto outfile if (!is.null(skill_metrics)) { save_metrics(recipe = recipe, skill = skill_metrics, - data_cube = data$hcst, - outdir = outdir) + data_cube = data$hcst, + outdir = outdir) } if (!is.null(corr_metrics)) { save_corr(recipe = recipe, skill = corr_metrics, - data_cube = data$hcst, - outdir = outdir) + data_cube = data$hcst, + outdir = outdir) } # Export probabilities onto outfile if (!is.null(probabilities)) { save_percentiles(recipe = recipe, percentiles = probabilities$percentiles, - data_cube = data$hcst, - outdir = outdir) + data_cube = data$hcst, + outdir = outdir) save_probabilities(recipe = recipe, probs = probabilities$probs, - data_cube = data$hcst, - type = "hcst", - outdir = outdir) + data_cube = data$hcst, + type = "hcst", + outdir = outdir) if (!is.null(probabilities$probs_fcst)) { save_probabilities(recipe = recipe, probs = probabilities$probs_fcst, - data_cube = data$fcst, - type = "fcst", - outdir = outdir) + data_cube = data$fcst, + type = "fcst", + outdir = outdir) } } } @@ -90,18 +98,18 @@ get_global_attributes <- function(recipe, archive) { # netCDF files. parameters <- recipe$Analysis hcst_period <- paste0(parameters$Time$hcst_start, " to ", - parameters$Time$hcst_end) + parameters$Time$hcst_end) current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) system_name <- parameters$Datasets$System$name reference_name <- parameters$Datasets$Reference$name attrs <- list(reference_period = hcst_period, - institution_system = archive$System[[system_name]]$institution, - institution_reference = archive$Reference[[reference_name]]$institution, - system = system_name, - reference = reference_name, - calibration_method = parameters$Workflow$Calibration$method, - computed_on = current_time) + institution_system = archive$System[[system_name]]$institution, + institution_reference = archive$Reference[[reference_name]]$institution, + system = system_name, + reference = reference_name, + calibration_method = parameters$Workflow$Calibration$method, + computed_on = current_time) return(attrs) } @@ -121,14 +129,14 @@ get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { dim(time) <- length(time) sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), - calendar = calendar)) + calendar = calendar)) attr(time, 'variables') <- metadata names(dim(time)) <- 'time' sdate <- 1:length(sdate) dim(sdate) <- length(sdate) metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), - collapse=", "), + collapse=", "), units = paste0('Init date'))) attr(sdate, 'variables') <- metadata names(dim(sdate)) <- 'sdate' @@ -154,760 +162,3 @@ get_latlon <- function(latitude, longitude) { return(list(lat=latitude, lon=longitude)) } - -save_forecast <- function(recipe, - data_cube, - type = "hcst", - agg = "global", - outdir = NULL) { - # Loops over the years in the s2dv_cube containing a hindcast or forecast - # and exports each year to a netCDF file. - # data_cube: s2dv_cube containing the data and metadata - # recipe: the auto-s2s recipe - # outdir: directory where the files should be saved - # agg: aggregation, "global" or "country" - - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - dictionary <- read_yaml("conf/variable-dictionary.yml") - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name - global_attributes <- get_global_attributes(recipe, archive) - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - ## Method 1: Use the first date as init_date. But it may be better to use - ## the real initialized date (ask users) - # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - ## Method 2: use initial month - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - if (type == 'hcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else if (type == 'fcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } - } else { - if (type == 'hcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } else if (type == 'fcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - # expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) - - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) - } - - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'ensemble', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- attr(data_cube$Variable, 'variable')$units - } else { - dims <- c(lalo, 'ensemble', 'time') - var.expname <- variable - var.sdname <- var.sdname - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes - - # Select start date - if (fcst.horizon == 'decadal') { - ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal - ## data has been reshaped - # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') - - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') - - } else { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - } - - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "exp") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) - } - } - info(recipe$Run$logger, paste("#####", toupper(type), - "SAVED TO NETCDF FILE #####")) -} - - -save_observations <- function(recipe, - data_cube, - agg = "global", - outdir = NULL) { - # Loops over the years in the s2dv_cube containing the observations and - # exports each year to a netCDF file. - # data_cube: s2dv_cube containing the data and metadata - # recipe: the auto-s2s recipe - # outdir: directory where the files should be saved - # agg: aggregation, "global" or "country" - - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - dictionary <- read_yaml("conf/variable-dictionary.yml") - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name - global_attributes <- get_global_attributes(recipe, archive) - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$Reference[[global_attributes$reference]]$calendar - - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - - # Generate vector containing leadtimes - ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - ## Method 1: Use the first date as init_date. But it may be better to use - ## the real initialized date (ask users) -# init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - ## Method 2: use initial month - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - ## expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) - - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'time'))) - } - - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } else { - dims <- c(lalo, 'time') - var.expname <- variable - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes - - # Select start date. The date is computed for each year, and adapted for - # consistency with the hcst/fcst dates, so that both sets of files have - # the same name pattern. - ## Because observations are loaded differently in the daily vs. monthly - ## cases, different approaches are necessary. - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - } else { - - if (store.freq == "monthly_mean") { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') - } else { - fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) - } - } - - # Ensure the year is correct if the first leadtime goes to the next year - init_date <- as.POSIXct(init_date) - if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { - lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 - } - # Ensure that the initialization month is consistent with the hindcast - lubridate::month(fcst.sdate) <- lubridate::month(init_date) - fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') - - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "obs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) - } - } - info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") -} - -## TODO: Place inside a function somewhere -# if (tolower(agg) == "country") { -# load(mask.path) -# grid <- europe.countries.iso -# } else { -# grid <- list(lon=attr(var.obs, 'Variables')$dat1$longitude, -# lat=attr(var.obs, 'Variables')$dat1$latitude) -# } - -save_metrics <- function(recipe, - skill, - data_cube, - agg = "global", - outdir = NULL) { - # This function adds metadata to the skill metrics in 'skill' - # and exports them to a netCDF file inside 'outdir'. - - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - dictionary <- read_yaml("conf/variable-dictionary.yml") - - - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - # Add global and variable attributes - global_attributes <- get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - attr(skill[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(skill)) { - metric <- names(skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - skill[[i]][is.na(skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'time') - } else { - sdname <- paste0(metric) #, " grid point metric") - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(skill[[i]], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } - - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "skill") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, skill), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill) - ArrayToNc(vars, outfile) - } - info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") -} - -save_corr <- function(recipe, - skill, - data_cube, - agg = "global", - outdir = NULL) { - # This function adds metadata to the ensemble correlation in 'skill' - # and exports it to a netCDF file inside 'outdir'. - - archive <- get_archive(recipe) - dictionary <- read_yaml("conf/variable-dictionary.yml") - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x) { - Reorder(x, c(lalo, 'ensemble', 'time'))}) - } - # Add global and variable attributes - global_attributes <- get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(global_attributes, - list(from_anomalies = "Yes")) - } else { - global_attributes <- c(global_attributes, - list(from_anomalies = "No")) - } - attr(skill[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(skill)) { - metric <- names(skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - skill[[i]][is.na(skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'ensemble', 'time') - } else { - sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) - dims <- c(lalo, 'ensemble', 'time') - } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(skill[[i]], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } - - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "corr") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, skill), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill) - ArrayToNc(vars, outfile) - } - info(recipe$Run$logger, - "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") -} - -save_percentiles <- function(recipe, - percentiles, - data_cube, - agg = "global", - outdir = NULL) { - # This function adds metadata to the percentiles - # and exports them to a netCDF file inside 'outdir'. - archive <- get_archive(recipe) - - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - percentiles <- lapply(percentiles, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - - # Add global and variable attributes - global_attributes <- get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - attr(percentiles[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(percentiles)) { - ## TODO: replace with proper standard names - percentile <- names(percentiles[i]) - long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - } else { - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = percentile, long_name = long_name)) - attr(percentiles[[i]], 'variables') <- metadata - names(dim(percentiles[[i]])) <- dims - } - - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "percentiles") - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, percentiles), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, percentiles) - ArrayToNc(vars, outfile) - } - info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") -} - -save_probabilities <- function(recipe, - probs, - data_cube, - agg = "global", - type = "hcst", - outdir = NULL) { - # Loops over the years in the s2dv_cube containing a hindcast or forecast - # and exports the corresponding category probabilities to a netCDF file. - # probs: array containing the probability data - # recipe: the auto-s2s recipe - # data_cube: s2dv_cube containing the data and metadata - # outdir: directory where the files should be saved - # type: 'exp' (hcst and fcst) or 'obs' - # agg: aggregation, "global" or "country" - # type: 'hcst' or 'fcst' - - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name - global_attributes <- get_global_attributes(recipe, archive) - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - # Add anomaly computation to global attributes - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - # Generate vector containing leadtimes - ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - ## expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') - if (tolower(agg) == "global") { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c(lalo, 'time'))}) - } else { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c('country', 'time'))}) - } - - ## TODO: Replace for loop with something more efficient? - for (bin in 1:length(probs_syear)) { - prob_bin <- names(probs_syear[bin]) - long_name <- paste0(prob_bin, " probability category") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - } else { - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = prob_bin, long_name = long_name)) - attr(probs_syear[[bin]], 'variables') <- metadata - names(dim(probs_syear[[bin]])) <- dims # is this necessary? - } - - # Add global attributes - attr(probs_syear[[1]], 'global_attrs') <- global_attributes - - # Select start date - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') - } else { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - } - - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "probs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, probs_syear), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, probs_syear) - ArrayToNc(vars, outfile) - } - } - - info(recipe$Run$logger, - paste("#####", toupper(type), - "PROBABILITIES SAVED TO NETCDF FILE #####")) -} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 3eab1287fceeefe0b85a64c2aab28184839fabf2..d3f74b1374e4fc8607abe7a73a5cf4db48d17f99 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -52,8 +52,8 @@ source("modules/Skill/R/tmp/RandomWalkTest.R") # " it can call ", metric_fun )) # compute_skill_metrics <- function(recipe, data$hcst, obs, -# clim_data$hcst = NULL, -# clim_obs = NULL) { +# clim_data$hcst = NULL, +# clim_obs = NULL) { compute_skill_metrics <- function(recipe, data) { # data$hcst: s2dv_cube containing the hindcast @@ -68,14 +68,14 @@ compute_skill_metrics <- function(recipe, data) { # if (recipe$Analysis$Workflow$Anomalies$compute) { # if (is.null(clim_data$hcst) || is.null(clim_obs)) { # warn(recipe$Run$logger, "Anomalies have been requested in the recipe, -# but the climatologies have not been provided in the -# compute_skill_metrics call. Be aware that some metrics like the -# Mean Bias may not be correct.") +# but the climatologies have not been provided in the +# compute_skill_metrics call. Be aware that some metrics like the +# Mean Bias may not be correct.") # } # } else { # warn(recipe$Run$logger, "Anomaly computation was not requested in the -# recipe. Be aware that some metrics, such as the CRPSS may not be -# correct.") +# recipe. Be aware that some metrics, such as the CRPSS may not be +# correct.") # } time_dim <- 'syear' memb_dim <- 'ensemble' @@ -94,7 +94,7 @@ compute_skill_metrics <- function(recipe, data) { for (metric in strsplit(metrics, ", | |,")[[1]]) { # Whether the fair version of the metric is to be computed if (metric %in% c('frps', 'frpss', 'bss10', 'bss90', - 'fcrps', 'fcrpss')) { + 'fcrps', 'fcrpss')) { Fair <- T } else { Fair <- F @@ -108,65 +108,65 @@ compute_skill_metrics <- function(recipe, data) { # Ranked Probability Score and Fair version if (metric %in% c('rps', 'frps')) { skill <- RPS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 10th percentile } else if (metric == 'bss10') { skill <- RPSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - prob_thresholds = 0.1, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.1, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 90th percentile } else if (metric == 'bss90') { skill <- RPSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - prob_thresholds = 0.9, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.9, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # CRPS and FCRPS } else if (metric %in% c('crps', 'fcrps')) { skill <- CRPS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # CRPSS and FCRPSS } else if (metric %in% c('crpss', 'fcrpss')) { skill <- CRPSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$crpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Mean bias (climatology) @@ -174,35 +174,35 @@ compute_skill_metrics <- function(recipe, data) { ## TODO: Eliminate option to compute from anomalies # Compute from full field if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, - time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) } else { skill <- Bias(data$hcst$data, data$obs$data, time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + memb_dim = memb_dim, + ncores = ncores) } skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # Mean bias skill score } else if (metric == 'mean_bias_ss') { if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - skill <- AbsBiasSS(data$hcst.full_val$data, data$obs.full_val$data, - time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- AbsBiasSS(data$hcst.full_val$data, data$obs.full_val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) } else { skill <- AbsBiasSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) } skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$biasSS skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Ensemble mean correlation @@ -214,43 +214,43 @@ compute_skill_metrics <- function(recipe, data) { time_dim = time_dim, method = 'pearson', memb_dim = memb_dim, - memb = memb, - conf = F, - pval = F, - sign = T, - alpha = 0.05, + memb = memb, + conf = F, + pval = F, + sign = T, + alpha = 0.05, ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$corr skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'rmsss') { # Compute RMSSS skill <- RMSSS(data$hcst$data, data$obs$data, - dat_dim = 'dat', - time_dim = time_dim, - memb_dim = memb_dim, - pval = FALSE, - sign = TRUE, - sig_method = 'Random Walk', - ncores = ncores) + dat_dim = 'dat', + time_dim = time_dim, + memb_dim = memb_dim, + pval = FALSE, + sign = TRUE, + sig_method = 'Random Walk', + ncores = ncores) # Compute ensemble mean and modify dimensions skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rmsss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'enssprerr') { # Remove ensemble dim from obs to avoid veriApply warning obs_noensdim <- ClimProjDiags::Subset(data$obs$data, "ensemble", 1, - drop = "selected") + drop = "selected") capture.output( skill <- easyVerification::veriApply(verifun = 'EnsSprErr', - fcst = data$hcst$data, - obs = obs_noensdim, - tdim = which(names(dim(data$hcst$data))==time_dim), - ensdim = which(names(dim(data$hcst$data))==memb_dim), - na.rm = na.rm, - ncpus = ncores) + fcst = data$hcst$data, + obs = obs_noensdim, + tdim = which(names(dim(data$hcst$data))==time_dim), + ensdim = which(names(dim(data$hcst$data))==memb_dim), + na.rm = na.rm, + ncpus = ncores) ) remove(obs_noensdim) skill <- .drop_dims(skill) @@ -261,26 +261,49 @@ compute_skill_metrics <- function(recipe, data) { ## Retain _specs in metric name for clarity metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', - 'rpss'))) { - warn(recipe$Run$logger, - "Some of the requested SpecsVerification metrics are not available.") + 'rpss'))) { + warn(recipe$Run$logger, + "Some of the requested SpecsVerification metrics are not available.") } capture.output( skill <- Compute_verif_metrics(data$hcst$data, data$obs$data, - skill_metrics = metric_name, - verif.dims=c("syear", "sday", "sweek"), - na.rm = na.rm, - ncores = ncores) + skill_metrics = metric_name, + verif.dims=c("syear", "sday", "sweek"), + na.rm = na.rm, + ncores = ncores) ) skill <- .drop_dims(skill) if (metric_name == "frps") { - # Compute yearly mean for FRPS - skill <- colMeans(skill, dims = 1) + # Compute yearly mean for FRPS + skill <- colMeans(skill, dims = 1) } skill_metrics[[ metric ]] <- skill } } info(recipe$Run$logger, "##### SKILL METRIC COMPUTATION COMPLETE #####") + # Save outputs + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Skill/") + # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim + if (recipe$Analysis$Workflow$Skill$save == 'all') { + corr_metric_names <- grep("^corr", names(skill_metrics)) + if (length(corr_metric_names) == 0) { + save_metrics(recipe = recipe, skill = skill_metrics, + data_cube = data$hcst) + } else { + # Save corr + if (length(skill_metrics[corr_metric_names]) > 0) { + save_corr(recipe = recipe, skill = skill_metrics[corr_metric_names], + data_cube = data$hcst) + } + # Save other skill metrics + if (length(skill_metrics[-corr_metric_names]) > 0) { + save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], + data_cube = data$hcst) + } + } + } + # Return results return(skill_metrics) } @@ -305,46 +328,46 @@ compute_probabilities <- function(recipe, data) { if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, "Quantiles and probability bins have been - requested, but no thresholds are provided in the recipe.") + requested, but no thresholds are provided in the recipe.") stop() } else { for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { # Parse thresholds in recipe thresholds <- sapply(element, function (x) eval(parse(text = x))) quants <- compute_quants(data$hcst$data, thresholds, - ncores = ncores, - na.rm = na.rm) + ncores = ncores, + na.rm = na.rm) probs <- compute_probs(data$hcst$data, quants, - ncores = ncores, - na.rm = na.rm) + ncores = ncores, + na.rm = na.rm) for (i in seq(1:dim(quants)['bin'][[1]])) { - named_quantiles <- append(named_quantiles, - list(ClimProjDiags::Subset(quants, - 'bin', i))) - names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", - as.integer(thresholds[i]*100)) + named_quantiles <- append(named_quantiles, + list(ClimProjDiags::Subset(quants, + 'bin', i))) + names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", + as.integer(thresholds[i]*100)) } for (i in seq(1:dim(probs)['bin'][[1]])) { - if (i == 1) { - name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) - } else if (i == dim(probs)['bin'][[1]]) { - name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) - } else { - name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", - as.integer(thresholds[i]*100)) - } - named_probs <- append(named_probs, - list(ClimProjDiags::Subset(probs, - 'bin', i))) - names(named_probs)[length(named_probs)] <- name_i + if (i == 1) { + name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) + } else if (i == dim(probs)['bin'][[1]]) { + name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) + } else { + name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", + as.integer(thresholds[i]*100)) + } + named_probs <- append(named_probs, + list(ClimProjDiags::Subset(probs, + 'bin', i))) + names(named_probs)[length(named_probs)] <- name_i } # Compute fcst probability bins if (!is.null(data$fcst)) { - probs_fcst <- compute_probs(data$fcst$data, quants, - ncores = ncores, - na.rm = na.rm) + probs_fcst <- compute_probs(data$fcst$data, quants, + ncores = ncores, + na.rm = na.rm) for (i in seq(1:dim(probs_fcst)['bin'][[1]])) { if (i == 1) { @@ -353,11 +376,11 @@ compute_probabilities <- function(recipe, data) { name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) } else { name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", - as.integer(thresholds[i]*100)) + as.integer(thresholds[i]*100)) } named_probs_fcst <- append(named_probs_fcst, - list(ClimProjDiags::Subset(probs_fcst, - 'bin', i))) + list(ClimProjDiags::Subset(probs_fcst, + 'bin', i))) names(named_probs_fcst)[length(named_probs_fcst)] <- name_i } } @@ -369,20 +392,40 @@ compute_probabilities <- function(recipe, data) { if (!is.null(data$fcst)) { fcst_years <- dim(data$fcst$data)[['syear']] named_probs_fcst <- lapply(named_probs_fcst, - function(x) {Subset(x, - along = 'syear', - indices = 1:fcst_years, - drop = 'non-selected')}) + function(x) {Subset(x, + along = 'syear', + indices = 1:fcst_years, + drop = 'non-selected')}) results <- list(probs = named_probs, - probs_fcst = named_probs_fcst, - percentiles = named_quantiles) + probs_fcst = named_probs_fcst, + percentiles = named_quantiles) } else { results <- list(probs = named_probs, - percentiles = named_quantiles) + percentiles = named_quantiles) } info(recipe$Run$logger, - "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") + "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") + # Save outputs + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Skill/") + # Save percentiles + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'percentiles_only')) { + save_percentiles(recipe = recipe, percentiles = results$percentiles, + data_cube = data$hcst) + } + # Save probability bins + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = results$probs, + data_cube = data$hcst, type = "hcst") + if (!is.null(results$probs_fcst)) { + save_probabilities(recipe = recipe, probs = results$probs_fcst, + data_cube = data$fcst, type = "fcst") + } + } + # Return results return(results) } } @@ -398,7 +441,7 @@ compute_probabilities <- function(recipe, data) { # If array has memb dim (Corr case), change name to 'ensemble' if ("exp_memb" %in% names(dim(metric_array))) { names(dim(metric_array))[which(names(dim(metric_array)) == - "exp_memb")] <- "ensemble" + "exp_memb")] <- "ensemble" # } else { # dim(metric_array) <- c(dim(metric_array), "ensemble" = 1) } diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R new file mode 100644 index 0000000000000000000000000000000000000000..e0fa8b8432dcd1972752313634d7461504afc14f --- /dev/null +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -0,0 +1,89 @@ +plot_ensemble_mean <- function(recipe, fcst, outdir) { + + ## TODO: Add 'anomaly' to plot title + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } + + latitude <- fcst$coords$lat + longitude <- fcst$coords$lon + archive <- get_archive(recipe) + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + variable <- recipe$Analysis$Variables$name + units <- attr(fcst$Variable, "variable")$units + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + # Compute ensemble mean + ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') + # Drop extra dims, add time dim if missing: + ensemble_mean <- drop(ensemble_mean) + + if (!("time" %in% names(dim(ensemble_mean)))) { + dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) + } + if (!'syear' %in% names(dim(ensemble_mean))) { + ensemble_mean <- Reorder(ensemble_mean, c("time", + "longitude", + "latitude")) + } else { + ensemble_mean <- Reorder(ensemble_mean, c("syear", + "time", + "longitude", + "latitude")) + } + ## TODO: Redefine column colors, possibly depending on variable + if (variable == 'prlr') { + palette = "BrBG" + rev = F + } else { + palette = "RdBu" + rev = T + } + # Define brks, centered on in the case of anomalies + ## + if (grepl("anomaly", + fcst$attrs$Variable$metadata[[variable]]$long_name)) { + variable <- paste(variable, "anomaly") + max_value <- max(abs(ensemble_mean)) + ugly_intervals <- seq(-max_value, max_value, max_value/20) + brks <- pretty(ugly_intervals, n = 12, min.n = 8) + } else { + brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) + } + cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) + options(bitmapType = "cairo") + + for (i_syear in start_date) { + # Define name of output file and titles + if (length(start_date) == 1) { + i_ensemble_mean <- ensemble_mean + outfile <- paste0(outdir, "forecast_ensemble_mean-", start_date, ".png") + } else { + i_ensemble_mean <- ensemble_mean[which(start_date == i_syear), , , ] + outfile <- paste0(outdir, "forecast_ensemble_mean-", i_syear, ".png") + } + toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, + "- Initialization:", i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + titles <- as.vector(months) + # Plots + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + i_ensemble_mean, longitude, latitude, + filled.continents = F, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + units = units, + cols = cols, + brks = brks, + fileout = outfile, + bar_label_digits = 4, + bar_extra_margin = rep(0.7, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) + } + info(recipe$Run$logger, + "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R new file mode 100644 index 0000000000000000000000000000000000000000..9ab0199e2026be5ab9e5e6487a83d206fd68c4f9 --- /dev/null +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -0,0 +1,88 @@ +plot_most_likely_terciles <- function(recipe, + fcst, + probabilities, + outdir) { + + ## TODO: Add 'anomaly' to plot title + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } + + latitude <- fcst$coords$lat + longitude <- fcst$coords$lon + archive <- get_archive(recipe) + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + variable <- recipe$Analysis$Variables$name + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + + # Retrieve and rearrange probability bins for the forecast + if (is.null(probabilities$probs_fcst$prob_b33) || + is.null(probabilities$probs_fcst$prob_33_to_66) || + is.null(probabilities$probs_fcst$prob_a66)) { + stop("The forecast tercile probability bins are not present inside ", + "'probabilities', the most likely tercile map cannot be plotted.") + } + + probs_fcst <- abind(probabilities$probs_fcst$prob_b33, + probabilities$probs_fcst$prob_33_to_66, + probabilities$probs_fcst$prob_a66, + along = 0) + names(dim(probs_fcst)) <- c("bin", + names(dim(probabilities$probs_fcst$prob_b33))) + + ## TODO: Improve this section + # Drop extra dims, add time dim if missing: + probs_fcst <- drop(probs_fcst) + if (!("time" %in% names(dim(probs_fcst)))) { + dim(probs_fcst) <- c("time" = 1, dim(probs_fcst)) + } + if (!'syear' %in% names(dim(probs_fcst))) { + probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) + } else { + probs_fcst <- Reorder(probs_fcst, + c("syear", "time", "bin", "longitude", "latitude")) + } + + for (i_syear in start_date) { + # Define name of output file and titles + if (length(start_date) == 1) { + i_probs_fcst <- probs_fcst + outfile <- paste0(outdir, "forecast_most_likely_tercile-", start_date, + ".png") + } else { + i_probs_fcst <- probs_fcst[which(start_date == i_syear), , , , ] + outfile <- paste0(outdir, "forecast_most_likely_tercile-", i_syear, ".png") + } + toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", + "Initialization:", i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + ## TODO: Ensure this works for daily and sub-daily cases + titles <- as.vector(months) + + # Plots + ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked + ## on. + suppressWarnings( + PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), + cat_dim = 'bin', + i_probs_fcst, longitude, latitude, + coast_width = 1.5, + title_scale = 0.6, + legend_scale = 0.8, #cex_bar_titles = 0.6, + toptitle = toptitle, + titles = titles, + fileout = outfile, + bar_label_digits = 2, + bar_scale = rep(0.7, 4), + bar_label_scale = 1.2, + axes_label_scale = 1.3, + triangle_ends = c(F, F), width = 11, height = 8) + ) + } + + info(recipe$Run$logger, + "##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R new file mode 100644 index 0000000000000000000000000000000000000000..f8be19d9055f483f8cfe8dea391c29eee645e540 --- /dev/null +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -0,0 +1,162 @@ +plot_skill_metrics <- function(recipe, data_cube, skill_metrics, + outdir, significance = F) { + # recipe: Auto-S2S recipe + # archive: Auto-S2S archive + # data_cube: s2dv_cube object with the corresponding hindcast data + # skill_metrics: list of named skill metrics arrays + # outdir: output directory + # significance: T/F, whether to display the significance dots in the plots + + ## TODO: OPTION for CERISE: Using PuOr + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + error(recipe$Run$logger, "Visualization functions not yet implemented + for daily data.") + stop() + } + # Abort if skill_metrics is not list + if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { + stop("The element 'skill_metrics' must be a list of named arrays.") + } + + latitude <- data_cube$coords$lat + longitude <- data_cube$coords$lon + archive <- get_archive(recipe) + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, + start = 1, stop = 2)) + month_label <- tolower(month.name[init_month]) + month_abbreviation <- month.abb[init_month] + + # Define color palette and number of breaks according to output format + ## TODO: Make separate function + if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { + diverging_palette <- "purpleorange" + sequential_palette <- "Oranges" + } else { + diverging_palette <- "bluered" + sequential_palette <- "Reds" + } + + # Group different metrics by type + skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", + "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", + "enscorr_specs", "rmsss") + scores <- c("rps", "frps", "crps", "frps_specs") + # Assign colorbar to each metric type + ## TODO: Triangle ends + for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { + if (name %in% names(skill_metrics)) { + # Define plot characteristics and metric name to display in plot + if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", + "rpss_specs", "bss90_specs", "bss10_specs", + "rmsss")) { + display_name <- toupper(strsplit(name, "_")[[1]][1]) + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name == "mean_bias_ss") { + display_name <- "Mean Bias Skill Score" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name %in% c("enscorr", "enscorr_specs")) { + display_name <- "Ensemble Mean Correlation" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.2) + cols <- clim.colors(length(brks) - 1, diverging_palette) + col_inf <- NULL + col_sup <- NULL + } else if (name %in% scores) { + skill <- skill_metrics[[name]] + display_name <- toupper(strsplit(name, "_")[[1]][1]) + brks <- seq(0, 1, by = 0.1) + colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) + cols <- colorbar[1:(length(colorbar) - 1)] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name == "enssprerr") { + skill <- skill_metrics[[name]] + display_name <- "Spread-to-Error Ratio" + brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) + colorbar <- clim.colors(length(brks), diverging_palette) + cols <- colorbar[1:length(colorbar) - 1] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name == "mean_bias") { + skill <- skill_metrics[[name]] + display_name <- "Mean Bias" + max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), + abs(quantile(skill, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] + } + options(bitmapType = "cairo") + # Reorder dimensions + skill <- Reorder(skill, c("time", "longitude", "latitude")) + # If the significance has been requested and the variable has it, + # retrieve it and reorder its dimensions. + significance_name <- paste0(name, "_significance") + if ((significance) && (significance_name %in% names(skill_metrics))) { + skill_significance <- skill_metrics[[significance_name]] + skill_significance <- Reorder(skill_significance, c("time", + "longitude", + "latitude")) + # Split skill significance into list of lists, along the time dimension + # This allows for plotting the significance dots correctly. + skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + dim = 'time', + level = "sublist", + names = "dots") + } else { + skill_significance <- NULL + } + # Define output file name and titles + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + outfile <- paste0(outdir, name, "-", month_label, ".png") + } else { + outfile <- paste0(outdir, name, ".png") + } + toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName, + "-", system_name, "-", month_abbreviation, + hcst_period) + months <- unique(lubridate::month(data_cube$attrs$Dates, + label = T, abb = F)) + titles <- as.vector(months) + # Plot + suppressWarnings( + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + asplit(skill, MARGIN=1), # Splitting array into a list + longitude, latitude, + special_args = skill_significance, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents=F, + brks = brks, + cols = cols, + col_inf = col_inf, + col_sup = col_sup, + fileout = outfile, + bar_label_digits = 3, + bar_extra_margin = rep(0.9, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) + ) + } + } + info(recipe$Run$logger, + "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index f7960bccd7abc8f898db04dbabb1595a085dfcdd..1ae481093f30e7b7fbdcc6f384390d311cdef0c1 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -3,394 +3,68 @@ ## TODO: Add param 'raw'? ## TODO: Decadal plot names +source("modules/Visualization/R/plot_ensemble_mean.R") +source("modules/Visualization/R/plot_most_likely_terciles_map.R") +source("modules/Visualization/R/plot_skill_metrics.R") + plot_data <- function(recipe, - data, - skill_metrics = NULL, - probabilities = NULL, - archive = NULL, - significance = F) { + data, + skill_metrics = NULL, + probabilities = NULL, + significance = F) { # Try to produce and save several basic plots. # recipe: the auto-s2s recipe as read by read_yaml() - # archive: the auto-s2s archive as read by read_yaml() # data: list containing the hcst, obs and (optional) fcst s2dv_cube objects # calibrated_data: list containing the calibrated hcst and (optional) fcst # s2dv_cube objects # skill_metrics: list of arrays containing the computed skill metrics # significance: Bool. Whether to include significance dots where applicable - outdir <- paste0(get_dir(recipe), "/plots/") + plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, ", | |,")[[1]] + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/plots/") + outdir <- paste0(get_dir(recipe)) dir.create(outdir, showWarnings = FALSE, recursive = TRUE) if ((is.null(skill_metrics)) && (is.null(data$fcst))) { error(recipe$Run$logger, "The Visualization module has been called, - but there is no fcst in 'data', and 'skill_metrics' is NULL - so there is no data that can be plotted.") + but there is no fcst in 'data', and 'skill_metrics' is NULL + so there is no data that can be plotted.") stop() } - if (is.null(archive)) { - if (tolower(recipe$Analysis$Horizon) == "seasonal") { - archive <- - read_yaml(paste0("conf/archive.yml"))[[recipe$Run$filesystem]] - } else if (tolower(recipe$Analysis$Horizon) == "decadal") { - archive <- - read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]] - } - } - # Plot skill metrics - if (!is.null(skill_metrics)) { - plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, - significance) - } - - # Plot forecast ensemble mean - if (!is.null(data$fcst)) { - plot_ensemble_mean(recipe, archive, data$fcst, outdir) - } - - # Plot Most Likely Terciles - if ((!is.null(probabilities)) && (!is.null(data$fcst))) { - plot_most_likely_terciles(recipe, archive, data$fcst, - probabilities, outdir) - } -} - -plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, - outdir, significance = F) { - # recipe: Auto-S2S recipe - # archive: Auto-S2S archive - # data_cube: s2dv_cube object with the corresponding hindcast data - # skill_metrics: list of named skill metrics arrays - # outdir: output directory - # significance: T/F, whether to display the significance dots in the plots - - ## TODO: OPTION for CERISE: Using PuOr - # Abort if frequency is daily - if (recipe$Analysis$Variables$freq == "daily_mean") { - error(recipe$Run$logger, "Visualization functions not yet implemented - for daily data.") - stop() - } - # Abort if skill_metrics is not list - if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { - stop("The element 'skill_metrics' must be a list of named arrays.") - } - - latitude <- data_cube$coords$lat - longitude <- data_cube$coords$lon - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", - recipe$Analysis$Time$hcst_end) - init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, - start = 1, stop = 2)) - month_label <- tolower(month.name[init_month]) - month_abbreviation <- month.abb[init_month] - - # Define color palette and number of breaks according to output format - ## TODO: Make separate function - if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { - diverging_palette <- "purpleorange" - sequential_palette <- "Oranges" - } else { - diverging_palette <- "bluered" - sequential_palette <- "Reds" - } - - # Group different metrics by type - skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", - "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", - "enscorr_specs", "rmsss") - scores <- c("rps", "frps", "crps", "frps_specs") - # Assign colorbar to each metric type - ## TODO: Triangle ends - for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { - if (name %in% names(skill_metrics)) { - # Define plot characteristics and metric name to display in plot - if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90_specs", "bss10_specs", - "rmsss")) { - display_name <- toupper(strsplit(name, "_")[[1]][1]) - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL - } else if (name == "mean_bias_ss") { - display_name <- "Mean Bias Skill Score" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL - } else if (name %in% c("enscorr", "enscorr_specs")) { - display_name <- "Ensemble Mean Correlation" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - cols <- clim.colors(length(brks) - 1, diverging_palette) - col_inf <- NULL - col_sup <- NULL - } else if (name %in% scores) { - skill <- skill_metrics[[name]] - display_name <- toupper(strsplit(name, "_")[[1]][1]) - brks <- seq(0, 1, by = 0.1) - colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) - cols <- colorbar[1:(length(colorbar) - 1)] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] - } else if (name == "enssprerr") { - skill <- skill_metrics[[name]] - display_name <- "Spread-to-Error Ratio" - brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) - colorbar <- clim.colors(length(brks), diverging_palette) - cols <- colorbar[1:length(colorbar) - 1] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] - } else if (name == "mean_bias") { - skill <- skill_metrics[[name]] - display_name <- "Mean Bias" - max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), - abs(quantile(skill, 0.98, na.rm = T))) - brks <- max_value * seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- colorbar[length(colorbar)] - } - options(bitmapType = "cairo") - # Reorder dimensions - skill <- Reorder(skill, c("time", "longitude", "latitude")) - # If the significance has been requested and the variable has it, - # retrieve it and reorder its dimensions. - significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(skill_metrics))) { - skill_significance <- skill_metrics[[significance_name]] - skill_significance <- Reorder(skill_significance, c("time", - "longitude", - "latitude")) - # Split skill significance into list of lists, along the time dimension - # This allows for plotting the significance dots correctly. - skill_significance <- ClimProjDiags::ArrayToList(skill_significance, - dim = 'time', - level = "sublist", - names = "dots") - } else { - skill_significance <- NULL - } - # Define output file name and titles - if (tolower(recipe$Analysis$Horizon) == "seasonal") { - outfile <- paste0(outdir, name, "-", month_label, ".png") - } else { - outfile <- paste0(outdir, name, ".png") - } - toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName, - "-", system_name, "-", month_abbreviation, - hcst_period) - months <- unique(lubridate::month(data_cube$attrs$Dates, - label = T, abb = F)) - titles <- as.vector(months) - # Plot - suppressWarnings( - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - asplit(skill, MARGIN=1), # Splitting array into a list - longitude, latitude, - special_args = skill_significance, - dot_symbol = 20, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - filled.continents=F, - brks = brks, - cols = cols, - col_inf = col_inf, - col_sup = col_sup, - fileout = outfile, - bar_label_digits = 3, - bar_extra_margin = rep(0.9, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) - ) + if ("skill_metrics" %in% plots) { + if (!is.null(skill_metrics)) { + plot_skill_metrics(recipe, data$hcst, skill_metrics, outdir, + significance) + } else { + error(recipe$Run$logger, + paste0("The skill metric plots have been requested, but the ", + "parameter 'skill_metrics' is NULL")) } } - info(recipe$Run$logger, - "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") -} - -plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { - - ## TODO: Add 'anomaly' to plot title - # Abort if frequency is daily - if (recipe$Analysis$Variables$freq == "daily_mean") { - stop("Visualization functions not yet implemented for daily data.") - } - latitude <- fcst$coords$lat - longitude <- fcst$coords$lon - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - variable <- recipe$Analysis$Variables$name - units <- attr(fcst$Variable, "variable")$units - start_date <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - # Compute ensemble mean - ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') - # Drop extra dims, add time dim if missing: - ensemble_mean <- drop(ensemble_mean) - - if (!("time" %in% names(dim(ensemble_mean)))) { - dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) - } - if (!'syear' %in% names(dim(ensemble_mean))) { - ensemble_mean <- Reorder(ensemble_mean, c("time", - "longitude", - "latitude")) - } else { - ensemble_mean <- Reorder(ensemble_mean, c("syear", - "time", - "longitude", - "latitude")) - } - ## TODO: Redefine column colors, possibly depending on variable - if (variable == 'prlr') { - palette = "BrBG" - rev = F - } else { - palette = "RdBu" - rev = T - } - # Define brks, centered on in the case of anomalies - ## - if (grepl("anomaly", - fcst$attrs$Variable$metadata[[variable]]$long_name)) { - variable <- paste(variable, "anomaly") - max_value <- max(abs(ensemble_mean)) - ugly_intervals <- seq(-max_value, max_value, max_value/20) - brks <- pretty(ugly_intervals, n = 12, min.n = 8) - } else { - brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) - } - cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) - options(bitmapType = "cairo") - - for (i_syear in start_date) { - # Define name of output file and titles - if (length(start_date) == 1) { - i_ensemble_mean <- ensemble_mean - outfile <- paste0(outdir, "forecast_ensemble_mean-", start_date, ".png") + # Plot forecast ensemble mean + if ("forecast_ensemble_mean" %in% plots) { + if (!is.null(data$fcst)) { + plot_ensemble_mean(recipe, data$fcst, outdir) } else { - i_ensemble_mean <- ensemble_mean[which(start_date == i_syear), , , ] - outfile <- paste0(outdir, "forecast_ensemble_mean-", i_syear, ".png") + error(recipe$Run$logger, + paste0("The forecast ensemble mean plot has been requested, but ", + "there is no fcst element in 'data'")) } - toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, - "- Initialization:", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) - titles <- as.vector(months) - # Plots - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - i_ensemble_mean, longitude, latitude, - filled.continents = F, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - units = units, - cols = cols, - brks = brks, - fileout = outfile, - bar_label_digits = 4, - bar_extra_margin = rep(0.7, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) - } - info(recipe$Run$logger, - "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") -} - -plot_most_likely_terciles <- function(recipe, archive, - fcst, - probabilities, - outdir) { - - ## TODO: Add 'anomaly' to plot title - # Abort if frequency is daily - if (recipe$Analysis$Variables$freq == "daily_mean") { - stop("Visualization functions not yet implemented for daily data.") } - latitude <- fcst$coords$lat - longitude <- fcst$coords$lon - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - variable <- recipe$Analysis$Variables$name - start_date <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - - # Retrieve and rearrange probability bins for the forecast - if (is.null(probabilities$probs_fcst$prob_b33) || - is.null(probabilities$probs_fcst$prob_33_to_66) || - is.null(probabilities$probs_fcst$prob_a66)) { - stop("The forecast tercile probability bins are not present inside ", - "'probabilities', the most likely tercile map cannot be plotted.") - } - - probs_fcst <- abind(probabilities$probs_fcst$prob_b33, - probabilities$probs_fcst$prob_33_to_66, - probabilities$probs_fcst$prob_a66, - along = 0) - names(dim(probs_fcst)) <- c("bin", - names(dim(probabilities$probs_fcst$prob_b33))) - - ## TODO: Improve this section - # Drop extra dims, add time dim if missing: - probs_fcst <- drop(probs_fcst) - if (!("time" %in% names(dim(probs_fcst)))) { - dim(probs_fcst) <- c("time" = 1, dim(probs_fcst)) - } - if (!'syear' %in% names(dim(probs_fcst))) { - probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) - } else { - probs_fcst <- Reorder(probs_fcst, - c("syear", "time", "bin", "longitude", "latitude")) - } - - for (i_syear in start_date) { - # Define name of output file and titles - if (length(start_date) == 1) { - i_probs_fcst <- probs_fcst - outfile <- paste0(outdir, "forecast_most_likely_tercile-", start_date, - ".png") + # Plot Most Likely Terciles + if ("most_likely_terciles" %in% plots) { + if ((!is.null(probabilities)) && (!is.null(data$fcst))) { + plot_most_likely_terciles(recipe, data$fcst, + probabilities, outdir) } else { - i_probs_fcst <- probs_fcst[which(start_date == i_syear), , , , ] - outfile <- paste0(outdir, "forecast_most_likely_tercile-", i_syear, ".png") + error(recipe$Run$logger, + paste0("For the most likely terciles plot, both the fcst and the ", + "probabilities must be provided.")) } - toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", - "Initialization:", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) - ## TODO: Ensure this works for daily and sub-daily cases - titles <- as.vector(months) - - # Plots - ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked - ## on. - suppressWarnings( - PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), - cat_dim = 'bin', - i_probs_fcst, longitude, latitude, - coast_width = 1.5, - title_scale = 0.6, - legend_scale = 0.8, #cex_bar_titles = 0.6, - toptitle = toptitle, - titles = titles, - fileout = outfile, - bar_label_digits = 2, - bar_scale = rep(0.7, 4), - bar_label_scale = 1.2, - axes_label_scale = 1.3, - triangle_ends = c(F, F), width = 11, height = 8) - ) } - - info(recipe$Run$logger, - "##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") } + diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index 4dd34b6187a35fe9157f6d27fb478a8178be8a25..4535d41bccab6628aeedc12c147df5a3e57fdad8 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -19,7 +19,7 @@ skill_metrics <- compute_skill_metrics(recipe, data) # Compute percentiles and probability bins probabilities <- compute_probabilities(recipe, data) # Export all data to netCDF -save_data(recipe, data, skill_metrics, probabilities) +# save_data(recipe, data, skill_metrics, probabilities) # Plot data plot_data(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/recipes/atomic_recipes/recipe_decadal.yml b/recipes/atomic_recipes/recipe_decadal.yml index 986578f7cc7a74e44604c83fb6080ada63637406..2dca96c06666efb1cb20a4f3b4af5d31f0863d21 100644 --- a/recipes/atomic_recipes/recipe_decadal.yml +++ b/recipes/atomic_recipes/recipe_decadal.yml @@ -32,14 +32,20 @@ Analysis: Anomalies: compute: no cross_validation: + save: Calibration: method: bias + save: 'all' Skill: metric: RPSS Corr + save: 'all' Probabilities: percentiles: [[1/3, 2/3]] + save: 'all' Indicators: index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles ncores: # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E diff --git a/recipes/atomic_recipes/recipe_system5c3s-tas.yml b/recipes/atomic_recipes/recipe_system5c3s-tas.yml index 31ae079d2dfe76c600ecef3083db5943e5f2ae20..c4606d59c61edeae34fa4cdd677f6bff00e05e9e 100644 --- a/recipes/atomic_recipes/recipe_system5c3s-tas.yml +++ b/recipes/atomic_recipes/recipe_system5c3s-tas.yml @@ -31,12 +31,19 @@ Analysis: Anomalies: compute: no cross_validation: + save: Calibration: method: raw + save: fcst_only Skill: metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + save: all Probabilities: percentiles: [[1/3, 2/3]] + save: all + Visualization: + plots: skill_metrics forecast_ensemble_mean + Indicators: index: no Output_format: S2S4E diff --git a/recipes/atomic_recipes/recipe_system7c3s-prlr.yml b/recipes/atomic_recipes/recipe_system7c3s-prlr.yml index 58030bf3b0697a177d901bbb3b2cbbca0411c779..fa7bee7f97d64570aeed769f1c4776027b230ab4 100644 --- a/recipes/atomic_recipes/recipe_system7c3s-prlr.yml +++ b/recipes/atomic_recipes/recipe_system7c3s-prlr.yml @@ -31,15 +31,21 @@ Analysis: Anomalies: compute: no cross_validation: + save: Calibration: method: mse_min + save: 'all' Skill: metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + save: 'all' + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles Indicators: index: no - ncores: 1 + ncores: 12 remove_NAs: no Output_format: S2S4E Run: diff --git a/recipes/atomic_recipes/recipe_system7c3s-tas.yml b/recipes/atomic_recipes/recipe_system7c3s-tas.yml index a2a87c23dd2ce092b77f42c2f9984e5c3463d47d..e5cd2abaf0bb84decf9c5430743055a95f57c03b 100644 --- a/recipes/atomic_recipes/recipe_system7c3s-tas.yml +++ b/recipes/atomic_recipes/recipe_system7c3s-tas.yml @@ -31,12 +31,18 @@ Analysis: Anomalies: compute: yes # yes/no, default yes cross_validation: yes # yes/no, default yes + save: 'all' # 'all'/'none'/'exp_only'/'fcst_only' Calibration: method: mse_min + save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' Skill: metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + save: 'all' # 'all'/'none' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles Indicators: index: no ncores: 10 diff --git a/recipes/recipe_decadal_split.yml b/recipes/recipe_decadal_split.yml index 708037f75dea5828e3574afb29e7482815ab0d83..a94ffd0307e5fe9db4be48d5a53f04c9d81f61da 100644 --- a/recipes/recipe_decadal_split.yml +++ b/recipes/recipe_decadal_split.yml @@ -28,13 +28,19 @@ Analysis: Workflow: Anomalies: compute: no - cross_validation: + cross_validation: + save: Calibration: method: 'bias' + save: 'all' Skill: metric: EnsCorr RPSS + save: 'all' Probabilities: percentiles: [[1/3, 2/3]] + save: 'all' + Visualization: + plots: skill_metrics Indicators: index: FALSE ncores: 8 # Optional, int: number of cores, defaults to 1 diff --git a/recipes/recipe_splitting_example.yml b/recipes/recipe_splitting_example.yml index 94a944682b26e7533188a917be0fc32c2479ff0c..93e5994e9885598fc7c8d4af495b476e7b301565 100644 --- a/recipes/recipe_splitting_example.yml +++ b/recipes/recipe_splitting_example.yml @@ -38,12 +38,19 @@ Analysis: method: bilinear ## TODO: allow multiple methods? type: to_system Workflow: + Anomalies: + compute: no Calibration: method: mse_min ## TODO: list, split? + save: 'none' Skill: metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + save: 'all' + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean Indicators: index: no # ? ncores: 7 diff --git a/recipes/tests/recipe_autosubmit_marstest.yml b/recipes/tests/recipe_autosubmit_marstest.yml index dfd2159f9f734fb1a0aa2f63aa49d736495f596d..5bf62fb9d1012fae7e9aa85823db2d157b51f0d8 100644 --- a/recipes/tests/recipe_autosubmit_marstest.yml +++ b/recipes/tests/recipe_autosubmit_marstest.yml @@ -40,12 +40,18 @@ Analysis: Anomalies: compute: yes cross_validation: yes + save: 'all' Calibration: method: raw ## TODO: list, split? + save: 'none' Skill: metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + save: 'all' + Visualization: + plots: skill_metrics Indicators: index: no # ? ncores: 8 diff --git a/recipes/tests/recipe_multiregion.yml b/recipes/tests/recipe_multiregion.yml index bcb4d126b96410d183c338e8d700c400073cd762..69d8621d9d6b6aa0ae6ea7394b689e7a0e69144e 100644 --- a/recipes/tests/recipe_multiregion.yml +++ b/recipes/tests/recipe_multiregion.yml @@ -40,12 +40,18 @@ Analysis: Anomalies: compute: yes cross_validation: yes + save: none Calibration: method: raw + save: none Skill: metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS + save: all Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + save: all + Visualization: + plots: skill_metrics Indicators: index: no ncores: 8 diff --git a/recipes/tests/recipe_seasonal_example.yml b/recipes/tests/recipe_seasonal_example.yml index cb941f8422bbccddd9ca32e4f1f7cff07a95467e..f4f3a8f51656e9e694ed453bf98ec5441300891c 100644 --- a/recipes/tests/recipe_seasonal_example.yml +++ b/recipes/tests/recipe_seasonal_example.yml @@ -40,12 +40,18 @@ Analysis: Anomalies: compute: yes cross_validation: yes + save: 'all' Calibration: method: raw + save: 'none' Skill: metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + save: 'all' + Visualization: + plots: skill_metrics Indicators: index: no ncores: 8 diff --git a/recipes/tests/recipe_seasonal_two-variables.yml b/recipes/tests/recipe_seasonal_two-variables.yml index 5dbd892f2bc783276e02e18de3f0e518f5f40d5d..1cb5c3b236a0e99c027c3afab6ed23ed6fe51151 100644 --- a/recipes/tests/recipe_seasonal_two-variables.yml +++ b/recipes/tests/recipe_seasonal_two-variables.yml @@ -38,12 +38,18 @@ Analysis: Anomalies: compute: yes cross_validation: yes + save: 'all' Calibration: method: raw ## TODO: list, split? + save: 'none' Skill: metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + save: 'all' + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles Indicators: index: no # ? ncores: 7 diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml index 7a2a575baefd1881ecf2616a654b944e32f11b9c..88b87622cc066e665d69e5d42bc06185ed2fbf19 100644 --- a/tests/recipes/recipe-decadal_daily_1.yml +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -31,13 +31,17 @@ Analysis: Workflow: Anomalies: compute: no - cross_validation: + cross_validation: + save: 'none' Calibration: method: qmap + save: 'none' Skill: metric: RPSS + save: 'none' Probabilities: percentiles: [[1/10, 9/10]] + save: 'none' Indicators: index: FALSE ncores: # Optional, int: number of cores, defaults to 1 diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml index 35b55b1a1985142d0bb6833ccd925da5142cfd3e..a2849c272a0e0c0da75e2af5ecb83347af9a0ce8 100644 --- a/tests/recipes/recipe-decadal_monthly_1.yml +++ b/tests/recipes/recipe-decadal_monthly_1.yml @@ -31,15 +31,21 @@ Analysis: Workflow: Anomalies: compute: no - cross-validation: + cross-validation: + save: Calibration: method: bias + save: 'all' Skill: metric: RPSS + save: 'all' Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' Indicators: index: FALSE + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean ncores: # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-decadal_monthly_1b.yml b/tests/recipes/recipe-decadal_monthly_1b.yml index 5551d9c7c9611a79b73646bc846bbda629e9cbd8..5a1ce4fde98f0b79633ec5de810efeb66a0817ff 100644 --- a/tests/recipes/recipe-decadal_monthly_1b.yml +++ b/tests/recipes/recipe-decadal_monthly_1b.yml @@ -31,13 +31,17 @@ Analysis: Workflow: Anomalies: compute: no - cross_validation: + cross_validation: + save: 'none' Calibration: method: bias + save: 'none' Skill: metric: RPSS + save: 'none' Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'none' Indicators: index: FALSE ncores: # Optional, int: number of cores, defaults to 1 diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml index 45eb01dd44e20486eec242362f39df05f459603f..b0892a0c899da38bb605ec9127e2d78c22861d41 100644 --- a/tests/recipes/recipe-decadal_monthly_2.yml +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -32,14 +32,20 @@ Analysis: Anomalies: compute: no cross_validation: + save: 'all' Calibration: method: raw + save: 'all' Skill: metric: RPSS_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + save: 'all' Probabilities: percentiles: [[1/3, 2/3]] + save: 'all' Indicators: index: FALSE + Visualization: + plots: most_likely_terciles skill_metrics forecast_ensemble_mean ncores: # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-decadal_monthly_3.yml b/tests/recipes/recipe-decadal_monthly_3.yml index 94bdfebc7fb5d1b56552f0ce983fcd091466f95f..fc42cd2d85261399547a90d124d32439344d4b8d 100644 --- a/tests/recipes/recipe-decadal_monthly_3.yml +++ b/tests/recipes/recipe-decadal_monthly_3.yml @@ -31,13 +31,17 @@ Analysis: Workflow: Anomalies: compute: no - cross_validation: + cross_validation: + save: 'none' Calibration: method: 'evmos' + save: 'none' Skill: metric: BSS10 Corr + save: 'none' Probabilities: percentiles: [[1/3, 2/3]] + save: 'none' Indicators: index: FALSE ncores: # Optional, int: number of cores, defaults to 1 diff --git a/tests/recipes/recipe-seasonal_daily_1.yml b/tests/recipes/recipe-seasonal_daily_1.yml index afa0f4966bf6b242e40f1179f79882fbc60c1022..f70f0c0369d0ff16dc45b00a29a2fbbd63eaaee2 100644 --- a/tests/recipes/recipe-seasonal_daily_1.yml +++ b/tests/recipes/recipe-seasonal_daily_1.yml @@ -31,10 +31,13 @@ Analysis: Anomalies: compute: no cross_validation: + save: 'none' Calibration: method: qmap + save: 'none' Skill: metric: EnsCorr_specs + save: 'none' Indicators: index: no Output_format: S2S4E diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml index 68c58f83603baf157662d5c44f885a814cb914ec..21321fab7a0de8d347ce0b38d188979bba3897ac 100644 --- a/tests/recipes/recipe-seasonal_monthly_1.yml +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -28,17 +28,23 @@ Analysis: method: bilinear type: to_system Workflow: - Anomalies: - compute: no - cross_validation: + # Anomalies: + # compute: no + # cross_validation: + # save: 'none' Calibration: method: mse_min + save: 'all' Skill: metric: RPSS CRPSS EnsCorr Corr Enscorr_specs + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' Indicators: index: no + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean Output_format: S2S4E Run: Loglevel: INFO diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R index 400b864d1576aa1b51d6b169cbd8a1a8d7f46f8d..c26b89785a8c31afcac87b98441a3b0d758e9421 100644 --- a/tests/testthat/test-decadal_daily_1.R +++ b/tests/testthat/test-decadal_daily_1.R @@ -219,4 +219,4 @@ as.POSIXct("1992-03-30 12:00:00", tz = 'UTC') # #}) - +unlink(recipe$Run$output_dir, recursive = TRUE) diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 4fc92b3d3cdb8bf8b7ba31c4c96b63b00b5945d1..9b46cce8e0d80e0727b137a32add29196229d54f 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -30,21 +30,13 @@ suppressWarnings({invisible(capture.output( probs <- compute_probabilities(recipe, calibrated_data) ))}) -# Saving -suppressWarnings({invisible(capture.output( -save_data(recipe = recipe, data = calibrated_data, - skill_metrics = skill_metrics, probabilities = probs) -))}) - # Plotting suppressWarnings({invisible(capture.output( -plot_data(recipe = recipe, archive = archive, data = calibrated_data, - skill_metrics = skill_metrics, probabilities = probs, significance = T) +plot_data(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + significance = T) ))}) - -outdir <- get_dir(recipe) - #====================================== test_that("1. Loading", { @@ -256,10 +248,10 @@ tolerance = 0.0001 #====================================== test_that("4. Saving", { - +outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( -all(list.files(outdir) %in% -c("plots", "tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-percentiles_month11.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc", "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc")), @@ -270,30 +262,30 @@ TRUE #) expect_equal( -length(list.files(outdir)), -17 +length(list.files(outputs, recursive = T)), +16 ) }) test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") expect_equal( -all(list.files(paste0(outdir, "/plots/")) %in% +all(basename(list.files(plots, recursive = T)) %in% c("forecast_ensemble_mean-2021.png", "forecast_most_likely_tercile-2021.png", "rpss.png")), TRUE ) expect_equal( -length(list.files(paste0(outdir, "/plots/"))), +length(list.files(plots, recursive = T)), 3 ) }) # Delete files -unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) - +unlink(recipe$Run$output_dir, recursive = TRUE) #============================================================== @@ -345,4 +337,5 @@ lapply(probs_b$probs_fcst, ClimProjDiags::Subset, 'syear', 2), probs$probs_fcst ) +unlink(recipe$Run$output_dir, recursive = TRUE) }) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 40a56abf37a826fe268d14560390aa9f6e1f0c2b..a6ca9254464918bd99295501dcb9599117a7b2c6 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -29,11 +29,6 @@ suppressWarnings({invisible(capture.output( probs <- compute_probabilities(recipe, calibrated_data) ))}) -# Saving -suppressWarnings({invisible(capture.output( -save_data(recipe, calibrated_data, skill_metrics, probs) -))}) - # Plotting suppressWarnings({invisible(capture.output( plot_data(recipe = recipe, data = calibrated_data, @@ -250,18 +245,18 @@ tolerance = 0.0001 #====================================== test_that("4. Saving", { - +outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( -all(list.files(outdir) %in% -c("plots", "tas_19901101.nc", "tas_19911101.nc", "tas_19921101.nc", "tas_20201101.nc", "tas_20211101.nc", +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19901101.nc", "tas_19911101.nc", "tas_19921101.nc", "tas_20201101.nc", "tas_20211101.nc", "tas-obs_19901101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-percentiles_month11.nc", "tas-probs_19901101.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc", "tas-probs_20201101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc")), TRUE ) expect_equal( -length(list.files(outdir)), -16 +length(list.files(outputs, recursive = T)), +15 ) }) @@ -269,19 +264,20 @@ length(list.files(outdir)), #====================================== test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") expect_equal( -all(list.files(paste0(outdir, "/plots/")) %in% +all(basename(list.files(plots, recursive = T)) %in% c("bss10_specs.png", "enscorr_specs.png", "forecast_ensemble_mean-2020.png", "forecast_ensemble_mean-2021.png", "forecast_most_likely_tercile-2020.png", "forecast_most_likely_tercile-2021.png", "frps_specs.png", "frps.png", "rpss_specs.png") ), TRUE ) expect_equal( -length(list.files(paste0(outdir, "/plots/"))), +length(list.files(plots, recursive = T)), 9 ) }) # Delete files -unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) +unlink(recipe$Run$output_dir, recursive = TRUE) diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 85c15c88925d6db7b1d204f25cd007062337a7ea..988172c6ad6ed3a21571816874fc9aa7c023568a 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -196,4 +196,4 @@ tolerance = 0.0001 }) - +unlink(recipe$Run$output_dir, recursive = TRUE) diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index 10cba33e9df2ef6cd18a4659127b4098581a0916..da0e789bd0e7acd50ca0556b5e9387c9e04d58aa 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -164,4 +164,4 @@ tolerance=0.0001 ) }) -unlink(recipe$Run$output_dir) +unlink(recipe$Run$output_dir, recursive = TRUE) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 6a166503762df3d63835d4d4f2ab208db89d7988..83b5ceab15c0ccc2c4dafa7ba593aa6a66be6e97 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -212,10 +212,10 @@ rep(FALSE, 3) }) test_that("4. Saving", { - +outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( -all(list.files(outdir) %in% -c("plots", "tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", @@ -224,26 +224,27 @@ c("plots", "tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", TRUE ) expect_equal( -length(list.files(outdir)), -18 +length(list.files(outputs, recursive = T)), +17 ) }) test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") expect_equal( -all(list.files(paste0(outdir, "/plots/")) %in% +all(basename(list.files(plots, recursive = T)) %in% c("crpss-november.png", "enscorr_specs-november.png", "enscorr-november.png", "forecast_ensemble_mean-20201101.png", "forecast_most_likely_tercile-20201101.png", "rpss-november.png")), TRUE ) expect_equal( -length(list.files(paste0(outdir, "/plots/"))), +length(list.files(plots, recursive = T)), 6 ) }) # Delete files -unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) +unlink(recipe$Run$output_dir, recursive = T) diff --git a/tools/check_number_of_dependent_verifications.R b/tools/check_number_of_dependent_verifications.R new file mode 100644 index 0000000000000000000000000000000000000000..0c85d09f63c4d3c473c98f11a3e4ab26a6b82f56 --- /dev/null +++ b/tools/check_number_of_dependent_verifications.R @@ -0,0 +1,134 @@ +check_number_of_dependent_verifications <- function(recipe) { + # Number of verifications depends on the variables and indicators requested + # and the order of the workflow: + # workflow: correction + indicator --> only 1 variable is calibrated + # workflow: indicator + correction --> the indicator and the ecv are calibrated + independent_verifications <- NULL + dependent_verifications <- NULL + dep <- 1 + # check workflow order: + if (all(c('Calibration', 'Indicators') %in% names(recipe$Analysis$Workflow))) { + cal_pos <- which(names(recipe$Analysis$Workflow) == 'Calibration') + ind_pos <- which(names(recipe$Analysis$Workflow) == 'Indicators') + if (cal_pos < ind_pos) { + workflow_independent <- FALSE + } else { + workflow_independent <- TRUE + } + } + if (workflow_independent) { + independent_verifications <- append(recipe$Analysis$Variables$ECVs, + recipe$Analysis$Variables$Indicators) + } else { + if (is.null(recipe$Analysis$Variables$Indicators) || + (length(recipe$Analysis$Variables$Indicators) == 1 && + is.null(recipe$Analysis$Variables$ECVs))) { + independent_verifications <- append(recipe$Analysis$Variables$ECVs, + recipe$Analysis$Variables$Indicators) + } else { + ecvs <- recipe$Analysi$Variables$ECVs + inds <- recipe$Analysi$Variables$Indicators + ind_table <- read_yaml(paste0(recipe$Run$code_dir, + "conf/indicators_table.yml")) + # first, loop on ecvs if any and compare to indicators + done <- NULL # to gather the indicators reviewed + if (!is.null(ecvs)) { + for (i in 1:length(ecvs)) { + dependent <- list(ecvs[[i]]) + for (j in 1:length(inds)) { + if (ind_table[inds[[j]]$name][[1]]$ECVs == ecvs[[i]]$name) { + if (ind_table[inds[[j]]$name][[1]]$freq == ecvs[[i]]$freq) { + # they are dependent + dependent <- append(dependent, inds[[j]]) + done <- append(done, inds[[j]]) + } + } + } + if (length(dependent) == 1) { + dependent <- NULL + independent_verifications <- append(independent_verifications, + list(ecvs[[i]])) + } else { + dependent_verifications <- append(dependent_verifications, + list(dependent)) + } + } + # There are indicators not reviewed yet? + if (length(done) < length(inds)) { + if (length(inds) == 1) { + independent_verifications <- append(independent_verifications, + inds) + } else { + done <- NULL + for (i in 1:(length(inds) - 1)) { + dependent <- list(inds[[i]]$name) + if (is.na(match(unlist(dependent), unlist(done)))) { + for (j in (i+1):length(inds)) { + if (ind_table[inds[[i]]$name][[1]]$ECVs == + ind_table[inds[[j]]$name][[1]]$ECVs) { + if (ind_table[inds[[i]]$name][[1]]$freq == + ind_table[inds[[j]]$name][[1]]$freq) { + dependent <- append(dependent, inds[[j]]$name) + done <- dependent + } + } + } + } + if (length(dependent) == 1) { + independent_verifications <- dependent + dependent <- NULL + } else { + dependent_verifications <- dependent + } + } + } + } + } else { # there are only Indicators: + done <- NULL + for (i in 1:(length(inds) - 1)) { + dependent <- list(inds[[i]]$name) + if (is.na(match(unlist(dependent), unlist(done)))) { + for (j in (i+1):length(inds)) { + if (ind_table[inds[[i]]$name][[1]]$ECVs == + ind_table[inds[[j]]$name][[1]]$ECVs) { + if (ind_table[inds[[i]]$name][[1]]$freq == + ind_table[inds[[j]]$name][[1]]$freq) { + dependent <- append(dependent, inds[[j]]$name) + done <- dependent + } + } + } + } + if (length(dependent) == 1) { + independent_verifications <- dependent + dependent <- NULL + } else { + dependent_verifications <- dependent + } + } + } + } + } + if (!is.null(independent_verifications)) { + info(logger, paste("The variables for independent verification are ", + paste(independent_verifications, collapse = " "))) + } + if (!is.null(dependent_verifications)) { + info(logger, paste("The variables for dependent verification are: ", + paste(dependent_verifications, collapse = " "))) + } + # remove unnecessary names in objects to be removed + return(list(independent = independent_verifications, + dependent = dependent_verifications)) +} +#workflow <- list(Calibration = list(method = 'SBC'), +# Skill = list(metric = 'RPSS')) +#ApplyWorkflow <- function(workflow) { + +#res <- do.call('CST_BiasCorrection', +# args = list(exp = lonlat_data$exp, +# obs = lonlat_data$obs)) + + + + diff --git a/tools/check_recipe.R b/tools/check_recipe.R index eaec8f9a393868a47247ba5aea6ab5f7c308be9c..a9170707d0ce9b0ebc5ab7410737ce6b33af893e 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -10,10 +10,10 @@ check_recipe <- function(recipe) { # --------------------------------------------------------------------- TIME_SETTINGS_SEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", - "hcst_end") + "hcst_end") TIME_SETTINGS_DECADAL <- c("ftime_min", "ftime_max", "hcst_start", "hcst_end") PARAMS <- c("Horizon", "Time", "Variables", "Region", "Regrid", "Workflow", - "Datasets") + "Datasets") HORIZONS <- c("subseasonal", "seasonal", "decadal") ARCHIVE_SEASONAL <- "conf/archive.yml" ARCHIVE_DECADAL <- "conf/archive_decadal.yml" @@ -24,21 +24,21 @@ check_recipe <- function(recipe) { # Check basic elements in recipe:Analysis: if (!("Analysis" %in% names(recipe))) { error(recipe$Run$logger, - "The recipe must contain an element called 'Analysis'.") + "The recipe must contain an element called 'Analysis'.") error_status <- T } if (!all(PARAMS %in% names(recipe$Analysis))) { error(recipe$Run$logger, paste0("The element 'Analysis' in the recipe must contain all of ", - "the following: ", paste(PARAMS, collapse = ", "), ".")) + "the following: ", paste(PARAMS, collapse = ", "), ".")) error_status <- T } if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { error(recipe$Run$logger, paste0("The element 'Horizon' in the recipe must be one of the ", - "following: ", paste(HORIZONS, collapse = ", "), ".")) + "following: ", paste(HORIZONS, collapse = ", "), ".")) error_status <- T } # Check time settings @@ -47,18 +47,18 @@ check_recipe <- function(recipe) { archive <- read_yaml(ARCHIVE_SEASONAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_SEASONAL %in% names(recipe$Analysis$Time))) { error(recipe$Run$logger, - paste0("The element 'Time' in the recipe must contain all of the ", - "following: ", paste(TIME_SETTINGS_SEASONAL, - collapse = ", "), ".")) + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_SEASONAL, + collapse = ", "), ".")) error_status <- T } } else if (tolower(recipe$Analysis$Horizon) == "decadal") { archive <- read_yaml(ARCHIVE_DECADAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_DECADAL %in% names(recipe$Analysis$Time))) { error(recipe$Run$logger, - paste0("The element 'Time' in the recipe must contain all of the ", - "following: ", paste(TIME_SETTINGS_DECADAL, - collapse = ", "), ".")) + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_DECADAL, + collapse = ", "), ".")) error_status <- T } } else { @@ -68,14 +68,14 @@ check_recipe <- function(recipe) { if (!is.null(archive)) { if (!all(recipe$Analysis$Datasets$System$name %in% names(archive$System))) { error(recipe$Run$logger, - "The specified System name was not found in the archive.") + "The specified System name was not found in the archive.") error_status <- T } # Check reference names if (!all(recipe$Analysis$Datasets$Reference$name %in% names(archive$Reference))) { error(recipe$Run$logger, - "The specified Reference name was not found in the archive.") + "The specified Reference name was not found in the archive.") error_status <- T } } @@ -83,36 +83,36 @@ check_recipe <- function(recipe) { if ((!(recipe$Analysis$Time$ftime_min > 0)) || (!is.integer(recipe$Analysis$Time$ftime_min))) { error(recipe$Run$logger, - "The element 'ftime_min' must be an integer larger than 0.") + "The element 'ftime_min' must be an integer larger than 0.") error_status <- T } if ((!(recipe$Analysis$Time$ftime_max > 0)) || (!is.integer(recipe$Analysis$Time$ftime_max))) { error(recipe$Run$logger, - "The element 'ftime_max' must be an integer larger than 0.") + "The element 'ftime_max' must be an integer larger than 0.") error_status <- T } if (recipe$Analysis$Time$ftime_max < recipe$Analysis$Time$ftime_min) { error(recipe$Run$logger, - "'ftime_max' cannot be smaller than 'ftime_min'.") + "'ftime_max' cannot be smaller than 'ftime_min'.") error_status <- T } # Check consistency of hindcast years if (!(as.numeric(recipe$Analysis$Time$hcst_start) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_start > 0))) { error(recipe$Run$logger, - "The element 'hcst_start' must be a valid year.") + "The element 'hcst_start' must be a valid year.") error_status <- T } if (!(as.numeric(recipe$Analysis$Time$hcst_end) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_end > 0))) { error(recipe$Run$logger, - "The element 'hcst_end' must be a valid year.") + "The element 'hcst_end' must be a valid year.") error_status <- T } if (recipe$Analysis$Time$hcst_end < recipe$Analysis$Time$hcst_start) { error(recipe$Run$logger, - "'hcst_end' cannot be smaller than 'hcst_start'.") + "'hcst_end' cannot be smaller than 'hcst_start'.") error_status <- T } ## TODO: Is this needed? @@ -141,7 +141,7 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Time$fcst_year)) { warn(recipe$Run$logger, paste("The element 'fcst_year' is not defined in the recipe.", - "No forecast year will be used.")) + "No forecast year will be used.")) } ## TODO: Adapt and move this inside 'if'? # fcst.sdate <- NULL @@ -165,7 +165,7 @@ check_recipe <- function(recipe) { # calculate number of workflows to create for each variable and if (length(recipe$Analysis$Horizon) > 1) { error(recipe$Run$logger, - "Only one single Horizon can be specified in the recipe") + "Only one single Horizon can be specified in the recipe") error_status <- T } @@ -197,7 +197,7 @@ check_recipe <- function(recipe) { if (!all(LIMITS %in% names(region))) { error(recipe$Run$logger, paste0("There must be 4 elements in 'Region': ", - paste(LIMITS, collapse = ", "), ".")) + paste(LIMITS, collapse = ", "), ".")) error_status <- T } } @@ -206,15 +206,15 @@ check_recipe <- function(recipe) { if (!("name" %in% names(region)) || (is.null(region$name))) { error(recipe$Run$logger, paste("If more than one region has been defined, every region", - "must have a unique name.")) + "must have a unique name.")) } } } # Atomic recipe } else if (!all(LIMITS %in% names(recipe$Analysis$Region))) { error(recipe$Run$logger, - paste0("There must be 4 elements in 'Region': ", - paste(LIMITS, collapse = ", "), ".")) + paste0("There must be 4 elements in 'Region': ", + paste(LIMITS, collapse = ", "), ".")) error_status <- T } ## TODO: Implement multiple regions @@ -253,47 +253,110 @@ check_recipe <- function(recipe) { "The 'Calibration' element 'method' must be specified.") error_status <- T } + SAVING_OPTIONS_CALIB <- c("all", "none", "exp_only", "fcst_only") + if ((is.null(recipe$Analysis$Workflow$Calibration$save)) || + (!(recipe$Analysis$Workflow$Calibration$save %in% SAVING_OPTIONS_CALIB))) { + error(recipe$Run$logger, + paste0("Please specify which Calibration module outputs you want ", + "to save with the 'save' parameter. The options are: ", + paste(SAVING_OPTIONS_CALIB, collapse = ", "), ".")) + error_status <- T + } } # Anomalies if ("Anomalies" %in% names(recipe$Analysis$Workflow)) { + # Computation and cross-validation checks if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { error(recipe$Run$logger, - "Parameter 'compute' must be defined under 'Anomalies'.") + "Parameter 'compute' must be defined under 'Anomalies'.") error_status <- T } else if (!(is.logical(recipe$Analysis$Workflow$Anomalies$compute))) { error(recipe$Run$logger, - paste("Parameter 'Anomalies:compute' must be a logical value", - "(True/False or yes/no).")) - error_status <- T - } else if ((recipe$Analysis$Workflow$Anomalies$compute) && - (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation))) { - error(recipe$Run$logger, - paste("If anomaly computation is requested, parameter", - "'cross_validation' must be defined under 'Anomalies', - and it must be a logical value (True/False or yes/no).")) + paste("Parameter 'Anomalies:compute' must be a logical value", + "(True/False or yes/no).")) error_status <- T + } else if ((recipe$Analysis$Workflow$Anomalies$compute)) { + # Cross-validation check + if (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation)) { + error(recipe$Run$logger, + paste("If anomaly computation is requested, parameter", + "'cross_validation' must be defined under 'Anomalies', + and it must be a logical value (True/False or yes/no).")) + error_status <- T + } + # Saving checks + SAVING_OPTIONS_ANOM <- c("all", "none", "exp_only", "fcst_only") + if ((is.null(recipe$Analysis$Workflow$Anomalies$save)) || + (!(recipe$Analysis$Workflow$Anomalies$save %in% SAVING_OPTIONS_ANOM))) { + error(recipe$Run$logger, + paste0("Please specify which Anomalies module outputs you want ", + "to save with the 'save' parameter. The options are: ", + paste(SAVING_OPTIONS_ANOM, collapse = ", "), ".")) + error_status <- T + } } } # Skill if (("Skill" %in% names(recipe$Analysis$Workflow)) && (is.null(recipe$Analysis$Workflow$Skill$metric))) { error(recipe$Run$logger, - "Parameter 'metric' must be defined under 'Skill'.") + "Parameter 'metric' must be defined under 'Skill'.") error_status <- T + # Saving checks + SAVING_OPTIONS_SKILL <- c("all", "none") + if ((is.null(recipe$Analysis$Workflow$Skill$save)) || + (!(recipe$Analysis$Workflow$Skill$save %in% SAVING_OPTIONS_SKILL))) { + error(recipe$Run$logger, + paste0("Please specify whether you want to save the Skill metrics ", + "with the 'save' parameter. The options are: ", + paste(SAVING_OPTIONS_SKILL, collapse = ", "), ".")) + error_status <- T + } } # Probabilities if ("Probabilities" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, - "Parameter 'percentiles' must be defined under 'Probabilities'.") + "Parameter 'percentiles' must be defined under 'Probabilities'.") error_status <- T } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, - paste("Parameter 'Probabilities:percentiles' expects a list.", - "See documentation in the wiki for examples.")) + paste("Parameter 'Probabilities:percentiles' expects a list.", + "See documentation in the wiki for examples.")) + error_status <- T + } + # Saving checks + SAVING_OPTIONS_PROBS <- c("all", "none", "bins_only", "percentiles_only") + if ((is.null(recipe$Analysis$Workflow$Probabilities$save)) || + (!(recipe$Analysis$Workflow$Probabilities$save %in% SAVING_OPTIONS_PROBS))) { + error(recipe$Run$logger, + paste0("Please specify whether you want to save the percentiles ", + "and probability bins with the 'save' parameter. The ", + "options are: ", + paste(SAVING_OPTIONS_PROBS, collapse = ", "), ".")) error_status <- T } } + # Visualization + if ("Visualization" %in% names(recipe$Analysis$Workflow)) { + PLOT_OPTIONS <- c("skill_metrics", "forecast_ensemble_mean", + "most_likely_terciles") + ## Separate plots parameter and check if all elements are in PLOT_OPTIONS + if (is.null(recipe$Analysis$Workflow$Visualization$plots)) { + error(recipe$Run$logger, + "The 'plots' element must be defined under 'Visualization'.") + error_status <- T + } else { + plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, + ", | |,")[[1]] + if (!all(plots %in% PLOT_OPTIONS)) { + error(recipe$Run$logger, + paste0("The options available for the plots are: ", + paste(PLOT_OPTIONS, collapse = ", "), ".")) + error_status <- T + } + } + } # --------------------------------------------------------------------- # RUN CHECKS @@ -307,7 +370,7 @@ check_recipe <- function(recipe) { } if (!all(RUN_FIELDS %in% names(recipe$Run))) { error(recipe$Run$logger, paste("Recipe element 'Run' must contain", - "all of the following fields:", + "all of the following fields:", paste(RUN_FIELDS, collapse=", "), ".")) error_status <- T } @@ -347,8 +410,8 @@ check_recipe <- function(recipe) { # --------------------------------------------------------------------- AUTO_PARAMS <- c("script", "expid", "hpc_user", "wallclock", - "processors_per_job", "platform", "email_notifications", - "email_address", "notify_completed", "notify_failed") + "processors_per_job", "platform", "email_notifications", + "email_address", "notify_completed", "notify_failed") # Autosubmit false by default if (is.null(recipe$Run$autosubmit)) { recipe$Run$autosubmit <- F @@ -360,53 +423,53 @@ check_recipe <- function(recipe) { # Check that the autosubmit configuration parameters are present if (!("auto_conf" %in% names(recipe$Run))) { error(recipe$Run$logger, - "The 'auto_conf' is missing from the 'Run' section of the recipe.") + "The 'auto_conf' is missing from the 'Run' section of the recipe.") error_status <- T } else if (!all(AUTO_PARAMS %in% names(recipe$Run$auto_conf))) { error(recipe$Run$logger, - paste0("The element 'Run:auto_conf' must contain all of the ", - "following: ", paste(AUTO_PARAMS, collapse = ", "), ".")) + paste0("The element 'Run:auto_conf' must contain all of the ", + "following: ", paste(AUTO_PARAMS, collapse = ", "), ".")) error_status <- T } # Check that the script is not NULL and exists if (is.null(recipe$Run$auto_conf$script)) { error(recipe$Run$logger, - "A script must be provided to run the recipe with autosubmit.") + "A script must be provided to run the recipe with autosubmit.") error_status <- T } else if (!file.exists(recipe$Run$auto_conf$script)) { error(recipe$Run$logger, - "Could not find the file for the script in 'auto_conf'.") + "Could not find the file for the script in 'auto_conf'.") error_status <- T } # Check that the experiment ID exists if (is.null(recipe$Run$auto_conf$expid)) { error(recipe$Run$logger, - paste("The Autosubmit EXPID is missing. You can create one by", - "running the following commands on the autosubmit machine:")) + paste("The Autosubmit EXPID is missing. You can create one by", + "running the following commands on the autosubmit machine:")) error(recipe$Run$logger, - paste("module load", auto_specs$module_version)) + paste("module load", auto_specs$module_version)) error(recipe$Run$logger, - paste("autosubmit expid -H", auto_specs$platform, - "-d ")) + paste("autosubmit expid -H", auto_specs$platform, + "-d ")) } else if (!dir.exists(paste0(auto_specs$experiment_dir, - recipe$Run$auto_conf$expid))) { + recipe$Run$auto_conf$expid))) { error(recipe$Run$logger, - paste0("No folder in ", auto_specs$experiment_dir, - " for the EXPID", recipe$Run$auto_conf$expid, - ". Please make sure it is correct.")) + paste0("No folder in ", auto_specs$experiment_dir, + " for the EXPID", recipe$Run$auto_conf$expid, + ". Please make sure it is correct.")) } if ((recipe$Run$auto_conf$email_notifications) && - (is.null(recipe$Run$auto_conf$email_address))) { + (is.null(recipe$Run$auto_conf$email_address))) { error(recipe$Run$logger, - "Autosubmit notifications are enabled but email address is empty!") + "Autosubmit notifications are enabled but email address is empty!") } if (is.null(recipe$Run$auto_conf$hpc_user)) { error(recipe$Run$logger, - "The 'Run:auto_conf:hpc_user' field can not be empty.") + "The 'Run:auto_conf:hpc_user' field can not be empty.") } else if ((recipe$Run$filesystem == "esarchive") && - (!substr(recipe$Run$auto_conf$hpc_user, 1, 5) == "bsc32")) { + (!substr(recipe$Run$auto_conf$hpc_user, 1, 5) == "bsc32")) { error(recipe$Run$logger, - "Please check your hpc_user ID. It should look like: 'bsc32xxx'") + "Please check your hpc_user ID. It should look like: 'bsc32xxx'") } } @@ -418,150 +481,15 @@ check_recipe <- function(recipe) { ## TODO: Implement number of dependent verifications #nverifications <- check_number_of_dependent_verifications(recipe) # info(recipe$Run$logger, paste("Start Dates:", - # paste(fcst.sdate, collapse = " "))) + # paste(fcst.sdate, collapse = " "))) # Return error if any check has failed if (error_status) { error(recipe$Run$logger, "RECIPE CHECK FAILED.") stop("The recipe contains some errors. Find the full list in the", - "startup.log file.") + " startup.log file.") } else { info(recipe$Run$logger, "##### RECIPE CHECK SUCCESSFULL #####") # return(append(nverifications, fcst.sdate)) } } - -check_number_of_dependent_verifications <- function(recipe) { - # Number of verifications depends on the variables and indicators requested - # and the order of the workflow: - # workflow: correction + indicator --> only 1 variable is calibrated - # workflow: indicator + correction --> the indicator and the ecv are calibrated - independent_verifications <- NULL - dependent_verifications <- NULL - dep <- 1 - # check workflow order: - if (all(c('Calibration', 'Indicators') %in% names(recipe$Analysis$Workflow))) { - cal_pos <- which(names(recipe$Analysis$Workflow) == 'Calibration') - ind_pos <- which(names(recipe$Analysis$Workflow) == 'Indicators') - if (cal_pos < ind_pos) { - workflow_independent <- FALSE - } else { - workflow_independent <- TRUE - } - } - if (workflow_independent) { - independent_verifications <- append(recipe$Analysis$Variables$ECVs, - recipe$Analysis$Variables$Indicators) - } else { - if (is.null(recipe$Analysis$Variables$Indicators) || - (length(recipe$Analysis$Variables$Indicators) == 1 && - is.null(recipe$Analysis$Variables$ECVs))) { - independent_verifications <- append(recipe$Analysis$Variables$ECVs, - recipe$Analysis$Variables$Indicators) - } else { - ecvs <- recipe$Analysi$Variables$ECVs - inds <- recipe$Analysi$Variables$Indicators - ind_table <- read_yaml(paste0(recipe$Run$code_dir, - "conf/indicators_table.yml")) - # first, loop on ecvs if any and compare to indicators - done <- NULL # to gather the indicators reviewed - if (!is.null(ecvs)) { - for (i in 1:length(ecvs)) { - dependent <- list(ecvs[[i]]) - for (j in 1:length(inds)) { - if (ind_table[inds[[j]]$name][[1]]$ECVs == ecvs[[i]]$name) { - if (ind_table[inds[[j]]$name][[1]]$freq == ecvs[[i]]$freq) { - # they are dependent - dependent <- append(dependent, inds[[j]]) - done <- append(done, inds[[j]]) - } - } - } - if (length(dependent) == 1) { - dependent <- NULL - independent_verifications <- append(independent_verifications, - list(ecvs[[i]])) - } else { - dependent_verifications <- append(dependent_verifications, - list(dependent)) - } - } - # There are indicators not reviewed yet? - if (length(done) < length(inds)) { - if (length(inds) == 1) { - independent_verifications <- append(independent_verifications, - inds) - } else { - done <- NULL - for (i in 1:(length(inds) - 1)) { - dependent <- list(inds[[i]]$name) - if (is.na(match(unlist(dependent), unlist(done)))) { - for (j in (i+1):length(inds)) { - if (ind_table[inds[[i]]$name][[1]]$ECVs == - ind_table[inds[[j]]$name][[1]]$ECVs) { - if (ind_table[inds[[i]]$name][[1]]$freq == - ind_table[inds[[j]]$name][[1]]$freq) { - dependent <- append(dependent, inds[[j]]$name) - done <- dependent - } - } - } - } - if (length(dependent) == 1) { - independent_verifications <- dependent - dependent <- NULL - } else { - dependent_verifications <- dependent - } - } - } - } - } else { # there are only Indicators: - done <- NULL - for (i in 1:(length(inds) - 1)) { - dependent <- list(inds[[i]]$name) - if (is.na(match(unlist(dependent), unlist(done)))) { - for (j in (i+1):length(inds)) { - if (ind_table[inds[[i]]$name][[1]]$ECVs == - ind_table[inds[[j]]$name][[1]]$ECVs) { - if (ind_table[inds[[i]]$name][[1]]$freq == - ind_table[inds[[j]]$name][[1]]$freq) { - dependent <- append(dependent, inds[[j]]$name) - done <- dependent - } - } - } - } - if (length(dependent) == 1) { - independent_verifications <- dependent - dependent <- NULL - } else { - dependent_verifications <- dependent - } - } - } - } - } - if (!is.null(independent_verifications)) { - info(logger, paste("The variables for independent verification are ", - paste(independent_verifications, collapse = " "))) - } - if (!is.null(dependent_verifications)) { - info(logger, paste("The variables for dependent verification are: ", - paste(dependent_verifications, collapse = " "))) - } - # remove unnecessary names in objects to be removed - return(list(independent = independent_verifications, - dependent = dependent_verifications)) -} -#workflow <- list(Calibration = list(method = 'SBC'), -# Skill = list(metric = 'RPSS')) -#ApplyWorkflow <- function(workflow) { - -#res <- do.call('CST_BiasCorrection', -# args = list(exp = lonlat_data$exp, -# obs = lonlat_data$obs)) - - - - diff --git a/tools/data_summary.R b/tools/data_summary.R index 5f532dcfc641eb9798de8e44cf5d850511c31c1a..92dcb353c751abfae966868e9f6d1b8549eabd68 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -19,7 +19,7 @@ data_summary <- function(data_cube, recipe) { info(recipe$Run$logger, "DATA SUMMARY:") info(recipe$Run$logger, paste(object_name, "months:", months)) info(recipe$Run$logger, paste(object_name, "range:", sdate_min, "to", - sdate_max)) + sdate_max)) info(recipe$Run$logger, paste(object_name, "dimensions:")) # Use capture.output() and for loop to display results neatly output_string <- capture.output(dim(data_cube$data)) @@ -27,7 +27,7 @@ data_summary <- function(data_cube, recipe) { info(recipe$Run$logger, i) } info(recipe$Run$logger, paste0("Statistical summary of the data in ", - object_name, ":")) + object_name, ":")) output_string <- capture.output(summary(data_cube$data)) for (i in output_string) { info(recipe$Run$logger, i) diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 61825738fa417ea3000ce4473d64d1dfeaa22e17..d0857730ac821d8f4dde6b44a8478befff124a67 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -22,8 +22,8 @@ #'@export prepare_outputs <- function(recipe_file, - disable_checks = FALSE, - uniqueID = TRUE) { + disable_checks = FALSE, + uniqueID = TRUE) { # recipe_file: path to recipe YAML file # disable_checks: If TRUE, does not perform checks on recipe # disable_uniqueID: If TRUE, does not add a unique ID to output dir @@ -39,7 +39,7 @@ prepare_outputs <- function(recipe_file, } else { folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$name)), "_", gsub(" ", "", gsub(":", "", gsub("-", "", - Sys.time())))) + Sys.time())))) } print("Saving all outputs to:") print(paste0(output_dir, folder_name)) @@ -49,7 +49,7 @@ prepare_outputs <- function(recipe_file, ## TODO: Move this part to main recipe # Copy recipe to output folder file.copy(recipe$recipe_path, file.path(output_dir, folder_name, 'logs', - 'recipes')) + 'recipes')) # Create log output file logfile <- file.path(output_dir, folder_name, 'logs', 'main.log') file.create(logfile) @@ -84,12 +84,12 @@ prepare_outputs <- function(recipe_file, if (is.null(recipe$Run$filesystem)) { recipe$Run$filesystem <- "esarchive" warn(recipe$Run$logger, - "Filesystem not specified in the recipe. Setting it to 'esarchive'.") + "Filesystem not specified in the recipe. Setting it to 'esarchive'.") } # Run recipe checker if (disable_checks) { warn(recipe$Run$logger, - "Recipe checks disabled. The recipe will not be checked for errors.") + "Recipe checks disabled. The recipe will not be checked for errors.") } else { check_recipe(recipe) } diff --git a/tools/read_atomic_recipe.R b/tools/read_atomic_recipe.R index 1eadb707b3c3293c12a2450d171471ef5070c1e9..de2ad5b554e81dec54146afeefd2724cf8b0c069 100644 --- a/tools/read_atomic_recipe.R +++ b/tools/read_atomic_recipe.R @@ -28,7 +28,7 @@ read_atomic_recipe <- function(recipe_file) { recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) # Create log file for atomic recipe logfile <- file.path(recipe$Run$output_dir, 'logs', - paste0(recipe$name, '.log')) + paste0(recipe$name, '.log')) file.create(logfile) # Set default behaviour of logger if (is.null(recipe$Run)) { diff --git a/tools/write_autosubmit_conf.R b/tools/write_autosubmit_conf.R index a0208a9e12d1b0bd5640f57482d5366b93a0b5cc..a425566d62278e109a6e9b1d53c78eef9f7e700e 100644 --- a/tools/write_autosubmit_conf.R +++ b/tools/write_autosubmit_conf.R @@ -20,9 +20,9 @@ write_autosubmit_conf <- function(recipe, nchunks) { ## expid, email notifications and address conf$config$EXPID <- expid if (recipe$Run$auto_conf$email_notifications) { - conf$mail$NOTIFICATIONS <- "True" + conf$mail$NOTIFICATIONS <- "True" } else { - conf$mail$NOTIFICATIONS <- "False" + conf$mail$NOTIFICATIONS <- "False" } conf$mail$TO <- recipe$Run$auto_conf$email_address } else if (conf_type == "expdef") { @@ -37,34 +37,34 @@ write_autosubmit_conf <- function(recipe, nchunks) { ## wallclock, notify_on, platform?, processors # Different file structure depending on autosubmit version if (auto_specs$auto_version == "4.0.0") { - jobs <- conf$JOBS + jobs <- conf$JOBS } else { - jobs <- conf + jobs <- conf } jobs$verification$WALLCLOCK <- recipe$Run$auto_conf$wallclock if (recipe$Run$auto_conf$notify_completed) { jobs$verification$NOTIFY_ON <- paste(jobs$verification$NOTIFY_ON, - "COMPLETED") + "COMPLETED") } if (recipe$Run$auto_conf$notify_failed) { - jobs$verification$NOTIFY_ON <- paste(jobs$verification$NOTIFY_ON, - "FAILED") + jobs$verification$NOTIFY_ON <- paste(jobs$verification$NOTIFY_ON, + "FAILED") } jobs$verification$PROCESSORS <- recipe$Run$auto_conf$processors_per_job # ncores? # Return to original list if (auto_specs$auto_version == "4.0.0") { - conf$JOBS <- jobs + conf$JOBS <- jobs } else { - conf <- jobs + conf <- jobs } } else if (conf_type == "platforms") { # Section 4: platform configuration ## nord3v2 configuration... platform name? user, processors_per_node if (auto_specs$auto_version == "4.0.0") { - conf$Platforms[[auto_specs$platform]]$USER <- - recipe$Run$auto_conf$hpc_user + conf$Platforms[[auto_specs$platform]]$USER <- + recipe$Run$auto_conf$hpc_user } else { - conf[[auto_specs$platform]]$USER <- recipe$Run$auto_conf$hpc_user + conf[[auto_specs$platform]]$USER <- recipe$Run$auto_conf$hpc_user } } else if (conf_type == "proj") { # Section 5: proj @@ -75,31 +75,31 @@ write_autosubmit_conf <- function(recipe, nchunks) { # Write config file inside autosubmit dir ## TODO: Change write.type depending on autosubmit version write.config(conf, paste0(dest_dir, dest_file), - write.type = auto_specs$conf_format) + write.type = auto_specs$conf_format) Sys.chmod(paste0(dest_dir, dest_file), mode = "755", use_umask = F) } info(recipe$Run$logger, paste("##### AUTOSUBMIT CONFIGURATION WRITTEN FOR", expid, "#####")) info(recipe$Run$logger, paste0("You can check your experiment configuration at: ", - "/esarchive/autosubmit/", expid, "/conf/")) + "/esarchive/autosubmit/", expid, "/conf/")) # Print instructions/commands for user if (recipe$Run$Terminal) { ## TODO: Change SSH message for other environments (outside BSC) info(recipe$Run$logger, - paste("Please SSH into bscesautosubmit01 or bscesautosubmit02 and run", - "the following commands:")) + paste("Please SSH into bscesautosubmit01 or bscesautosubmit02 and run", + "the following commands:")) info(recipe$Run$logger, - paste("module load", auto_specs$module_version)) + paste("module load", auto_specs$module_version)) info(recipe$Run$logger, - paste("autosubmit create", expid)) + paste("autosubmit create", expid)) info(recipe$Run$logger, - paste("autosubmit refresh", expid)) + paste("autosubmit refresh", expid)) info(recipe$Run$logger, - paste("nohup autosubmit run", expid, "& disown")) + paste("nohup autosubmit run", expid, "& disown")) } else { print(paste("Please SSH into bscesautosubmit01 or bscesautosubmit02 and run", - "the following commands:")) + "the following commands:")) print(paste("module load", auto_specs$module_version)) print(paste("autosubmit create", expid)) print(paste("autosubmit refresh", expid))