diff --git a/example_scripts/exec_timeagg.R b/example_scripts/exec_timeagg.R new file mode 100644 index 0000000000000000000000000000000000000000..66df62b868b25564f863970c97473c047d3534b3 --- /dev/null +++ b/example_scripts/exec_timeagg.R @@ -0,0 +1,42 @@ +rm(list=ls()) +gc() + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("tools/prepare_outputs.R") +source("modules/Units/Units.R") +source("modules/Aggregation/Aggregation.R") + +# Read recipe +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +#recipe <- read_atomic_recipe(recipe_file) +## to test a single recipe: + # recipe_file <- "recipes/examples/recipe_tas_seasonal_timeagg.yml" + # recipe_file <- "recipes/examples/recipe_prlr_seasonal_timeagg.yml" + +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- Loading(recipe) +# Units transformation +data <- Units(recipe, data) +# Temporal aggregation +data <- Aggregation(recipe = recipe, data = data) +# Calibrate datasets +data <- Calibration(recipe, data) +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Compute percentiles and probability bins +probabilities <- Probabilities(recipe, data) +# Export all data to netCDF +## TODO: Fix plotting +# save_data(recipe, data, skill_metrics, probabilities) +# Plot data +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) + diff --git a/example_scripts/test_decadal.R b/example_scripts/test_decadal.R index 12daa540012d05d876c1da2fe1f142c11b58d66a..de9eae6a8bea924f35fcdf477b64fe0a09efe96f 100644 --- a/example_scripts/test_decadal.R +++ b/example_scripts/test_decadal.R @@ -9,7 +9,7 @@ source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") - +source("modules/Aggregation/Aggregation.R") recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml" recipe <- prepare_outputs(recipe_file) # archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive @@ -17,16 +17,19 @@ recipe <- prepare_outputs(recipe_file) # Load datasets data <- Loading(recipe) +data <- Aggregation(recipe = recipe, data = data) # Calibrate datasets calibrated_data <- Calibration(recipe, data) # Compute skill metrics -skill_metrics <- Skill(recipe, calibrated_data) +skill_metrics <- Skill(recipe, data) # Compute percentiles and probability bins -probabilities <- Probabilities(recipe, calibrated_data) +probabilities <- Probabilities(recipe, data) # Plot data -Visualization(recipe, calibrated_data, skill_metrics, probabilities, +Visualization(recipe = recipe, data = data, + skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R new file mode 100644 index 0000000000000000000000000000000000000000..d5c09faca9f7a44d8a650fee2bc286f88fabb69f --- /dev/null +++ b/modules/Aggregation/Aggregation.R @@ -0,0 +1,63 @@ +# recipe +## Time_aggregation$execute YES/NO +## Time_aggregation$method accumulated or average +## Time_aggregation$user_def for decadal +## Time_aggregation$ini based on forecast time +## Time_aggregation$end based on forecast time +# For date definition of period aggregation consider using Indicators module +## ini = 2, end = 3 with monthly freq would be a 2 months agg +source("modules/Aggregation/R/agg_ini_end.R") + +Aggregation <- function(recipe, data) { + + ## TODO: Move checks to recipe checker + ncores <- recipe$Analysis$ncores # is it already checked? NULL or number + na.rm <- ifelse(is.null(recipe$Analysis$remove_NAs), + TRUE, recipe$Analysis$remove_NAs) # is it already checked? TRUE/FALSE + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + # original freq + raw_freq <- tolower(recipe$Analysis$Variable$freq) + custom <- recipe$Analysis$Workflow$Time_aggregation$user_def + ini <- recipe$Analysis$Workflow$Time_aggregation$ini + end <- recipe$Analysis$Workflow$Time_aggregation$end + method <- tolower(recipe$Analysis$Workflow$Time_aggregation$method) + ## Instead of raw freq may be better to use ini/end + ## if (raw_freq %in% c('daily', 'daily_mean', 'weekly', 'weekly_mean')) { + if (!is.null(ini) && !is.null(end)) { + # calculate aggregations from fday1 to fday2 + # calculate aggregations from fweek1 to fweek2 + # other cases could be indicators or other original freq + if (length(ini) == length(end)) { + res <- sapply(1:length(data), function(x) { + if (!is.null(data[[x]])) { + result <- agg_ini_end(data[x], method = method, + na.rm = na.rm, ncores = ncores, + ini = ini, end = end, indices = NULL) + } else { + result <- data[x] + }}, simplify = TRUE) + } + } else if (!is.null(custom)) { + # calCUlate aggregations from fmonth1 to ftmonth2 + # for decadal, it makes sense to get all seasons and annual means too + ## Ex: January, February, March for decadal simulations aggregation + ## custom <- sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) + ## TODO: Remove this check? Already in check_recipe() + if (is.list(custom)) { + res <- sapply(1:length(data), function(x) { + if (!is.null(data[[x]])) { + result <- agg_ini_end(data[x], method, na.rm, ncores, + indices = custom, + ini = NULL, end = NULL) + } else { + result <- data[x] + }}, simplify = TRUE) + } else { + stop("Time_aggregation$user_def is not a list") + } + } + } + return(res) + info(recipe$Run$logger, + "##### TIME AGGREGATION COMPLETE #####") +} diff --git a/modules/Aggregation/R/agg_ini_end.R b/modules/Aggregation/R/agg_ini_end.R new file mode 100644 index 0000000000000000000000000000000000000000..17c6940a7a77ccf65f42c1fc110856d171f9bee1 --- /dev/null +++ b/modules/Aggregation/R/agg_ini_end.R @@ -0,0 +1,56 @@ +agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { + # to make the code work with both options: + if (!is.null(ini) && is.null(indices)) { + # list of vectors for the indices from ini to end pairs + indices <- lapply(1:length(ini), function(x) { + ini[x]:end[x]}) + plotting_attr <- list(ini_ftime = ini, end_ftime = end) + } else { + # take the first and last element of each indices list for time_bounds saving + ini <- unlist(lapply(indices, function(x){x[1]})) + end <- unlist(lapply(indices, function(x){x[length(x)]})) + plotting_attr <- list(names(indices)) + } + original_dims <- names(dim(x[[1]]$data)) + if (method == 'average') { + x[[1]]$data <- Apply(x[[1]]$data, + target_dim = 'time', + function(y, ind) { + sapply(1:length(indices), + function(z) {mean(y[indices[[z]]], na.rm = na.rm)}) + }, + ind = indices, + output_dims = 'time', + ncores = ncores)$output1 + } else if (method == 'accumulated') { + x[[1]]$data <- Apply(x[[1]]$data, + target_dim = 'time', + function(y, ind) { + sapply(1:length(indices), + function(z) {sum(y[indices[[z]]], na.rm = na.rm)}) + }, + ind = indices, + output_dims = 'time', + ncores = ncores)$output1 + } else { + stop("Unknown method") + } + # Check in case only 1 aggregation only is requested to add time dim back: + if (!('time' %in% names(dim(data[[1]]$data)))) { + dim(x[[1]]$data) <- c(dim(x[[1]]$data), time = 1) + } + x[[1]]$data <- Reorder(x[[1]]$data, original_dims) + x[[1]]$dims <- dim(x[[1]]$data) + x[[1]]$coords$time <- 1:length(ini) + attr(x[[1]]$coords$time, "indices") <- TRUE + tmp_dates <- Subset(x[[1]]$attrs$Dates, + along = 'time', + indices = ini) + x[[1]]$attrs$time_bounds$start <- tmp_dates + x[[1]]$attrs$time_bounds$end <- Subset(x[[1]]$attrs$Dates, + along = 'time', + indices = end) + attributes(x[[1]]$attrs$time_bounds)$plotting_attr <- plotting_attr + x[[1]]$attrs$Dates <- tmp_dates + return(x) +} diff --git a/modules/Saving/R/get_time.R b/modules/Saving/R/get_time.R deleted file mode 100644 index 5426c177f1bd31e14b27ca9e831eb880da183f04..0000000000000000000000000000000000000000 --- a/modules/Saving/R/get_time.R +++ /dev/null @@ -1,30 +0,0 @@ -get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { - # Generates time dimensions and the corresponding metadata. - ## TODO: Subseasonal - - switch(fcst.horizon, - "seasonal" = {time <- leadtimes; ref <- 'hours since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- 4; ref <- 'hours since '; - stdname <- ''}, - "decadal" = {time <- leadtimes; ref <- 'hours since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}) - - dim(time) <- length(time) - sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting - metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), - calendar = calendar)) - attr(time, 'variables') <- metadata - names(dim(time)) <- 'time' - - sdate <- 1:length(sdate) - dim(sdate) <- length(sdate) - metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), - collapse=", "), - units = paste0('Init date'))) - attr(sdate, 'variables') <- metadata - names(dim(sdate)) <- 'sdate' - - return(list(time=time)) -} - diff --git a/modules/Saving/R/get_times.R b/modules/Saving/R/get_times.R new file mode 100644 index 0000000000000000000000000000000000000000..ec36b1028889e54f43cd47932f591173677e69dc --- /dev/null +++ b/modules/Saving/R/get_times.R @@ -0,0 +1,56 @@ +# leadtimes: list 2 arrays, 'start' and 'end', containing the time bounds for +# each leadtime +# sdate: start date in POSIXt/POSIXct format +# calendar: name of the calendar in string format + +.get_times <- function(recipe, data_cube, sdate, calendar, init_date) { + + # Define list to contain metadata for time and time_bnds + times <- list() + # Compute initial date + fcst.horizon <- tolower(recipe$Analysis$Horizon) + # Generate time dimensions and the corresponding metadata. + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + leadtimes <- as.numeric(dates - init_date)/3600 + + switch(fcst.horizon, + "seasonal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}, + "subseasonal" = {len <- leadtimes; ref <- 'hours since '; + stdname <- ''}, + "decadal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}) + dim(time) <- length(time) + sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting + metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), + calendar = calendar)) + attr(time, 'variables') <- metadata + names(dim(time)) <- 'time' + times$time <- time + # Generate time_bnds dimensions and the corresponding metadata. + if (!is.null(data_cube$attrs$time_bounds)) { + time_bounds <- lapply(data_cube$attrs$time_bounds, + function(x) { + y <- as.PCICt(ClimProjDiags::Subset(x, + along = 'syear', + indices = 1, + drop = 'non-selected'), + cal = calendar) + y <- as.numeric(y - init_date)/3600 + return(y) + }) + # Generates time_bnds dimensions and the corresponding metadata. + time_bnds <- abind(time_bounds, along = 0) + names(dim(time_bnds)) <- c("bnds", "time") + sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting + metadata <- list(time_bnds = list(units = paste0(ref, sdate, 'T00:00:00'), + calendar = calendar, + long_name = "time bounds")) + attr(time_bnds, 'variables') <- metadata + attr(time, 'variables')$bounds <- "time_bounds" + times$time_bnds <- time_bnds + } + return(times) +} + diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R index 1eb58b937b966909957f365c60f8666feaa57cc3..47310bf0e5b80d3aa67c064a654192bfe603f0ec 100644 --- a/modules/Saving/R/save_corr.R +++ b/modules/Saving/R/save_corr.R @@ -31,9 +31,6 @@ save_corr <- function(recipe, calendar <- archive$System[[global_attributes$system]]$calendar } - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) if (fcst.horizon == 'decadal') { init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', @@ -45,9 +42,6 @@ save_corr <- function(recipe, format = '%Y%m%d', cal = calendar) } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - # Select start date # If a fcst is provided, use that as the ref. year. Otherwise use 1970. if (fcst.horizon == 'decadal') { @@ -67,8 +61,7 @@ save_corr <- function(recipe, } } - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) # Loop over variable dimension for (var in 1:data_cube$dims[['var']]) { @@ -115,14 +108,13 @@ save_corr <- function(recipe, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- .get_countries(grid) - ArrayToNc(append(country, time, subset_skill), outfile) + ArrayToNc(append(country, times, subset_skill), outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, subset_skill) + vars <- c(latlon, times, subset_skill) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R index f7afa73cbbb4a8b1b47c086a8ba7c38e3c2a32fb..6e3bc906d5490bf32ad9631370c37e205c4dc4fd 100644 --- a/modules/Saving/R/save_forecast.R +++ b/modules/Saving/R/save_forecast.R @@ -25,43 +25,28 @@ save_forecast <- function(recipe, } else { calendar <- archive$System[[global_attributes$system]]$calendar } - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + + # Compute initial date if (fcst.horizon == 'decadal') { - ## Method 1: Use the first date as init_date. But it may be better to use - ## the real initialized date (ask users) - # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - ## Method 2: use initial month - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month if (type == 'hcst') { - # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - # sprintf('%02d', init_month), '-01'), - # cal = calendar) - init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start) + 1, '-01-01'), cal = calendar) } else if (type == 'fcst') { - # init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', - # sprintf('%02d', init_month), '-01'), - # cal = calendar) - init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$fcst_year)+1, + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$fcst_year) + 1, '-01-01'), cal = calendar) } } else { if (type == 'hcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), + recipe$Analysis$Time$sdate), format = '%Y%m%d', cal = calendar) } else if (type == 'fcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate), + recipe$Analysis$Time$sdate), format = '%Y%m%d', cal = calendar) } } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) # expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) @@ -132,8 +117,7 @@ save_forecast <- function(recipe, } # Get time dimension values and metadata - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) # Generate name of output file outfile <- get_filename(outdir, recipe, variable, fcst.sdate, @@ -142,14 +126,13 @@ save_forecast <- function(recipe, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) + ArrayToNc(append(country, times, fcst), outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) + vars <- c(latlon, times, fcst) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index 800004d7d99908bd9c741948af704188ff5e87f3..db7ceecacd0f6e8af750fae45147e6dcbf07f506 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -13,6 +13,7 @@ save_metrics <- function(recipe, dictionary <- read_yaml("conf/variable-dictionary.yml") global_attributes <- .get_global_attributes(recipe, archive) + time_bounds <- NULL ## TODO: Sort out the logic once default behavior is decided if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { @@ -25,7 +26,7 @@ save_metrics <- function(recipe, # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - if (global_attributes$system == 'Multimodel'){ + if (global_attributes$system == 'Multimodel') { if (fcst.horizon == 'decadal'){ calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]$name]]$calendar } else { @@ -34,34 +35,23 @@ save_metrics <- function(recipe, } else { calendar <- archive$System[[global_attributes$system]]$calendar } - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - - if (fcst.horizon == 'decadal') { + if (fcst.horizon == 'decadal') { # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', # sprintf('%02d', init_month), '-01'), # cal = calendar) - init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, '-01-01'), cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), format = '%Y%m%d', cal = calendar) } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - # Select start date # If a fcst is provided, use that as the ref. year. Otherwise use 1970. if (fcst.horizon == 'decadal') { if (!is.null(recipe$Analysis$Time$fcst_year)) { #PROBLEM: May be more than one fcst_year - # fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - # sprintf('%02d', init_month), '01') fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1]) } else { fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') @@ -75,15 +65,14 @@ save_metrics <- function(recipe, } } - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) # Loop over variable dimension for (var in 1:data_cube$dims[['var']]) { # Subset skill arrays subset_metric <- lapply(metrics, function(x) { ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) + indices = var, + drop = 'selected')}) # Generate name of output file variable <- data_cube$attrs$Variable$varName[[var]] outdir <- get_dir(recipe = recipe, variable = variable) @@ -95,9 +84,16 @@ save_metrics <- function(recipe, if (any('syear' %in% names(dim(subset_metric[[i]])))) { sdate_dim_save = 'syear' dates <- data_cube$attrs$Dates + time_bounds <- data_cube$attrs$time_bounds } else { sdate_dim_save = NULL dates <- Subset(data_cube$attrs$Dates, along = 'syear', indices = 1) + if (!is.null(data_cube$attrs$time_bounds)) { + time_bounds <- lapply(data_cube$attrs$time_bounds, + FUN = function (x) { + Subset(x, along = 'syear', indices = 1) + }) + } } ## TODO: Maybe 'scorecards' condition could go here to further simplify ## the code @@ -106,7 +102,8 @@ save_metrics <- function(recipe, SaveExp(data = subset_metric[[i]], destination = outdir, Dates = dates, coords = c(data_cube$coords['longitude'], - data_cube$coords['latitude']), + data_cube$coords['latitude']), + time_bounds = time_bounds, varname = names(subset_metric)[[i]], metadata = data_cube$attrs$Variable$metadata, Datasets = NULL, startdates = NULL, dat_dim = NULL, sdate_dim = sdate_dim_save, @@ -149,12 +146,13 @@ save_metrics <- function(recipe, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, subset_metric), outfile) + ArrayToNc(append(country, times$time, subset_metric), outfile) } else if (tolower(agg) == "region") { - region <- array(1:dim(metrics[[1]])['region'], c(dim(metrics[[1]])['region'])) + region <- list(region = array(1:dim(metrics[[1]])['region'], + c(dim(metrics[[1]])['region']))) ## TODO: check metadata when more than 1 region is store in the data array attr(region, 'variables') <- data_cube$attrs$Variable$metadata['region'] - vars <- list(region, time) + vars <- c(region, times) vars <- c(vars, subset_metric) ArrayToNc(vars, outfile) } else { @@ -162,13 +160,12 @@ save_metrics <- function(recipe, longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, subset_metric) + vars <- c(latlon, times, subset_metric) ArrayToNc(vars, outfile) } } } info(recipe$Run$logger, - paste("#####", toupper(module), " METRICS SAVED TO NETCDF FILE #####")) + paste("#####", toupper(module), "METRICS SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R index 0f4d70a17a6eaf4e56070f1484dddbbb9f6d274c..794697190903fdd584867e6a5695a522f5a3bdc1 100644 --- a/modules/Saving/R/save_observations.R +++ b/modules/Saving/R/save_observations.R @@ -17,10 +17,7 @@ save_observations <- function(recipe, store.freq <- recipe$Analysis$Variables$freq calendar <- archive$Reference[[global_attributes$reference]]$calendar - # Generate vector containing leadtimes ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) if (fcst.horizon == 'decadal') { ## Method 1: Use the first date as init_date. But it may be better to use ## the real initialized date (ask users) @@ -37,8 +34,6 @@ save_observations <- function(recipe, recipe$Analysis$Time$sdate), format = '%Y%m%d', cal = calendar) } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] @@ -120,8 +115,7 @@ save_observations <- function(recipe, fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') # Get time dimension values and metadata - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) # Generate name of output file outfile <- get_filename(outdir, recipe, variable, @@ -130,14 +124,13 @@ save_observations <- function(recipe, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) + ArrayToNc(append(country, times$time, fcst), outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) + vars <- c(latlon, times, fcst) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index 7b68b2c27e14b4181f3af6b031ad72d51aa83e83..d5dfae16f16346242d8bf1b744d536164fb3e30d 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -34,8 +34,6 @@ save_percentiles <- function(recipe, } # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) if (fcst.horizon == 'decadal') { # if (global_attributes$system == 'Multimodel') { # init_month <- 11 #TODO: put as if init_month is January @@ -53,9 +51,6 @@ save_percentiles <- function(recipe, format = '%Y%m%d', cal = calendar) } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - # Select start date # If a fcst is provided, use that as the ref. year. Otherwise use 1970. if (fcst.horizon == 'decadal') { @@ -75,8 +70,7 @@ save_percentiles <- function(recipe, fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) } } - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) for (var in 1:data_cube$dims[['var']]) { # Subset arrays @@ -124,8 +118,7 @@ save_percentiles <- function(recipe, longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, subset_percentiles) + vars <- c(latlon, times, subset_percentiles) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index 76a5a54c397e63ae3a367402006d4016dbef5301..a9ddc977e231896de23dc7a49dd7dac4386c45f2 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -39,10 +39,6 @@ save_probabilities <- function(recipe, calendar <- archive$System[[global_attributes$system]]$calendar } - # Generate vector containing leadtimes - ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) if (fcst.horizon == 'decadal') { # if (global_attributes$system == 'Multimodel') { # init_month <- 11 #TODO: put as if init_month is January @@ -60,9 +56,6 @@ save_probabilities <- function(recipe, format = '%Y%m%d', cal = calendar) } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) @@ -119,8 +112,7 @@ save_probabilities <- function(recipe, } # Get time dimension values and metadata - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) # Generate name of output file outfile <- get_filename(outdir, recipe, variable, fcst.sdate, agg, @@ -129,14 +121,13 @@ save_probabilities <- function(recipe, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, probs_syear), outfile) + ArrayToNc(append(country, times, probs_syear), outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, probs_syear) + vars <- c(latlon, times, probs_syear) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 5ce9857524800d84c45ff5a41a2d4c24b47335c1..e9e4a5b408cacf3638953c021dd557c69f54e81a 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -10,7 +10,7 @@ source("modules/Saving/R/save_metrics.R") source("modules/Saving/R/save_corr.R") source("modules/Saving/R/save_probabilities.R") source("modules/Saving/R/save_percentiles.R") -source("modules/Saving/R/get_time.R") +source("modules/Saving/R/get_times.R") source("modules/Saving/R/get_latlon.R") source("modules/Saving/R/get_global_attributes.R") source("modules/Saving/R/drop_dims.R") diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 5988186458fe9a469e86e1c73c887e684f107358..ff91adf4cef2d5b453f5a6fa61ca06d7310883e8 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -118,8 +118,30 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Forecast Ensemble Mean / ", "Init.: ", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) + if (is.null(attributes(fcst$attrs$time_bounds))) { + months <- lubridate::month( + fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + } else { + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } else { + months <- attributes(fcst$attrs$time_bounds)$plotting_attr[[1]] + } + } years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) if (recipe$Analysis$Workflow$Visualization$multi_panel) { @@ -174,11 +196,15 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o # Loop over forecast times for (i in 1:length(months)) { # Get forecast time label - forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 + if (is.null(attributes(fcst$attrs$time_bounds))) { + forecast_time <- match(months[i], month.name) - init_month + 1 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + } else { + forecast_time <- months } - forecast_time <- sprintf("%02d", forecast_time) # Get mask subset if (!is.null(mask)) { mask_i <- Subset(var_mask, along = 'time', indices = i, drop = TRUE) @@ -191,14 +217,23 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } else { dots_i <- NULL } - # Define plot title - toptitle <- paste0(system_name, " / ", - str_to_title(var_long_name), - "\n", "Ensemble Mean / ", - months[i], " ", years[i], - " / Start date: ", - format(as.Date(i_syear, format="%Y%m%d"), - "%d-%m-%Y")) + # Define plot title + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Ensemble Mean / ", + months[i], " ", years[i], + " / Start date: ", + format(as.Date(i_syear, format="%Y%m%d"), + "%d-%m-%Y")) + } else { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Ensemble Mean / ", + months[i], + " / Start date: ", + i_syear) + } # Define caption if (identical(fun, PlotRobinson)) { ## TODO: Customize technical details @@ -209,7 +244,17 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } # Modify base arguments base_args[[1]] <- i_var_ens_mean[i, , ] - fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") + if (is.null(attributes(fcst$attrs$time_bounds))) { + fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") + } else { + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + fileout <- paste0(outfile, "_ft", + attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", + attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } else { + fileout <- paste0(outfile, "_ft", months[i], ".pdf") + } + } base_args$mask <- mask_i base_args$dots <- dots_i # Plot diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 92b707fa9da431075e906c9f8ba6e7941a2db235..2df06e8bdf87027e3a42da24b78821c5bdc1de82 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -172,7 +172,6 @@ plot_metrics <- function(recipe, data_cube, metrics, col_sup <- NULL } - # Reorder dimensions metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, @@ -206,7 +205,23 @@ plot_metrics <- function(recipe, data_cube, metrics, # Define titles toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", display_name, " / ", hcst_period) - titles <- as.vector(months) + ## time_bounds in data_cube to know if Time_aggregation was applied + if (!is.null(attributes(data_cube$attrs$time_bounds))) { + info(recipe$Run$logger, "Using plotting attrs from time_bounds.") + if (length(attributes(data_cube$attrs$time_bounds)$plotting_attr) > 1) { + titles <- unlist( + lapply(1:length(attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime), + function(x) { + paste("Forecast time", + attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime[x], + "to", + attributes(data_cube$attrs$time_bounds)$plotting_attr$end_ftime[x])})) + } else { + titles <- attributes(data_cube$attrs$time_bounds)$plotting_attr[[1]] + } + } else { + titles <- as.vector(months) + } ## TODO: Combine PlotLayout with PlotRobinson? suppressWarnings( PlotLayout(PlotEquiMap, c('longitude', 'latitude'), @@ -266,17 +281,46 @@ plot_metrics <- function(recipe, data_cube, metrics, # Loop over forecast times for (i in 1:dim(metric)[['time']]) { # Get forecast time label - forecast_time <- match(months[i], month.name) - init_month + 1 + # Case without time aggregation: + if (is.null(attributes(data_cube$attrs$time_bounds))) { + forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + # Define plot title + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", months[i], "/", + hcst_period) + } else { + if (length(attributes(data_cube$attrs$time_bounds)$plotting_attr) > 1) { + forecast_time_ini <-attributes(data_cube$attrs$time_bounds)$plotting_attr$ini[i] + forecast_time_end <- attributes(data_cube$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) + # title names: + forecast_time_ini <- init_month + forecast_time_ini - 1 + forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) + forecast_time_ini <- month.name[forecast_time_ini] + forecast_time_end <- init_month + forecast_time_end - 1 + forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) + forecast_time_end <- month.name[forecast_time_end] + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", forecast_time_ini, "to", + forecast_time_end, "/", + hcst_period) + } else { + forecast_time <- attributes(data_cube$attrs$time_bounds)$plotting_attr[[1]][i] + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", + forecast_time, "/", + hcst_period) + } } - forecast_time <- sprintf("%02d", forecast_time) - # Define plot title - toptitle <- paste(system_name, "/", - str_to_title(var_long_name), - "\n", display_name, "/", months[i], "/", - hcst_period) # Modify base arguments base_args[[1]] <- metric[i, , ] if (!is.null(metric_significance)) { diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index c9ce70f23030a577165359c1ec6a26b7e7eab725..739f2d10535191d39175abe11c78016526e1bf6e 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -42,7 +42,6 @@ plot_most_likely_terciles <- function(recipe, ## TODO: Sort out decadal initial month (is it always January?) init_month <- 1 } - # Retrieve and rearrange probability bins for the forecast if (is.null(probabilities$probs_fcst$prob_b33) || is.null(probabilities$probs_fcst$prob_33_to_66) || @@ -113,8 +112,31 @@ plot_most_likely_terciles <- function(recipe, toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Most Likely Tercile / Initialization: ", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F,locale = "en_GB") + if (is.null(attributes(fcst$attrs$time_bounds))) { + months <- lubridate::month( + fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + } else { + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } else { + months <- attributes(fcst$attrs$time_bounds)$plotting_attr[[1]] + } + } + years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) if (recipe$Analysis$Workflow$Visualization$multi_panel) { ## TODO: Ensure this works for daily and sub-daily cases @@ -170,11 +192,16 @@ plot_most_likely_terciles <- function(recipe, base_args[names(output_configuration)] <- output_configuration for (i in 1:length(months)) { # Get forecast time label - forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 + if (is.null(attributes(fcst$attrs$time_bounds))) { + forecast_time <- match(months[i], month.name) - init_month + 1 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + } else { + forecast_time <- months } - forecast_time <- sprintf("%02d", forecast_time) + # Get mask subset if (!is.null(mask)) { mask_i <- Subset(var_mask, along = 'time', indices = i, drop = TRUE) @@ -188,22 +215,42 @@ plot_most_likely_terciles <- function(recipe, dots_i <- NULL } # Define plot title - toptitle <- paste0(system_name, " / ", - str_to_title(var_long_name), - "\n", "Most Likely Tercile / ", - months[i], " ", years[i], - " / Start date: ", - format(as.Date(i_syear, format="%Y%m%d"), - "%d-%m-%Y")) + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Most Likely Tercile / ", + months[i], " ", years[i], + " / Start date: ", + format(as.Date(i_syear, format="%Y%m%d"), + "%d-%m-%Y")) + } else { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Most Likely Tercile / ", + months[i], + " / Start date: ", + i_syear) + } # Plot - fileout <- paste0(outfile, "_ft", forecast_time, ".pdf") + if (is.null(attributes(fcst$attrs$time_bounds))) { + fileout <- paste0(outfile, "_ft", forecast_time, ".pdf") + } else { + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + fileout <- paste0(outfile, "_ft", + attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", + attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } else { + fileout <- paste0(outfile, "_ft", months[i], ".pdf") + } + } + base_args$probs <- i_var_probs[i, , , ] base_args$mask <- mask_i base_args$dots <- dots_i cb_info <- do.call(PlotMostLikelyQuantileMap, - args = c(base_args, - list(toptitle = toptitle, - fileout = fileout))) + args = c(base_args, + list(toptitle = toptitle, + fileout = fileout))) # Add color bars with 1 range for normal category: for (i_bar in 1:cb_info$nmap) { tmp <- cb_info diff --git a/recipes/atomic_recipes/recipe_decadal.yml b/recipes/atomic_recipes/recipe_decadal.yml index 26312b34d1127af7585fb51c9348763e47643a79..2028fc468766a74bdc32b2ea70e60f89b637bcaa 100644 --- a/recipes/atomic_recipes/recipe_decadal.yml +++ b/recipes/atomic_recipes/recipe_decadal.yml @@ -18,7 +18,7 @@ Analysis: hcst_start: 1990 hcst_end: 1993 # season: 'Annual' - ftime_min: 2 + ftime_min: 1 ftime_max: 24 Region: latmin: 10 #-90 @@ -29,6 +29,15 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Time_aggregation: + execute: yes + method: 'average' + user_def: + JJA: !expr sort(c(seq(6,24, 12), seq(7,24,12), seq(8, 24, 12))) + MMA: !expr sort(c(seq(3,24, 12), seq(4,24,12), seq(5, 24, 12))) + SON: !expr sort(c(seq(9,24, 12), seq(10,24,12), seq(11, 24, 12))) + DJF: !expr sort(c(seq(12,24, 12), seq(13,24,12), seq(2, 24, 12))) + Y1: !expr 1:12 Anomalies: compute: no cross_validation: @@ -37,7 +46,7 @@ Analysis: method: bias save: 'all' Skill: - metric: RPSS Corr + metric: RPSS EnsCorr Mean_Bias save: 'all' Probabilities: percentiles: [[1/3, 2/3]] @@ -46,12 +55,13 @@ Analysis: index: FALSE Visualization: plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles - ncores: # Optional, int: number of cores, defaults to 1 - remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + file_format: PNG + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/aho/git/auto-s2s/ + output_dir: /esarchive/scratch/nperez/ + code_dir: /esarchive/scratch/nperez/git4/sunset/ diff --git a/recipes/examples/recipe_prlr_seasonal_timeagg.yml b/recipes/examples/recipe_prlr_seasonal_timeagg.yml new file mode 100644 index 0000000000000000000000000000000000000000..e03428ac44147c68af42d3bdfa0c120fe07d8c75 --- /dev/null +++ b/recipes/examples/recipe_prlr_seasonal_timeagg.yml @@ -0,0 +1,63 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: prlr + freq: monthly_mean + units: mm + Datasets: + System: + name: ECMWF-SEAS5.1 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0601' ## MMDD + fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 30 # Mandatory, int: minimum latitude + latmax: 50 # Mandatory, int: maximum latitude + lonmin: -10 # Mandatory, int: minimum longitude + lonmax: 10 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ diff --git a/recipes/examples/recipe_tas_seasonal_timeagg.yml b/recipes/examples/recipe_tas_seasonal_timeagg.yml new file mode 100644 index 0000000000000000000000000000000000000000..b62ac79af1dda1461998bfb98972a2fa69423831 --- /dev/null +++ b/recipes/examples/recipe_tas_seasonal_timeagg.yml @@ -0,0 +1,70 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: K + Datasets: + System: + name: ECMWF-SEAS5.1 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0601' ## MMDD + fcst_year: '2023' # Optional, int: Forecast year 'YYYY' + hcst_start: '2009' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 30 # Mandatory, int: minimum latitude + latmax: 50 # Mandatory, int: maximum latitude + lonmin: -10 # Mandatory, int: minimum longitude + lonmax: 10 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 3, 2] + end: [3, 6, 4] + Calibration: + execute: yes + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: both + file_format: PNG + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ diff --git a/tests/recipes/recipe-seasonal_monthly_1_timeagg.yml b/tests/recipes/recipe-seasonal_monthly_1_timeagg.yml new file mode 100644 index 0000000000000000000000000000000000000000..0cd5746f5d0a7ce454e710bda3467aa31aacd8f4 --- /dev/null +++ b/tests/recipes/recipe-seasonal_monthly_1_timeagg.yml @@ -0,0 +1,61 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: Meteo-France-System7 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 4 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system + Workflow: + # Anomalies: + # compute: no + # cross_validation: + # save: 'none' + Time_aggregation: + execute: yes #do we need this? + method: average #accumulated + ini: [1, 2] # aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + end: [3, 4] + + Calibration: + method: mse_min + save: 'all' + Skill: + metric: RPSS CRPSS EnsCorr Corr_individual_members Enscorr_specs + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean + multi_panel: yes + projection: Robinson + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/nperez/git4/sunset/ diff --git a/tests/testthat/test-seasonal_monthly_timeagg.R b/tests/testthat/test-seasonal_monthly_timeagg.R new file mode 100644 index 0000000000000000000000000000000000000000..21977d9c131e709b2d7423b9890e5869e5f7384d --- /dev/null +++ b/tests/testthat/test-seasonal_monthly_timeagg.R @@ -0,0 +1,209 @@ +context("Seasonal monthly data") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("modules/Aggregation/Aggregation.R") + +recipe_file <- "tests/recipes/recipe-seasonal_monthly_1_timeagg.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +# Aggregated data +suppressWarnings({invisible(capture.output( +aggregated_data <- Aggregation(recipe, data) +))}) + +# Calibrate data +suppressWarnings({invisible(capture.output( +calibrated_data <- Calibration(recipe, aggregated_data) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- Skill(recipe, calibrated_data) +))}) + +suppressWarnings({invisible(capture.output( +probs <- Probabilities(recipe, calibrated_data) +))}) + +# Saving +suppressWarnings({invisible(capture.output( +Saving(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +Visualization(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + significance = T) +))}) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) + +# ------- TESTS -------- + +test_that("1. Time Aggregation", { +expect_equal( +is.list(aggregated_data), +TRUE +) +expect_equal( +"time_bounds" %in% names(aggregated_data$hcst$attrs), +TRUE +) +expect_equal( +is.list(aggregated_data$hcst$attrs$time_bounds), +TRUE +) +expect_equal( +names(aggregated_data$hcst$attrs$time_bounds), +c("start", "end") +) +expect_equal( +dim(aggregated_data$hcst$attrs$time_bounds$start), +dim(aggregated_data$hcst$attrs$time_bounds$end) +) +expect_equal( +dim(aggregated_data$hcst$attrs$time_bounds$start), +dim(aggregated_data$hcst$attrs$Dates) +) +expect_equal( +"plotting_attr" %in% names(attributes(aggregated_data$hcst$attrs$time_bounds)), +TRUE +) +expect_equal( +attributes(aggregated_data$hcst$attrs$time_bounds)$plotting_attr$ini_ftime, +c(1,2) +) +expect_equal( +attributes(aggregated_data$hcst$attrs$time_bounds)$plotting_attr$end_ftime, +c(3,4) +) +expect_equal( +names(aggregated_data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(aggregated_data$hcst), +"s2dv_cube" +) +expect_equal( +class(aggregated_data$fcst), +"s2dv_cube" +) +expect_equal( +dim(aggregated_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 2, latitude = 3, longitude = 3, ensemble = 25) +) +expect_equal( +dim(aggregated_data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 2, latitude = 3, longitude = 3, ensemble = 51) +) +expect_equal( +mean(aggregated_data$fcst$data), +291.6194, +tolerance = 0.0001 +) +expect_equal( +mean(aggregated_data$hcst$data), +290.6241, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(aggregated_data$hcst$data)[1, , 2, 3, 4]), +c(290.5897, 290.2554), +tolerance = 0.0001 +) + +expect_equal( +range(aggregated_data$fcst$data), +c(287.4032, 296.3530), +tolerance = 0.0001 +) + +}) + + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", + "enscorr_significance", "corr_individual_members", "corr_individual_members_significance", + "enscorr_specs") +) +expect_equal( +class(skill_metrics$rpss), +"array" +) +expect_equal( +dim(skill_metrics$rpss), +c(var = 1, time = 2, latitude = 3, longitude = 3) +) +expect_equal( +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) +) +expect_equal( +as.vector(skill_metrics$rpss[, , 2, 3]), +c(-1.384229, -1.147657), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$rpss_significance[, , 2, 3]), +rep(FALSE, 2) +) + +}) + +test_that("4. Saving", { +outputs <- paste0(recipe$Run$output_dir, "/outputs/") +expect_equal( +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", + "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", + "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", + "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", + "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", + "tas-probs_20201101.nc", "tas-skill_month11.nc")), +TRUE +) +expect_equal( +length(list.files(outputs, recursive = T)), +17 +) + +}) + +test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") +expect_equal( +all(basename(list.files(plots, recursive = T)) %in% +c("crpss-november.pdf", "enscorr_specs-november.pdf", "enscorr-november.pdf", + "forecast_ensemble_mean-20201101.pdf", "forecast_most_likely_tercile-20201101.pdf", + "rpss-november.pdf")), +TRUE +) +expect_equal( +length(list.files(plots, recursive = T)), +6 +) + +}) + +# Delete files +unlink(recipe$Run$output_dir, recursive = T) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index be38ae7187dc4e1b5b2fb6ec0a2b189f82ae1503..dabd56335b66cc75877f66d3f1481955da806e5f 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -301,7 +301,91 @@ check_recipe <- function(recipe) { # --------------------------------------------------------------------- # WORKFLOW CHECKS # --------------------------------------------------------------------- - + # Time_Aggregation + if ("Time_aggregation" %in% names(recipe$Analysis$Workflow)) { + TIME_AGG_METHODS <- c("average", "accumulation") + if (!("execute" %in% names(recipe$Analysis$Workflow$Time_aggregation))) { + recipe$Analysis$Workflow$Time_aggregation$execute <- FALSE + } else { + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + if (!("method" %in% names(recipe$Analysis$Workflow$Time_aggregation))) { + error(recipe$Run$logger, + "'Workflow:Time_aggregation:method' is not defined in the recipe.") + error_status <- TRUE + } else { + if (!tolower(recipe$Analysis$Workflow$Time_aggregation$method) %in% + TIME_AGG_METHODS) { + error(recipe$Run$logger, + paste0("Time_aggregation method should be one of: ", + paste(TIME_AGG_METHODS, collapse = ", "), ".")) + error_status <- TRUE + } + } + if ((any(!c('ini', 'end') %in% names(recipe$Analysis$Workflow$Time_aggregation)) && + !'user_def' %in% names(recipe$Analysis$Workflow$Time_aggregation))) { + error(recipe$Run$logger, + paste("Missing parameters in Time_aggregation section; please", + "add 'ini' and 'end' or 'user_def'.")) + error_status <- TRUE + } + if (all(c('ini', 'end') %in% + tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { + if (!is.numeric(recipe$Analysis$Workflow$Time_aggregation$ini) || + !is.numeric(recipe$Analysis$Workflow$Time_aggregation$end)) { + error(recipe$Run$logger, + "ini and end in Time aggregation need to be numeric") + error_status <- TRUE + } else { + if (length(recipe$Analysis$Workflow$Time_aggregation$ini) != + length(recipe$Analysis$Workflow$Time_aggregation$end)) { + error(recipe$Run$logger, + paste("The numeber of ini and end parameters need to be", + "the same for Time aggregation")) + error_status <- TRUE + } + max_ind <- max(c(recipe$Analysis$Workflow$Time_aggregation$ini, + recipe$Analysis$Workflow$Time_aggregation$end)) + if (recipe$Analysis$Time$ftime_max < max_ind) { + error(recipe$Run$logger, + paste("The requested max forecast time is smaller than indices", + "requested for time aggregation")) + error_status <- TRUE + } + } + } + if ('user_def' %in% tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { + if (all(c('ini', 'end') %in% + tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { + recipe$Analysis$Workflow$Time_aggregation$user_def <- NULL + warn(recipe$Run$logger, + paste("Both 'ini' and 'end' and 'user_def' are defined under", + "'Time_aggregation'. Only 'ini' and 'end' indices will", + "be used.")) + } + if (is.list(recipe$Analysis$Workflow$Time_aggregation$user_def)) { + if (is.character(names(recipe$Analysis$Workflow$Time_aggregation$user_def))) { + if (!all(sapply(recipe$Analysis$Workflow$Time_aggregation$user_def, + is.numeric))) { + error(recipe$Run$logger, + paste0("'Time_aggregation:user_def' elements are ", + "expected to be numeric.")) + error_status <- TRUE + } + } else { + warn(recipe$Run$logger, + paste("No names provided to the list of 'Time_aggregation:user_def'", + "indices. Review plots and file names to make sure they", + "are correct.")) + } + } else { + error(recipe$Run$logger, + "'Time_aggregation:user_def' should be a named list.") + error_status <- TRUE + } + } + } + } + } # Calibration # If 'method' is FALSE/no/'none' or NULL, set to 'raw' ## TODO: Review this check diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 6c77e5cf7220ba20f9cdb0fe26277714f8b7af0e..16a34d321959d2bf0c3b3c20e23aea8f6ac35829 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -28,7 +28,7 @@ prepare_outputs <- function(recipe_file, # recipe_file: path to recipe YAML file # disable_checks: If TRUE, does not perform checks on recipe # disable_uniqueID: If TRUE, does not add a unique ID to output dir - recipe <- read_yaml(recipe_file) + recipe <- read_yaml(recipe_file, eval.exp = TRUE) recipe$recipe_path <- recipe_file recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) diff --git a/tools/read_atomic_recipe.R b/tools/read_atomic_recipe.R index fb26cb11a4e71b4b7059a48acccd0ec4c85d235d..3cb11e7ed6b1d3cb91a765debeaeebd2076ddedc 100644 --- a/tools/read_atomic_recipe.R +++ b/tools/read_atomic_recipe.R @@ -23,7 +23,7 @@ read_atomic_recipe <- function(recipe_file) { # recipe_file: path to recipe YAML file - recipe <- read_yaml(recipe_file) + recipe <- read_yaml(recipe_file, eval.exp = TRUE) recipe$recipe_path <- recipe_file recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) # Create log file for atomic recipe