diff --git a/.gitignore b/.gitignore index d17d76340a88f111f22792959ec31a0e53d8a4bf..e11ba7d322dd439b07d98ef244a871c11ae75d9e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ out-logs/ *.swp *.swo -/modules/Calibration/test_victoria.R modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml diff --git a/OperationalCS.R b/OperationalCS.R index 1e662d1bb0e6bc1b59634f7cd1b2b67d47636658..ec01a30ec482167ee7dbc07d31389086f7548721 100644 --- a/OperationalCS.R +++ b/OperationalCS.R @@ -22,7 +22,10 @@ log_file <- logger$logname logger <- logger$logger # Checks: -verifications <- check_recipe(recipe, logger) +verifications <- check_recipe(recipe, file = args[2], conf, logger) +# Divide recipe into single verifications recipes: +total_recipes <- divide_recipe(recipe, verifications, folder, logger) + # Divide recipe into single verifications recipes: total_recipes <- divide_recipe(recipe, verifications, folder, logger) # Go to verification code: diff --git a/conf/vars-dict.yml-OLD b/conf/vars-dict.yml-OLD new file mode 100644 index 0000000000000000000000000000000000000000..04549d36001c848521f53fd704b752878b2eb862 --- /dev/null +++ b/conf/vars-dict.yml-OLD @@ -0,0 +1,114 @@ + +vars: +# ECVs + tas: + units: "°C" + longname: "Daily mean temperature at surface" + outname: ~ + tasmin: + units: "°C" + longname: "Minimum daily temperature at surface" + outname: ~ + tasmax: + units: "°C" + longname: "Maximum daily temperature at surface" + outname: ~ + sfcwind: + units: "m/s" + longname: "Surface wind speed module" + outname: ~ + rsds: + units: "W/m2" + longname: "Surface solar radiation downwards" + outname: ~ + psl: + units: "hPa" + longname: "Mean sea level pressure" + outname: ~ + prlr: + units: "mm" + longname: "Total precipitation" + outname: ~ +# CFs + cfwnd1: + units: "%" + longname: "Wind Capacity factor IEC1" + outname: ~ + cfwnd2: + units: "%" + longname: "Wind Capacity factor IEC2" + outname: ~ + cfwnd3: + units: "%" + longname: "Wind Capacity factor IEC3" + outname: ~ + cfslr: + units: "%" + longname: "Solar Capacity factor" + outname: ~ +# Energy + edmnd: + units: "GW" + longname: "Electricity Demmand" + outname: ~ + wndpwo: + units: "GW" + longname: "Wind Power" + outname: ~ + dmndnetwnd: + units: "GW" + longname: "Demmand-net-Wind" + outname: ~ +# Indices + Spr32: + units: "days" + longname: > + Total count of days when daily maximum temp exceeded 32°C + from April 21st to June 21st + outname: ~ + SU35: + units: "days" + longname: > + Total count of days when daily maximum temp exceeded 35°C + from June 21st to September 21st + outname: ~ + SU36: + units: "days" + longname: > + Total count of days when daily maximum temp exceeded 36°C + from June 21st to September 21st + outname: ~ + SU40: + units: "days" + longname: > + Total count of days when daily maximum temp exceeded 40°C + from June 21st to September 21st + outname: ~ + GDD: + units: "days" + longname: > + The sum of the daily differences between daily mean + temperature and 10°C from April 1st to October 31st + outname: ~ + GST: + units: "°C" + longname: "The average temperature from April 1st to October 31st" + outname: ~ + SprTX: + units: "°C" + longname: "The average daily maximum temperature from April 1st to October 31st" + outname: ~ + WSDI: + units: "" + longname: > + The total count of days with at least 6 consecutives days + when the daily temperature maximum exceeds its 90th percentile + outname: ~ + SprR: + units: "mm" + longname: 'Total precipitation from April 21st to June 21st' + outname: ~ + HarR: + units: "mm" + longname: 'Total precipitation from August 21st to September 21st' + outname: ~ diff --git a/modules/Loading/testing_recipes/wrong_recipe_example.yml b/modules/Loading/testing_recipes/wrong_recipe_example.yml new file mode 100644 index 0000000000000000000000000000000000000000..12e2fc06ea95065fecdbbb8543057b5f3bd18f46 --- /dev/null +++ b/modules/Loading/testing_recipes/wrong_recipe_example.yml @@ -0,0 +1,44 @@ +Description: + Author: V. Agudetse + Info: Incomplete recipe with incorrect fields to test the recipe checker. + +Analysis: + Horizon: Seasoning + Variables: + name: tas + freq: monthly_mean + Petaflops: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_syear: '2020' + hcst_start: '1993' + hcst_end: '2016' + ftime_max: 6 + Region: + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: + Skill: + metric: RPS RPSS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 7 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 2d6353fe67c78a78531ba80e3b953be064c81959..2d5a0a4e05956a3eb00c6179bd57bc9a34f69016 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -1,4 +1,6 @@ ## TODO: Separate by time aggregation +## TODO: Build a default path that accounts for: +## variable, system, reference, start date and region name get_filename <- function(dir, recipe, var, date, agg, file.type) { # This function builds the path of the output file based on directory, diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index b8541488c8540e61c694c42a0f36be60595699a9..b22eb07099732b08ab37abfc311e2c4a79eeff26 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -5,21 +5,30 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") +<<<<<<< HEAD recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml" recipe <- prepare_outputs(recipe_file) +======= +recipe_file <- "recipes/tests/recipe_seasonal_two-variables.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = T) +atomic_recipes <- divide_recipe(recipe) +## archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive +>>>>>>> e9b1f4f4a5c3de69fa938c01fe827a5789613247 -# Load datasets -data <- load_datasets(recipe) -# Calibrate datasets -calibrated_data <- calibrate_datasets(recipe, data) -# Compute anomalies -calibrated_data <- compute_anomalies(recipe, calibrated_data) -# Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, calibrated_data) -# Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, calibrated_data) -# Export all data to netCDF -save_data(recipe, calibrated_data, skill_metrics, probabilities) -# Plot data -plot_data(recipe, calibrated_data, skill_metrics, probabilities, - significance = T) +for (atomic_recipe in atomic_recipes) { + # Load datasets + data <- load_datasets(atomic_recipe) + # Calibrate datasets + calibrated_data <- calibrate_datasets(atomic_recipe, data) + # Compute anomalies + calibrated_data <- compute_anomalies(atomic_recipe, calibrated_data) + # Compute skill metrics + skill_metrics <- compute_skill_metrics(atomic_recipe, calibrated_data) + # Compute percentiles and probability bins + probabilities <- compute_probabilities(atomic_recipe, calibrated_data) + # Export all data to netCDF + save_data(atomic_recipe, calibrated_data, skill_metrics, probabilities) + # Plot data + plot_data(atomic_recipe, calibrated_data, skill_metrics, probabilities, + significance = T) +} diff --git a/recipes/recipe_splitting_example.yml b/recipes/recipe_splitting_example.yml new file mode 100644 index 0000000000000000000000000000000000000000..e62611ab592a10dcadd1ef3d7660b567e7ae485f --- /dev/null +++ b/recipes/recipe_splitting_example.yml @@ -0,0 +1,60 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test for recipe splitting + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: # ECVs and Indicators? + - {name: tas, freq: monthly_mean} + - {name: prlr, freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: system7c3s} + - {name: system5c3s} + Multimodel: False # single option + Reference: + - {name: era5} # multiple references for single model? + Time: + sdate: # list, split + - '1101' + - '1201' + fcst_year: '2020' # list, don't split, handled internally + hcst_start: '1993' # single option + hcst_end: '2016' # single option + ftime_min: 1 # single option + ftime_max: 6 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "global", latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + - {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: bilinear ## TODO: allow multiple methods? + type: to_system + Workflow: + Calibration: + method: mse_min ## TODO: list, split? + Skill: + metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + Indicators: + index: no # ? + ncores: 7 + remove_NAs: yes # bool, don't split + Output_format: S2S4E # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/recipes/tests/execute_tests.R b/recipes/tests/old_tests/execute_tests.R similarity index 100% rename from recipes/tests/execute_tests.R rename to recipes/tests/old_tests/execute_tests.R diff --git a/recipes/tests/seasonal_testWorkflow1.yml b/recipes/tests/old_tests/seasonal_testWorkflow1.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow1.yml rename to recipes/tests/old_tests/seasonal_testWorkflow1.yml diff --git a/recipes/tests/seasonal_testWorkflow2.yml b/recipes/tests/old_tests/seasonal_testWorkflow2.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow2.yml rename to recipes/tests/old_tests/seasonal_testWorkflow2.yml diff --git a/recipes/tests/seasonal_testWorkflow3.yml b/recipes/tests/old_tests/seasonal_testWorkflow3.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow3.yml rename to recipes/tests/old_tests/seasonal_testWorkflow3.yml diff --git a/recipes/tests/seasonal_testWorkflow4.yml b/recipes/tests/old_tests/seasonal_testWorkflow4.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow4.yml rename to recipes/tests/old_tests/seasonal_testWorkflow4.yml diff --git a/recipes/tests/seasonal_testWorkflow5.yml b/recipes/tests/old_tests/seasonal_testWorkflow5.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow5.yml rename to recipes/tests/old_tests/seasonal_testWorkflow5.yml diff --git a/recipes/tests/seasonal_testWorkflow6.yml b/recipes/tests/old_tests/seasonal_testWorkflow6.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow6.yml rename to recipes/tests/old_tests/seasonal_testWorkflow6.yml diff --git a/recipes/tests/seasonal_testWorkflow7.yml b/recipes/tests/old_tests/seasonal_testWorkflow7.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow7.yml rename to recipes/tests/old_tests/seasonal_testWorkflow7.yml diff --git a/recipes/tests/seasonal_testWorkflow8.yml b/recipes/tests/old_tests/seasonal_testWorkflow8.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow8.yml rename to recipes/tests/old_tests/seasonal_testWorkflow8.yml diff --git a/recipes/tests/recipe_seasonal_two-variables.yml b/recipes/tests/recipe_seasonal_two-variables.yml new file mode 100644 index 0000000000000000000000000000000000000000..89406ecef1250ea6d5835a44c3b053a1b8f51a01 --- /dev/null +++ b/recipes/tests/recipe_seasonal_two-variables.yml @@ -0,0 +1,60 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test Independent verification of two variables + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: # ECVs and Indicators? + - {name: tas, freq: monthly_mean} + - {name: prlr, freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: system5c3s} + Multimodel: False # single option + Reference: + - {name: era5} # multiple references for single model? + Time: + sdate: # list, split + - '0101' + fcst_year: '2020' # list, don't split, handled internally + hcst_start: '2000' # single option + hcst_end: '2016' # single option + ftime_min: 1 # single option + ftime_max: 3 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: bilinear ## TODO: allow multiple methods? + type: to_system + Workflow: + Anomalies: + compute: yes + cross_validation: yes + Calibration: + method: mse_min ## TODO: list, split? + Skill: + metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + Indicators: + index: no # ? + ncores: 7 + remove_NAs: yes # bool, don't split + Output_format: S2S4E # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 25536335eb6d65f4bcf3bb41717a6973660e7684..541319f513b4fc264cbdbef2701c1a2fe4e2d967 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -1,93 +1,162 @@ -check_recipe <- function(recipe, logger) { +check_recipe <- function(recipe) { # recipe: yaml recipe already read it - # output: errors or the total number of workflow (vars x regions) to compute + ## TODO: Adapt to decadal case - info(logger, paste("Checking recipe", recipe$filename)) + info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) # --------------------------------------------------------------------- # ANALYSIS CHECKS # --------------------------------------------------------------------- - TIME_SETTINGS = c('sdate','leadtimemin','leadtimemax','hcst_start','hcst_end') - PARAMS = c('Horizon','Time','Variables','Region','Regrid','Workflow','Datasets') - HORIZONS <- c('Subseasonal','Seasonal','Decadal') + TIME_SETTINGS_SEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", + "hcst_end") + TIME_SETTINGS_DECADAL <- c("ftime_min", "ftime_max", "hcst_start", "hcst_end") + PARAMS <- c("Horizon", "Time", "Variables", "Region", "Regrid", "Workflow", + "Datasets") + HORIZONS <- c("subseasonal", "seasonal", "decadal") + # Define error status variable + error_status <- F - # create output dirs: - if (!any(names(recipe) %in% "Analysis")) { - error(logger, "The recipe should contain an element called 'Analysis'.") + # Check basic elements in recipe:Analysis: + if (!("Analysis" %in% names(recipe))) { + error(recipe$Run$logger, + "The recipe must contain an element called 'Analysis'.") + error_status <- T } if (!all(PARAMS %in% names(recipe$Analysis))) { - error(logger, - paste("The element 'Analysis' in the recipe should contain these", - "elements:", paste(PARAMS, collapse = " "))) + error(recipe$Run$logger, + paste0("The element 'Analysis' in the recipe must contain all of ", + "the following: ", paste(PARAMS, collapse = ", "), ".")) + error_status <- T } - if (!any(HORIZONS %in% recipe$Analysis$Horizon)) { - error(logger, - "The element 'Horizon' in the recipe should be one of the followings:", - paste(HORIZONS, collapse = " ")) - } - # Check temporal settings and - # count the number of verifications - if (!all(TIME_SETTINGS %in% names(recipe$Analysis$Time))) { - error(logger, - paste("The element 'Time' in the recipe should contain these elements:", - paste(TIME_SETTINGS, collapse = " "))) - } - if (is.null(recipe$Analysis$Time$sdate$fcst_year) || - recipe$Analysis$Time$sdate$fcst_year == 'None') { + if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { + error(recipe$Run$logger, + paste0("The element 'Horizon' in the recipe must be one of the ", + "following: ", paste(HORIZONS, collapse = ", "), ".")) + error_status <- T + } + # Check time settings + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + if (!all(TIME_SETTINGS_SEASONAL %in% names(recipe$Analysis$Time))) { + error(recipe$Run$logger, + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_SEASONAL, + collapse = ", "), ".")) + error_status <- T + } + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + if (!all(TIME_SETTINGS_DECADAL %in% names(recipe$Analysis$Time))) { + error(recipe$Run$logger, + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_DECADAL, + collapse = ", "), ".")) + error_status <- T + } + } + # Check ftime_min and ftime_max + if ((!(recipe$Analysis$Time$ftime_min > 0)) || + (!is.integer(recipe$Analysis$Time$ftime_min))) { + error(recipe$Run$logger, + "The element 'ftime_min' must be an integer larger than 0.") + error_status <- T + } + if ((!(recipe$Analysis$Time$ftime_max > 0)) || + (!is.integer(recipe$Analysis$Time$ftime_max))) { + error(recipe$Run$logger, + "The element 'ftime_max' must be an integer larger than 0.") + error_status <- T + } + if ((is.numeric(recipe$Analysis$Time$ftime_max)) && + (is.numeric(recipe$Analysis$Time$ftime_min))) { + if (recipe$Analysis$Time$ftime_max < recipe$Analysis$Time$ftime_min) { + error(recipe$Run$logger, + "'ftime_max' cannot be smaller than 'ftime_min'.") + error_status <- T + } + } + # Check consistency of hindcast years + if (!(as.numeric(recipe$Analysis$Time$hcst_start) %% 1 == 0) || + (!(recipe$Analysis$Time$hcst_start > 0))) { + error(recipe$Run$logger, + "The element 'hcst_start' must be a valid year.") + error_status <- T + } + if (!(as.numeric(recipe$Analysis$Time$hcst_end) %% 1 == 0) || + (!(recipe$Analysis$Time$hcst_end > 0))) { + error(recipe$Run$logger, + "The element 'hcst_end' must be a valid year.") + error_status <- T + } + if (recipe$Analysis$Time$hcst_end < recipe$Analysis$Time$hcst_start) { + error(recipe$Run$logger, + "'hcst_end' cannot be smaller than 'hcst_start'.") + error_status <- T + } + ## TODO: Is this needed? + if (is.null(recipe$Analysis$Time$fcst_year) || + tolower(recipe$Analysis$Time$fcst_year) == 'none') { stream <- "hindcast" - recipe$Analysis$Time$sdate$fcst_year <- 'YYYY' + # recipe$Analysis$Time$fcst_year <- 'YYYY' } else { stream <- "fcst" } - if (length(recipe$Analysis$Time$sdate$fcst_day) > 1 && - tolower(recipe$Analysis$Horizon) != "subseasonal") { - warn(logger, - paste("Only subseasonal verification allows multiple forecast days."), - "Element fcst_day in recipe set as 1.") - recipe$Analysis$Time$sdate$fcst_day <- '01' - } - if (is.null(recipe$Analysis$Time$sdate$fcst_sday)) { - error(logger, - paste("The element 'fcst_sday' in the recipe should be defined.")) - } - if (is.null(recipe$Analysis$Time$sdate$fcst_syear)) { - error(logger, - paste("The element 'fcst_syear' in the recipe should be defined.")) + + ## TODO: To be implemented in the future + # if (length(recipe$Analysis$Time$sdate$fcst_day) > 1 && + # tolower(recipe$Analysis$Horizon) != "subseasonal") { + # warn(recipe$Run$logger, + # paste("Only subseasonal verification allows multiple forecast days."), + # "Element fcst_day in recipe set as 1.") + # recipe$Analysis$Time$sdate$fcst_day <- '01' + # } + ## TODO: Delete, this parameter was deprecated + # if (is.null(recipe$Analysis$Time$sdate$fcst_sday)) { + # error(recipe$Run$logger, + # paste("The element 'fcst_sday' in the recipe should be defined.")) + # } + + if (is.null(recipe$Analysis$Time$fcst_year)) { + warn(recipe$Run$logger, + paste("The element 'fcst_year' is not defined in the recipe.", + "No forecast year will be used.")) } + ## TODO: Adapt and move this inside 'if'? + # fcst.sdate <- NULL + # for (syear in recipe$Analysis$Time$fcst_year) { + # for (sday in recipe$Analysis$Time$sdate) { + # fcst.sdate <- c(fcst.sdate, + # paste0(syear, + # sprintf("%04d", as.numeric(sday)))) + # } + # } + # fcst.sdate <- list(stream = stream, fcst.sdate = fcst.sdate) - - fcst.sdate <- NULL - for (syear in recipe$Analysis$Time$sdate$fcst_syear) { - for (sday in recipe$Analysis$Time$sdate$fcst_sday) { - fcst.sdate <- c(fcst.sdate, - paste0(syear, - sprintf("%04d", as.numeric(sday)))) - } - } - fcst.sdate <- list(stream = stream, fcst.sdate = fcst.sdate) # Regrid checks: if (length(recipe$Analysis$Regrid) != 2) { - error(logger, - "The 'Regrid' element should specified the 'method' and 'type'.") - stop("EXECUTION FAILED") + error(recipe$Run$logger, + "The 'Regrid' element must specify the 'method' and 'type'.") + error_status <- T } -# more checks + # TODO: Add Workflow checks? # ... - # calculate number of workflows to create for each variable and + # calculate number of workflows to create for each variable and if (length(recipe$Analysis$Horizon) > 1) { - error(logger, "Only 1 Horizon can be specified in the recipe") - stop("EXECUTION FAILED") - } - nvar <- length(recipe$Analysis$Variables) - if (nvar > 2) { - error(logger, - "Only two type of Variables can be listed: ECVs and Indicators.") - stop("EXECUTION FAILED") + error(recipe$Run$logger, + "Only one single Horizon can be specified in the recipe") + error_status <- T } + + ## TODO: Refine this + # nvar <- length(recipe$Analysis$Variables) + # if (nvar > 2) { + # error(recipe$Run$logger, + # "Only two type of Variables can be listed: ECVs and Indicators.") + # stop("EXECUTION FAILED") + # } + # remove NULL or None Indicators or ECVs from the recipe: if (!is.null(recipe$Analysis$Variables$Indicators) && !is.list(recipe$Analysis$Variables$Indicators)) { @@ -99,82 +168,138 @@ check_recipe <- function(recipe, logger) { recipe$Analysis$Variables <- recipe$Analysis$Variables[ -which(names(recipe$Analysis$Variables) == 'ECVs')] } + + # Region checks: + LIMITS <- c('latmin', 'latmax', 'lonmin', 'lonmax') + if (!all(LIMITS %in% names(recipe$Analysis$Region))) { + error(recipe$Run$logger, + paste0("There must be 4 elements in 'Region': ", + paste(LIMITS, collapse = ", "), ".")) + error_status <- T + } + ## TODO: Implement multiple regions + # nregions <- length(recipe$Analysis$Region) + # for (i in 1:length(recipe$Analysis$Region)) { + # if (!all(limits %in% names(recipe$Analysis$Region[[i]]))) { + # limits <- paste(limits, collapse = " ") + # error(recipe$Run$logger, + # paste0("Each region defined in element 'Region' ", + # "should have 4 elements: ", + # paste(limits, collapse = ", "), ".")) + # error_status <- T + # } + # if (length(recipe$Analysis$Region) > 1) { + # if (!("name" %in% names(recipe$Analysis$Region[[i]]))) { + # error(recipe$Run$logger, + # paste("If multiple regions are requested, each region must", + # "have a 'name'".) + # # are numeric? class list mode list + # } + # --------------------------------------------------------------------- + # WORKFLOW CHECKS + # --------------------------------------------------------------------- + # Only one Calibration method allowed: - if ((is.logical(recipe$Analysis$Workflow$Calibration[[1]]) && - recipe$Analysis$Workflow$Calibration[[1]] == FALSE) || - recipe$Analysis$Workflow$Calibration[[1]] == 'None' || - is.null(recipe$Analysis$Workflow$Calibration[[1]])) { - warn(logger, - "There is no Calibration method selected, raw data verification.") - recipe$Analysis$Workflow$Calibration[[1]] <- FALSE + if ((is.logical(recipe$Analysis$Workflow$Calibration$method) && + recipe$Analysis$Workflow$Calibration$method == FALSE) || + tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || + is.null(recipe$Analysis$Workflow$Calibration$method)) { + warn(recipe$Run$logger, + "No Calibration method was specified, raw data verification.") + recipe$Analysis$Workflow$Calibration$method <- 'raw' } else { - # remove multiple calibration methods - if (is.null(names(recipe$Analysis$Workflow$Calibration))) { - error(logger, - "The 'Calibration' element should specified at least the 'method'.") - stop("EXECUTION FAILED") + if (is.null(recipe$Analysis$Workflow$Calibration$method)) { + error(recipe$Run$logger, + "The 'Calibration' element 'method' must be specified.") + error_status <- T } - } - - if ("Region" %in% names(recipe$Analysis)) { - nregions <- length(recipe$Analysis$Region$Regional) - limits <- c('latmin', 'latmax', 'lonmin', 'lonmax') - for (i in 1:length(recipe$Analysis$Region)) { - if (!all(limits %in% names(recipe$Analysis$Region[[i]]))) { - limits <- paste(limits, collapse = " ") - error(logger, - paste("Each region defined in element 'Regional'", - "should have 4 elements:", - limits)) - stop("EXECUTION FAILED") - } - # are numeric? class list mode list + } + # Anomalies + if ("Anomalies" %in% names(recipe$Analysis$Workflow)) { + if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { + error(recipe$Run$logger, + "Parameter 'compute' must be defined under 'Anomalies'.") + error_status <- T + } else if (!(is.logical(recipe$Analysis$Workflow$Anomalies$compute))) { + error(recipe$Run$logger, + paste("Parameter 'Anomalies:compute' must be a logical value", + "(True/False or yes/no).")) + error_status <- T + } else if ((recipe$Analysis$Workflow$Anomalies$compute) && + (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation))) { + error(recipe$Run$logger, + paste("If anomaly computation is requested, parameter", + "'cross_validation' must be defined under 'Anomalies', + and it must be a logical value (True/False or yes/no).")) + error_status <- T } - } else { - error(logger, - paste("'Region'", - "should be defined", - limits)) - stop("EXECUTION FAILED") } - + # Skill + if (("Skill" %in% names(recipe$Analysis$Workflow)) && + (is.null(recipe$Analysis$Workflow$Skill$metric))) { + error(recipe$Run$logger, + "Parameter 'metric' must be defined under 'Skill'.") + error_status <- T + } + # Probabilities + if ("Probabilities" %in% names(recipe$Analysis$Workflow)) { + if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { + error(recipe$Run$logger, + "Parameter 'percentiles' must be defined under 'Probabilities'.") + error_status <- T + } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) { + error(recipe$Run$logger, + paste("Parameter 'Probabilities:percentiles' expects a list.", + "See documentation in the wiki for examples.")) + error_status <- T + } + } + # --------------------------------------------------------------------- # RUN CHECKS # --------------------------------------------------------------------- - RUN_FIELDS = c("Loglevel","Terminal","output_dir","code_dir") - LOG_LEVELS = c("INFO","DEBUG","WARNING","ERROR") + RUN_FIELDS = c("Loglevel", "Terminal", "output_dir", "code_dir") + LOG_LEVELS = c("INFO", "DEBUG", "WARN", "ERROR", "FATAL") - if (!any(names(recipe) %in% "Run")) { - error(logger, "The recipe should contain an element called 'Run'.") + if (!("Run" %in% names(recipe))) { + stop("The recipe must contain an element named 'Run'.") } if (!all(RUN_FIELDS %in% names(recipe$Run))) { - error(logger, paste0("Run should contain the fields: ", - paste(RUN_FIELDS,collapse=", "), ".")) + error(recipe$Run$logger, paste("Recipe element 'Run' must contain", + "all of the following fields:", + paste(RUN_FIELDS, collapse=", "), ".")) + error_status <- T } if (!is.character(recipe$Run$output_dir)) { - error(logger, - paste("The Run element 'output_dir' in", recipe$filename,"file ", - "should be a character string indicating the path ", - "where to save the outputs.")) + error(recipe$Run$logger, + paste("The Run element 'output_dir' in", recipe$name, "file", + "should be a character string indicating the path where", + "the outputs should be saved.")) + error_status <- T } if (!is.character(recipe$Run$code_dir)) { - error(logger, - paste("The Run element 'code_dir' in", recipe$filename,"file ", - "should be a character string indicating the path ", + error(recipe$Run$logger, + paste("The Run element 'code_dir' in", recipe$name, "file ", + "should be a character string indicating the path", "where the code is.")) + error_status <- T } if (!is.logical(recipe$Run$Terminal)) { - error(logger, - paste("The Run element 'Terminal' in", recipe$filename,"file ", - "should be a boolean value indicating wether to print or not the log", - "in the terminal.")) + error(recipe$Run$logger, + paste("The Run element 'Terminal' in", recipe$name, "file ", + "should be a boolean value indicating whether or not to", + "print the logs in the terminal.")) + error_status <- T } - if (!is.character(recipe$Run$Loglevel) || !any(recipe$Run$Loglevel %in% LOG_LEVELS)) { + ## TODO: Review this case, since default value is allowed + if (!is.character(recipe$Run$Loglevel) || + !any(recipe$Run$Loglevel %in% LOG_LEVELS)) { error(logger, - paste("The Run element 'Loglevel' in", recipe$filename,"file ", - "should be a character string indicating one of the levels available: ", - paste0(LOG_LEVELS,collapse='/'))) + paste("The Run element 'Loglevel' in", recipe$name, "file", + "should be a character string specifying one of the levels available:", + paste0(LOG_LEVELS, collapse='/'))) + error_status <- T } # --------------------------------------------------------------------- @@ -182,10 +307,19 @@ check_recipe <- function(recipe, logger) { # --------------------------------------------------------------------- # Check workflow: need to define restrictions? # e.g. only one calibration method - nverifications <- check_number_of_dependent_verifications(recipe) - info(logger, paste("Start Dates", paste(fcst.sdate, collapse = " "))) - info(logger, "Recipe checked succsessfully.") - return(append(nverifications, fcst.sdate)) + ## TODO: Implement number of dependent verifications + #nverifications <- check_number_of_dependent_verifications(recipe) + # info(recipe$Run$logger, paste("Start Dates:", + # paste(fcst.sdate, collapse = " "))) + + # Return error if any check has failed + if (error_status) { + error(recipe$Run$logger, "RECIPE CHECK FAILED.") + stop("The recipe contains some errors. The full list is in the logs.") + } else { + info(recipe$Run$logger, "##### RECIPE CHECK SUCCESSFULL #####") + # return(append(nverifications, fcst.sdate)) + } } check_number_of_dependent_verifications <- function(recipe) { diff --git a/tools/data_summary.R b/tools/data_summary.R index 8dcc9910101dc40a172e7bbbeba62a69c292a18b..f437d43157311fbfac142fe03a552498bd60bb1b 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -7,15 +7,15 @@ data_summary <- function(data_cube, recipe) { # Get name, leadtime months and date range object_name <- deparse(substitute(data_cube)) if (recipe$Analysis$Variables$freq == "monthly_mean") { - date_format <- '%b %Y' + date_format <- "%b %Y" } else if (recipe$Analysis$Variables$freq == "daily_mean") { - date_format <- '%b %d %Y' + date_format <- "%b %d %Y" } - months <- unique(format(as.Date(data_cube$Dates[[1]]), format = '%B')) - months <- paste(as.character(months), collapse=", ") + months <- unique(format(as.Date(data_cube$Dates[[1]]), format = "%B")) + months <- paste(as.character(months), collapse = ", ") sdate_min <- format(min(as.Date(data_cube$Dates[[1]])), format = date_format) sdate_max <- format(max(as.Date(data_cube$Dates[[1]])), format = date_format) - + # Create log instance and sink output to logfile and terminal info(recipe$Run$logger, "DATA SUMMARY:") info(recipe$Run$logger, paste(object_name, "months:", months)) @@ -35,4 +35,3 @@ data_summary <- function(data_cube, recipe) { } info(recipe$Run$logger, "---------------------------------------------") } - diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index dafc8704aa8a686fa5885b684acbaa23e49c8f7e..18962f93010bae7fe6693beb48a5f485075954f5 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -1,11 +1,12 @@ -# recipe: the content of the recipe -# verifications: the output from check_recipe -# folder: the name of the output folder for this run -# logger: the log file obtain from prepare_outputs -divide_recipe <- function(recipe, verifications, folder, logger) { - info(logger, "Spliting recipe in single verifications.") +# recipe: the recipe as returned by prepare_outputs() +divide_recipe <- function(recipe) { + + ## TODO: Implement dependent vs independent verifications? + info(recipe$Run$logger, "Spliting recipe in single verifications.") beta_recipe <- list(Description = append(recipe$Description, - "split version"), + list(Origin = paste("Atomic recipe,", + "split from:", + recipe$name))), Analysis = list(Horizon = recipe$Analysis$Horizon, Variables = NULL, Datasets = NULL, @@ -14,38 +15,38 @@ divide_recipe <- function(recipe, verifications, folder, logger) { Regrid = recipe$Analysis$Regrid, Workflow = recipe$Analysis$Workflow, Output_format = - recipe$Analysis$Output_format), - Run = recipe$Run) - # duplicate recipe by Variables considering dep and indep: - all_recipes <- list(beta_recipe) - i <- 1 # to get track of the recipe number - for (indep in verifications$independent) { - all_recipes[[i]]$Analysis$Variables <- indep - i = i + 1 - all_recipes <- append(all_recipes, list(beta_recipe)) + recipe$Analysis$Output_format), + Run = recipe$Run[c("Loglevel", "output_dir", "Terminal", + "code_dir", "logfile")]) + + # duplicate recipe by independent variables: + all_recipes <- rep(list(beta_recipe), length(recipe$Analysis$Variables)) + for (var in 1:length(recipe$Analysis$Variables)) { + all_recipes[[var]]$Analysis$Variables <- recipe$Analysis$Variables[[var]] } - for (dep in verifications$dependent) { - all_recipes[[i]]$Analysis$Variables <- dep - i = i + 1 - all_recipes <- append(all_recipes, list(beta_recipe)) - } - all_recipes <- all_recipes[-length(all_recipes)] + # for (dep in verifications$dependent) { + # all_recipes[[i]]$Analysis$Variables <- dep + # i = i + 1 + # all_recipes <- append(all_recipes, list(beta_recipe)) + # } + # all_recipes <- all_recipes[-length(all_recipes)] # wth does this do + # duplicate recipe by Datasets: # check Systems if (recipe$Analysis$Datasets$Multimodel) { for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Datasets <- list( - System = recipe$Analysis$Datasets$System, - Multimodel = recipe$Analysis$Datasets$Multimodel, - Reference = NULL) + all_recipes[[reci]]$Analysis$Datasets <- + list(System = recipe$Analysis$Datasets$System, + Multimodel = recipe$Analysis$Datasets$Multimodel, + Reference = NULL) } } else { for (sys in 1:length(recipe$Analysis$Datasets$System)) { for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Datasets <- list( - System = recipe$Analysis$Datasets$System[[sys]], - Multimodel = recipe$Analysis$Datasets$Multimodel, - Reference = NULL) + all_recipes[[reci]]$Analysis$Datasets <- + list(System = recipe$Analysis$Datasets$System[[sys]], + Multimodel = recipe$Analysis$Datasets$Multimodel, + Reference = NULL) } if (sys == 1) { recipes <- all_recipes @@ -72,28 +73,28 @@ divide_recipe <- function(recipe, verifications, folder, logger) { # Duplicate recipe by Region recipes <- list() for (reg in 1:length(recipe$Analysis$Region)) { - if (length(recipe$Analysis$Region[[reg]]) == 4) { ##TODO: THIS SHOULD BE ONLY CHECK IN THE RECIPE CHECKER? + # if (length(recipe$Analysis$Region[[reg]]) == 4) { ##TODO: THIS SHOULD BE ONLY CHECK IN THE RECIPE CHECKER? for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Region <- - recipe$Analysis$Region[[reg]] + all_recipes[[reci]]$Analysis$Region <- recipe$Analysis$Region[[reg]] } recipes <- append(recipes, all_recipes) - } + # } } all_recipes <- recipes rm(list = 'recipes') + # Duplicate recipe by start date if (tolower(recipe$Analysis$Horizon) == 'seasonal') { - for (sday in 1:length(recipe$Analysis$Time$sdate$fcst_sday)) { + for (sdate in 1:length(recipe$Analysis$Time$sdate)) { for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Time <- list(sdate = list( - fcst_syear = recipe$Analysis$Time$sdate$fcst_syear, - fcst_sday = recipe$Analysis$Time$sdate$fcst_sday[[sday]]), - hcst_start = recipe$Analysis$Time$hcst_start, - hcst_end = recipe$Analysis$Time$hcst_end, - leadtimemin = recipe$Analysis$Time$leadtimemin, - leadtimemax = recipe$Analysis$Time$leadtimemax) + all_recipes[[reci]]$Analysis$Time <- + list(sdate = recipe$Analysis$Time$sdate[[sdate]], + fcst_year = recipe$Analysis$Time$fcst_year, + hcst_start = recipe$Analysis$Time$hcst_start, + hcst_end = recipe$Analysis$Time$hcst_end, + ftime_min = recipe$Analysis$Time$ftime_min, + ftime_max = recipe$Analysis$Time$ftime_max) } - if (sday == 1) { + if (sdate == 1) { recipes <- all_recipes } else { recipes <- append(recipes, all_recipes) @@ -102,12 +103,24 @@ divide_recipe <- function(recipe, verifications, folder, logger) { all_recipes <- recipes rm(list = 'recipes') } # Rest of horizons - # Finally, save all recipes in saparated yaml files + # Save all recipes in separate YAML files + ## TODO: Re-add recipe$Run$logger for (reci in 1:length(all_recipes)) { + if (reci < 10) { + recipe_number <- paste0("0", reci) + } else { + recipe_number <- reci + } write_yaml(all_recipes[[reci]], - paste0(folder, "/logs/recipes/recipe_", reci, ".yml")) + paste0(recipe$Run$output_dir, "/logs/recipes/recipe_", + recipe_number, ".yml")) + all_recipes[[reci]]$Run$logger <- recipe$Run$logger } - text <- paste0("See folder ",folder,"/logs/recipes/ to see the individual recipes.") - info(logger, text) + info(recipe$Run$logger, + paste("The main recipe has been divided into", length(all_recipes), + "atomic recipes.")) + text <- paste0("See output directory ", recipe$Run$output_dir, + "/logs/recipes/ to see all the individual atomic recipes.") + info(recipe$Run$logger, text) return(all_recipes) } diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index a89e5e7b7d14b15a56a4e5a0bf7db3deb26f0259..1972aef0cb38f7e845ebd0fdfc1ca19a28950939 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -1,12 +1,12 @@ #'Read recipe YAML file and create and store logfile info #' #'The purpose of this function is to read the recipe configuration for Auto-S2S -#'workflows and create logfiles stores in an the output directory specified in +#'workflows and create logfiles stored in an the output directory specified in #'the recipe. It returns an object of class logger that stores information on #'the recipe configuration and errors. #' #'@param recipe_file path to a YAML file with Auto-S2S configuration recipe -#' +#'@param disable_checks whether to disable the recipe checks #'@return list contaning recipe with logger, log file name and log dir name #' #'@import log4r @@ -20,10 +20,11 @@ #' #'@export -prepare_outputs <- function(recipe_file) { +prepare_outputs <- function(recipe_file, + disable_checks = FALSE) { -# recipe: the content of the readed recipe -# file: the recipe file name +# recipe_file: path to recipe YAML file +# disable_checks: If TRUE, does not perform checks on recipe recipe <- read_yaml(recipe_file) recipe$recipe_path <- recipe_file @@ -33,32 +34,30 @@ prepare_outputs <- function(recipe_file) { # Create output folders: folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$name)), "_", gsub(" ", "", gsub(":", "", gsub("-", "", Sys.time())))) - print("Saving all outputs to:") print(output_dir) print(folder_name) - dir.create(file.path(output_dir, folder_name, 'outputs'), recursive = TRUE) dir.create(file.path(output_dir, folder_name, 'logs')) dir.create(file.path(output_dir, folder_name, 'logs', 'recipes')) - + # Copy recipe to output folder file.copy(recipe$recipe_path, file.path(output_dir, folder_name, 'logs', 'recipes')) - + # Create log output file logfile <- file.path(output_dir, folder_name, 'logs', 'log.txt') file.create(logfile) - # Set default behaviour of log output file: + # Set default behaviour of logger if (is.null(recipe$Run)) { recipe$Run <- list(Loglevel = 'INFO', Terminal = TRUE) } if (is.null(recipe$Run$Loglevel)) { recipe$Run$Loglevel <- 'INFO' } - if (!is.logical(recipe$Run$Terminal)) { recipe$Run$Terminal <- TRUE } + # logger set-up if (recipe$Run$Terminal) { logger <- log4r::logger(threshold = recipe$Run$Loglevel, appenders = list(console_appender(layout = default_log_layout()), @@ -69,13 +68,15 @@ prepare_outputs <- function(recipe_file) { appenders = list(file_appender(logfile, append = TRUE, layout = default_log_layout()))) } - recipe$Run$output_dir <- file.path(output_dir, folder_name) recipe$Run$logger <- logger recipe$Run$logfile <- logfile - - info(recipe$Run$logger, - "##### LOGGER SET UP AND OUTPUT DIRECTORY PREPARED #####") - + # Run recipe checker + if (disable_checks) { + warn(recipe$Run$logger, + "Recipe checks disabled. The recipe will not be checked for errors.") + } else { + check_recipe(recipe) + } return(recipe) }