diff --git a/MODULES b/MODULES index 42b06963a98e289c879fe0ced4c252172b03c773..a37e52d667da53f97f084b6a8c2932f54b6aca26 100644 --- a/MODULES +++ b/MODULES @@ -9,11 +9,6 @@ if [ $BSC_MACHINE == "power" ]; then module use /gpfs/projects/bsc32/software/rhel/7.4/ppc64le/POWER9/modules/all/ module load CDO/1.9.4-foss-2018b - module load pynco/0.0.2-foss-2018b-Python-2.7.15 - module load netcdf4-python/1.2.2-foss-2018b-Python-2.7.15 - module load grib_api/1.27.0-foss-2018b - module load NCO/4.7.2-foss-2018b - module load Python/2.7.15-foss-2018b module load R/3.6.1-foss-2018b elif [ $BSC_MACHINE == "nord3" ]; then @@ -25,26 +20,11 @@ elif [ $BSC_MACHINE == "nord3" ]; then module unuse /apps/modules/PRACE module load R/3.6.2-foss-2019b - - ## TODO: to be tested module load CDO/1.9.8-foss-2019b - module load pycdo/1.5.3-foss-2019b-Python-3.7.4 - module load netcdf4-python/1.5.3-foss-2019b-Python-3.7.4 - module load pynco/0.0.3-foss-2019b-Python-3.7.4 - module load Python/3.7.4-GCCcore-8.3.0 - module load NCO/4.9.2-foss-2019b - module load grib_api/1.24.0-foss-2019b - - else - module load grib_api/1.18.0-foss-2015a #netCDF - module load NCO/4.7.3-foss-2015a ncview/2.1.5-foss-2015a pynco/0.0.2-foss-2015a-Python-2.7.9 - module load ECAC/4.0.2 Python/2.7.9-foss-2015a module load CDO/1.9.8-foss-2015a - module load grib_api/1.13.1-foss-2015a - module load netCDF/4.3.3.1-foss-2015a module load R/3.6.1-foss-2015a-bare fi diff --git a/OperationalCS.R b/OperationalCS.R index bdb960c8aca7240188c64b2f742948da91fc1247..c04361dadce7673c434e880cb93438fb5b232790 100644 --- a/OperationalCS.R +++ b/OperationalCS.R @@ -8,27 +8,26 @@ print(args) library(yaml) # To test: -# args <- NULL; args[1] <- "recipes/conf_user.yml"; args[2] <- "recipes/tests/seasonal_testWorkflow1.yml" -# args <- NULL; args[1] <- "recipes/conf_user.yml"; args[2] <- "recipes/seasonal_oper.yml" -# args <- NULL; args[1] <- "recipes/conf_user.yml"; args[2] <- "recipes/seasonal_complex.yml" +# args <- NULL; args[1] <- "recipes/seasonal_oper.yml" +# args <- NULL; args[1] <- "recipes/seasonal_complex.yml" -conf <- read_yaml(args[1]) -recipe <- read_yaml(args[2]) +recipe <- read_yaml(args[1]) +recipe$filename <- args[1] # Load required libraries source("tools/libs.R") # Create output folder and log: -logger <- prepare_outputs(recipe = recipe, file = args[2], conf = conf) +logger <- prepare_outputs(recipe = recipe) log_file <- logger$logname logger <- logger$logger # Checks: -check_conf(conf, file = args[1], logger) -verifications <- check_recipe(recipe, file = args[2], conf, logger) +verifications <- check_recipe(recipe, logger) # Go to verification code: capture.output(source("modules/verifications.R"), file = log_file, type ='message', append = TRUE) + diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R new file mode 100644 index 0000000000000000000000000000000000000000..19b81380ffec4e082143aa933febe33009b9dd5d --- /dev/null +++ b/modules/data_load/dates2load.R @@ -0,0 +1,42 @@ + +# Taking the recipe returns the array of sdates to be loaded +# both for the hcst and fcst. +dates2load <- function(recipe, logger){ + + recipe <- recipe$Analysis$Time + + # hcst dates + file_dates <- outer(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), + recipe$sdate$fcst_sday,paste,sep="") + file_dates <- add_dims(file_dates, "hcst") + + # fcst dates (if fcst_year empty it creates an empty object) + if (! is.null(recipe$sdate$fcst_year)){ + file_dates.fcst <- outer(recipe$sdate$fcst_year,recipe$sdate$fcst_sday,paste,sep="") + file_dates.fcst <- add_dims(file_dates.fcst, "fcst") + } else { + file_dates.fcst <- NULL + info(logger, + paste("fcst_year empty in the recipe, creating empty fcst object...")) + } + + return(list(hcst=file_dates,fcst=file_dates.fcst)) + +} + +# adds the correspondent dims to each sdate array +add_dims <- function(data, type){ + + if (type == "hcst"){ + default_dims <- c(sday = 1, sweek = 1, + syear = dim(data)[1], srun = dim(data)[2]) + } else { + default_dims <- c(fcst_year = dim(data)[1], srun = dim(data)[2]) + } + + default_dims[names(dim(data))] <- dim(data) + dim(data) <- default_dims + return(data) + + } + diff --git a/modules/data_load/seas5.load.R b/modules/data_load/fcst_seas.load.R similarity index 100% rename from modules/data_load/seas5.load.R rename to modules/data_load/fcst_seas.load.R diff --git a/modules/data_load/hcst_seas.load.R b/modules/data_load/hcst_seas.load.R new file mode 100644 index 0000000000000000000000000000000000000000..c0d6ac697c9fcf068919f38529227e6f3811c2d9 --- /dev/null +++ b/modules/data_load/hcst_seas.load.R @@ -0,0 +1,204 @@ + +source("modules/data_load/dates2load.R") + +# Set params ----------------------------------------- +hcst.inityear <- recipe$Analysis$Time$hcst_start +hcst.endyear <- recipe$Analysis$Time$hcst_end +ltmin <- recipe$Analysis$Time$leadtimemin +ltmax <- recipe$Analysis$Time$leadtimemax +lats.min <- recipe$Analysis$Region$Regional[[1]]$latmin +lats.max <- recipe$Analysis$Region$Regional[[1]]$latmax +lons.min <- recipe$Analysis$Region$Regional[[1]]$lonmin +lons.max <- recipe$Analysis$Region$Regional[[1]]$lonmax +ref.name <- recipe$Analysis$Datasets$Reference$name +systems <- recipe$Analysis$Datasets$System + +stream <- verifications$stream +sdates <- verifications$fcst.sdate + +## TODO: define fcst.name +##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name + +# get sdates array +sdates <- dates2load(recipe, logger) + +# get esarchive datasets dict: +archive <- read_yaml(paste0(recipe$Run$code_dir,"conf/archive.yml"))$archive +dataset_descrip <- archive[[fcst.name]] + +freq.hcst <- unlist(dataset_descrip[[store.freq]][variable]) +reference_descrip <- archive[[ref.name]] +freq.obs <- unlist(reference_descrip[[store.freq]][variable]) +obs.dir <- reference_descrip$src +fcst.dir <- dataset_descrip$src +hcst.dir <- dataset_descrip$src +fcst.nmember <- dataset_descrip$nmember$fcst +hcst.nmember <- dataset_descrip$nmember$hcst + +if ("accum" %in% names(reference_descrip)) { + accum <- unlist(reference_descrip$accum[store.freq][[1]]) +} else { + accum <- FALSE +} + +# ----------- +obs.path <- paste0("/esarchive/", + obs.dir, store.freq, "/$var$", + freq.obs,"$var$_$file_date$.nc") + +hcst.path <- paste0("/esarchive/", + hcst.dir, store.freq, "/$var$", + freq.hcst,"$var$_$file_date$01", + ".nc") + +# Regrid: +#------------------------------------------------------------------- +regrid_params <- get_regrid_params(recipe) + +# Timeseries load +#------------------------------------------------------------------- + +hcst <- Start(dat = hcst.path, + var = variable, + file_date = file_dates, + time = indices(ltmin:ltmax), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = + dataset_descrip$lat_decreasing_sort), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort( + dataset_descrip$lon_circular_sort$ini, + dataset_descrip$lon_circular_sort$end), + transform = fcst.transform, + transform_params = list(grid = fcst.gridtype, + method = fcst.gridmethod, + crop = c(lons.min, lons.max, + lats.min, lats.max)), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude'), + member = c('ensemble')), + member = indices(1:hcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + +hcst.NA_files <- c(attributes(hcst)$NotFoundFiles) +hcst.NA_files <- hcst.NA_files[!is.na(hcst.NA_files)] +try(hcst.NA_files <- hcst.NA_files[order(hcst.NA_files)]) + +dates <- attr(hcst, 'Variables')$common$time +dates_file <- sapply(dates, format, '%Y%m%d') +dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") +dim(dates_file) <- dim(Subset(hcst, + along=c('dat','var', + 'latitude', 'longitude', 'member'), + list(1,1,1,1,1), drop="selected")) + +obs <- Start(dat = obs.path, + var = variable, + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = + reference_descrip$lat_decreasing_sort), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort( + reference_descrip$lon_circular_sort$ini, + reference_descrip$lon_circular_sort$end), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + transform = obs.transform, + transform_params = list(grid = obs.gridtype, + method = obs.gridmethod, + crop = c(lons.min, lons.max, + lats.min, lats.max)), + transform_vars = c('latitude', 'longitude'), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + +dates_file <- paste0(dates_file, '01') +dim(dates_file) <- dim(Subset(hcst, + along=c('dat','var', + 'latitude', 'longitude', 'member'), + list(1,1,1,1,1), drop="selected")) + +file_dates <- paste0(file_dates, '01') +dim(file_dates) <- dim(Subset(hcst, + along=c('dat','var', + 'latitude', 'longitude', 'member', 'time'), + list(1,1,1,1,1,1), drop="selected")) + + + +obs.NA_dates.ind <- Apply(obs, + fun=(function(x){ all(is.na(x))}), + target_dims=c('time', 'latitude', 'longitude'))[[1]] +obs.NA_dates <- dates_file[obs.NA_dates.ind] +obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] +obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", + freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") + +if (any(is.na(hcst))){ + fatal(logger, + paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", + " ################################################# ", + " ###### MISSING FILES #### ", + " ################################################# ", + "hcst files:", + hcst.NA_files, + " ################################################# ", + " ################################################# ", + sep="\n")) + quit(status = 1) +} + +if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ + fatal(logger, + paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", + " ################################################# ", + " ###### MISSING FILES #### ", + " ################################################# ", + "obs files:", + obs.NA_files, + " ################################################# ", + " ################################################# ", + sep="\n")) + quit(status=1) +} + +info(logger, + "######### DATA LOADING COMPLETED SUCCESFULLY ##############") + +default_dims <- c(dat = 1, var = 1, sweek = 1, + sday = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, member = 1) + +default_dims[names(dim(obs))] <- dim(obs) +dim(obs) <- default_dims + +lon <- attr(obs, 'Variables')$dat1$longitude +lat <- attr(obs, 'Variables')$dat1$latitude +hcst.times <- attr(hcst, 'Variables')$common$time +hcst.times <- sort(unique(sapply(as.character(hcst.times), + substr, 1, 10))) + +#obs<-Subset(obs,c('dat','var'),list(1,1),drop='selected') +#hcst<-Subset(hcst,c('dat','var'),list(1,1),drop='selected') +#if (stream == "fcst"){ +# fcst<-Subset(fcst,c('dat','var'),list(1,1),drop='selected') +#} + +#filters negative values in accum vars +if (accum){ + obs[obs < 0 ] <- 0 + hcst[hcst < 0 ] <- 0 +} + +sdates.hcst <- file_dates +leadtimes.hcst <- dates_file + diff --git a/modules/data_load/regrid.R b/modules/data_load/regrid.R new file mode 100644 index 0000000000000000000000000000000000000000..b16738166fe6e6cd2185e527b79f588a5ec6cc99 --- /dev/null +++ b/modules/data_load/regrid.R @@ -0,0 +1,36 @@ + +get_regrid_params <- function(recipe){ + + if (tolower(recipe$Analysis$Regrid$type) == 'reference') { + + regrid_params <- list(fcst.gridtype=reference_descrip$regrid, + fcst.gridmethod=recipe$Analysis$Regrid$method, + fcst.tranform=CDORemapper, + obs.gridtype=NULL, + obs.gridmethod=NULL, + obs.tranform=NULL) + + } else if (tolower(recipe$Analysis$Regrid$type) == 'system') { + + regrid_params <- list(fcst.gridtype=NULL, + fcst.gridmethod=NULL, + fcst.tranform=NULL, + obs.gridtype=dataset_descrip$regrid, + obs.gridmethod=recipe$Analysis$Regrid$method, + obs.tranform=CDORemapper) + + } else { + + regrid_params <- list(fcst.gridtype=reference_descrip$regrid, + fcst.gridmethod=recipe$Analysis$Regrid$method, + fcst.tranform=CDORemapper, + obs.gridtype=dataset_descrip$regrid, + obs.gridmethod=recipe$Analysis$Regrid$method, + obs.tranform=CDORemapper) + } + + return(regrid_params) + +} + + diff --git a/modules/verifications.R b/modules/verifications.R index 2451b7d7f7dc57f636cacd0933d7afe6f03aacd1..df1026f43a25dfbfe51b5eb00fe3252c370772f8 100644 --- a/modules/verifications.R +++ b/modules/verifications.R @@ -1,11 +1,13 @@ for (indep in verifications$independent) { + + ## TODO: re-write this condition if (length(indep) == 1) { info(logger, paste(" #*****************************************#", " # Starting Independent verification of Indicator ", indep, sep = "\n")) - ind_table <- read_yaml(paste0(conf$code_dir, + ind_table <- read_yaml(paste0(recipe$Run$code_dir, "conf/indicators_table.yml")) variable <- ind_table[indep[[1]]][[1]]$ECVs store.freq <- ind_table[indep[[1]]][[1]]$freq @@ -20,91 +22,78 @@ for (indep in verifications$independent) { store.freq <- indep$freq ind.fun <- NULL } - hcst.inityear <- recipe$Analysis$Time$hcst_start - hcst.endyear <- recipe$Analysis$Time$hcst_end - ltmin <- recipe$Analysis$Time$leadtimemin - ltmax <- recipe$Analysis$Time$leadtimemax - lats.min <- recipe$Analysis$Region$Regional[[1]]$latmin - lats.max <- recipe$Analysis$Region$Regional[[1]]$latmax - lons.min <- recipe$Analysis$Region$Regional[[1]]$lonmin - lons.max <- recipe$Analysis$Region$Regional[[1]]$lonmax - stream <- verifications$stream - # Is multimodel FALSE - if (recipe$Analysis$Datasets$Multimodel) { - # is there a multimodel load step in S2S4E backend? - } else { - for (sys in 1:length(recipe$Analysis$Datasets$System)) { - fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name - for (ref in 1:length(recipe$Analysis$Datasets$Reference)) { - ref.name <- recipe$Analysis$Datasets$Reference[[ref]]$name - for (sdate in verifications$fcst.sdate) { - fcst.sdate <- sdate - source("modules/data_load/seas5.load.R") - # Translate $Workflow to call modules: - ## 1) Clean step of the workflow set as FALSE or NULL or None: - modules <- names(recipe$Analysis$Workflow) - for (mod in modules) { - if ((is.logical(recipe$Analysis$Workflow[[mod]][[1]]) && - recipe$Analysis$Workflow[[mod]][[1]] == FALSE) || - recipe$Analysis$Workflow[[mod]][[1]] == 'None' || - is.null(recipe$Analysis$Workflow[[mod]][[1]])) { - info(logger, - paste("The module", mod, "won't be executed.")) - recipe$Analysis$Workflow <- recipe$Analysis$Workflow[ - -which(names(recipe$Analysis$Workflow) == mod)] - } - } - modules <- names(recipe$Analysis$Workflow) - ## 2) Create a common format for all modules - tmp_modules <- list() - for (mod in modules) { - if (length(recipe$Analysis$Workflow[[mod]]) > 1) { - names(recipe$Analysis$Workflow[[mod]]) <- - rep(mod, length(recipe$Analysis$Workflow[[mod]])) - tmp_modules <- append(tmp_modules, - recipe$Analysis$Workflow[[mod]]) - } else { - tmp_modules <- append(tmp_modules, - recipe$Analysis$Workflow[mod]) - } - } - modules <- tmp_modules - ## 3) Call each module and pass arguments: - for (mod in 1:length(modules)) { - # In case multiple calls to a module e.g.: Skill_1 --> Skill - if (any(strsplit(names(modules)[mod], "")[[1]] == "_")) { - module_name <- substr(names(modules)[mod], start = 1, - stop = which(strsplit(names(modules)[mod], "")[[1]] == "_") - 1) - } else { - module_name <- names(modules)[mod] - } - info(logger, paste("Start running module", module_name)) - module_code <- file.path(conf$code_dir, "modules", - module_name, - paste0(module_name, ".R")) - # Define variables setup in the recipe - for (param in names(modules[[mod]])) { - if (length(modules[[mod]][[param]])) { - tmp <- paste(modules[[mod]][[param]], - collapse = ",") - } else { - tmp <- modules[[mod]][[param]] - } - info(logger, paste("Variable *", param, "* set as", tmp)) - assign(as.character(param), - modules[[mod]][[param]]) - } - source(module_code) - # TO DO: - # Check the arguments of each module can be an option here: - #... - info(logger, paste(module_name, "module run finished.")) - } - info(logger, paste(sdate, "start date finished.")) - } - info(logger, paste(ref, "reference dataset finished.")) + + + # ========================================================= + # DATA LOADING -------------------------------------------- + # ========================================================= + + source(recipe$Analysis$Data_load$module) + + # ========================================================= + # WORKFLOW MODULES RUN ------------------------------------ + # ========================================================= + + # Translate $Workflow to call modules: + ## 1) Clean step of the workflow set as FALSE or NULL or None: + modules <- names(recipe$Analysis$Workflow) + for (mod in modules) { + if ((is.logical(recipe$Analysis$Workflow[[mod]][[1]]) && + recipe$Analysis$Workflow[[mod]][[1]] == FALSE) || + recipe$Analysis$Workflow[[mod]][[1]] == 'None' || + is.null(recipe$Analysis$Workflow[[mod]][[1]])) { + info(logger, + paste("The module", mod, "won't be executed.")) + recipe$Analysis$Workflow <- recipe$Analysis$Workflow[ + -which(names(recipe$Analysis$Workflow) == mod)] + } + } + + modules <- names(recipe$Analysis$Workflow) + ## 2) Create a common format for all modules + tmp_modules <- list() + for (mod in modules) { + if (length(recipe$Analysis$Workflow[[mod]]) > 1) { + names(recipe$Analysis$Workflow[[mod]]) <- + rep(mod, length(recipe$Analysis$Workflow[[mod]])) + tmp_modules <- append(tmp_modules, + recipe$Analysis$Workflow[[mod]]) + } else { + tmp_modules <- append(tmp_modules, + recipe$Analysis$Workflow[mod]) + } + + modules <- tmp_modules + ## 3) Call each module and pass arguments: + for (mod in 1:length(modules)) { + # In case multiple calls to a module e.g.: Skill_1 --> Skill + if (any(strsplit(names(modules)[mod], "")[[1]] == "_")) { + module_name <- substr(names(modules)[mod], start = 1, + stop = which(strsplit(names(modules)[mod], "")[[1]] == "_") - 1) + } else { + module_name <- names(modules)[mod] + } + info(logger, paste("Start running module", module_name)) + module_code <- file.path(conf$code_dir, "modules", + module_name, + paste0(module_name, ".R")) + # Define variables setup in the recipe + for (param in names(modules[[mod]])) { + if (length(modules[[mod]][[param]])) { + tmp <- paste(modules[[mod]][[param]], + collapse = ",") + } else { + tmp <- modules[[mod]][[param]] + } + info(logger, paste("Variable *", param, "* set as", tmp)) + assign(as.character(param), + modules[[mod]][[param]]) } - info(logger, paste(sys, "systemn finished.")) + source(module_code) + # TO DO: + # Check the arguments of each module can be an option here: + #... + info(logger, paste(module_name, "module run finished.")) } } } diff --git a/out-logs/ini b/out-logs/ini old mode 100644 new mode 100755 diff --git a/recipes/conf_user.yml b/recipes/conf_user.yml deleted file mode 100644 index c394d368088c9675730504795eb634f0e15778a9..0000000000000000000000000000000000000000 --- a/recipes/conf_user.yml +++ /dev/null @@ -1,5 +0,0 @@ -# Users configuration to point correct directories - -output_dir: /esarchive/scratch/nperez/git/auto-s2s/out-logs/ -code_dir: /esarchive/scratch/nperez/git/auto-s2s/ - diff --git a/recipes/seasonal_oper.yml b/recipes/seasonal_oper.yml index 9598f4495ff4cc95219895bea6e315cc55df984c..e2d379d2b16c528eeb963887950f2bff24a128e4 100644 --- a/recipes/seasonal_oper.yml +++ b/recipes/seasonal_oper.yml @@ -1,47 +1,71 @@ +# +# ___ ___ _ _ _ +# / __| / __| ___ _ __ ___ _ _ __ _ | |_ (_) ___ _ _ __ _ | | +# | (__ \__ \ / _ \ | '_ \ / -_) | '_| / _` | | _| | | / _ \ | ' \ / _` | | | +# \___| |___/ \___/ | .__/ \___| |_| \__,_| \__| |_| \___/ |_||_| \__,_| |_| +# |_| + +################################################################################# +# RECIPE DESCRIPTION +################################################################################# + Description: - Author: N.Pérez-Zanón - Info: This is a test to transform s2s4e data-analysis for SEAS5 + Author: N.Pérez-Zanón # [Optional?/str] +Info: This is a test to transform s2s4e data-analysis for SEAS5 # [Optional?/str] + +################################################################################# +# ANALYSIS CONF +################################################################################# Analysis: - Horizon: Seasonal + Horizon: Seasonal # [Mandatory/str (either seasonal, subseasonal, decadal)] Variables: - ECVs: + ECVs: # [Mandatory/list of dicts {name: , freq: } or None] - {name: tas, freq: monthly_mean} - Indicators: + Indicators: # list of strs? - None Datasets: System: - - name: system5c3s - Multimodel: False - Reference: - - {name: era5} + - name: system5c3s # list of strs + Multimodel: False # boolean, if true system above are aggregated into single multi-model + Reference: # single dict? in the future multiple ref can be an asset + - {name: era5} # str Time: sdate: - fcst_year: 2021 - fcst_month: 07 - fcst_day: 01 - hcst_start: 2000 - hcst_end: 2002 - leadtimemin: 2 - leadtimemax: 4 + fcst_year: ["2020", "2021"] # list of ints or None (case where only hcst is verfied) + fcst_sday: ["0701", "0601"] # int or list of ints with MMDD format + hcst_start: "1993" # int mandatory + hcst_end: "2016" # int mandatory + leadtimemin: 2 # int mandatory + leadtimemax: 4 # int mandatory [units????] Region: - Global: TRUE - Aggregation: False - Regional: + Global: TRUE # bool mandatory + Aggregation: False # bool mandatory + Regional: # list of dicts mandatory - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} Regrid: - method: bilinear - type: system + method: bilinear # str mandatory + type: system # str either system or reference mandatory + Data_load: + module: "modules/data_load/seas5.load.R" Workflow: Calibration: - method: SBC + method: SBC # str Skill: - metric: RPSS + metric: RPSS # str Indicators: - index: FALSE - Output_format: S2S4E + index: FALSE # bool + Output_format: S2S4E # str + +################################################################################# +# Run CONF +################################################################################# Run: - Loglevel: INFO - Terminal: TRUE + Loglevel: INFO # str + Terminal: TRUE # bool + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ + + diff --git a/recipes/tests/execute_tests.R b/recipes/tests/execute_tests.R index 44292f1600a2e317c5f3d18a35db9e73d3b44bed..2fa6a1373a22b95c5572e6e5aaf9372fe83188ef 100644 --- a/recipes/tests/execute_tests.R +++ b/recipes/tests/execute_tests.R @@ -1,7 +1,6 @@ library(yaml) args <- NULL; -args[1] <- "recipes/conf_user.yml"; # Function to run tests: # source_lines("/esarchive/scratch/nperez/git/startR/inst/doc/usecase/ex2_1_timedim.R", @@ -15,31 +14,31 @@ source_lines <- function(file, start, end, ...) { # ------------------------------------------ # Section to check recipes that should work: -args[2] <- "recipes/tests/seasonal_testWorkflow1.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow1.yml" source_lines("OperationalCS.R", start = 14, end = 50) # Calibration method None --> raw data verification -args[2] <- "recipes/tests/seasonal_testWorkflow4.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow4.yml" source_lines("OperationalCS.R", start = 14, end = 50) # Calibration: None --> raw data verification -args[2] <- "recipes/tests/seasonal_testWorkflow5.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow5.yml" source_lines("OperationalCS.R", start = 14, end = 50) # Case Skill_1 and Skill_2 when multiple times needed -args[2] <- "recipes/tests/seasonal_testWorkflow7.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow7.yml" source_lines("OperationalCS.R", start = 14, end = 50) # Indicator -args[2] <- "recipes/tests/seasonal_testWorkflow8.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow8.yml" source_lines("OperationalCS.R", start = 14, end = 50) # ------------------------------------------ # Section to check recipes that should fail: ## This should fail because there is no Horizon: -args[2] <- "recipes/tests/seasonal_testWorkflow2.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow2.yml" source_lines("OperationalCS.R", start = 14, end = 50) ## This should fail because there are 2 Calibration options: -args[2] <- "recipes/tests/seasonal_testWorkflow3.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow3.yml" source_lines("OperationalCS.R", start = 14, end = 50) ## This fails because it is not allow repeating the name Skill -args[2] <- "recipes/tests/seasonal_testWorkflow6.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow6.yml" source_lines("OperationalCS.R", start = 14, end = 50) diff --git a/recipes/tests/seasonal_testWorkflow1.yml b/recipes/tests/seasonal_testWorkflow1.yml index c3b876132ced9a482d78a14dd30f5f4e5b147abe..3c9e55f67a6e05e7ba971cf34972f728a0187a7e 100644 --- a/recipes/tests/seasonal_testWorkflow1.yml +++ b/recipes/tests/seasonal_testWorkflow1.yml @@ -49,3 +49,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow2.yml b/recipes/tests/seasonal_testWorkflow2.yml index e31e4e5ac3d84f0fa13974acdbad3d02fd748efc..4b05eb8cf52eb8aa65763a5101a1bcc44f030f98 100644 --- a/recipes/tests/seasonal_testWorkflow2.yml +++ b/recipes/tests/seasonal_testWorkflow2.yml @@ -48,3 +48,7 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ + + diff --git a/recipes/tests/seasonal_testWorkflow3.yml b/recipes/tests/seasonal_testWorkflow3.yml index 26754f23cab73cbc4ec27a06cb2fae5b3c8f6c0f..2b544fa6b5df9d27e1cb3ed00c5a9c6682418757 100644 --- a/recipes/tests/seasonal_testWorkflow3.yml +++ b/recipes/tests/seasonal_testWorkflow3.yml @@ -48,3 +48,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow4.yml b/recipes/tests/seasonal_testWorkflow4.yml index 2b7992eec82d89aa509f6020bf79993e8e28a456..e3f9499c498336c50f760ead2d56795bda2d7346 100644 --- a/recipes/tests/seasonal_testWorkflow4.yml +++ b/recipes/tests/seasonal_testWorkflow4.yml @@ -49,3 +49,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow5.yml b/recipes/tests/seasonal_testWorkflow5.yml index 04700ed9ac133998360e59a73f78aa01d9a4efed..7029db3c77d4e704a65d2827fb951a8b6f326e52 100644 --- a/recipes/tests/seasonal_testWorkflow5.yml +++ b/recipes/tests/seasonal_testWorkflow5.yml @@ -47,3 +47,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow6.yml b/recipes/tests/seasonal_testWorkflow6.yml index 45033b6f1ea992eae1123597249749f1bd72aec5..9441758207b5903d7b5079f125bbcef6b68badf0 100644 --- a/recipes/tests/seasonal_testWorkflow6.yml +++ b/recipes/tests/seasonal_testWorkflow6.yml @@ -49,3 +49,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow7.yml b/recipes/tests/seasonal_testWorkflow7.yml index 093f697cf5a1b503670856cbb5b020d2d79a1350..595b677bb250555170ca6ccb1d400ade906a5042 100644 --- a/recipes/tests/seasonal_testWorkflow7.yml +++ b/recipes/tests/seasonal_testWorkflow7.yml @@ -49,3 +49,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow8.yml b/recipes/tests/seasonal_testWorkflow8.yml index b4e57216310637fdafebc7effd92fdf29b0e0549..b6d0c66247a01c955aefdae1adebb5c0f7364307 100644 --- a/recipes/tests/seasonal_testWorkflow8.yml +++ b/recipes/tests/seasonal_testWorkflow8.yml @@ -47,3 +47,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ \ No newline at end of file diff --git a/tools/add_dims.R b/tools/add_dims.R index aa770eb81825b94e57c3fe04c53c6443ef8fa6bd..ce9445c8db022b132d18b51bc231c7964376a2a8 100644 --- a/tools/add_dims.R +++ b/tools/add_dims.R @@ -1,7 +1,7 @@ # Function created in S2S$E-backend. Copied from: # https://earth.bsc.es/gitlab/es/S2S4E-backend-BSC/-/blob/master/data-analysis/SEAS5/seas5.load.R add_dims <- function(data){ - default_dims <- c(sweek = 1, sday = 1, syear = length(data)) + default_dims <- c(sdate_hcst_1 = 1, sdate_hcst_2 = 1, sdate_hcst_year = length(data)) default_dims[names(dim(data))] <- dim(data) dim(data) <- default_dims return(data) diff --git a/tools/check_conf.R b/tools/check_conf.R deleted file mode 100644 index 70ffa9f0dcf5f06355e3b9d1030dde97273a7bd4..0000000000000000000000000000000000000000 --- a/tools/check_conf.R +++ /dev/null @@ -1,21 +0,0 @@ - - - -check_conf <- function(conf, file, logger) { - # conf: yaml user config already read it - # file: name of the file to clarify messages - info(logger, paste("Checking user configuration", file)) - if (!('output_dir' %in% names(conf))) { - error(logger, - paste("The user configuration file should have element 'output_dir'.")) - } - if (!is.character(conf$output_dir)) { - error(logger, - paste("The element 'output_dir' in", file,"file ", - "should be a character string indicating the path ", - "where to save the outputs.")) - } - info(logger, "User configuration checked succsessfully.") -} - - diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 2b7479d65cc9d993ab8d04587ff9a8d4e557d08e..34d046056ae4fef796013136748254cefd714d81 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -1,41 +1,40 @@ -check_recipe <- function(recipe, file, conf, logger) { +check_recipe <- function(recipe, logger) { + # recipe: yaml recipe already read it - # file: name of the file to clarify messages - # conf: configuration file read to load the tables of definitions # output: errors or the total number of workflow (vars x regions) to compute - info(logger, paste("Checking recipe", file)) + + info(logger, paste("Checking recipe", recipe$filename)) + + # --------------------------------------------------------------------- + # ANALYSIS CHECKS + # --------------------------------------------------------------------- + + TIME_SETTINGS = c('sdate','leadtimemin','leadtimemax','hcst_start','hcst_end') + PARAMS = c('Horizon','Time','Variables','Region','Regrid','Workflow','Datasets') + HORIZONS <- c('Subseasonal','Seasonal','Decadal') + # create output dirs: if (!any(names(recipe) %in% "Analysis")) { - error(logger, "The recipe should has a element 'Analysis'.") + error(logger, "The recipe should contain an element called 'Analysis'.") } - params <- c('Horizon', 'Time', 'Variables', 'Region', - 'Regrid', 'Workflow', 'Datasets') - if (!all(params %in% names(recipe$Analysis))) { - params <- paste(params, collapse = ' ') + + if (!all(PARAMS %in% names(recipe$Analysis))) { error(logger, paste("The element 'Analysis' in the recipe should contain these", - "elements:", params)) - stop("EXECUTION FAILED") + "elements:", paste(PARAMS, collapse = " "))) } - horizons <- c('Subseasonal', 'Seasonal', 'Decadal') - if (!any(horizons %in% recipe$Analysis$Horizon)) { - horizons <- paste(horizons, collapse = " ") + + if (!any(HORIZONS %in% recipe$Analysis$Horizon)) { error(logger, - paste("The element 'Horizon' in the recipe", - "should be one of the followings:", - horizons)) - stop("EXECUTION FAILED") + "The element 'Horizon' in the recipe should be one of the followings:", + paste(HORIZONS, collapse = " ")) } # Check temporal settings and # count the number of verifications - time_settings <- c('sdate', 'leadtimemin', 'leadtimemax', - 'hcst_start', 'hcst_end') - if (!all(time_settings %in% names(recipe$Analysis$Time))) { - time_settings <- paste(time_settings, collapse = " ") + if (!all(TIME_SETTINGS %in% names(recipe$Analysis$Time))) { error(logger, paste("The element 'Time' in the recipe should contain these elements:", - time_settings)) - stop("EXECUTION FAILED") + paste(TIME_SETTINGS, collapse = " "))) } if (is.null(recipe$Analysis$Time$sdate$fcst_year) || recipe$Analysis$Time$sdate$fcst_year == 'None') { @@ -55,6 +54,8 @@ check_recipe <- function(recipe, file, conf, logger) { error(logger, paste("The element 'fcst_month' in the recipe should be defined.")) } + + fcst.sdate <- NULL for (year in recipe$Analysis$Time$sdate$fcst_year) { for (month in recipe$Analysis$Time$sdate$fcst_month) { @@ -144,15 +145,58 @@ check_recipe <- function(recipe, file, conf, logger) { # are numeric? class list mode list } } + + # --------------------------------------------------------------------- + # RUN CHECKS + # --------------------------------------------------------------------- + + RUN_FIELDS = c("Loglevel","Terminal","output_dir","code_dir") + LOG_LEVELS = c("INFO","DEBUG","WARNING","ERROR") + + if (!any(names(recipe) %in% "Run")) { + error(logger, "The recipe should contain an element called 'Run'.") + } + if (!all(RUN_FIELDS %in% names(recipe$Run))) { + error(logger, paste0("Run should contain the fields: ", + paste(RUN_FIELDS,collapse=", "), ".")) + } + if (!is.character(recipe$Run$output_dir)) { + error(logger, + paste("The Run element 'output_dir' in", recipe$filename,"file ", + "should be a character string indicating the path ", + "where to save the outputs.")) + } + if (!is.character(recipe$Run$code_dir)) { + error(logger, + paste("The Run element 'code_dir' in", recipe$filename,"file ", + "should be a character string indicating the path ", + "where the code is.")) + } + if (!is.logical(recipe$Run$Terminal)) { + error(logger, + paste("The Run element 'Terminal' in", recipe$filename,"file ", + "should be a boolean value indicating wether to print or not the log", + "in the terminal.")) + } + if (!is.character(recipe$Run$Loglevel) || !any(recipe$Run$Loglevel %in% LOG_LEVELS)) { + error(logger, + paste("The Run element 'Loglevel' in", recipe$filename,"file ", + "should be a character string indicating one of the levels available: ", + paste0(LOG_LEVELS,collapse='/'))) + } + + # --------------------------------------------------------------------- + # WORKFLOW CHECKS + # --------------------------------------------------------------------- # Check workflow: need to define restrictions? # e.g. only one calibration method - nverifications <- check_number_of_dependent_verifications(recipe, conf) + nverifications <- check_number_of_dependent_verifications(recipe) info(logger, paste("Start Dates", paste(fcst.sdate, collapse = " "))) info(logger, "Recipe checked succsessfully.") return(append(nverifications, fcst.sdate)) } -check_number_of_dependent_verifications <- function(recipe, conf) { +check_number_of_dependent_verifications <- function(recipe) { # Number of verifications depends on the variables and indicators requested # and the order of the workflow: # workflow: correction + indicator --> only 1 variable is calibrated @@ -182,7 +226,7 @@ check_number_of_dependent_verifications <- function(recipe, conf) { } else { ecvs <- recipe$Analysi$Variables$ECVs inds <- recipe$Analysi$Variables$Indicators - ind_table <- read_yaml(paste0(conf$code_dir, + ind_table <- read_yaml(paste0(recipe$Run$code_dir, "conf/indicators_table.yml")) # first, loop on ecvs if any and compare to indicators done <- NULL # to gather the indicators reviewed diff --git a/tools/libs.R b/tools/libs.R index 8d9856d7697de1c89031bcd98a1dd8afb2a32163..16a5c28cd7532c9ec46cb0694b5cf187416f7003 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -20,9 +20,8 @@ library(multiApply) # source("Calibration_fcst4.R") # source("R_Reorder.R") # source("R_CST_MergeDims.R") -#setwd(conf$code_dir) +#setwd(recipe$Run$code_dir) # # To be removed when new package is done by library(CSOperational) - source(paste0(conf$code_dir, "tools/check_recipe.R")) - source(paste0(conf$code_dir, "tools/check_conf.R")) - source(paste0(conf$code_dir, "tools/prepare_outputs.R")) - source(paste0(conf$code_dir, "tools/add_dims.R")) # Not sure if necessary yet +source(paste0(recipe$Run$code_dir, "tools/check_recipe.R")) +source(paste0(recipe$Run$code_dir, "tools/prepare_outputs.R")) +source(paste0(recipe$Run$code_dir, "tools/add_dims.R")) # Not sure if necessary yet diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 192956588f41bc4cb36bc0503c4382edbc898b7b..08a49510005941c89ded4878e32c6699c91ec3a9 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -1,15 +1,19 @@ -prepare_outputs <- function(recipe, file, conf) { + +prepare_outputs <- function(recipe) { + # recipe: the content of the readed recipe # file: the recipe file name -# conf: the content of the readed config user file + + output_dir = recipe$Run$output_dir # Create output folders: - folder_name <- paste0(gsub(".yml", "", gsub("/", "_", file)), "_", + folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$filename)), "_", gsub(" ", "", gsub(":", "", gsub("-", "", Sys.time())))) - dir.create(file.path(conf$output_dir, folder_name, 'plots'), recursive = TRUE) - dir.create(file.path(conf$output_dir, folder_name, 'outputs')) - dir.create(file.path(conf$output_dir, folder_name, 'logs')) - file.copy(file, file.path(conf$output_dir, folder_name, 'logs')) - logfile <- file.path(conf$output_dir, folder_name, 'logs', 'log.txt') + dir.create(file.path(output_dir, folder_name, 'plots'), recursive = TRUE) + dir.create(file.path(output_dir, folder_name, 'outputs')) + dir.create(file.path(output_dir, folder_name, 'logs')) + file.copy(recipe$filename, file.path(output_dir, folder_name, 'logs')) + logfile <- file.path(output_dir, folder_name, 'logs', 'log.txt') + # Set default behaviour of log output file: if (is.null(recipe$Run)) { recipe$Run <- list(Loglevel = 'INFO', Terminal = TRUE) @@ -31,4 +35,4 @@ prepare_outputs <- function(recipe, file, conf) { layout = default_log_layout()))) } return(list(logger = logger, logname = logfile)) -} +} \ No newline at end of file