From 682d21fb425199fdf6b7d6ba960d374125f6d591 Mon Sep 17 00:00:00 2001 From: lpalma Date: Tue, 31 Aug 2021 11:20:09 +0200 Subject: [PATCH 1/3] merge conf and recipe .yml --- OperationalCS.R | 15 +- modules/data_load/seas5.load.R | 249 --------------------------------- modules/verifications.R | 2 +- recipes/conf_user.yml | 4 +- recipes/seasonal_oper.yml | 73 ++++++---- tools/check_recipe.R | 88 +++++++++--- tools/libs.R | 9 +- tools/prepare_outputs.R | 22 +-- 8 files changed, 146 insertions(+), 316 deletions(-) delete mode 100644 modules/data_load/seas5.load.R diff --git a/OperationalCS.R b/OperationalCS.R index 2f034dbc..342d5252 100644 --- a/OperationalCS.R +++ b/OperationalCS.R @@ -8,24 +8,25 @@ print(args) library(yaml) # To test: -# args <- NULL; args[1] <- "recipes/conf_user.yml"; args[2] <- "recipes/seasonal_oper.yml" -# args <- NULL; args[1] <- "recipes/conf_user.yml"; args[2] <- "recipes/seasonal_complex.yml" +# args <- NULL; args[1] <- "recipes/seasonal_oper.yml" +# args <- NULL; args[1] <- "recipes/seasonal_complex.yml" -conf <- read_yaml(args[1]) -recipe <- read_yaml(args[2]) +recipe <- read_yaml(args[1]) +recipe$filename <- args[1] # Load required libraries source("tools/libs.R") # Create output folder and log: -logger <- prepare_outputs(recipe = recipe, file = args[2], conf = conf) +logger <- prepare_outputs(recipe = recipe) log_file <- logger$logname logger <- logger$logger + # Checks: -check_conf(conf, file = args[1], logger) -verifications <- check_recipe(recipe, file = args[2], conf, logger) +verifications <- check_recipe(recipe, logger) # Go to verification code: capture.output(source("modules/verifications.R"), file = log_file, type ='message', append = TRUE) + diff --git a/modules/data_load/seas5.load.R b/modules/data_load/seas5.load.R deleted file mode 100644 index 4ebd922b..00000000 --- a/modules/data_load/seas5.load.R +++ /dev/null @@ -1,249 +0,0 @@ - - - fcst.month <- substr(fcst.sdate,5,6) - fcst.year <- substr(fcst.sdate,1,4) - - file_dates.fcst <- paste(fcst.year, fcst.month, sep = "") - file_dates <- paste(strtoi(hcst.inityear):strtoi(hcst.endyear), - fcst.month, sep = "") - - - file_dates <- add_dims(file_dates) - file_dates.fcst <- add_dims(file_dates.fcst) -# Take parameters from conf/archive for datasets: -table <- read_yaml(paste0(conf$code_dir, "conf/archive.yml")) -dataset_descrip <- table$archive[which(names(table$archive) == fcst.name)][[1]] -freq.hcst <- unlist(dataset_descrip[store.freq][[1]][variable]) -reference_descrip <- table$archive[which(names(table$archive) == - tolower(ref.name))][[1]] -freq.obs <- unlist(reference_descrip[store.freq][[1]][variable]) -obs.dir <- reference_descrip$src -fcst.dir <- dataset_descrip$src -hcst.dir <- dataset_descrip$src -fcst.nmember <- dataset_descrip$nmember$fcst -hcst.nmember <- dataset_descrip$nmember$hcst - -if ("accum" %in% names(reference_descrip)) { - accum <- unlist(reference_descrip$accum[store.freq][[1]]) -} else { - accum <- FALSE -} -# ----------- - obs.path <- paste0("/esarchive/", - obs.dir, store.freq, "/$var$", - freq.obs,"$var$_$file_date$.nc") - - hcst.path <- paste0("/esarchive/", - hcst.dir, store.freq, "/$var$", - freq.hcst,"$var$_$file_date$01", - ".nc") - - fcst.path <- paste0("/esarchive/", - fcst.dir, store.freq, "/$var$", - freq.hcst,"$var$_$file_date$01", - ".nc") -#------------------------------------------------------------------- -# Regrid: -if (tolower(recipe$Analysis$Regrid$type) == 'reference') { - fcst.gridtype <- reference_descrip$regrid - fcst.gridmethod <- recipe$Analysis$Regrid$method - fcst.tranform <- CDORemapper - obs.gridtype <- NULL - obs.gridmethod <- NULL - obs.tranform <- NULL -} else if (tolower(recipe$Analysis$Regrid$type) == 'system') { - fcst.gridtype <- NULL - fcst.gridmethod <- NULL - fcst.transform <- NULL - obs.gridtype <- dataset_descrip$regrid - obs.gridmethod <- recipe$Analysis$Regrid$method - obs.transform <- CDORemapper -} else { - fcst.gridtype <- recipe$Analysis$Regrid$type - fcst.gridmethod <- recipe$Analysis$Regrid$method - fcst.transform <- CDORemapper - obs.gridtype <- recipe$Analysis$Regrid$type - obs.gridmethod <- recipe$Analysis$Regrid$method - obs.transform <- CDORemapper -} - - - # Timeseries load - #------------------------------------------------------------------- - - if (tolower(stream) == "fcst"){ - - fcst <- Start(dat = fcst.path, - var = variable, - file_date = file_dates.fcst, - time = indices(ltmin:ltmax), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - dataset_descrip$lat_decreasing_sort), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - dataset_descrip$lon_circular_sort$ini, - dataset_descrip$lon_circular_sort$end), - transform = fcst.transform, - transform_params = list(grid = fcst.gridtype, - method = fcst.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude'), - member = c('ensemble')), - member = indices(1:fcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - } - - hcst <- Start(dat = hcst.path, - var = variable, - file_date = file_dates, - time = indices(ltmin:ltmax), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - dataset_descrip$lat_decreasing_sort), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - dataset_descrip$lon_circular_sort$ini, - dataset_descrip$lon_circular_sort$end), - transform = fcst.transform, - transform_params = list(grid = fcst.gridtype, - method = fcst.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude'), - member = c('ensemble')), - member = indices(1:hcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - - hcst.NA_files <- c(attributes(hcst)$NotFoundFiles) - hcst.NA_files <- hcst.NA_files[!is.na(hcst.NA_files)] - try(hcst.NA_files <- hcst.NA_files[order(hcst.NA_files)]) - - dates <- attr(hcst, 'Variables')$common$time - dates_file <- sapply(dates, format, '%Y%m%d') - dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") - dim(dates_file) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member'), - list(1,1,1,1,1), drop="selected")) - - obs <- Start(dat = obs.path, - var = variable, - file_date = dates_file, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - reference_descrip$lat_decreasing_sort), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - reference_descrip$lon_circular_sort$ini, - reference_descrip$lon_circular_sort$end), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - transform = obs.transform, - transform_params = list(grid = obs.gridtype, - method = obs.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - - dates_file <- paste0(dates_file, '01') - dim(dates_file) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member'), - list(1,1,1,1,1), drop="selected")) - - file_dates <- paste0(file_dates, '01') - dim(file_dates) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member', 'time'), - list(1,1,1,1,1,1), drop="selected")) - - - - obs.NA_dates.ind <- Apply(obs, - fun=(function(x){ all(is.na(x))}), - target_dims=c('time', 'latitude', 'longitude'))[[1]] - obs.NA_dates <- dates_file[obs.NA_dates.ind] - obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] - obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", - freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") - - if (any(is.na(hcst))){ - fatal(logger, - paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", - " ################################################# ", - " ###### MISSING FILES #### ", - " ################################################# ", - "hcst files:", - hcst.NA_files, - " ################################################# ", - " ################################################# ", - sep="\n")) - quit(status = 1) - } - - if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ - fatal(logger, - paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", - " ################################################# ", - " ###### MISSING FILES #### ", - " ################################################# ", - "obs files:", - obs.NA_files, - " ################################################# ", - " ################################################# ", - sep="\n")) - quit(status=1) - } - - info(logger, - "######### DATA LOADING COMPLETED SUCCESFULLY ##############") - - default_dims <- c(dat = 1, var = 1, sweek = 1, - sday = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, member = 1) - - default_dims[names(dim(obs))] <- dim(obs) - dim(obs) <- default_dims - - lon <- attr(obs, 'Variables')$dat1$longitude - lat <- attr(obs, 'Variables')$dat1$latitude - hcst.times <- attr(hcst, 'Variables')$common$time - hcst.times <- sort(unique(sapply(as.character(hcst.times), - substr, 1, 10))) - - #obs<-Subset(obs,c('dat','var'),list(1,1),drop='selected') - #hcst<-Subset(hcst,c('dat','var'),list(1,1),drop='selected') - #if (stream == "fcst"){ - # fcst<-Subset(fcst,c('dat','var'),list(1,1),drop='selected') - #} - - #filters negative values in accum vars - if (accum){ - obs[obs < 0 ] <- 0 - hcst[hcst < 0 ] <- 0 - if (stream == "fcst"){ fcst[fcst < 0 ] <- 0 } - } - - sdates.hcst <- file_dates - sdates.fcst <- file_dates.fcst - leadtimes.hcst <- dates_file - diff --git a/modules/verifications.R b/modules/verifications.R index 8078762c..63ad32ed 100644 --- a/modules/verifications.R +++ b/modules/verifications.R @@ -6,7 +6,7 @@ print(indep) " # Starting Independent verification of Indicator ", indep, sep = "\n")) - ind_table <- read_yaml(paste0(conf$code_dir, + ind_table <- read_yaml(paste0(recipe$Run$code_dir, "conf/indicators_table.yml")) variable <- ind_table[indep[[1]]][[1]]$ECVs store.freq <- ind_table[indep[[1]]][[1]]$freq diff --git a/recipes/conf_user.yml b/recipes/conf_user.yml index c394d368..a25767bb 100644 --- a/recipes/conf_user.yml +++ b/recipes/conf_user.yml @@ -1,5 +1,5 @@ # Users configuration to point correct directories -output_dir: /esarchive/scratch/nperez/git/auto-s2s/out-logs/ -code_dir: /esarchive/scratch/nperez/git/auto-s2s/ +output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ +code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/seasonal_oper.yml b/recipes/seasonal_oper.yml index 9598f449..886a955d 100644 --- a/recipes/seasonal_oper.yml +++ b/recipes/seasonal_oper.yml @@ -1,47 +1,70 @@ +# +# ___ ___ _ _ _ +# / __| / __| ___ _ __ ___ _ _ __ _ | |_ (_) ___ _ _ __ _ | | +# | (__ \__ \ / _ \ | '_ \ / -_) | '_| / _` | | _| | | / _ \ | ' \ / _` | | | +# \___| |___/ \___/ | .__/ \___| |_| \__,_| \__| |_| \___/ |_||_| \__,_| |_| +# |_| + +################################################################################# +# RECIPE DESCRIPTION +################################################################################# + Description: - Author: N.Pérez-Zanón - Info: This is a test to transform s2s4e data-analysis for SEAS5 + Author: N.Pérez-Zanón # [Optional?/str] +Info: This is a test to transform s2s4e data-analysis for SEAS5 # [Optional?/str] + +################################################################################# +# ANALYSIS CONF +################################################################################# Analysis: - Horizon: Seasonal + Horizon: Seasonal # [Mandatory/str (either seasonal, subseasonal, decadal)] Variables: - ECVs: + ECVs: # [Mandatory/list of dicts {name: , freq: } or None] - {name: tas, freq: monthly_mean} - Indicators: + Indicators: # list of strs? - None Datasets: System: - - name: system5c3s + - name: system5c3s # list of strs Multimodel: False Reference: - - {name: era5} + - {name: era5} # str Time: sdate: - fcst_year: 2021 - fcst_month: 07 - fcst_day: 01 - hcst_start: 2000 - hcst_end: 2002 - leadtimemin: 2 - leadtimemax: 4 + fcst_year: 2021 # list of ints + fcst_month: 07 # list of ints + fcst_day: 01 # list of ints + hcst_start: 2000 # int + hcst_end: 2002 # int + leadtimemin: 2 # int + leadtimemax: 4 # int Region: - Global: TRUE - Aggregation: False - Regional: + Global: TRUE # bool + Aggregation: False # bool + Regional: # list of dicts - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} Regrid: - method: bilinear - type: system + method: bilinear # str + type: system # str Workflow: Calibration: - method: SBC + method: SBC # str Skill: - metric: RPSS + metric: RPSS # str Indicators: - index: FALSE - Output_format: S2S4E + index: FALSE # bool + Output_format: S2S4E # str + +################################################################################# +# Run CONF +################################################################################# Run: - Loglevel: INFO - Terminal: TRUE + Loglevel: INFO # str + Terminal: TRUE # bool + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ + + diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 3fea5594..ef452bc8 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -1,33 +1,40 @@ -check_recipe <- function(recipe, file, conf, logger) { +check_recipe <- function(recipe, logger) { + # recipe: yaml recipe already read it - # file: name of the file to clarify messages - # conf: configuration file read to load the tables of definitions # output: errors or the total number of workflow (vars x regions) to compute - info(logger, paste("Checking recipe", file)) + + info(logger, paste("Checking recipe", recipe$filename)) + + # --------------------------------------------------------------------- + # ANALYSIS CHECKS + # --------------------------------------------------------------------- + + TIME_SETTINGS = c('sdate','leadtimemin','leadtimemax','hcst_start','hcst_end') + PARAMS = c('Horizon','Time','Variables','Region','Regrid','Workflow','Datasets') + HORIZONS <- c('Subseasonal','Seasonal','Decadal') + # create output dirs: if (!any(names(recipe) %in% "Analysis")) { - error(logger, "The recipe should has a element 'Analysis'.") + error(logger, "The recipe should contain an element called 'Analysis'.") } - params <- c('Horizon', 'Time', 'Variables', 'Region', - 'Regrid', 'Workflow', 'Datasets') - if (!all(params %in% names(recipe$Analysis))) { + + if (!all(PARAMS %in% names(recipe$Analysis))) { error(logger, paste("The element 'Analysis' in the recipe should contain these", - "elements:", paste(params, collapse = " "))) + "elements:", paste(PARAMS, collapse = " "))) } - horizons <- c('Subseasonal', 'Seasonal', 'Decadal') - if (!any(horizons %in% recipe$Analysis$Horizon)) { + + if (!any(HORIZONS %in% recipe$Analysis$Horizon)) { error(logger, "The element 'Horizon' in the recipe should be one of the followings:", - paste(horizons, collapse = " ")) + paste(HORIZONS, collapse = " ")) } # Check temporal settings and # count the number of verifications - time_settings <- c('sdate', 'leadtimemin', 'leadtimemax', 'hcst_start', 'hcst_end') - if (!all(time_settings %in% names(recipe$Analysis$Time))) { + if (!all(TIME_SETTINGS %in% names(recipe$Analysis$Time))) { error(logger, paste("The element 'Time' in the recipe should contain these elements:", - paste(time_settings, collapse = " "))) + paste(TIME_SETTINGS, collapse = " "))) } if (is.null(recipe$Analysis$Time$sdate$fcst_year) || recipe$Analysis$Time$sdate$fcst_year == 'None') { @@ -47,6 +54,8 @@ check_recipe <- function(recipe, file, conf, logger) { error(logger, paste("The element 'fcst_month' in the recipe should be defined.")) } + + fcst.sdate <- NULL for (year in recipe$Analysis$Time$sdate$fcst_year) { for (month in recipe$Analysis$Time$sdate$fcst_month) { @@ -110,15 +119,58 @@ check_recipe <- function(recipe, file, conf, logger) { # are numeric? class list mode list } } + + # --------------------------------------------------------------------- + # RUN CHECKS + # --------------------------------------------------------------------- + + RUN_FIELDS = c("Loglevel","Terminal","output_dir","code_dir") + LOG_LEVELS = c("INFO","DEBUG","WARNING","ERROR") + + if (!any(names(recipe) %in% "Run")) { + error(logger, "The recipe should contain an element called 'Run'.") + } + if (!all(RUN_FIELDS %in% names(recipe$Run))) { + error(logger, paste0("Run should contain the fields: ", + paste(RUN_FIELDS,collapse=", "), ".")) + } + if (!is.character(recipe$Run$output_dir)) { + error(logger, + paste("The Run element 'output_dir' in", recipe$filename,"file ", + "should be a character string indicating the path ", + "where to save the outputs.")) + } + if (!is.character(recipe$Run$code_dir)) { + error(logger, + paste("The Run element 'code_dir' in", recipe$filename,"file ", + "should be a character string indicating the path ", + "where the code is.")) + } + if (!is.logical(recipe$Run$Terminal)) { + error(logger, + paste("The Run element 'Terminal' in", recipe$filename,"file ", + "should be a boolean value indicating wether to print or not the log", + "in the terminal.")) + } + if (!is.character(recipe$Run$Loglevel) || !any(recipe$Run$Loglevel %in% LOG_LEVELS)) { + error(logger, + paste("The Run element 'Loglevel' in", recipe$filename,"file ", + "should be a character string indicating one of the levels available: ", + paste0(LOG_LEVELS,collapse='/'))) + } + + # --------------------------------------------------------------------- + # WORKFLOW CHECKS + # --------------------------------------------------------------------- # Check workflow: need to define restrictions? # e.g. only one calibration method - nverifications <- check_number_of_dependent_verifications(recipe, conf) + nverifications <- check_number_of_dependent_verifications(recipe) info(logger, paste("Start Dates", paste(fcst.sdate, collapse = " "))) info(logger, "Recipe checked succsessfully.") return(append(nverifications, fcst.sdate)) } -check_number_of_dependent_verifications <- function(recipe, conf) { +check_number_of_dependent_verifications <- function(recipe) { # Number of verifications depends on the variables and indicators requested # and the order of the workflow: # workflow: correction + indicator --> only 1 variable is calibrated @@ -148,7 +200,7 @@ check_number_of_dependent_verifications <- function(recipe, conf) { } else { ecvs <- recipe$Analysi$Variables$ECVs inds <- recipe$Analysi$Variables$Indicators - ind_table <- read_yaml(paste0(conf$code_dir, + ind_table <- read_yaml(paste0(recipe$Run$code_dir, "conf/indicators_table.yml")) # first, loop on ecvs if any and compare to indicators done <- NULL # to gather the indicators reviewed diff --git a/tools/libs.R b/tools/libs.R index 8d9856d7..16a5c28c 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -20,9 +20,8 @@ library(multiApply) # source("Calibration_fcst4.R") # source("R_Reorder.R") # source("R_CST_MergeDims.R") -#setwd(conf$code_dir) +#setwd(recipe$Run$code_dir) # # To be removed when new package is done by library(CSOperational) - source(paste0(conf$code_dir, "tools/check_recipe.R")) - source(paste0(conf$code_dir, "tools/check_conf.R")) - source(paste0(conf$code_dir, "tools/prepare_outputs.R")) - source(paste0(conf$code_dir, "tools/add_dims.R")) # Not sure if necessary yet +source(paste0(recipe$Run$code_dir, "tools/check_recipe.R")) +source(paste0(recipe$Run$code_dir, "tools/prepare_outputs.R")) +source(paste0(recipe$Run$code_dir, "tools/add_dims.R")) # Not sure if necessary yet diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 19295658..08a49510 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -1,15 +1,19 @@ -prepare_outputs <- function(recipe, file, conf) { + +prepare_outputs <- function(recipe) { + # recipe: the content of the readed recipe # file: the recipe file name -# conf: the content of the readed config user file + + output_dir = recipe$Run$output_dir # Create output folders: - folder_name <- paste0(gsub(".yml", "", gsub("/", "_", file)), "_", + folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$filename)), "_", gsub(" ", "", gsub(":", "", gsub("-", "", Sys.time())))) - dir.create(file.path(conf$output_dir, folder_name, 'plots'), recursive = TRUE) - dir.create(file.path(conf$output_dir, folder_name, 'outputs')) - dir.create(file.path(conf$output_dir, folder_name, 'logs')) - file.copy(file, file.path(conf$output_dir, folder_name, 'logs')) - logfile <- file.path(conf$output_dir, folder_name, 'logs', 'log.txt') + dir.create(file.path(output_dir, folder_name, 'plots'), recursive = TRUE) + dir.create(file.path(output_dir, folder_name, 'outputs')) + dir.create(file.path(output_dir, folder_name, 'logs')) + file.copy(recipe$filename, file.path(output_dir, folder_name, 'logs')) + logfile <- file.path(output_dir, folder_name, 'logs', 'log.txt') + # Set default behaviour of log output file: if (is.null(recipe$Run)) { recipe$Run <- list(Loglevel = 'INFO', Terminal = TRUE) @@ -31,4 +35,4 @@ prepare_outputs <- function(recipe, file, conf) { layout = default_log_layout()))) } return(list(logger = logger, logname = logfile)) -} +} \ No newline at end of file -- GitLab From ab2cf8f323026881526597e8f40e73d5e6499934 Mon Sep 17 00:00:00 2001 From: lpalma Date: Tue, 31 Aug 2021 11:58:43 +0200 Subject: [PATCH 2/3] adapted execute_tests to merged conf and recipe --- recipes/tests/execute_tests.R | 17 ++++++++--------- recipes/tests/seasonal_testWorkflow1.yml | 2 ++ recipes/tests/seasonal_testWorkflow2.yml | 4 ++++ recipes/tests/seasonal_testWorkflow3.yml | 2 ++ recipes/tests/seasonal_testWorkflow4.yml | 2 ++ recipes/tests/seasonal_testWorkflow5.yml | 2 ++ recipes/tests/seasonal_testWorkflow6.yml | 2 ++ recipes/tests/seasonal_testWorkflow7.yml | 2 ++ recipes/tests/seasonal_testWorkflow8.yml | 2 ++ 9 files changed, 26 insertions(+), 9 deletions(-) diff --git a/recipes/tests/execute_tests.R b/recipes/tests/execute_tests.R index 44292f16..2fa6a137 100644 --- a/recipes/tests/execute_tests.R +++ b/recipes/tests/execute_tests.R @@ -1,7 +1,6 @@ library(yaml) args <- NULL; -args[1] <- "recipes/conf_user.yml"; # Function to run tests: # source_lines("/esarchive/scratch/nperez/git/startR/inst/doc/usecase/ex2_1_timedim.R", @@ -15,31 +14,31 @@ source_lines <- function(file, start, end, ...) { # ------------------------------------------ # Section to check recipes that should work: -args[2] <- "recipes/tests/seasonal_testWorkflow1.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow1.yml" source_lines("OperationalCS.R", start = 14, end = 50) # Calibration method None --> raw data verification -args[2] <- "recipes/tests/seasonal_testWorkflow4.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow4.yml" source_lines("OperationalCS.R", start = 14, end = 50) # Calibration: None --> raw data verification -args[2] <- "recipes/tests/seasonal_testWorkflow5.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow5.yml" source_lines("OperationalCS.R", start = 14, end = 50) # Case Skill_1 and Skill_2 when multiple times needed -args[2] <- "recipes/tests/seasonal_testWorkflow7.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow7.yml" source_lines("OperationalCS.R", start = 14, end = 50) # Indicator -args[2] <- "recipes/tests/seasonal_testWorkflow8.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow8.yml" source_lines("OperationalCS.R", start = 14, end = 50) # ------------------------------------------ # Section to check recipes that should fail: ## This should fail because there is no Horizon: -args[2] <- "recipes/tests/seasonal_testWorkflow2.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow2.yml" source_lines("OperationalCS.R", start = 14, end = 50) ## This should fail because there are 2 Calibration options: -args[2] <- "recipes/tests/seasonal_testWorkflow3.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow3.yml" source_lines("OperationalCS.R", start = 14, end = 50) ## This fails because it is not allow repeating the name Skill -args[2] <- "recipes/tests/seasonal_testWorkflow6.yml" +args[1] <- "recipes/tests/seasonal_testWorkflow6.yml" source_lines("OperationalCS.R", start = 14, end = 50) diff --git a/recipes/tests/seasonal_testWorkflow1.yml b/recipes/tests/seasonal_testWorkflow1.yml index c3b87613..3c9e55f6 100644 --- a/recipes/tests/seasonal_testWorkflow1.yml +++ b/recipes/tests/seasonal_testWorkflow1.yml @@ -49,3 +49,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow2.yml b/recipes/tests/seasonal_testWorkflow2.yml index e31e4e5a..4b05eb8c 100644 --- a/recipes/tests/seasonal_testWorkflow2.yml +++ b/recipes/tests/seasonal_testWorkflow2.yml @@ -48,3 +48,7 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ + + diff --git a/recipes/tests/seasonal_testWorkflow3.yml b/recipes/tests/seasonal_testWorkflow3.yml index 26754f23..2b544fa6 100644 --- a/recipes/tests/seasonal_testWorkflow3.yml +++ b/recipes/tests/seasonal_testWorkflow3.yml @@ -48,3 +48,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow4.yml b/recipes/tests/seasonal_testWorkflow4.yml index 2b7992ee..e3f9499c 100644 --- a/recipes/tests/seasonal_testWorkflow4.yml +++ b/recipes/tests/seasonal_testWorkflow4.yml @@ -49,3 +49,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow5.yml b/recipes/tests/seasonal_testWorkflow5.yml index 04700ed9..7029db3c 100644 --- a/recipes/tests/seasonal_testWorkflow5.yml +++ b/recipes/tests/seasonal_testWorkflow5.yml @@ -47,3 +47,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow6.yml b/recipes/tests/seasonal_testWorkflow6.yml index 45033b6f..94417582 100644 --- a/recipes/tests/seasonal_testWorkflow6.yml +++ b/recipes/tests/seasonal_testWorkflow6.yml @@ -49,3 +49,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow7.yml b/recipes/tests/seasonal_testWorkflow7.yml index 093f697c..595b677b 100644 --- a/recipes/tests/seasonal_testWorkflow7.yml +++ b/recipes/tests/seasonal_testWorkflow7.yml @@ -49,3 +49,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow8.yml b/recipes/tests/seasonal_testWorkflow8.yml index b4e57216..b6d0c662 100644 --- a/recipes/tests/seasonal_testWorkflow8.yml +++ b/recipes/tests/seasonal_testWorkflow8.yml @@ -47,3 +47,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ \ No newline at end of file -- GitLab From 7e69984e92d99a5aa1d14711c14c306a7f3617e0 Mon Sep 17 00:00:00 2001 From: lpalma Date: Wed, 3 Nov 2021 13:00:37 +0100 Subject: [PATCH 3/3] data load non-working version --- MODULES | 20 --- modules/data_load/dates2load.R | 42 +++++ modules/data_load/fcst_seas.load.R | 249 +++++++++++++++++++++++++++++ modules/data_load/hcst_seas.load.R | 204 +++++++++++++++++++++++ modules/data_load/regrid.R | 36 +++++ modules/verifications.R | 157 +++++++++--------- out-logs/ini | 0 recipes/conf_user.yml | 5 - recipes/seasonal_oper.yml | 29 ++-- tools/add_dims.R | 2 +- tools/check_conf.R | 21 --- 11 files changed, 620 insertions(+), 145 deletions(-) create mode 100644 modules/data_load/dates2load.R create mode 100644 modules/data_load/fcst_seas.load.R create mode 100644 modules/data_load/hcst_seas.load.R create mode 100644 modules/data_load/regrid.R mode change 100644 => 100755 out-logs/ini delete mode 100644 recipes/conf_user.yml delete mode 100644 tools/check_conf.R diff --git a/MODULES b/MODULES index 42b06963..a37e52d6 100644 --- a/MODULES +++ b/MODULES @@ -9,11 +9,6 @@ if [ $BSC_MACHINE == "power" ]; then module use /gpfs/projects/bsc32/software/rhel/7.4/ppc64le/POWER9/modules/all/ module load CDO/1.9.4-foss-2018b - module load pynco/0.0.2-foss-2018b-Python-2.7.15 - module load netcdf4-python/1.2.2-foss-2018b-Python-2.7.15 - module load grib_api/1.27.0-foss-2018b - module load NCO/4.7.2-foss-2018b - module load Python/2.7.15-foss-2018b module load R/3.6.1-foss-2018b elif [ $BSC_MACHINE == "nord3" ]; then @@ -25,26 +20,11 @@ elif [ $BSC_MACHINE == "nord3" ]; then module unuse /apps/modules/PRACE module load R/3.6.2-foss-2019b - - ## TODO: to be tested module load CDO/1.9.8-foss-2019b - module load pycdo/1.5.3-foss-2019b-Python-3.7.4 - module load netcdf4-python/1.5.3-foss-2019b-Python-3.7.4 - module load pynco/0.0.3-foss-2019b-Python-3.7.4 - module load Python/3.7.4-GCCcore-8.3.0 - module load NCO/4.9.2-foss-2019b - module load grib_api/1.24.0-foss-2019b - - else - module load grib_api/1.18.0-foss-2015a #netCDF - module load NCO/4.7.3-foss-2015a ncview/2.1.5-foss-2015a pynco/0.0.2-foss-2015a-Python-2.7.9 - module load ECAC/4.0.2 Python/2.7.9-foss-2015a module load CDO/1.9.8-foss-2015a - module load grib_api/1.13.1-foss-2015a - module load netCDF/4.3.3.1-foss-2015a module load R/3.6.1-foss-2015a-bare fi diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R new file mode 100644 index 00000000..19b81380 --- /dev/null +++ b/modules/data_load/dates2load.R @@ -0,0 +1,42 @@ + +# Taking the recipe returns the array of sdates to be loaded +# both for the hcst and fcst. +dates2load <- function(recipe, logger){ + + recipe <- recipe$Analysis$Time + + # hcst dates + file_dates <- outer(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), + recipe$sdate$fcst_sday,paste,sep="") + file_dates <- add_dims(file_dates, "hcst") + + # fcst dates (if fcst_year empty it creates an empty object) + if (! is.null(recipe$sdate$fcst_year)){ + file_dates.fcst <- outer(recipe$sdate$fcst_year,recipe$sdate$fcst_sday,paste,sep="") + file_dates.fcst <- add_dims(file_dates.fcst, "fcst") + } else { + file_dates.fcst <- NULL + info(logger, + paste("fcst_year empty in the recipe, creating empty fcst object...")) + } + + return(list(hcst=file_dates,fcst=file_dates.fcst)) + +} + +# adds the correspondent dims to each sdate array +add_dims <- function(data, type){ + + if (type == "hcst"){ + default_dims <- c(sday = 1, sweek = 1, + syear = dim(data)[1], srun = dim(data)[2]) + } else { + default_dims <- c(fcst_year = dim(data)[1], srun = dim(data)[2]) + } + + default_dims[names(dim(data))] <- dim(data) + dim(data) <- default_dims + return(data) + + } + diff --git a/modules/data_load/fcst_seas.load.R b/modules/data_load/fcst_seas.load.R new file mode 100644 index 00000000..4ebd922b --- /dev/null +++ b/modules/data_load/fcst_seas.load.R @@ -0,0 +1,249 @@ + + + fcst.month <- substr(fcst.sdate,5,6) + fcst.year <- substr(fcst.sdate,1,4) + + file_dates.fcst <- paste(fcst.year, fcst.month, sep = "") + file_dates <- paste(strtoi(hcst.inityear):strtoi(hcst.endyear), + fcst.month, sep = "") + + + file_dates <- add_dims(file_dates) + file_dates.fcst <- add_dims(file_dates.fcst) +# Take parameters from conf/archive for datasets: +table <- read_yaml(paste0(conf$code_dir, "conf/archive.yml")) +dataset_descrip <- table$archive[which(names(table$archive) == fcst.name)][[1]] +freq.hcst <- unlist(dataset_descrip[store.freq][[1]][variable]) +reference_descrip <- table$archive[which(names(table$archive) == + tolower(ref.name))][[1]] +freq.obs <- unlist(reference_descrip[store.freq][[1]][variable]) +obs.dir <- reference_descrip$src +fcst.dir <- dataset_descrip$src +hcst.dir <- dataset_descrip$src +fcst.nmember <- dataset_descrip$nmember$fcst +hcst.nmember <- dataset_descrip$nmember$hcst + +if ("accum" %in% names(reference_descrip)) { + accum <- unlist(reference_descrip$accum[store.freq][[1]]) +} else { + accum <- FALSE +} +# ----------- + obs.path <- paste0("/esarchive/", + obs.dir, store.freq, "/$var$", + freq.obs,"$var$_$file_date$.nc") + + hcst.path <- paste0("/esarchive/", + hcst.dir, store.freq, "/$var$", + freq.hcst,"$var$_$file_date$01", + ".nc") + + fcst.path <- paste0("/esarchive/", + fcst.dir, store.freq, "/$var$", + freq.hcst,"$var$_$file_date$01", + ".nc") +#------------------------------------------------------------------- +# Regrid: +if (tolower(recipe$Analysis$Regrid$type) == 'reference') { + fcst.gridtype <- reference_descrip$regrid + fcst.gridmethod <- recipe$Analysis$Regrid$method + fcst.tranform <- CDORemapper + obs.gridtype <- NULL + obs.gridmethod <- NULL + obs.tranform <- NULL +} else if (tolower(recipe$Analysis$Regrid$type) == 'system') { + fcst.gridtype <- NULL + fcst.gridmethod <- NULL + fcst.transform <- NULL + obs.gridtype <- dataset_descrip$regrid + obs.gridmethod <- recipe$Analysis$Regrid$method + obs.transform <- CDORemapper +} else { + fcst.gridtype <- recipe$Analysis$Regrid$type + fcst.gridmethod <- recipe$Analysis$Regrid$method + fcst.transform <- CDORemapper + obs.gridtype <- recipe$Analysis$Regrid$type + obs.gridmethod <- recipe$Analysis$Regrid$method + obs.transform <- CDORemapper +} + + + # Timeseries load + #------------------------------------------------------------------- + + if (tolower(stream) == "fcst"){ + + fcst <- Start(dat = fcst.path, + var = variable, + file_date = file_dates.fcst, + time = indices(ltmin:ltmax), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = + dataset_descrip$lat_decreasing_sort), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort( + dataset_descrip$lon_circular_sort$ini, + dataset_descrip$lon_circular_sort$end), + transform = fcst.transform, + transform_params = list(grid = fcst.gridtype, + method = fcst.gridmethod, + crop = c(lons.min, lons.max, + lats.min, lats.max)), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude'), + member = c('ensemble')), + member = indices(1:fcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + } + + hcst <- Start(dat = hcst.path, + var = variable, + file_date = file_dates, + time = indices(ltmin:ltmax), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = + dataset_descrip$lat_decreasing_sort), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort( + dataset_descrip$lon_circular_sort$ini, + dataset_descrip$lon_circular_sort$end), + transform = fcst.transform, + transform_params = list(grid = fcst.gridtype, + method = fcst.gridmethod, + crop = c(lons.min, lons.max, + lats.min, lats.max)), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude'), + member = c('ensemble')), + member = indices(1:hcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + hcst.NA_files <- c(attributes(hcst)$NotFoundFiles) + hcst.NA_files <- hcst.NA_files[!is.na(hcst.NA_files)] + try(hcst.NA_files <- hcst.NA_files[order(hcst.NA_files)]) + + dates <- attr(hcst, 'Variables')$common$time + dates_file <- sapply(dates, format, '%Y%m%d') + dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") + dim(dates_file) <- dim(Subset(hcst, + along=c('dat','var', + 'latitude', 'longitude', 'member'), + list(1,1,1,1,1), drop="selected")) + + obs <- Start(dat = obs.path, + var = variable, + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = + reference_descrip$lat_decreasing_sort), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort( + reference_descrip$lon_circular_sort$ini, + reference_descrip$lon_circular_sort$end), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + transform = obs.transform, + transform_params = list(grid = obs.gridtype, + method = obs.gridmethod, + crop = c(lons.min, lons.max, + lats.min, lats.max)), + transform_vars = c('latitude', 'longitude'), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + dates_file <- paste0(dates_file, '01') + dim(dates_file) <- dim(Subset(hcst, + along=c('dat','var', + 'latitude', 'longitude', 'member'), + list(1,1,1,1,1), drop="selected")) + + file_dates <- paste0(file_dates, '01') + dim(file_dates) <- dim(Subset(hcst, + along=c('dat','var', + 'latitude', 'longitude', 'member', 'time'), + list(1,1,1,1,1,1), drop="selected")) + + + + obs.NA_dates.ind <- Apply(obs, + fun=(function(x){ all(is.na(x))}), + target_dims=c('time', 'latitude', 'longitude'))[[1]] + obs.NA_dates <- dates_file[obs.NA_dates.ind] + obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] + obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", + freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") + + if (any(is.na(hcst))){ + fatal(logger, + paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", + " ################################################# ", + " ###### MISSING FILES #### ", + " ################################################# ", + "hcst files:", + hcst.NA_files, + " ################################################# ", + " ################################################# ", + sep="\n")) + quit(status = 1) + } + + if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ + fatal(logger, + paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", + " ################################################# ", + " ###### MISSING FILES #### ", + " ################################################# ", + "obs files:", + obs.NA_files, + " ################################################# ", + " ################################################# ", + sep="\n")) + quit(status=1) + } + + info(logger, + "######### DATA LOADING COMPLETED SUCCESFULLY ##############") + + default_dims <- c(dat = 1, var = 1, sweek = 1, + sday = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, member = 1) + + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + lon <- attr(obs, 'Variables')$dat1$longitude + lat <- attr(obs, 'Variables')$dat1$latitude + hcst.times <- attr(hcst, 'Variables')$common$time + hcst.times <- sort(unique(sapply(as.character(hcst.times), + substr, 1, 10))) + + #obs<-Subset(obs,c('dat','var'),list(1,1),drop='selected') + #hcst<-Subset(hcst,c('dat','var'),list(1,1),drop='selected') + #if (stream == "fcst"){ + # fcst<-Subset(fcst,c('dat','var'),list(1,1),drop='selected') + #} + + #filters negative values in accum vars + if (accum){ + obs[obs < 0 ] <- 0 + hcst[hcst < 0 ] <- 0 + if (stream == "fcst"){ fcst[fcst < 0 ] <- 0 } + } + + sdates.hcst <- file_dates + sdates.fcst <- file_dates.fcst + leadtimes.hcst <- dates_file + diff --git a/modules/data_load/hcst_seas.load.R b/modules/data_load/hcst_seas.load.R new file mode 100644 index 00000000..c0d6ac69 --- /dev/null +++ b/modules/data_load/hcst_seas.load.R @@ -0,0 +1,204 @@ + +source("modules/data_load/dates2load.R") + +# Set params ----------------------------------------- +hcst.inityear <- recipe$Analysis$Time$hcst_start +hcst.endyear <- recipe$Analysis$Time$hcst_end +ltmin <- recipe$Analysis$Time$leadtimemin +ltmax <- recipe$Analysis$Time$leadtimemax +lats.min <- recipe$Analysis$Region$Regional[[1]]$latmin +lats.max <- recipe$Analysis$Region$Regional[[1]]$latmax +lons.min <- recipe$Analysis$Region$Regional[[1]]$lonmin +lons.max <- recipe$Analysis$Region$Regional[[1]]$lonmax +ref.name <- recipe$Analysis$Datasets$Reference$name +systems <- recipe$Analysis$Datasets$System + +stream <- verifications$stream +sdates <- verifications$fcst.sdate + +## TODO: define fcst.name +##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name + +# get sdates array +sdates <- dates2load(recipe, logger) + +# get esarchive datasets dict: +archive <- read_yaml(paste0(recipe$Run$code_dir,"conf/archive.yml"))$archive +dataset_descrip <- archive[[fcst.name]] + +freq.hcst <- unlist(dataset_descrip[[store.freq]][variable]) +reference_descrip <- archive[[ref.name]] +freq.obs <- unlist(reference_descrip[[store.freq]][variable]) +obs.dir <- reference_descrip$src +fcst.dir <- dataset_descrip$src +hcst.dir <- dataset_descrip$src +fcst.nmember <- dataset_descrip$nmember$fcst +hcst.nmember <- dataset_descrip$nmember$hcst + +if ("accum" %in% names(reference_descrip)) { + accum <- unlist(reference_descrip$accum[store.freq][[1]]) +} else { + accum <- FALSE +} + +# ----------- +obs.path <- paste0("/esarchive/", + obs.dir, store.freq, "/$var$", + freq.obs,"$var$_$file_date$.nc") + +hcst.path <- paste0("/esarchive/", + hcst.dir, store.freq, "/$var$", + freq.hcst,"$var$_$file_date$01", + ".nc") + +# Regrid: +#------------------------------------------------------------------- +regrid_params <- get_regrid_params(recipe) + +# Timeseries load +#------------------------------------------------------------------- + +hcst <- Start(dat = hcst.path, + var = variable, + file_date = file_dates, + time = indices(ltmin:ltmax), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = + dataset_descrip$lat_decreasing_sort), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort( + dataset_descrip$lon_circular_sort$ini, + dataset_descrip$lon_circular_sort$end), + transform = fcst.transform, + transform_params = list(grid = fcst.gridtype, + method = fcst.gridmethod, + crop = c(lons.min, lons.max, + lats.min, lats.max)), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude'), + member = c('ensemble')), + member = indices(1:hcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + +hcst.NA_files <- c(attributes(hcst)$NotFoundFiles) +hcst.NA_files <- hcst.NA_files[!is.na(hcst.NA_files)] +try(hcst.NA_files <- hcst.NA_files[order(hcst.NA_files)]) + +dates <- attr(hcst, 'Variables')$common$time +dates_file <- sapply(dates, format, '%Y%m%d') +dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") +dim(dates_file) <- dim(Subset(hcst, + along=c('dat','var', + 'latitude', 'longitude', 'member'), + list(1,1,1,1,1), drop="selected")) + +obs <- Start(dat = obs.path, + var = variable, + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = + reference_descrip$lat_decreasing_sort), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort( + reference_descrip$lon_circular_sort$ini, + reference_descrip$lon_circular_sort$end), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + transform = obs.transform, + transform_params = list(grid = obs.gridtype, + method = obs.gridmethod, + crop = c(lons.min, lons.max, + lats.min, lats.max)), + transform_vars = c('latitude', 'longitude'), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + +dates_file <- paste0(dates_file, '01') +dim(dates_file) <- dim(Subset(hcst, + along=c('dat','var', + 'latitude', 'longitude', 'member'), + list(1,1,1,1,1), drop="selected")) + +file_dates <- paste0(file_dates, '01') +dim(file_dates) <- dim(Subset(hcst, + along=c('dat','var', + 'latitude', 'longitude', 'member', 'time'), + list(1,1,1,1,1,1), drop="selected")) + + + +obs.NA_dates.ind <- Apply(obs, + fun=(function(x){ all(is.na(x))}), + target_dims=c('time', 'latitude', 'longitude'))[[1]] +obs.NA_dates <- dates_file[obs.NA_dates.ind] +obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] +obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", + freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") + +if (any(is.na(hcst))){ + fatal(logger, + paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", + " ################################################# ", + " ###### MISSING FILES #### ", + " ################################################# ", + "hcst files:", + hcst.NA_files, + " ################################################# ", + " ################################################# ", + sep="\n")) + quit(status = 1) +} + +if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ + fatal(logger, + paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", + " ################################################# ", + " ###### MISSING FILES #### ", + " ################################################# ", + "obs files:", + obs.NA_files, + " ################################################# ", + " ################################################# ", + sep="\n")) + quit(status=1) +} + +info(logger, + "######### DATA LOADING COMPLETED SUCCESFULLY ##############") + +default_dims <- c(dat = 1, var = 1, sweek = 1, + sday = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, member = 1) + +default_dims[names(dim(obs))] <- dim(obs) +dim(obs) <- default_dims + +lon <- attr(obs, 'Variables')$dat1$longitude +lat <- attr(obs, 'Variables')$dat1$latitude +hcst.times <- attr(hcst, 'Variables')$common$time +hcst.times <- sort(unique(sapply(as.character(hcst.times), + substr, 1, 10))) + +#obs<-Subset(obs,c('dat','var'),list(1,1),drop='selected') +#hcst<-Subset(hcst,c('dat','var'),list(1,1),drop='selected') +#if (stream == "fcst"){ +# fcst<-Subset(fcst,c('dat','var'),list(1,1),drop='selected') +#} + +#filters negative values in accum vars +if (accum){ + obs[obs < 0 ] <- 0 + hcst[hcst < 0 ] <- 0 +} + +sdates.hcst <- file_dates +leadtimes.hcst <- dates_file + diff --git a/modules/data_load/regrid.R b/modules/data_load/regrid.R new file mode 100644 index 00000000..b1673816 --- /dev/null +++ b/modules/data_load/regrid.R @@ -0,0 +1,36 @@ + +get_regrid_params <- function(recipe){ + + if (tolower(recipe$Analysis$Regrid$type) == 'reference') { + + regrid_params <- list(fcst.gridtype=reference_descrip$regrid, + fcst.gridmethod=recipe$Analysis$Regrid$method, + fcst.tranform=CDORemapper, + obs.gridtype=NULL, + obs.gridmethod=NULL, + obs.tranform=NULL) + + } else if (tolower(recipe$Analysis$Regrid$type) == 'system') { + + regrid_params <- list(fcst.gridtype=NULL, + fcst.gridmethod=NULL, + fcst.tranform=NULL, + obs.gridtype=dataset_descrip$regrid, + obs.gridmethod=recipe$Analysis$Regrid$method, + obs.tranform=CDORemapper) + + } else { + + regrid_params <- list(fcst.gridtype=reference_descrip$regrid, + fcst.gridmethod=recipe$Analysis$Regrid$method, + fcst.tranform=CDORemapper, + obs.gridtype=dataset_descrip$regrid, + obs.gridmethod=recipe$Analysis$Regrid$method, + obs.tranform=CDORemapper) + } + + return(regrid_params) + +} + + diff --git a/modules/verifications.R b/modules/verifications.R index 067a6844..df1026f4 100644 --- a/modules/verifications.R +++ b/modules/verifications.R @@ -1,4 +1,6 @@ for (indep in verifications$independent) { + + ## TODO: re-write this condition if (length(indep) == 1) { info(logger, paste(" #*****************************************#", @@ -20,91 +22,78 @@ for (indep in verifications$independent) { store.freq <- indep$freq ind.fun <- NULL } - hcst.inityear <- recipe$Analysis$Time$hcst_start - hcst.endyear <- recipe$Analysis$Time$hcst_end - ltmin <- recipe$Analysis$Time$leadtimemin - ltmax <- recipe$Analysis$Time$leadtimemax - lats.min <- recipe$Analysis$Region$Regional[[1]]$latmin - lats.max <- recipe$Analysis$Region$Regional[[1]]$latmax - lons.min <- recipe$Analysis$Region$Regional[[1]]$lonmin - lons.max <- recipe$Analysis$Region$Regional[[1]]$lonmax - stream <- verifications$stream - # Is multimodel FALSE - if (recipe$Analysis$Datasets$Multimodel) { - # is there a multimodel load step in S2S4E backend? - } else { - for (sys in 1:length(recipe$Analysis$Datasets$System)) { - fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name - for (ref in 1:length(recipe$Analysis$Datasets$Reference)) { - ref.name <- recipe$Analysis$Datasets$Reference[[ref]]$name - for (sdate in verifications$fcst.sdate) { - fcst.sdate <- sdate - source("modules/data_load/seas5.load.R") - # Translate $Workflow to call modules: - ## 1) Clean step of the workflow set as FALSE or NULL or None: - modules <- names(recipe$Analysis$Workflow) - for (mod in modules) { - if ((is.logical(recipe$Analysis$Workflow[[mod]][[1]]) && - recipe$Analysis$Workflow[[mod]][[1]] == FALSE) || - recipe$Analysis$Workflow[[mod]][[1]] == 'None' || - is.null(recipe$Analysis$Workflow[[mod]][[1]])) { - info(logger, - paste("The module", mod, "won't be executed.")) - recipe$Analysis$Workflow <- recipe$Analysis$Workflow[ - -which(names(recipe$Analysis$Workflow) == mod)] - } - } - modules <- names(recipe$Analysis$Workflow) - ## 2) Create a common format for all modules - tmp_modules <- list() - for (mod in modules) { - if (length(recipe$Analysis$Workflow[[mod]]) > 1) { - names(recipe$Analysis$Workflow[[mod]]) <- - rep(mod, length(recipe$Analysis$Workflow[[mod]])) - tmp_modules <- append(tmp_modules, - recipe$Analysis$Workflow[[mod]]) - } else { - tmp_modules <- append(tmp_modules, - recipe$Analysis$Workflow[mod]) - } - } - modules <- tmp_modules - ## 3) Call each module and pass arguments: - for (mod in 1:length(modules)) { - # In case multiple calls to a module e.g.: Skill_1 --> Skill - if (any(strsplit(names(modules)[mod], "")[[1]] == "_")) { - module_name <- substr(names(modules)[mod], start = 1, - stop = which(strsplit(names(modules)[mod], "")[[1]] == "_") - 1) - } else { - module_name <- names(modules)[mod] - } - info(logger, paste("Start running module", module_name)) - module_code <- file.path(conf$code_dir, "modules", - module_name, - paste0(module_name, ".R")) - # Define variables setup in the recipe - for (param in names(modules[[mod]])) { - if (length(modules[[mod]][[param]])) { - tmp <- paste(modules[[mod]][[param]], - collapse = ",") - } else { - tmp <- modules[[mod]][[param]] - } - info(logger, paste("Variable *", param, "* set as", tmp)) - assign(as.character(param), - modules[[mod]][[param]]) - } - source(module_code) - # TO DO: - # Check the arguments of each module can be an option here: - #... - info(logger, paste(module_name, "module run finished.")) - } - info(logger, paste(sdate, "start date finished.")) - } - info(logger, paste(ref, "reference dataset finished.")) + + + # ========================================================= + # DATA LOADING -------------------------------------------- + # ========================================================= + + source(recipe$Analysis$Data_load$module) + + # ========================================================= + # WORKFLOW MODULES RUN ------------------------------------ + # ========================================================= + + # Translate $Workflow to call modules: + ## 1) Clean step of the workflow set as FALSE or NULL or None: + modules <- names(recipe$Analysis$Workflow) + for (mod in modules) { + if ((is.logical(recipe$Analysis$Workflow[[mod]][[1]]) && + recipe$Analysis$Workflow[[mod]][[1]] == FALSE) || + recipe$Analysis$Workflow[[mod]][[1]] == 'None' || + is.null(recipe$Analysis$Workflow[[mod]][[1]])) { + info(logger, + paste("The module", mod, "won't be executed.")) + recipe$Analysis$Workflow <- recipe$Analysis$Workflow[ + -which(names(recipe$Analysis$Workflow) == mod)] + } + } + + modules <- names(recipe$Analysis$Workflow) + ## 2) Create a common format for all modules + tmp_modules <- list() + for (mod in modules) { + if (length(recipe$Analysis$Workflow[[mod]]) > 1) { + names(recipe$Analysis$Workflow[[mod]]) <- + rep(mod, length(recipe$Analysis$Workflow[[mod]])) + tmp_modules <- append(tmp_modules, + recipe$Analysis$Workflow[[mod]]) + } else { + tmp_modules <- append(tmp_modules, + recipe$Analysis$Workflow[mod]) + } + + modules <- tmp_modules + ## 3) Call each module and pass arguments: + for (mod in 1:length(modules)) { + # In case multiple calls to a module e.g.: Skill_1 --> Skill + if (any(strsplit(names(modules)[mod], "")[[1]] == "_")) { + module_name <- substr(names(modules)[mod], start = 1, + stop = which(strsplit(names(modules)[mod], "")[[1]] == "_") - 1) + } else { + module_name <- names(modules)[mod] + } + info(logger, paste("Start running module", module_name)) + module_code <- file.path(conf$code_dir, "modules", + module_name, + paste0(module_name, ".R")) + # Define variables setup in the recipe + for (param in names(modules[[mod]])) { + if (length(modules[[mod]][[param]])) { + tmp <- paste(modules[[mod]][[param]], + collapse = ",") + } else { + tmp <- modules[[mod]][[param]] + } + info(logger, paste("Variable *", param, "* set as", tmp)) + assign(as.character(param), + modules[[mod]][[param]]) } - info(logger, paste(sys, "systemn finished.")) + source(module_code) + # TO DO: + # Check the arguments of each module can be an option here: + #... + info(logger, paste(module_name, "module run finished.")) } } } diff --git a/out-logs/ini b/out-logs/ini old mode 100644 new mode 100755 diff --git a/recipes/conf_user.yml b/recipes/conf_user.yml deleted file mode 100644 index a25767bb..00000000 --- a/recipes/conf_user.yml +++ /dev/null @@ -1,5 +0,0 @@ -# Users configuration to point correct directories - -output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ -code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ - diff --git a/recipes/seasonal_oper.yml b/recipes/seasonal_oper.yml index 886a955d..e2d379d2 100644 --- a/recipes/seasonal_oper.yml +++ b/recipes/seasonal_oper.yml @@ -27,27 +27,28 @@ Analysis: Datasets: System: - name: system5c3s # list of strs - Multimodel: False - Reference: + Multimodel: False # boolean, if true system above are aggregated into single multi-model + Reference: # single dict? in the future multiple ref can be an asset - {name: era5} # str Time: sdate: - fcst_year: 2021 # list of ints - fcst_month: 07 # list of ints - fcst_day: 01 # list of ints - hcst_start: 2000 # int - hcst_end: 2002 # int - leadtimemin: 2 # int - leadtimemax: 4 # int + fcst_year: ["2020", "2021"] # list of ints or None (case where only hcst is verfied) + fcst_sday: ["0701", "0601"] # int or list of ints with MMDD format + hcst_start: "1993" # int mandatory + hcst_end: "2016" # int mandatory + leadtimemin: 2 # int mandatory + leadtimemax: 4 # int mandatory [units????] Region: - Global: TRUE # bool - Aggregation: False # bool - Regional: # list of dicts + Global: TRUE # bool mandatory + Aggregation: False # bool mandatory + Regional: # list of dicts mandatory - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} Regrid: - method: bilinear # str - type: system # str + method: bilinear # str mandatory + type: system # str either system or reference mandatory + Data_load: + module: "modules/data_load/seas5.load.R" Workflow: Calibration: method: SBC # str diff --git a/tools/add_dims.R b/tools/add_dims.R index aa770eb8..ce9445c8 100644 --- a/tools/add_dims.R +++ b/tools/add_dims.R @@ -1,7 +1,7 @@ # Function created in S2S$E-backend. Copied from: # https://earth.bsc.es/gitlab/es/S2S4E-backend-BSC/-/blob/master/data-analysis/SEAS5/seas5.load.R add_dims <- function(data){ - default_dims <- c(sweek = 1, sday = 1, syear = length(data)) + default_dims <- c(sdate_hcst_1 = 1, sdate_hcst_2 = 1, sdate_hcst_year = length(data)) default_dims[names(dim(data))] <- dim(data) dim(data) <- default_dims return(data) diff --git a/tools/check_conf.R b/tools/check_conf.R deleted file mode 100644 index 70ffa9f0..00000000 --- a/tools/check_conf.R +++ /dev/null @@ -1,21 +0,0 @@ - - - -check_conf <- function(conf, file, logger) { - # conf: yaml user config already read it - # file: name of the file to clarify messages - info(logger, paste("Checking user configuration", file)) - if (!('output_dir' %in% names(conf))) { - error(logger, - paste("The user configuration file should have element 'output_dir'.")) - } - if (!is.character(conf$output_dir)) { - error(logger, - paste("The element 'output_dir' in", file,"file ", - "should be a character string indicating the path ", - "where to save the outputs.")) - } - info(logger, "User configuration checked succsessfully.") -} - - -- GitLab