From 8a86388be7340849facf1272e8592815cdb61099 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 3 Jul 2024 19:05:16 +0200 Subject: [PATCH 01/20] load subseasonal --- conf/archive_subseasonal.yml | 118 ++++++++ example_scripts/subseasonal_load.R | 19 ++ modules/Loading/Loading.R | 3 + modules/Loading/R/dates2load.R | 56 +++- modules/Loading/R/load_subseasonal.R | 381 +++++++++++++++++++++++++ modules/Loading/R/subseas_file_dates.R | 67 +++++ recipe_subseasonal.yml | 177 ++++++++++++ tools/check_recipe.R | 52 +++- tools/data_summary.R | 19 +- tools/divide_recipe.R | 35 +++ 10 files changed, 907 insertions(+), 20 deletions(-) create mode 100644 conf/archive_subseasonal.yml create mode 100644 example_scripts/subseasonal_load.R create mode 100644 modules/Loading/R/load_subseasonal.R create mode 100644 modules/Loading/R/subseas_file_dates.R create mode 100644 recipe_subseasonal.yml diff --git a/conf/archive_subseasonal.yml b/conf/archive_subseasonal.yml new file mode 100644 index 00000000..52e767ce --- /dev/null +++ b/conf/archive_subseasonal.yml @@ -0,0 +1,118 @@ +esarchive: + src: "/esarchive/" + System: + NCEP-CFSv2: + name: "NCEP CFSv2" + institution: "NOAA NCEP" #? + src: "exp/ncep/cfs-v2/" + weekly_mean: {"tas":"weekly_mean/s2s/tas_f24h/", + "prlr":"weekly_mean/s2s/prlr_f24h/", + "tasmax":"weekly_mean/s2s/tasmax_f24h/", + "tasmin":"weekly_mean/s2s/tasmin_f24h/", + "sfcWind":"weekly_mean/s2s/sfcWind_f24h/", + "rsds":"weekly_mean/s2s/rsds_f24h/"} + daily_mean: {"tas":"daily_mean/s2s/tas_f6h"} + nmember: + fcst: 48 + hcst: 12 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" # Do we need it for subseasonal? + reference_grid: "/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20040624.nc" # is it the same as seasonal? + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/era5/" + weekly_mean: {"tas":"weekly_mean/tas_f1h-r1440x721cds/", + "prlr":"weekly_mean/prlr_f1h-r1440x721cds/"} + daily_mean: {"tas":"daily_mean/tas_f1h-r1440x721cds/", + "rsds":"daily_mean/rsds_f1h-r1440x721cds/", + "prlr":"daily_mean/prlr_f1h-r1440x721cds/", + "g300":"daily_mean/g300_f1h-r1440x721cds/", + "g500":"daily_mean/g500_f1h-r1440x721cds/", + "g850":"daily_mean/g850_f1h-r1440x721cds/", + "sfcWind":"daily_mean/sfcWind_f1h-r1440x721cds/", + "tasmax":"daily/tasmax_f1h-r1440x721cds/", + "tasmin":"daily/tasmin_f1h-r1440x721cds/", + "ta300":"daily_mean/ta300_f1h-r1440x721cds/", + "ta500":"daily_mean/ta500_f1h-r1440x721cds/", + "ta850":"daily_mean/ta850_f1h-r1440x721cds/", + "hurs":"daily_mean/hurs_f1h-r1440x721cds/"} + monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", + "psl":"monthly_mean/psl_f1h-r1440x721cds/", + "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", + "rsds":"monthly_mean/rsds_f1h-r1440x721cds/", + "g300":"monthly_mean/g300_f1h-r1440x721cds/", + "g500":"monthly_mean/g500_f1h-r1440x721cds/", + "g850":"monthly_mean/g850_f1h-r1440x721cds/", + "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/", + "tasmax":"monthly_mean/tasmax_f1h-r1440x721cds/", + "tasmin":"monthly_mean/tasmin_f1h-r1440x721cds/", + "ta300":"montly_mean/ta300_f1h-r1440x721cds/", + "ta500":"monthly_mean/ta500_f1h-r1440x721cds/", + "ta850":"monthly_mean/ta850_f1h-r1440x721cds/", + "tos":"monthly_mean/tos_f1h-r1440x721cds/", + "sic":"monthly_mean/sic_f1h-r1440x721cds/"} + calendar: "standard" + reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/esarchive/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + ERA5-Land: + name: "ERA5-Land" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/era5land/" + daily_mean: {"tas":"daily_mean/tas_f1h/", "rsds":"daily_mean/rsds_f1h/", + "prlr":"daily_mean/prlr_f1h/", "sfcWind":"daily_mean/sfcWind_f1h/", + "tasmin":"daily/tasmin/", "tasmax":"daily/tasmax/"} + monthly_mean: {"tas":"monthly_mean/tas_f1h/","tasmin":"monthly_mean/tasmin_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f1h/", + "sfcWind":"monthly_mean/sfcWind_f1h/", "rsds":"monthly_mean/rsds_f1h/", + "tdps":"monthly_mean/tdps_f1h/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" + UERRA: + name: "ECMWF UERRA" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/uerra_mescan/" + daily_mean: {"tas":"daily_mean/tas_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/ecmwf/uerra_mescan/daily_mean/tas_f6h/tas_201805.nc" + CERRA: + name: "ECMWF CERRA" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/cerra/" + daily_mean: {"hurs":"daily_mean/hurs_f3h-r2631x1113/", "ps":"daily_mean/ps_f3h-r2631x1113/", + "sfcWind":"daily_mean/sfcWind_f3h-r2631x1113/", + "tas":"daily_mean/tas_f3h-r2631x1113/", "winddir":"daily_mean/tas_f3h-r2631x1113/"} + monthly_mean: {"hurs":"monthly_mean/hurs_f3h-r2631x1113/", "ps":"monthly_mean/ps_f3h-r2631x1113/", + "sfcWind":"monthly_mean/sfcWind_f3h-r2631x1113/", + "tas":"monthly_mean/tas_f3h-r2631x1113/", + "winddir":"monthly_mean/winddir_f3h-r2631x1113/", + "tasmin":"monthly_mean/tasmin_f24h-r2631x1113/", + "tasmax":"monthly_mean/tasmax_f24h-r2631x1113/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/ecmwf/cerra/monthly_mean/tas_f3h-r2631x1113/tas_200506.nc" + CERRA-Land: + name: "ECMWF CERRA-Land" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/cerraland/" + daily_mean: {"prlr":"daily_mean/prlr_f6h-r2631x1113/"} + monthly_mean: {"prlr":"monthly_mean/prlr_f6h-r2631x1113/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/ecmwf/cerraland/monthly_mean/prlr_f6h-r2631x1113/prlr_200412.nc" + HadCRUT5: + name: "HadCRUT5" + institution: "Met Office" + src: "obs/ukmo/hadcrut_v5.0_analysis/" + monthly_mean: {"tasanomaly":"monthly_mean/tasanomaly/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/obs/ukmo/hadcrut_v5.0_analysis/monthly_mean/tasanomaly/tasanomaly_202001.nc" + BEST: + name: "BEST" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "obs/berkeleyearth/berkeleyearth/" + daily_mean: {"tas":"daily_mean/tas/"} + monthly_mean: {"tas":"monthly_mean/tas/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/obs/berkeleyearth/berkeleyearth/monthly_mean/tas/tas_201805.nc" + diff --git a/example_scripts/subseasonal_load.R b/example_scripts/subseasonal_load.R new file mode 100644 index 00000000..7418850a --- /dev/null +++ b/example_scripts/subseasonal_load.R @@ -0,0 +1,19 @@ + +source("modules/Loading/Loading.R") + +recipe_file <- "recipe_subseasonal.yml" + +# To test atomic recipes: +#recipe <- prepare_outputs(recipe_file) + +# To check the split +#source("tools/divide_recipe.R") +#divide_recipe(recipe) + +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) + +data <- Loading(recipe) + + diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 6e219bf1..8e322ab1 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -23,6 +23,9 @@ Loading <- function(recipe) { } else if (time_horizon == "decadal") { source("modules/Loading/R/load_decadal.R") data <- load_decadal(recipe) + } else if (time_horizon == "subseasonal") { + source("modules/Loading/R/load_subseasonal.R") + data <- load_subseasonal(recipe) } else { stop("Incorrect time horizon.") } diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index f084ce62..ffe4a053 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -14,28 +14,58 @@ #'@export library(lubridate) +source("modules/Loading/R/subseas_file_dates.R") dates2load <- function(recipe, logger) { temp_freq <- recipe$Analysis$Variables$freq recipe <- recipe$Analysis$Time - # hcst dates - file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), - recipe$sdate) - if (temp_freq == "monthly_mean") { + # hcst dates + file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), + recipe$sdate) file_dates <- .add_dims(file_dates) } - # fcst dates (if fcst_year empty it creates an empty object) - if (! is.null(recipe$fcst_year)) { - file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) - if (temp_freq == "monthly_mean") { - file_dates.fcst <- .add_dims(file_dates.fcst) + if (temp_freq == "weekly_mean") { + sday <- recipe$sday_window + if (is.null(sday)) { + sday <- 3 + } + n.skill.weeks <- recipe$sweek_window + if (n.skill.weeks %% 2 != 0) { + fcst_sweek_ind <- (n.skill.weeks + 1)/2 + } else { + fcst_sweek_ind <- (n.skill.weeks)/2 } - } else { - file_dates.fcst <- NULL - info(logger, - paste("fcst_year empty in the recipe, creating empty fcst object...")) + fcst.sdate <- recipe$sdate + if (fcst_sweek_ind != 1) { + if (fcst_sweek_ind %% 2 == 0) { + fcst.sdate_to_start <- as.character(format(as.Date(as.character(fcst.sdate), + format = "%Y%m%d") + 4 + 7 * ((fcst_sweek_ind/2) - 1), "%Y%m%d")) + } else { + fcst.sdate_to_start <- as.character(format(as.Date(as.character(fcst.sdate), + format = "%Y%m%d") + 7 *((fcst_sweek_ind - 1)/2), "%Y%m%d")) + } + } else { + fcst.sdate_to_start <- startdate + } + file_dates <- subseas_file_dates(startdate = fcst.sdate_to_start, + n.skill.weeks = n.skill.weeks, + n.days = sday, + hcst.start = as.numeric(recipe$hcst_start), + hcst.end = as.numeric(recipe$hcst_end), + ftime_min = recipe$ftime_min, + ftime_max = recipe$ftime_max, out = 'hcst') + } + # fcst dates (if fcst_year empty it creates an empty object) + if (! is.null(recipe$fcst_year)) { + file_dates.fcst <- subseas_file_dates(startdate = fcst.sdate_to_start, + n.skill.weeks = n.skill.weeks, + n.days = sday, + hcst.start = as.numeric(recipe$hcst_start), + hcst.end = as.numeric(recipe$hcst_end), + ftime_min = recipe$ftime_min, + ftime_max = recipe$ftime_max, out = 'fcst') } return(list(hcst = file_dates, fcst = file_dates.fcst)) ## TODO: document header of fun diff --git a/modules/Loading/R/load_subseasonal.R b/modules/Loading/R/load_subseasonal.R new file mode 100644 index 00000000..7893ee16 --- /dev/null +++ b/modules/Loading/R/load_subseasonal.R @@ -0,0 +1,381 @@ +# Load required libraries/funs +source("modules/Loading/R/get_regrid_params.R") +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/check_latlon.R") +source("modules/Loading/R/compare_exp_obs_grids.R") + +load_subseasonal <- function(recipe) { + + # ------------------------------------------- + # Set params ----------------------------------------- + + hcst.inityear <- recipe$Analysis$Time$hcst_start + hcst.endyear <- recipe$Analysis$Time$hcst_end + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + store.freq <- recipe$Analysis$Variables$freq + + # get sdates array + ## LOGGER: Change dates2load to extract logger from recipe? + sdates <- dates2load(recipe, recipe$Run$logger) + + ## TODO: Examine this verifications part, verify if it's necessary + # stream <- verifications$stream + # sdates <- verifications$fcst.sdate + + ## TODO: define fcst.name + ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name + + # get datasets dict: + archive <- read_yaml("conf/archive_subseasonal.yml")[[recipe$Run$filesystem]] + exp_descrip <- archive$System[[exp.name]] + + freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]]) + reference_descrip <- archive$Reference[[ref.name]] + freq.obs <- unlist(reference_descrip[[store.freq]][variable[1]]) + obs.dir <- reference_descrip$src + fcst.dir <- exp_descrip$src + hcst.dir <- exp_descrip$src + fcst.nmember <- exp_descrip$nmember$fcst + hcst.nmember <- exp_descrip$nmember$hcst + + sweek_window <- recipe$Analysis$Time$sweek + sday_window <- recipe$Analysis$Time$swday + + ## TODO: it is necessary? + ##if ("accum" %in% names(reference_descrip)) { + ## accum <- unlist(reference_descrip$accum[store.freq][[1]]) + ##} else { + ## accum <- FALSE + ##} + + if (store.freq %in% c("daily", "daily_mean")) { + frequency <- "daily_mean" + } else if (store.freq == "weekly_mean") { + frequency <- "weekly_mean" + } + var_dir_obs <- reference_descrip[[frequency]][variable] + var_dir_exp <- exp_descrip[[frequency]][variable] + + # ----------- + obs.path <- paste0(archive$src, obs.dir, "$var_dir$", + "$var$_$file_date$.nc") + + hcst.path <- paste0(archive$src, hcst.dir, "$var_dir$", + "$var$_$file_date$.nc") + + fcst.path <- paste0(archive$src, hcst.dir, "$var_dir$", + "$var$_$file_date$.nc") + + # Define regrid parameters: + #------------------------------------------------------------------- + regrid_params <- get_regrid_params(recipe, archive) + + # Longitude circular sort and latitude check + #------------------------------------------------------------------- + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "weekly_mean"){ + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + var_dir = var_dir_exp, + file_date = sdates$hcst, + time = recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max, + var_dir_depends = 'var', + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble', 'lev')), + ensemble = indices(1:hcst.nmember), + metadata_dims = 'var', # change to just 'var'? + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(hcst))) { + hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + + # Convert hcst to s2dv_cube object + ## TODO: Give correct dimensions to $Dates + ## (sday, sweek, syear instead of file_date) + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + # the call uses file_date instead of fcst_syear so that it can work + # with the daily case and the current version of startR not allowing + # multiple dims split + + fcst <- Start(dat = fcst.path, + var = variable, + var_dir = var_dir_exp, + var_dir_depends = 'var', + file_date = sdates$fcst, + time = recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max, + + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble', 'lev')), + ensemble = indices(1:fcst.nmember), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + if ("var_dir" %in% names(dim(fcst))) { + fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + + # Convert fcst to s2dv_cube + fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$attrs$Dates[] <- + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Separate Start() call for monthly vs daily data + if (store.freq == "weekly_mean") { + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m%d") + dim(dates_file) <- dim(dates) + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq %in% c("daily_mean", "daily")) { + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m%d') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + } + + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(obs))) { + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + } + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!(recipe$Analysis$Regrid$type == 'none')) { + compare_exp_obs_grids(hcst, obs) + } + + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + for (var_idx in 1:length(variable)) { + var_name <- variable[var_idx] + if (dictionary$vars[[var_name]]$accum) { + info(recipe$Run$logger, + paste0("Accumulated variable ", var_name, + ": setting negative values to zero.")) + # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, + # along = "var", + # indices = var_idx, F), 0) + obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 + hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + } + } + } + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + ############################################################################ + # + # CHECKS ON MISSING FILES + # + ############################################################################ + + #obs.NA_dates.ind <- Apply(obs, + # fun=(function(x){ all(is.na(x))}), + # target_dims=c('time', 'latitude', 'longitude'))[[1]] + #obs.NA_dates <- dates_file[obs.NA_dates.ind] + #obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] + #obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", + # freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") + # + #if (any(is.na(hcst))){ + # fatal(recipe$Run$logger, + # paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "hcst files:", + # hcst.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status = 1) + #} + # + #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ + # fatal(recipe$logger, + # paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "obs files:", + # obs.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status=1) + #} + # + #info(recipe$logger, + # "######### DATA LOADING COMPLETED SUCCESFULLY ##############") + + ############################################################################ + ############################################################################ + .log_memory_usage(recipe$Run$logger, when = "After loading") + return(list(hcst = hcst, fcst = fcst, obs = obs)) + +} + diff --git a/modules/Loading/R/subseas_file_dates.R b/modules/Loading/R/subseas_file_dates.R new file mode 100644 index 00000000..318df765 --- /dev/null +++ b/modules/Loading/R/subseas_file_dates.R @@ -0,0 +1,67 @@ +subseas_file_dates <- function(startdate, n.skill.weeks, n.days, + hcst.start, hcst.end, + ftime_min, ftime_max, out) { + ftime_min <- as.numeric(substr(as.character(startdate), 1, 4)) - hcst.end + ftime_max <- as.numeric(substr(as.character(startdate), 1, 4)) - hcst.start + startdate <- as.Date(toString(startdate), "%Y%m%d") + sdates <- numeric(0) + while (length(sdates) < n.skill.weeks){ + if (format(startdate, "%a") == "Thu" || format(startdate, "%a") == "Mon") { + sdates <- c(sdates, format(startdate, "%Y%m%d")) + } + startdate <- startdate - 1 + } + + file_dates <- array(numeric(), c(0, n.days, (ftime_max -ftime_min) + 1)) + file_dates.fcst <- array(numeric(), c(0, n.days, (ftime_max - ftime_min) + 1)) + for (sdate in sdates) { + ## TODO: take into account year of the fcst to select hcst years + fcst.day <- substr(sdate, 7, 8) + fcst.month <- substr(sdate, 5, 6) + fcst.year <- substr(sdate, 1, 4) + + startdate <- as.Date(toString(sdate), "%Y%m%d") + + # makes sure of giving a: [M T M || T M T] combination + if(format(startdate, "%a") == "Thu") { + hcst.sdays <- c(startdate - 3, startdate, startdate + 4, recursive = T) + } else { + hcst.sdays <- c(startdate - 4, startdate, startdate + 3, recursive = T) + } + + hcst.sdays <- format(as.Date(hcst.sdays,"%Y%m%d"), "%Y%m%d") + + sdays.file_dates <- array(numeric(),c(0, (ftime_max - ftime_min) + 1)) + sdays.file_dates.fcst <- array(hcst.sdays, c(n.days, + (ftime_max - ftime_min) + 1)) + for (sday.sdate in hcst.sdays) { + sday.year <- substr(sday.sdate, 1, 4) + sday.mday <- substr(sday.sdate, 5, 8) + sday.years <- paste((strtoi(sday.year)- ftime_max): + (strtoi(sday.year) - ftime_min), sep = "") + sday.dates <- apply(expand.grid(sday.years, sday.mday), + 1, paste, collapse = "") + sdays.file_dates <- abind(sdays.file_dates, sday.dates, along = 1) + } + + names(dim(sdays.file_dates)) <- c('sday','syear') + + file_dates <- abind(file_dates, sdays.file_dates, along = 1) + file_dates.fcst <- abind(file_dates.fcst, sdays.file_dates.fcst, along = 1) + + } + names(dim(file_dates)) <- c('sweek','sday','syear') + names(dim(file_dates.fcst)) <- c('sweek','sday','syear') + +# dir_dates <- paste0(file_dates.fcst,"/",var,"_",file_dates) +# dim(dir_dates) <- dim(file_dates) +# names(dim(dir_dates)) <- c('sweek','sday','syear') +# dir_dates <- substr(dir_dates,10,24) +# return(dir_dates) + if (out == 'fcst') { + central_day <- (n.days + 1)/2 + central_week <- (n.skill.weeks + 1)/2 + file_dates <- file_dates.fcst[central_week,central_day, 1] + } + return(file_dates) +} diff --git a/recipe_subseasonal.yml b/recipe_subseasonal.yml new file mode 100644 index 00000000..61d08d28 --- /dev/null +++ b/recipe_subseasonal.yml @@ -0,0 +1,177 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. +# - {name: 'tas', freq: 'weekly_mean', units: 'C'} + name: 'tas' + freq: 'weekly_mean' + units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 2024 #20240101 #%Y%m%d + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2024' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2006' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + #- {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: bilinear # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: mse_min # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'fcst_only' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20000202.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: yes # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + #user_def: + # DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + # DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: yes # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'cylindrical_equidistant' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 10 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'S2S4E' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git3/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git3/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: victoria.agudetse@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 245e4e3d..c711b5a9 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -9,12 +9,15 @@ check_recipe <- function(recipe) { TIME_SETTINGS_SEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", "hcst_end") - TIME_SETTINGS_DECADAL <- c("ftime_min", "ftime_max", "hcst_start", "hcst_end") + TIME_SETTINGS_DECADAL <- c("ftime_min", "ftime_max", "hcst_start", "hcst_end") + + TIME_SETTINGS_SUBSEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", "hcst_end", "week_day", "sweek_window", "sday_window") PARAMS <- c("Horizon", "Time", "Variables", "Region", "Regrid", "Workflow", "Datasets") HORIZONS <- c("subseasonal", "seasonal", "decadal") ARCHIVE_SEASONAL <- "conf/archive.yml" ARCHIVE_DECADAL <- "conf/archive_decadal.yml" + ARCHIVE_SUBSEASONAL <- "conf/archive_subseasonal.yml" # Define error status variable error_status <- F @@ -38,9 +41,12 @@ check_recipe <- function(recipe) { paste0("The element 'Horizon' in the recipe must be one of the ", "following: ", paste(HORIZONS, collapse = ", "), ".")) error_status <- T + } else { + # avoid tolower() in the rest of the checks + recipe$Analysis$Horizon <- tolower(recipe$Analysis$Horizon) } # Check time settings - if (tolower(recipe$Analysis$Horizon) == "seasonal") { + if (recipe$Analysis$Horizon == "seasonal") { ## TODO: Specify filesystem archive <- read_yaml(ARCHIVE_SEASONAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_SEASONAL %in% names(recipe$Analysis$Time))) { @@ -50,7 +56,7 @@ check_recipe <- function(recipe) { collapse = ", "), ".")) error_status <- TRUE } - } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + } else if (recipe$Analysis$Horizon == "decadal") { archive <- read_yaml(ARCHIVE_DECADAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_DECADAL %in% names(recipe$Analysis$Time))) { error(recipe$Run$logger, @@ -59,6 +65,15 @@ check_recipe <- function(recipe) { collapse = ", "), ".")) error_status <- TRUE } + } else if (recipe$Analysis$Horizon == "subseasonal") { + archive <- read_yaml(ARCHIVE_SUBSEASONAL)[[recipe$Run$filesystem]] + if (!all(TIME_SETTINGS_SUBSEASONAL %in% names(recipe$Analysis$Time))) { + error(recipe$Run$logger, + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_SUBSEASONAL, + collapse = ", "), ".")) + error_status <- TRUE + } } else { archive <- NULL } @@ -171,6 +186,37 @@ check_recipe <- function(recipe) { "'hcst_end' cannot be smaller than 'hcst_start'.") error_status <- TRUE } + if (recipe$Analysis$Horizon == "subseasonal") { + if (is.null(recipe$Analysis$Time$week_day)) { + recipe$Analysis$Time$week_day <- "Thursday" + info(recipe$Run$logger, + "Initialization day set to Thursdays") + } + sww <- recipe$Analysis$Time$sweek_window + if (is.null(sww) || !is.numeric(sww) || (sww %% 2) != 1) { + recipe$Analysis$Time$sweek_window <- 9 + info(recipe$Run$logger, + "Number of initialisation 'weeks' for skill assessment set to 9.") + } + sdw <- recipe$Analysis$Time$sday_window + if (is.null(sdw) || !is.numeric(sdw) || (sdw %% 2) != 1) { + # Calibration + # If 'method' is FALSE/no/'none' or NULL, set to 'raw' + ## TODO: Review this check + if ((is.logical(recipe$Analysis$Workflow$Calibration$method) && + recipe$Analysis$Workflow$Calibration$method == FALSE) || + tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || + is.null(recipe$Analysis$Workflow$Calibration$method)) { + recipe$Analysis$Time$sday_window <- 1 + info(recipe$Run$logger, + "Number of initialisation 'days' for calibration set to 1.") + } else { + recipe$Analysis$Time$sday_window <- 3 + info(recipe$Run$logger, + "Number of initialisation 'days' for calibration set to 3.") + } + } + } ## TODO: Is this needed? if (is.null(recipe$Analysis$Time$fcst_year) || identical(tolower(recipe$Analysis$Time$fcst_year), 'none')) { diff --git a/tools/data_summary.R b/tools/data_summary.R index b76101ba..6e1ce902 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -10,18 +10,28 @@ data_summary <- function(data_cube, recipe) { object_name <- deparse(substitute(data_cube)) if (recipe$Analysis$Variables$freq == "monthly_mean") { date_format <- "%b %Y" - } else if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { + } else if (recipe$Analysis$Variables$freq %in% + c("daily", "daily_mean", "weekly_mean")) { date_format <- "%b %d %Y" } months <- unique(format(as.Date(data_cube$attrs$Dates), format = '%B')) months <- paste(as.character(months), collapse=", ") - sdate_min <- format(min(as.Date(data_cube$attrs$Dates)), + sdate_min <- format(min(as.Date(data_cube$attrs$Dates), na.rm = TRUE), format = date_format) - sdate_max <- format(max(as.Date(data_cube$attrs$Dates)), + sdate_max <- format(max(as.Date(data_cube$attrs$Dates), na.rm = TRUE), format = date_format) # Log the summary info(recipe$Run$logger, "DATA SUMMARY:") - info(recipe$Run$logger, paste(object_name, "months:", months)) + if (recipe$Analysis$Variables$freq == "monthly_mean") { + info(recipe$Run$logger, paste(object_name, "months:", months)) + } else if (recipe$Analysis$Variables$freq == "weekly_mean") { + sdate <- recipe$Analysis$Time$sdate + ftime_min <- recipe$Analysis$Time$ftime_min + ftime_max <- recipe$Analysis$Time$ftime_max + week_valid_ini <- (ftime_min:ftime_max - 1)*7 + ymd(sdate) + 4 #4 if sdate Thursday + info(recipe$Run$logger, paste(object_name, "corresponding forecast times:", + paste(week_valid_ini, collapse = ' '))) + } # Should we include daily summaries? info(recipe$Run$logger, paste(object_name, "range:", sdate_min, "to", sdate_max)) info(recipe$Run$logger, paste(object_name, "dimensions:")) @@ -49,3 +59,4 @@ data_summary <- function(data_cube, recipe) { info(recipe$Run$logger, "---------------------------------------------") invisible(gc()) } + diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 3b2e6eee..9d4f00d4 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -169,6 +169,41 @@ divide_recipe <- function(recipe) { ftime_min = recipe$Analysis$Time$ftime_min, ftime_max = recipe$Analysis$Time$ftime_max) } + } else if (tolower(recipe$Analysis$Horizon) == "subseasonal") { + for (sdate in 1:length(recipe$Analysis$Time$sdate)) { + #sdate: "2024" Calculate all Thursdays in the year + # TODO: consider other initialisation days in the week + if (nchar(recipe$Analysis$Time$sdate[sdate]) == 4) { + days <- as.POSIXlt(paste(recipe$Analysis$Time$sdate[sdate], + 1:366, sep="-"), format="%Y-%j") + day_of_the_week_ini <- days[days$wday == 4] # Thursdays + } else if (nchar(recipe$Analysis$Time$sdate[sdate]) == 8) { + day_of_the_week_ini <- recipe$Analysis$Time$sdate[sdate] + } else { + stop("what other cases can exist in subaseasonal?") + } + } + for (sdate in 1:length(day_of_the_week_ini)) { + for (reci in 1:length(all_recipes)) { + all_recipes[[reci]]$Analysis$Time <- + list(sdate = format(day_of_the_week_ini[sdate], "%Y%m%d"), + fcst_year = recipe$Analysis$Time$fcst_year, + hcst_start = recipe$Analysis$Time$hcst_start, + hcst_end = recipe$Analysis$Time$hcst_end, + ftime_min = recipe$Analysis$Time$ftime_min, + ftime_max = recipe$Analysis$Time$ftime_max, + week_day = recipe$Analysis$Time$week_day, + sweek_window = recipe$Analysis$Time$sweek_window, + sday_window =recipe$Analysis$Time$sday_window) + } + if (sdate == 1) { + recipes <- all_recipes + } else { + recipes <- append(recipes, all_recipes) + } + } + all_recipes <- recipes + rm(list = 'recipes') } # Rest of horizons # Save all recipes in separate YAML files chunk <- 1 -- GitLab From 7cc6cc3f46444ec991d98439007e8adb2ca0eacf Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 4 Jul 2024 11:08:43 +0200 Subject: [PATCH 02/20] recover monthly freq fcst file dates --- modules/Loading/R/dates2load.R | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index ffe4a053..eb0ba57b 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -59,13 +59,18 @@ dates2load <- function(recipe, logger) { } # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$fcst_year)) { - file_dates.fcst <- subseas_file_dates(startdate = fcst.sdate_to_start, - n.skill.weeks = n.skill.weeks, - n.days = sday, - hcst.start = as.numeric(recipe$hcst_start), - hcst.end = as.numeric(recipe$hcst_end), - ftime_min = recipe$ftime_min, - ftime_max = recipe$ftime_max, out = 'fcst') + if (temp_freq == "monthly_mean") { + file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) + file_dates.fcst <- .add_dims(file_dates.fcst) + } else if (temp_freq == "weekly_mean") { + file_dates.fcst <- subseas_file_dates(startdate = fcst.sdate_to_start, + n.skill.weeks = n.skill.weeks, + n.days = sday, + hcst.start = as.numeric(recipe$hcst_start), + hcst.end = as.numeric(recipe$hcst_end), + ftime_min = recipe$ftime_min, + ftime_max = recipe$ftime_max, out = 'fcst') + } } return(list(hcst = file_dates, fcst = file_dates.fcst)) ## TODO: document header of fun -- GitLab From b9e7680b715086f8e6f0397b29fa49077ac18108 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 4 Jul 2024 11:20:39 +0200 Subject: [PATCH 03/20] recover daily freq case --- modules/Loading/R/dates2load.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index eb0ba57b..952b6f92 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -25,8 +25,7 @@ dates2load <- function(recipe, logger) { file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) file_dates <- .add_dims(file_dates) - } - if (temp_freq == "weekly_mean") { + } else if (temp_freq == "weekly_mean") { sday <- recipe$sday_window if (is.null(sday)) { sday <- 3 @@ -56,6 +55,9 @@ dates2load <- function(recipe, logger) { hcst.end = as.numeric(recipe$hcst_end), ftime_min = recipe$ftime_min, ftime_max = recipe$ftime_max, out = 'hcst') + } else { + file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), + recipe$sdate) } # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$fcst_year)) { @@ -70,6 +72,8 @@ dates2load <- function(recipe, logger) { hcst.end = as.numeric(recipe$hcst_end), ftime_min = recipe$ftime_min, ftime_max = recipe$ftime_max, out = 'fcst') + } else { + file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) } } return(list(hcst = file_dates, fcst = file_dates.fcst)) -- GitLab From d5fad3db3ed0f975b5a06abdd39235363e1bfcc4 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 4 Jul 2024 11:32:42 +0200 Subject: [PATCH 04/20] recover empty fcst year result --- modules/Loading/R/dates2load.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index 952b6f92..717fd854 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -75,6 +75,9 @@ dates2load <- function(recipe, logger) { } else { file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) } + } else { + # if no fcst year is requested: + file_dates.fcst <- NULL } return(list(hcst = file_dates, fcst = file_dates.fcst)) ## TODO: document header of fun -- GitLab From e550e5fdee3e48e9392633b34838726a664072de Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 4 Jul 2024 11:40:05 +0200 Subject: [PATCH 05/20] add some documentation to auxiliary functions --- modules/Loading/R/dates2load.R | 4 +++- modules/Loading/R/subseas_file_dates.R | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index 717fd854..3f12ce98 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -17,7 +17,9 @@ library(lubridate) source("modules/Loading/R/subseas_file_dates.R") dates2load <- function(recipe, logger) { - +# from recipe it uses: +# temporal frequency of the variable requested +# the details in $Time request: hcst period, forecast times ... temp_freq <- recipe$Analysis$Variables$freq recipe <- recipe$Analysis$Time if (temp_freq == "monthly_mean") { diff --git a/modules/Loading/R/subseas_file_dates.R b/modules/Loading/R/subseas_file_dates.R index 318df765..ddcd4a45 100644 --- a/modules/Loading/R/subseas_file_dates.R +++ b/modules/Loading/R/subseas_file_dates.R @@ -1,3 +1,10 @@ +# Given an startdate e.g. 20240104 +# The number of weeks that will be used for metrics calculation +# The number of days that will be used for calibration +# the hindcast period and the forecast times +# It returns an array with dimension sday, sweek, syear +# for the hcst is out = 'hcst' or for the forecast if out = 'fcst' + subseas_file_dates <- function(startdate, n.skill.weeks, n.days, hcst.start, hcst.end, ftime_min, ftime_max, out) { -- GitLab From b930251afafd61a64d89165ab5bd166a060345a7 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 4 Jul 2024 17:05:09 +0200 Subject: [PATCH 06/20] unit test subseasonal --- .gitlab-ci.yml | 14 ++ modules/Loading/R/load_subseasonal.R | 6 +- modules/Loading/R/subseas_file_dates.R | 2 + tests/recipes/recipe-subs_weekly.yml | 177 +++++++++++++++++++++++++ tests/test_subseasonal.R | 9 ++ tests/testthat/test-subs_weekly.R | 112 ++++++++++++++++ 6 files changed, 318 insertions(+), 2 deletions(-) create mode 100644 tests/recipes/recipe-subs_weekly.yml create mode 100644 tests/test_subseasonal.R create mode 100644 tests/testthat/test-subs_weekly.R diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 997870c8..ccaf0809 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -26,3 +26,17 @@ unit-test-decadal: # This job runs in the test stage. - module list - echo "Running decadal unit tests..." - Rscript ./tests/test_decadal.R + +unit-test-subseasonal: # This job runs in the test stage. + stage: test + script: + - echo "Loading modules..." + - module load R/4.1.2-foss-2015a-bare + - module load CDO/1.9.8-foss-2015a + - module load GEOS/3.7.2-foss-2015a-Python-3.7.3 + - module load GDAL/2.2.1-foss-2015a + - module load PROJ/4.8.0-foss-2015a + - module list + - echo "Running subseasonal unit tests..." + - Rscript ./tests/test_subseasonal.R + diff --git a/modules/Loading/R/load_subseasonal.R b/modules/Loading/R/load_subseasonal.R index 7893ee16..10e3c019 100644 --- a/modules/Loading/R/load_subseasonal.R +++ b/modules/Loading/R/load_subseasonal.R @@ -218,9 +218,11 @@ load_subseasonal <- function(recipe) { # Obtain dates and date dimensions from the loaded hcst data to make sure # the corresponding observations are loaded correctly. - dates <- hcst$attrs$Dates + dates <- as.POSIXlt(hcst$attrs$Dates) + # Hcst files metatdata are 3 days ahead of Obs files names: + dates$mday <- dates$mday - 3 + dates <- as.POSIXct(dates) dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] - # Separate Start() call for monthly vs daily data if (store.freq == "weekly_mean") { dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m%d") diff --git a/modules/Loading/R/subseas_file_dates.R b/modules/Loading/R/subseas_file_dates.R index ddcd4a45..519b32e0 100644 --- a/modules/Loading/R/subseas_file_dates.R +++ b/modules/Loading/R/subseas_file_dates.R @@ -69,6 +69,8 @@ subseas_file_dates <- function(startdate, n.skill.weeks, n.days, central_day <- (n.days + 1)/2 central_week <- (n.skill.weeks + 1)/2 file_dates <- file_dates.fcst[central_week,central_day, 1] + dim(file_dates) <- c(sday = 1, sweek = 1, syear = 1) } + file_dates <- Reorder(file_dates, c('sday', 'sweek', 'syear')) return(file_dates) } diff --git a/tests/recipes/recipe-subs_weekly.yml b/tests/recipes/recipe-subs_weekly.yml new file mode 100644 index 00000000..545c3570 --- /dev/null +++ b/tests/recipes/recipe-subs_weekly.yml @@ -0,0 +1,177 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. +# - {name: 'tas', freq: 'weekly_mean', units: 'C'} + name: 'tas' + freq: 'weekly_mean' + units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20240104 #%Y%m%d + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2024' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2003' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 5 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + #- {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 10} + Regrid: + method: bilinear # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: mse_min # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'fcst_only' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20000202.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: yes # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + #user_def: + # DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + # DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: yes # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'cylindrical_equidistant' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 10 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'S2S4E' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git3/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git3/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: victoria.agudetse@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/tests/test_subseasonal.R b/tests/test_subseasonal.R new file mode 100644 index 00000000..5ed1f291 --- /dev/null +++ b/tests/test_subseasonal.R @@ -0,0 +1,9 @@ +library(testthat) + +path_testthat <- file.path('./tests/testthat/') +files_testthat <- list.files('./tests/testthat/', pattern = 'subs') + +for (i_file in 1:length(files_testthat)) { + source(paste0('./tests/testthat/', files_testthat[i_file])) +} + diff --git a/tests/testthat/test-subs_weekly.R b/tests/testthat/test-subs_weekly.R new file mode 100644 index 00000000..7c992f75 --- /dev/null +++ b/tests/testthat/test-subs_weekly.R @@ -0,0 +1,112 @@ +context("Subseasonal weekly data") + +source("modules/Loading/Loading.R") +source(" modules/Saving/R/get_dir.R") + +recipe_file <- "tests/recipes/recipe-subs_weekly.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_subseasonal.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) + +# ------- TESTS -------- + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 3, sweek = 5, syear = 5, time = 4, latitude = 10, longitude = 21, ensemble = 12) +) +expect_equal( +dim(data$obs$data), +c(dat = 1, var = 1, sday = 3, sweek = 5, syear = 5, time = 4, latitude = 10, longitude = 21, ensemble = 1) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 4, latitude = 10, longitude = 21, ensemble = 48) +) +expect_equal( +dim(data$hcst$attrs$Dates), +c(sday = 3, sweek = 5, syear = 5, time = 4) +) +# The metadata in files are the sdate + 7 days +expect_equal( +format(data$hcst$attrs$Dates[2,3,,1], "%m%d"), +rep("0111", 5) +) +expect_equal( +format(data$hcst$attrs$Dates[,3,3,1], "%m%d"), +c("0108", "0111", "0115"), +) +expect_equal( +format(data$hcst$attrs$Dates[2,,3,1], "%m%d"), +c("0118", "0115", "0111", "0108", "0104") +) +expect_equal( +format(data$hcst$attrs$Dates[2,3,3,], "%Y%m%d"), +c("20010111", "20010118", "20010125", "20010201") +) +# Obs metadata +expect_equal( +dim(data$obs$attrs$Dates), +c(sday = 3, sweek = 5, syear = 5, time = 4) +) +expect_equal( +format(data$obs$attrs$Dates[2,3,,1], "%m%d"), +rep("0114", 5) +) +expect_equal( +dim(data$obs$attrs$Dates), +dim(data$obs$attrs$load_parameters$dat1$file_date[[1]]) +) +expect_equal( +data$obs$attrs$load_parameters$dat1$file_date[[1]][2,3,,1], +c("19990108", "20000108", "20010108", "20020108", "20030108") +) +expect_equal( +data$hcst$data[1,1,2,3,,1,1,1,1], +c(298.9691, 299.0720, 299.0578, 299.3024, 299.5754), +tolerance = 0.0001 +) +}) + + +# Delete files +unlink(recipe$Run$output_dir, recursive = T) -- GitLab From 2c1f3aa98c03bfa134b85e1b1da9c172b654a931 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 4 Jul 2024 17:27:16 +0200 Subject: [PATCH 07/20] add description to template and fix typo in unit test --- recipe_template.yml | 12 +++++++++++- tests/testthat/test-subs_weekly.R | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/recipe_template.yml b/recipe_template.yml index 7f9432c7..4dd5bb20 100644 --- a/recipe_template.yml +++ b/recipe_template.yml @@ -30,17 +30,27 @@ Analysis: # To request more References to be divided into atomic recipes, add them this way: # - {name: 'ERA5Land'} Time: - sdate: + sdate: - '1201' # Start date, 'mmdd' (Mandatory, int) # To request more startdates to be divided into atomic recipes, add them this way: # - '0101' # - '0201' # ... + # For subseasonal case there are two options: + #sdate: 20240104 # A specific date of the year + #sdate: 2024 # A year to evaluate all 52 weeks initialized on Thursday fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) + # For subseasonal, a specific date should be requested and it should be + # the same as the one defined as sdate (to be coherent between + # forecast provision and assessment. hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + # For subseasonal, there are three extra paramenters: + #week_day: Thursday # only works for Thursdaays right now + #sday_window: 3 # The number of days use for calibration + #sweek_window: 9 # The numeber of weeks to use for assessment Region: # latmin: minimum latitude (Mandatory, int) # latmax: maximum latitude (Mandatory, int) diff --git a/tests/testthat/test-subs_weekly.R b/tests/testthat/test-subs_weekly.R index 7c992f75..ffd5640b 100644 --- a/tests/testthat/test-subs_weekly.R +++ b/tests/testthat/test-subs_weekly.R @@ -1,7 +1,7 @@ context("Subseasonal weekly data") source("modules/Loading/Loading.R") -source(" modules/Saving/R/get_dir.R") +source("modules/Saving/R/get_dir.R") recipe_file <- "tests/recipes/recipe-subs_weekly.yml" recipe <- prepare_outputs(recipe_file, disable_checks = F) -- GitLab From 9f6404aed1d8710b569bd6158f1bfcbd44e1a622 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 4 Jul 2024 17:54:47 +0200 Subject: [PATCH 08/20] change output dir --- tests/recipes/recipe-subs_weekly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/recipes/recipe-subs_weekly.yml b/tests/recipes/recipe-subs_weekly.yml index 545c3570..56e08b95 100644 --- a/tests/recipes/recipe-subs_weekly.yml +++ b/tests/recipes/recipe-subs_weekly.yml @@ -155,7 +155,7 @@ Run: # Default value is 'INFO'. (Optional, str) Terminal: yes # Optional, bool: Whether to display log messages in the terminal. # Default is yes/true. - output_dir: /esarchive/scratch/nperez/git3/ # Output directory. Must have write permissions. (Mandatory, str) + output_dir: ./tests/out-logs/ # Output directory. Must have write permissions. (Mandatory, str) code_dir: /esarchive/scratch/nperez/git3/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) # fill only if using autosubmit -- GitLab From a45f7b47c83a835886ef1735ac008f69dbf862a7 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 10 Jul 2024 14:53:22 +0200 Subject: [PATCH 09/20] comments and formatting --- modules/Loading/R/load_subseasonal.R | 6 +++--- tools/check_recipe.R | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/Loading/R/load_subseasonal.R b/modules/Loading/R/load_subseasonal.R index 10e3c019..fa3bbfd1 100644 --- a/modules/Loading/R/load_subseasonal.R +++ b/modules/Loading/R/load_subseasonal.R @@ -81,7 +81,7 @@ load_subseasonal <- function(recipe) { #------------------------------------------------------------------- circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) - if (recipe$Analysis$Variables$freq == "weekly_mean"){ + if (recipe$Analysis$Variables$freq == "weekly_mean") { split_multiselected_dims = TRUE } else { split_multiselected_dims = FALSE @@ -219,7 +219,8 @@ load_subseasonal <- function(recipe) { # Obtain dates and date dimensions from the loaded hcst data to make sure # the corresponding observations are loaded correctly. dates <- as.POSIXlt(hcst$attrs$Dates) - # Hcst files metatdata are 3 days ahead of Obs files names: + # Hcst files metadata are 3 days ahead of Obs files names: + ## TODO: Is this true in general, or only for a specific case (NCEPv2 + ERA5)? dates$mday <- dates$mday - 3 dates <- as.POSIXct(dates) dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] @@ -288,7 +289,6 @@ load_subseasonal <- function(recipe) { retrieve = TRUE) } - # Remove var_dir dimension if ("var_dir" %in% names(dim(obs))) { obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") diff --git a/tools/check_recipe.R b/tools/check_recipe.R index c711b5a9..836ffada 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -186,6 +186,7 @@ check_recipe <- function(recipe) { "'hcst_end' cannot be smaller than 'hcst_start'.") error_status <- TRUE } + # Subseasonal-specific checks if (recipe$Analysis$Horizon == "subseasonal") { if (is.null(recipe$Analysis$Time$week_day)) { recipe$Analysis$Time$week_day <- "Thursday" @@ -198,7 +199,7 @@ check_recipe <- function(recipe) { info(recipe$Run$logger, "Number of initialisation 'weeks' for skill assessment set to 9.") } - sdw <- recipe$Analysis$Time$sday_window + sdw <- recipe$Analysis$Time$sday_window if (is.null(sdw) || !is.numeric(sdw) || (sdw %% 2) != 1) { # Calibration # If 'method' is FALSE/no/'none' or NULL, set to 'raw' -- GitLab From bf7fe3192f596aaeb317096ea600420214d0fc63 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 10 Jul 2024 16:10:30 +0200 Subject: [PATCH 10/20] Formatting --- ...est-subs_weekly.R => test-subseasonal_weekly.R} | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) rename tests/testthat/{test-subs_weekly.R => test-subseasonal_weekly.R} (85%) diff --git a/tests/testthat/test-subs_weekly.R b/tests/testthat/test-subseasonal_weekly.R similarity index 85% rename from tests/testthat/test-subs_weekly.R rename to tests/testthat/test-subseasonal_weekly.R index ffd5640b..104397f2 100644 --- a/tests/testthat/test-subs_weekly.R +++ b/tests/testthat/test-subseasonal_weekly.R @@ -68,19 +68,19 @@ c(sday = 3, sweek = 5, syear = 5, time = 4) ) # The metadata in files are the sdate + 7 days expect_equal( -format(data$hcst$attrs$Dates[2,3,,1], "%m%d"), +format(data$hcst$attrs$Dates[2, 3, , 1], "%m%d"), rep("0111", 5) ) expect_equal( -format(data$hcst$attrs$Dates[,3,3,1], "%m%d"), +format(data$hcst$attrs$Dates[, 3, 3, 1], "%m%d"), c("0108", "0111", "0115"), ) expect_equal( -format(data$hcst$attrs$Dates[2,,3,1], "%m%d"), +format(data$hcst$attrs$Dates[2, , 3, 1], "%m%d"), c("0118", "0115", "0111", "0108", "0104") ) expect_equal( -format(data$hcst$attrs$Dates[2,3,3,], "%Y%m%d"), +format(data$hcst$attrs$Dates[2, 3, 3, ], "%Y%m%d"), c("20010111", "20010118", "20010125", "20010201") ) # Obs metadata @@ -89,7 +89,7 @@ dim(data$obs$attrs$Dates), c(sday = 3, sweek = 5, syear = 5, time = 4) ) expect_equal( -format(data$obs$attrs$Dates[2,3,,1], "%m%d"), +format(data$obs$attrs$Dates[2, 3, , 1], "%m%d"), rep("0114", 5) ) expect_equal( @@ -97,11 +97,11 @@ dim(data$obs$attrs$Dates), dim(data$obs$attrs$load_parameters$dat1$file_date[[1]]) ) expect_equal( -data$obs$attrs$load_parameters$dat1$file_date[[1]][2,3,,1], +data$obs$attrs$load_parameters$dat1$file_date[[1]][2, 3, , 1], c("19990108", "20000108", "20010108", "20020108", "20030108") ) expect_equal( -data$hcst$data[1,1,2,3,,1,1,1,1], +data$hcst$data[1, 1, 2, 3, , 1, 1, 1, 1], c(298.9691, 299.0720, 299.0578, 299.3024, 299.5754), tolerance = 0.0001 ) -- GitLab From 83a750f0bd735c8215e6cc6336044afbe550a635 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 10 Jul 2024 16:16:52 +0200 Subject: [PATCH 11/20] Change subseasonal test pattern from 'subs' to 'subseasonal' --- tests/test_subseasonal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_subseasonal.R b/tests/test_subseasonal.R index 5ed1f291..1de54f2a 100644 --- a/tests/test_subseasonal.R +++ b/tests/test_subseasonal.R @@ -1,7 +1,7 @@ library(testthat) path_testthat <- file.path('./tests/testthat/') -files_testthat <- list.files('./tests/testthat/', pattern = 'subs') +files_testthat <- list.files('./tests/testthat/', pattern = 'subseasonal') for (i_file in 1:length(files_testthat)) { source(paste0('./tests/testthat/', files_testthat[i_file])) -- GitLab From 63d7370bd9fbf8b894d023edabbb8ba23ae143e9 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 10 Jul 2024 16:37:16 +0200 Subject: [PATCH 12/20] Fix typos, formatting --- modules/Loading/R/dates2load.R | 4 ++-- modules/Loading/R/load_subseasonal.R | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index 3f12ce98..2c128495 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -25,9 +25,9 @@ dates2load <- function(recipe, logger) { if (temp_freq == "monthly_mean") { # hcst dates file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), - recipe$sdate) + recipe$sdate) file_dates <- .add_dims(file_dates) - } else if (temp_freq == "weekly_mean") { + } else if (temp_freq == "weekly_mean") { sday <- recipe$sday_window if (is.null(sday)) { sday <- 3 diff --git a/modules/Loading/R/load_subseasonal.R b/modules/Loading/R/load_subseasonal.R index fa3bbfd1..0b616330 100644 --- a/modules/Loading/R/load_subseasonal.R +++ b/modules/Loading/R/load_subseasonal.R @@ -45,9 +45,9 @@ load_subseasonal <- function(recipe) { fcst.nmember <- exp_descrip$nmember$fcst hcst.nmember <- exp_descrip$nmember$hcst - sweek_window <- recipe$Analysis$Time$sweek - sday_window <- recipe$Analysis$Time$swday - + sweek_window <- recipe$Analysis$Time$sweek_window + sday_window <- recipe$Analysis$Time$sday_window + ## TODO: it is necessary? ##if ("accum" %in% names(reference_descrip)) { ## accum <- unlist(reference_descrip$accum[store.freq][[1]]) -- GitLab From 553268099cccfa8fc71fbe1bb9312a459438b366 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 10 Jul 2024 16:51:17 +0200 Subject: [PATCH 13/20] Pipeline fix: make test file pattern more specific --- .../{recipe-subs_weekly.yml => recipe-subseasonal_weekly.yml} | 0 tests/test_decadal.R | 2 +- tests/test_seasonal.R | 2 +- tests/test_subseasonal.R | 2 +- tests/testthat/test-subseasonal_weekly.R | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) rename tests/recipes/{recipe-subs_weekly.yml => recipe-subseasonal_weekly.yml} (100%) diff --git a/tests/recipes/recipe-subs_weekly.yml b/tests/recipes/recipe-subseasonal_weekly.yml similarity index 100% rename from tests/recipes/recipe-subs_weekly.yml rename to tests/recipes/recipe-subseasonal_weekly.yml diff --git a/tests/test_decadal.R b/tests/test_decadal.R index 75b7fca7..97cd4216 100644 --- a/tests/test_decadal.R +++ b/tests/test_decadal.R @@ -1,7 +1,7 @@ library(testthat) path_testthat <- file.path('./tests/testthat/') -files_testthat <- list.files('./tests/testthat/', pattern = 'decadal') +files_testthat <- list.files('./tests/testthat/', pattern = 'test-decadal') for (i_file in 1:length(files_testthat)) { source(paste0('./tests/testthat/', files_testthat[i_file])) diff --git a/tests/test_seasonal.R b/tests/test_seasonal.R index 4718e3d4..c1f64eb2 100644 --- a/tests/test_seasonal.R +++ b/tests/test_seasonal.R @@ -1,7 +1,7 @@ library(testthat) path_testthat <- file.path('./tests/testthat/') -files_testthat <- list.files('./tests/testthat/', pattern = 'seasonal') +files_testthat <- list.files('./tests/testthat/', pattern = 'test-seasonal') for (i_file in 1:length(files_testthat)) { source(paste0('./tests/testthat/', files_testthat[i_file])) diff --git a/tests/test_subseasonal.R b/tests/test_subseasonal.R index 1de54f2a..3b3d25e2 100644 --- a/tests/test_subseasonal.R +++ b/tests/test_subseasonal.R @@ -1,7 +1,7 @@ library(testthat) path_testthat <- file.path('./tests/testthat/') -files_testthat <- list.files('./tests/testthat/', pattern = 'subseasonal') +files_testthat <- list.files('./tests/testthat/', pattern = 'test-subseasonal') for (i_file in 1:length(files_testthat)) { source(paste0('./tests/testthat/', files_testthat[i_file])) diff --git a/tests/testthat/test-subseasonal_weekly.R b/tests/testthat/test-subseasonal_weekly.R index 104397f2..815ba226 100644 --- a/tests/testthat/test-subseasonal_weekly.R +++ b/tests/testthat/test-subseasonal_weekly.R @@ -3,7 +3,7 @@ context("Subseasonal weekly data") source("modules/Loading/Loading.R") source("modules/Saving/R/get_dir.R") -recipe_file <- "tests/recipes/recipe-subs_weekly.yml" +recipe_file <- "tests/recipes/recipe-subseasonal_weekly.yml" recipe <- prepare_outputs(recipe_file, disable_checks = F) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_subseasonal.yml"))$archive -- GitLab From 0c558a8e7e0dfe4b07ebfad60e11e9d46b0912e0 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 11 Jul 2024 15:55:11 +0200 Subject: [PATCH 14/20] Add general tolower() statement at the beginning of the checks --- tools/check_recipe.R | 92 ++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 51 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 836ffada..ffba7a52 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -3,6 +3,21 @@ check_recipe <- function(recipe) { ## TODO: set up logger-less case info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) + # Pre-processing: convert all strings in Analysis section to lowercase, + # except for system and reference names + system <- recipe$Analysis$Datasets$System + reference <- recipe$Analysis$Datasets$Reference + recipe$Analysis <- rapply(recipe$Analysis, + function (x) { + if (is.character(x)) { + tolower(x) + } else { + x + } + }, + how = "list") + recipe$Analysis$Datasets$System <- system + recipe$Analysis$Datasets$Reference <- reference # --------------------------------------------------------------------- # ANALYSIS CHECKS # --------------------------------------------------------------------- @@ -11,7 +26,9 @@ check_recipe <- function(recipe) { "hcst_end") TIME_SETTINGS_DECADAL <- c("ftime_min", "ftime_max", "hcst_start", "hcst_end") - TIME_SETTINGS_SUBSEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", "hcst_end", "week_day", "sweek_window", "sday_window") + TIME_SETTINGS_SUBSEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", + "hcst_end", "week_day", "sweek_window", + "sday_window") PARAMS <- c("Horizon", "Time", "Variables", "Region", "Regrid", "Workflow", "Datasets") HORIZONS <- c("subseasonal", "seasonal", "decadal") @@ -36,14 +53,11 @@ check_recipe <- function(recipe) { error_status <- TRUE } - if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { + if (!any(HORIZONS %in% recipe$Analysis$Horizon)) { error(recipe$Run$logger, paste0("The element 'Horizon' in the recipe must be one of the ", "following: ", paste(HORIZONS, collapse = ", "), ".")) error_status <- T - } else { - # avoid tolower() in the rest of the checks - recipe$Analysis$Horizon <- tolower(recipe$Analysis$Horizon) } # Check time settings if (recipe$Analysis$Horizon == "seasonal") { @@ -116,7 +130,7 @@ check_recipe <- function(recipe) { isFALSE(recipe$Analysis$Datasets$Multimodel)) { recipe$Analysis$Datasets$Multimodel <- list(execute = FALSE) } - if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'false') { + if (!isTRUE(recipe$Analysis$Datasets$Multimodel$execute)) { multimodel <- FALSE } else { multimodel <- TRUE @@ -132,14 +146,14 @@ check_recipe <- function(recipe) { "Please specify yes/true, no/false or 'both'.")) error_status <- TRUE } - if (!tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% + if (recipe$Analysis$Datasets$Multimodel$approach %in% MULTIMODEL_METHODS) { error(recipe$Run$logger, paste("The specified approach for the multimodel is not valid.", "Please specify pooled.")) #, mean or median.")) error_status <- TRUE } - if (!tolower(recipe$Analysis$Datasets$Multimodel$createFrom) %in% + if (recipe$Analysis$Datasets$Multimodel$createFrom %in% MULTIMODEL_CREATEFROM) { error(recipe$Run$logger, paste("The specified 'createFrom' for the multimodel is not valid.", @@ -148,7 +162,7 @@ check_recipe <- function(recipe) { } } } else { - recipe$Analysis$Datasets$Multimodel$execute <- FALSE + recipe$Analysis$Datasets$Multimodel$execute <- FALSE } # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || @@ -189,7 +203,7 @@ check_recipe <- function(recipe) { # Subseasonal-specific checks if (recipe$Analysis$Horizon == "subseasonal") { if (is.null(recipe$Analysis$Time$week_day)) { - recipe$Analysis$Time$week_day <- "Thursday" + recipe$Analysis$Time$week_day <- "thursday" info(recipe$Run$logger, "Initialization day set to Thursdays") } @@ -206,7 +220,7 @@ check_recipe <- function(recipe) { ## TODO: Review this check if ((is.logical(recipe$Analysis$Workflow$Calibration$method) && recipe$Analysis$Workflow$Calibration$method == FALSE) || - tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || + recipe$Analysis$Workflow$Calibration$method == 'none' || is.null(recipe$Analysis$Workflow$Calibration$method)) { recipe$Analysis$Time$sday_window <- 1 info(recipe$Run$logger, @@ -218,14 +232,6 @@ check_recipe <- function(recipe) { } } } - ## TODO: Is this needed? - if (is.null(recipe$Analysis$Time$fcst_year) || - identical(tolower(recipe$Analysis$Time$fcst_year), 'none')) { - stream <- "hindcast" - # recipe$Analysis$Time$fcst_year <- 'YYYY' - } else { - stream <- "fcst" - } ## TODO: To be implemented in the future # if (length(recipe$Analysis$Time$sdate$fcst_day) > 1 && @@ -235,27 +241,12 @@ check_recipe <- function(recipe) { # "Element fcst_day in recipe set as 1.") # recipe$Analysis$Time$sdate$fcst_day <- '01' # } - ## TODO: Delete, this parameter was deprecated - # if (is.null(recipe$Analysis$Time$sdate$fcst_sday)) { - # error(recipe$Run$logger, - # paste("The element 'fcst_sday' in the recipe should be defined.")) - # } if (is.null(recipe$Analysis$Time$fcst_year)) { warn(recipe$Run$logger, paste("The element 'fcst_year' is not defined in the recipe.", "No forecast year will be used.")) } - ## TODO: Adapt and move this inside 'if'? - # fcst.sdate <- NULL - # for (syear in recipe$Analysis$Time$fcst_year) { - # for (sday in recipe$Analysis$Time$sdate) { - # fcst.sdate <- c(fcst.sdate, - # paste0(syear, - # sprintf("%04d", as.numeric(sday)))) - # } - # } - # fcst.sdate <- list(stream = stream, fcst.sdate = fcst.sdate) # Regrid checks: if (length(recipe$Analysis$Regrid) != 2) { @@ -360,8 +351,8 @@ check_recipe <- function(recipe) { "'Workflow:Time_aggregation:method' is not defined in the recipe.") error_status <- TRUE } else { - if (!tolower(recipe$Analysis$Workflow$Time_aggregation$method) %in% - TIME_AGG_METHODS) { + if (!(recipe$Analysis$Workflow$Time_aggregation$method %in% + TIME_AGG_METHODS)) { error(recipe$Run$logger, paste0("Time_aggregation method should be one of: ", paste(TIME_AGG_METHODS, collapse = ", "), ".")) @@ -375,8 +366,7 @@ check_recipe <- function(recipe) { "add 'ini' and 'end' or 'user_def'.")) error_status <- TRUE } - if (all(c('ini', 'end') %in% - tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { + if (all(c('ini', 'end') %in% names(recipe$Analysis$Workflow$Time_aggregation))) { if (!is.numeric(recipe$Analysis$Workflow$Time_aggregation$ini) || !is.numeric(recipe$Analysis$Workflow$Time_aggregation$end)) { error(recipe$Run$logger, @@ -400,9 +390,9 @@ check_recipe <- function(recipe) { } } } - if ('user_def' %in% tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { + if ('user_def' %in% names(recipe$Analysis$Workflow$Time_aggregation)) { if (all(c('ini', 'end') %in% - tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { + names(recipe$Analysis$Workflow$Time_aggregation))) { recipe$Analysis$Workflow$Time_aggregation$user_def <- NULL warn(recipe$Run$logger, paste("Both 'ini' and 'end' and 'user_def' are defined under", @@ -438,7 +428,7 @@ check_recipe <- function(recipe) { ## TODO: Review this check if ((is.logical(recipe$Analysis$Workflow$Calibration$method) && recipe$Analysis$Workflow$Calibration$method == FALSE) || - tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || + recipe$Analysis$Workflow$Calibration$method == 'none' || is.null(recipe$Analysis$Workflow$Calibration$method)) { warn(recipe$Run$logger, "No Calibration method was specified, raw data verification.") @@ -495,7 +485,7 @@ check_recipe <- function(recipe) { # Downscaling if ("Downscaling" %in% names(recipe$Analysis$Workflow)) { - downscal_params <- lapply(recipe$Analysis$Workflow$Downscaling, tolower) + downscal_params <- recipe$Analysis$Workflow$Downscaling # Define accepted entries DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", @@ -598,7 +588,7 @@ check_recipe <- function(recipe) { if ("Indices" %in% names(recipe$Analysis$Workflow)) { nino_indices <- paste0("nino", c("1+2", "3", "3.4", "4")) indices <- c("nao", nino_indices) - if (!("anomalies" %in% tolower(names(recipe$Analysis$Workflow)))) { + if (!("Anomalies" %in% names(recipe$Analysis$Workflow))) { error(recipe$Run$logger, paste0("Indices uses Anomalies as input, but Anomalies are missing", "in the recipe.")) @@ -652,14 +642,14 @@ check_recipe <- function(recipe) { } else { requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] - if (!all(tolower(requested_metrics) %in% AVAILABLE_METRICS)) { + if (!all(requested_metrics %in% AVAILABLE_METRICS)) { error(recipe$Run$logger, paste0("Some of the metrics requested under 'Skill' are not ", "available in SUNSET. Check the documentation to see the ", "full list of accepted skill metrics.")) error_status <- TRUE } - if (tolower(recipe$Analysis$Output_format) != 'scorecards') { + if (recipe$Analysis$Output_format != 'scorecards') { if (any(grepl('_syear', requested_metrics))) { recipe$Analysis$Output_format <- 'scorecards' warn(recipe$Run$logger, @@ -769,7 +759,7 @@ check_recipe <- function(recipe) { # Check user-defined file format for plots PLOT_FILE_FORMATS <- c("pdf", "png", "jpg", "jpeg", "eps") if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && - !(tolower(recipe$Analysis$Workflow$Visualization$file_format)) %in% + !(recipe$Analysis$Workflow$Visualization$file_format) %in% PLOT_FILE_FORMATS) { error(recipe$Run$logger, paste0("Visualization:file_format must be one of the following: ", @@ -794,7 +784,7 @@ check_recipe <- function(recipe) { sc_metrics <- strsplit(recipe$Analysis$Workflow$Scorecards$metric, ", | |,")[[1]] if (recipe$Analysis$Workflow$Scorecards$metric_aggregation == 'score') { - if ('rpss' %in% tolower(sc_metrics)) { + if ('rpss' %in% sc_metrics) { if (!('rps_clim_syear' %in% requested_metrics)) { requested_metrics <- c(requested_metrics, 'rps_clim_syear') } @@ -802,7 +792,7 @@ check_recipe <- function(recipe) { requested_metrics <- c(requested_metrics, 'rps_syear') } } - if ('crpss' %in% tolower(sc_metrics)) { + if ('crpss' %in% sc_metrics) { if (!('crps_clim_syear' %in% requested_metrics)) { requested_metrics <- c(requested_metrics, 'crps_clim_syear') } @@ -810,7 +800,7 @@ check_recipe <- function(recipe) { requested_metrics <- c(requested_metrics, 'crps_syear') } } - if ('enscorr' %in% tolower(sc_metrics) && + if (('enscorr' %in% sc_metrics) && !all(c('std', 'cov', 'n_eff') %in% statistics)) { error(recipe$Run$logger, paste("For 'enscorr' to be plotted in the Scorecards with", @@ -822,12 +812,12 @@ check_recipe <- function(recipe) { recipe$Analysis$Workflow$Skill$metric <- paste0(requested_metrics, collapse = " ") } - if (tolower(recipe$Analysis$Output_format) != 'scorecards') { + if (recipe$Analysis$Output_format != 'scorecards') { warn(recipe$Run$logger, "Scorecards requested: setting output format as 'Scorecards'") recipe$Analysis$Output_format <- 'scorecards' } - if (!all(tolower(sc_metrics) %in% tolower(requested_metrics))) { + if (!all(sc_metrics %in% requested_metrics)) { error(recipe$Run$logger, paste0("All of the metrics requested under 'Scorecards' must ", "be requested in the 'Skill' section.")) -- GitLab From 353ed80c530a8f675f4fff11081a76932f19a4a3 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 11 Jul 2024 15:55:38 +0200 Subject: [PATCH 15/20] Generalize subseasonal code to look for the day of the week provided in the recipe --- tools/divide_recipe.R | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 9d4f00d4..ceb696f4 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -33,13 +33,6 @@ divide_recipe <- function(recipe) { all_recipes[[var]]$Analysis$Variables <- recipe$Analysis$Variables[[var]] } - # for (dep in verifications$dependent) { - # all_recipes[[i]]$Analysis$Variables <- dep - # i = i + 1 - # all_recipes <- append(all_recipes, list(beta_recipe)) - # } - # all_recipes <- all_recipes[-length(all_recipes)] # wth does this do - # duplicate recipe by Datasets: # check Systems # If a single system is not given inside a list, rebuild structure @@ -61,7 +54,7 @@ divide_recipe <- function(recipe) { } for (sys in 1:n_models) { for (reci in 1:length(all_recipes)) { - if (sys == length(recipe$Analysis$Datasets$System)+1){ + if (sys == length(recipe$Analysis$Datasets$System)+1) { # seasonal only needs the model name; decadal also needs the members if (tolower(recipe$Analysis$Horizon) == 'seasonal') { m <- unlist(recipe$Analysis$Datasets$System) @@ -171,12 +164,15 @@ divide_recipe <- function(recipe) { } } else if (tolower(recipe$Analysis$Horizon) == "subseasonal") { for (sdate in 1:length(recipe$Analysis$Time$sdate)) { - #sdate: "2024" Calculate all Thursdays in the year - # TODO: consider other initialisation days in the week + # If 'sdate' is a year, calculate all days in that year that are the same as + # the day specify in 'week_day' (by default, Thursday) + DAYS_OF_WEEK <- list(monday = 1, tuesday = 2, wednesday = 3, thursday = 4, + friday = 5, saturday = 6, sunday = 0) if (nchar(recipe$Analysis$Time$sdate[sdate]) == 4) { - days <- as.POSIXlt(paste(recipe$Analysis$Time$sdate[sdate], + sdate_weekday <- DAYS_OF_WEEK[[tolower(recipe$Analysis$Time$week_day)]] + days <- as.POSIXlt(paste(recipe$Analysis$Time$sdate[sdate], 1:366, sep="-"), format="%Y-%j") - day_of_the_week_ini <- days[days$wday == 4] # Thursdays + day_of_the_week_ini <- days[days$wday == sdate_weekday] } else if (nchar(recipe$Analysis$Time$sdate[sdate]) == 8) { day_of_the_week_ini <- recipe$Analysis$Time$sdate[sdate] } else { -- GitLab From 94375f0e55fac1d137d0ed77c41a90368b5c42da Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 11 Jul 2024 16:51:52 +0200 Subject: [PATCH 16/20] Fix tolower() call with rapply() --- tools/check_recipe.R | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index ffba7a52..c11707ce 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -7,15 +7,9 @@ check_recipe <- function(recipe) { # except for system and reference names system <- recipe$Analysis$Datasets$System reference <- recipe$Analysis$Datasets$Reference - recipe$Analysis <- rapply(recipe$Analysis, - function (x) { - if (is.character(x)) { - tolower(x) - } else { - x - } - }, - how = "list") + recipe$Analysis <- rapply(recipe$Analysis, tolower, + how = "replace", + classes = "character") recipe$Analysis$Datasets$System <- system recipe$Analysis$Datasets$Reference <- reference # --------------------------------------------------------------------- -- GitLab From 2864443a7cf175fe16195e25591fa3791bb3fcb1 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 17 Jul 2024 12:33:20 +0200 Subject: [PATCH 17/20] fix sday window size --- modules/Loading/R/dates2load.R | 2 +- modules/Loading/R/subseas_file_dates.R | 44 +++++++++++++++++--- recipe_subseasonal.yml | 4 +- tests/testthat/test-subseasonal_weekly.R | 51 ++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 9 deletions(-) diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index 2c128495..7cf80b8f 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -48,7 +48,7 @@ dates2load <- function(recipe, logger) { format = "%Y%m%d") + 7 *((fcst_sweek_ind - 1)/2), "%Y%m%d")) } } else { - fcst.sdate_to_start <- startdate + fcst.sdate_to_start <- recipe$sdate } file_dates <- subseas_file_dates(startdate = fcst.sdate_to_start, n.skill.weeks = n.skill.weeks, diff --git a/modules/Loading/R/subseas_file_dates.R b/modules/Loading/R/subseas_file_dates.R index 519b32e0..eca66c55 100644 --- a/modules/Loading/R/subseas_file_dates.R +++ b/modules/Loading/R/subseas_file_dates.R @@ -8,6 +8,38 @@ subseas_file_dates <- function(startdate, n.skill.weeks, n.days, hcst.start, hcst.end, ftime_min, ftime_max, out) { + # Generate the sday_window vectors: + ## Only for Thursdays and Mondays + ## Create a diagonal matrix of 3 and 4 days matching Mondays/Thursdays + ## To be substracted to startdate in the loop below + prev_sday <- t(sapply(1:((n.days + 1)/2 - 1), function(x) { + if (x %% 2 == 0) { + res <- rep(4, (n.days + 1)/2 - 1) + } else { + res <- rep(3, (n.days + 1)/2 - 1) + } + return(res)})) + prev_sday[lower.tri(prev_sday)] <- 0 + + next_sday <- t(sapply(1:((n.days + 1)/2 - 1), function(x) { + if (x %% 2 == 0) { + res <- rep(3, (n.days + 1)/2 - 1) + } else { + res <- rep(4, (n.days + 1)/2 - 1) + } + return(res)})) + next_sday[lower.tri(next_sday)] <- 0 + + if (n.days > 1) { + prev_sday <- colSums(prev_sday) + next_sday <- colSums(next_sday) + thrusday_win <- c(rev(prev_sday) * -1, 0, next_sday) + monday_win <- c(rev(next_sday) * -1, 0, prev_sday) + } else { + thrusday_win <- monday_win <- 0 + } + #### END Generation sdya_window vectors + ftime_min <- as.numeric(substr(as.character(startdate), 1, 4)) - hcst.end ftime_max <- as.numeric(substr(as.character(startdate), 1, 4)) - hcst.start startdate <- as.Date(toString(startdate), "%Y%m%d") @@ -31,11 +63,12 @@ subseas_file_dates <- function(startdate, n.skill.weeks, n.days, # makes sure of giving a: [M T M || T M T] combination if(format(startdate, "%a") == "Thu") { - hcst.sdays <- c(startdate - 3, startdate, startdate + 4, recursive = T) - } else { - hcst.sdays <- c(startdate - 4, startdate, startdate + 3, recursive = T) - } - + hcst.sdays <- startdate + thrusday_win +# hcst.sdays <- c(startdate - 3, startdate, startdate + 4, recursive = T) + } else { + hcst.sdays <- startdate + monday_win +# hcst.sdays <- c(startdate - 4, startdate, startdate + 3, recursive = T) + } hcst.sdays <- format(as.Date(hcst.sdays,"%Y%m%d"), "%Y%m%d") sdays.file_dates <- array(numeric(),c(0, (ftime_max - ftime_min) + 1)) @@ -52,7 +85,6 @@ subseas_file_dates <- function(startdate, n.skill.weeks, n.days, } names(dim(sdays.file_dates)) <- c('sday','syear') - file_dates <- abind(file_dates, sdays.file_dates, along = 1) file_dates.fcst <- abind(file_dates.fcst, sdays.file_dates.fcst, along = 1) diff --git a/recipe_subseasonal.yml b/recipe_subseasonal.yml index 61d08d28..d817c69a 100644 --- a/recipe_subseasonal.yml +++ b/recipe_subseasonal.yml @@ -33,7 +33,7 @@ Analysis: # To request more References to be divided into atomic recipes, add them this way: # - {name: 'ERA5Land'} Time: - sdate: 2024 #20240101 #%Y%m%d + sdate: 20240711 #%Y%m%d #- '1201' # Start date, 'mmdd' (Mandatory, int) # To request more startdates to be divided into atomic recipes, add them this way: # - '0101' @@ -45,7 +45,7 @@ Analysis: ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) week_day: Thursday - sweek_window: 9 + sweek_window: 3 sday_window: 3 Region: # latmin: minimum latitude (Mandatory, int) diff --git a/tests/testthat/test-subseasonal_weekly.R b/tests/testthat/test-subseasonal_weekly.R index 815ba226..da99e4ee 100644 --- a/tests/testthat/test-subseasonal_weekly.R +++ b/tests/testthat/test-subseasonal_weekly.R @@ -107,6 +107,57 @@ tolerance = 0.0001 ) }) +recipe$Analysis$Time$sweek_window <- 1 +recipe$Analysis$Time$sday_window <- 1 +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +test_that("2. Loading", { +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 5, time = 4, latitude = 10, longitude = 21, ensemble = 12) +) +expect_equal( +dim(data$hcst$attrs$Dates), +c(sday = 1, sweek = 1, syear = 5, time = 4) +) +expect_equal( +format(data$hcst$attrs$Dates[1, 1, , 1], "%m%d"), +rep("0111", 5) +) +expect_equal( +format(data$hcst$attrs$Dates[1, 1, , 1], "%Y"), +as.character(1999:2003), +) +expect_equal( +format(data$hcst$attrs$Dates[1, 1, 3, ], "%Y%m%d"), +c("20010111", "20010118", "20010125", "20010201") +) +# Obs metadata +expect_equal( +dim(data$obs$attrs$Dates), +c(sday = 1, sweek = 1, syear = 5, time = 4) +) +expect_equal( +format(data$obs$attrs$Dates[1, 1, , 1], "%m%d"), +rep("0114", 5) +) +expect_equal( +dim(data$obs$attrs$Dates), +dim(data$obs$attrs$load_parameters$dat1$file_date[[1]]) +) +expect_equal( +data$obs$attrs$load_parameters$dat1$file_date[[1]][1, 1, , 1], +c("19990108", "20000108", "20010108", "20020108", "20030108") +) +expect_equal( +data$hcst$data[1, 1, 1, 1, , 1, 1, 1, 1], +c(298.9691, 299.0720, 299.0578, 299.3024, 299.5754), +tolerance = 0.0001 +) +}) # Delete files unlink(recipe$Run$output_dir, recursive = T) -- GitLab From 46173c4f8cb49573b16102264131157afb5660d3 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 23 Jul 2024 16:51:42 +0200 Subject: [PATCH 18/20] Do not change region name to lowercase --- tools/check_recipe.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index c11707ce..020cda9b 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -7,11 +7,13 @@ check_recipe <- function(recipe) { # except for system and reference names system <- recipe$Analysis$Datasets$System reference <- recipe$Analysis$Datasets$Reference + region <- recipe$Analysis$Region recipe$Analysis <- rapply(recipe$Analysis, tolower, how = "replace", classes = "character") recipe$Analysis$Datasets$System <- system recipe$Analysis$Datasets$Reference <- reference + recipe$Analysis$Region <- region # --------------------------------------------------------------------- # ANALYSIS CHECKS # --------------------------------------------------------------------- -- GitLab From d9c6b61a337a34f1a039b76dbaaf536905331b75 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 25 Jul 2024 15:41:27 +0200 Subject: [PATCH 19/20] Make all elements in the recipe lowercase in prepare_outputs() --- modules/Multimodel/dims_multimodel.R | 4 ++-- modules/Multimodel/load_multimodel.R | 6 +++--- tools/check_recipe.R | 11 ----------- tools/prepare_outputs.R | 12 ++++++++++++ 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/modules/Multimodel/dims_multimodel.R b/modules/Multimodel/dims_multimodel.R index f9076153..b132f1b4 100644 --- a/modules/Multimodel/dims_multimodel.R +++ b/modules/Multimodel/dims_multimodel.R @@ -16,6 +16,7 @@ dims_multimodel <- function(recipe) { variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] exp_descrip <- archive$System[[exp.name[1]]] reference_descrip <- archive$Reference[[ref.name]] + createFrom <- str_to_title(recipe$Analysis$Datasets$Multimodel$createFrom) sdates <- dates2load(recipe, recipe$Run$logger) lats.min <- recipe$Analysis$Region$latmin @@ -31,8 +32,7 @@ dims_multimodel <- function(recipe) { } # Find the saved data directory - recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", - recipe$Analysis$Datasets$Multimodel$createFrom) + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", createFrom) if (tolower(recipe$Analysis$Output_format) == "scorecards") { hcst_start <- recipe$Analysis$Time$hcst_start hcst_end <- recipe$Analysis$Time$hcst_end diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R index c6bd4585..529d2e5d 100644 --- a/modules/Multimodel/load_multimodel.R +++ b/modules/Multimodel/load_multimodel.R @@ -15,8 +15,9 @@ load_multimodel <- function(recipe) { store.freq <- recipe$Analysis$Variables$freq variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] exp_descrip <- archive$System[[exp.name[1]]] - reference_descrip <- archive$Reference[[ref.name]] + reference_descrip <- archive$Reference[[ref.name] sdates <- dates2load(recipe, recipe$Run$logger) + createFrom <- str_to_title(recipe$Analysis$Datasets$Multimodel$createFrom) lats.min <- recipe$Analysis$Region$latmin lats.max <- recipe$Analysis$Region$latmax @@ -31,8 +32,7 @@ load_multimodel <- function(recipe) { } # Find the saved data directory - recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", - recipe$Analysis$Datasets$Multimodel$createFrom) + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", createFrom) if (tolower(recipe$Analysis$Output_format) == "scorecards") { hcst_start <- recipe$Analysis$Time$hcst_start hcst_end <- recipe$Analysis$Time$hcst_end diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 020cda9b..01134476 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -3,17 +3,6 @@ check_recipe <- function(recipe) { ## TODO: set up logger-less case info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) - # Pre-processing: convert all strings in Analysis section to lowercase, - # except for system and reference names - system <- recipe$Analysis$Datasets$System - reference <- recipe$Analysis$Datasets$Reference - region <- recipe$Analysis$Region - recipe$Analysis <- rapply(recipe$Analysis, tolower, - how = "replace", - classes = "character") - recipe$Analysis$Datasets$System <- system - recipe$Analysis$Datasets$Reference <- reference - recipe$Analysis$Region <- region # --------------------------------------------------------------------- # ANALYSIS CHECKS # --------------------------------------------------------------------- diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 16a34d32..eab9c4fe 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -108,6 +108,18 @@ prepare_outputs <- function(recipe_file, if (restructure) { recipe <- restructure_recipe(recipe) } + # Pre-processing: convert all strings in Analysis section to lowercase, + # except for system, reference and region names + system <- recipe$Analysis$Datasets$System + reference <- recipe$Analysis$Datasets$Reference + region <- recipe$Analysis$Region + recipe$Analysis <- rapply(recipe$Analysis, tolower, + how = "replace", + classes = "character") + recipe$Analysis$Datasets$System <- system + recipe$Analysis$Datasets$Reference <- reference + recipe$Analysis$Region <- region + # Run recipe checker if (disable_checks) { warn(recipe$Run$logger, -- GitLab From 2b5952e8ca3f28ae97b8bd57323dd2ef70733e26 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 26 Jul 2024 16:18:48 +0200 Subject: [PATCH 20/20] Set 'file_date' as metadata_dims to load metadata if the first file is missing --- modules/Loading/R/load_subseasonal.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/modules/Loading/R/load_subseasonal.R b/modules/Loading/R/load_subseasonal.R index 0b616330..793b46ec 100644 --- a/modules/Loading/R/load_subseasonal.R +++ b/modules/Loading/R/load_subseasonal.R @@ -89,6 +89,9 @@ load_subseasonal <- function(recipe) { # Load hindcast #------------------------------------------------------------------- + ## NOTE: metadata_dims has to be specified as 'file_date' to be able to get + ## the metadata when the first file is missing. However, when retrieving two + ## variables, it must be 'var'. Start() does not admit both. hcst <- Start(dat = hcst.path, var = variable, var_dir = var_dir_exp, @@ -107,7 +110,7 @@ load_subseasonal <- function(recipe) { longitude = c('lon', 'longitude'), ensemble = c('member', 'ensemble', 'lev')), ensemble = indices(1:hcst.nmember), - metadata_dims = 'var', # change to just 'var'? + metadata_dims = c('file_date', 'var'), # change? return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), @@ -172,7 +175,7 @@ load_subseasonal <- function(recipe) { longitude = c('lon', 'longitude'), ensemble = c('member', 'ensemble', 'lev')), ensemble = indices(1:fcst.nmember), - metadata_dims = 'var', + metadata_dims = c('var', 'file_date'), return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), @@ -243,7 +246,7 @@ load_subseasonal <- function(recipe) { transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat','latitude'), longitude = c('lon','longitude')), - metadata_dims = 'var', + metadata_dims = c('file_date', 'var'), return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), @@ -281,7 +284,7 @@ load_subseasonal <- function(recipe) { transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat','latitude'), longitude = c('lon','longitude')), - metadata_dims = 'var', + metadata_dims = c('file_date', 'var'), return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), -- GitLab