diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 997870c821fec43485f1923b6802f853991c5633..ccaf0809ed7097283f5eaa8c02955282f902a5e7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -26,3 +26,17 @@ unit-test-decadal: # This job runs in the test stage. - module list - echo "Running decadal unit tests..." - Rscript ./tests/test_decadal.R + +unit-test-subseasonal: # This job runs in the test stage. + stage: test + script: + - echo "Loading modules..." + - module load R/4.1.2-foss-2015a-bare + - module load CDO/1.9.8-foss-2015a + - module load GEOS/3.7.2-foss-2015a-Python-3.7.3 + - module load GDAL/2.2.1-foss-2015a + - module load PROJ/4.8.0-foss-2015a + - module list + - echo "Running subseasonal unit tests..." + - Rscript ./tests/test_subseasonal.R + diff --git a/conf/archive_subseasonal.yml b/conf/archive_subseasonal.yml index ed7027da15e020d6585da4f08cc2aeb4f5ac7309..c142851dcdef7ae21de59168a88f804e4534fb77 100644 --- a/conf/archive_subseasonal.yml +++ b/conf/archive_subseasonal.yml @@ -1,5 +1,3 @@ -# TO BE REVIEWED - esarchive: src: "/esarchive/" System: @@ -7,19 +5,26 @@ esarchive: name: "NCEP CFSv2" institution: "NOAA NCEP" #? src: "exp/ncep/cfs-v2/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", - "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} + weekly_mean: {"tas":"weekly_mean/s2s/tas_f24h/", + "prlr":"weekly_mean/s2s/prlr_f24h/", + "tasmax":"weekly_mean/s2s/tasmax_f24h/", + "tasmin":"weekly_mean/s2s/tasmin_f24h/", + "sfcWind":"weekly_mean/s2s/sfcWind_f24h/", + "rsds":"weekly_mean/s2s/rsds_f24h/"} + daily_mean: {"tas":"daily_mean/s2s/tas_f6h"} nmember: fcst: 48 hcst: 12 - calendar: "gregorian" - time_stamp_lag: "0" - reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" + calendar: "proleptic_gregorian" + time_stamp_lag: "0" # Do we need it for subseasonal? + reference_grid: "/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20040624.nc" # is it the same as seasonal? Reference: ERA5: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" + weekly_mean: {"tas":"weekly_mean/tas_f1h-r1440x721cds/", + "prlr":"weekly_mean/prlr_f1h-r1440x721cds/"} daily_mean: {"tas":"daily_mean/tas_f1h-r1440x721cds/", "rsds":"daily_mean/rsds_f1h-r1440x721cds/", "prlr":"daily_mean/prlr_f1h-r1440x721cds/", diff --git a/example_scripts/subseasonal_load.R b/example_scripts/subseasonal_load.R new file mode 100644 index 0000000000000000000000000000000000000000..7418850ad574d2537f1cf90d3da8c4540a1317a9 --- /dev/null +++ b/example_scripts/subseasonal_load.R @@ -0,0 +1,19 @@ + +source("modules/Loading/Loading.R") + +recipe_file <- "recipe_subseasonal.yml" + +# To test atomic recipes: +#recipe <- prepare_outputs(recipe_file) + +# To check the split +#source("tools/divide_recipe.R") +#divide_recipe(recipe) + +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) + +data <- Loading(recipe) + + diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 6e219bf13b09a6d2888b8edb4095b1f78b875ee0..8e322ab13280f20161a26642af5f620e1eb3c87d 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -23,6 +23,9 @@ Loading <- function(recipe) { } else if (time_horizon == "decadal") { source("modules/Loading/R/load_decadal.R") data <- load_decadal(recipe) + } else if (time_horizon == "subseasonal") { + source("modules/Loading/R/load_subseasonal.R") + data <- load_subseasonal(recipe) } else { stop("Incorrect time horizon.") } diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index f084ce62fb1e4798e5dc3948fe0edd3b2c3bfdd6..7cf80b8f080a097eca647c6d349cdfb15cedf50d 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -14,28 +14,72 @@ #'@export library(lubridate) +source("modules/Loading/R/subseas_file_dates.R") dates2load <- function(recipe, logger) { - +# from recipe it uses: +# temporal frequency of the variable requested +# the details in $Time request: hcst period, forecast times ... temp_freq <- recipe$Analysis$Variables$freq recipe <- recipe$Analysis$Time - # hcst dates - file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), - recipe$sdate) - if (temp_freq == "monthly_mean") { + # hcst dates + file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), + recipe$sdate) file_dates <- .add_dims(file_dates) + } else if (temp_freq == "weekly_mean") { + sday <- recipe$sday_window + if (is.null(sday)) { + sday <- 3 + } + n.skill.weeks <- recipe$sweek_window + if (n.skill.weeks %% 2 != 0) { + fcst_sweek_ind <- (n.skill.weeks + 1)/2 + } else { + fcst_sweek_ind <- (n.skill.weeks)/2 + } + fcst.sdate <- recipe$sdate + if (fcst_sweek_ind != 1) { + if (fcst_sweek_ind %% 2 == 0) { + fcst.sdate_to_start <- as.character(format(as.Date(as.character(fcst.sdate), + format = "%Y%m%d") + 4 + 7 * ((fcst_sweek_ind/2) - 1), "%Y%m%d")) + } else { + fcst.sdate_to_start <- as.character(format(as.Date(as.character(fcst.sdate), + format = "%Y%m%d") + 7 *((fcst_sweek_ind - 1)/2), "%Y%m%d")) + } + } else { + fcst.sdate_to_start <- recipe$sdate + } + file_dates <- subseas_file_dates(startdate = fcst.sdate_to_start, + n.skill.weeks = n.skill.weeks, + n.days = sday, + hcst.start = as.numeric(recipe$hcst_start), + hcst.end = as.numeric(recipe$hcst_end), + ftime_min = recipe$ftime_min, + ftime_max = recipe$ftime_max, out = 'hcst') + } else { + file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), + recipe$sdate) } - # fcst dates (if fcst_year empty it creates an empty object) + # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$fcst_year)) { - file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) if (temp_freq == "monthly_mean") { + file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) file_dates.fcst <- .add_dims(file_dates.fcst) + } else if (temp_freq == "weekly_mean") { + file_dates.fcst <- subseas_file_dates(startdate = fcst.sdate_to_start, + n.skill.weeks = n.skill.weeks, + n.days = sday, + hcst.start = as.numeric(recipe$hcst_start), + hcst.end = as.numeric(recipe$hcst_end), + ftime_min = recipe$ftime_min, + ftime_max = recipe$ftime_max, out = 'fcst') + } else { + file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) } } else { + # if no fcst year is requested: file_dates.fcst <- NULL - info(logger, - paste("fcst_year empty in the recipe, creating empty fcst object...")) } return(list(hcst = file_dates, fcst = file_dates.fcst)) ## TODO: document header of fun diff --git a/modules/Loading/R/load_subseasonal.R b/modules/Loading/R/load_subseasonal.R new file mode 100644 index 0000000000000000000000000000000000000000..793b46ecd98f5c3d32762687c263f85c96c7aecd --- /dev/null +++ b/modules/Loading/R/load_subseasonal.R @@ -0,0 +1,386 @@ +# Load required libraries/funs +source("modules/Loading/R/get_regrid_params.R") +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/check_latlon.R") +source("modules/Loading/R/compare_exp_obs_grids.R") + +load_subseasonal <- function(recipe) { + + # ------------------------------------------- + # Set params ----------------------------------------- + + hcst.inityear <- recipe$Analysis$Time$hcst_start + hcst.endyear <- recipe$Analysis$Time$hcst_end + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + store.freq <- recipe$Analysis$Variables$freq + + # get sdates array + ## LOGGER: Change dates2load to extract logger from recipe? + sdates <- dates2load(recipe, recipe$Run$logger) + + ## TODO: Examine this verifications part, verify if it's necessary + # stream <- verifications$stream + # sdates <- verifications$fcst.sdate + + ## TODO: define fcst.name + ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name + + # get datasets dict: + archive <- read_yaml("conf/archive_subseasonal.yml")[[recipe$Run$filesystem]] + exp_descrip <- archive$System[[exp.name]] + + freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]]) + reference_descrip <- archive$Reference[[ref.name]] + freq.obs <- unlist(reference_descrip[[store.freq]][variable[1]]) + obs.dir <- reference_descrip$src + fcst.dir <- exp_descrip$src + hcst.dir <- exp_descrip$src + fcst.nmember <- exp_descrip$nmember$fcst + hcst.nmember <- exp_descrip$nmember$hcst + + sweek_window <- recipe$Analysis$Time$sweek_window + sday_window <- recipe$Analysis$Time$sday_window + + ## TODO: it is necessary? + ##if ("accum" %in% names(reference_descrip)) { + ## accum <- unlist(reference_descrip$accum[store.freq][[1]]) + ##} else { + ## accum <- FALSE + ##} + + if (store.freq %in% c("daily", "daily_mean")) { + frequency <- "daily_mean" + } else if (store.freq == "weekly_mean") { + frequency <- "weekly_mean" + } + var_dir_obs <- reference_descrip[[frequency]][variable] + var_dir_exp <- exp_descrip[[frequency]][variable] + + # ----------- + obs.path <- paste0(archive$src, obs.dir, "$var_dir$", + "$var$_$file_date$.nc") + + hcst.path <- paste0(archive$src, hcst.dir, "$var_dir$", + "$var$_$file_date$.nc") + + fcst.path <- paste0(archive$src, hcst.dir, "$var_dir$", + "$var$_$file_date$.nc") + + # Define regrid parameters: + #------------------------------------------------------------------- + regrid_params <- get_regrid_params(recipe, archive) + + # Longitude circular sort and latitude check + #------------------------------------------------------------------- + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "weekly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Load hindcast + #------------------------------------------------------------------- + ## NOTE: metadata_dims has to be specified as 'file_date' to be able to get + ## the metadata when the first file is missing. However, when retrieving two + ## variables, it must be 'var'. Start() does not admit both. + hcst <- Start(dat = hcst.path, + var = variable, + var_dir = var_dir_exp, + file_date = sdates$hcst, + time = recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max, + var_dir_depends = 'var', + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble', 'lev')), + ensemble = indices(1:hcst.nmember), + metadata_dims = c('file_date', 'var'), # change? + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(hcst))) { + hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + + # Convert hcst to s2dv_cube object + ## TODO: Give correct dimensions to $Dates + ## (sday, sweek, syear instead of file_date) + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + # the call uses file_date instead of fcst_syear so that it can work + # with the daily case and the current version of startR not allowing + # multiple dims split + + fcst <- Start(dat = fcst.path, + var = variable, + var_dir = var_dir_exp, + var_dir_depends = 'var', + file_date = sdates$fcst, + time = recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max, + + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble', 'lev')), + ensemble = indices(1:fcst.nmember), + metadata_dims = c('var', 'file_date'), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + if ("var_dir" %in% names(dim(fcst))) { + fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + + # Convert fcst to s2dv_cube + fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$attrs$Dates[] <- + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- as.POSIXlt(hcst$attrs$Dates) + # Hcst files metadata are 3 days ahead of Obs files names: + ## TODO: Is this true in general, or only for a specific case (NCEPv2 + ERA5)? + dates$mday <- dates$mday - 3 + dates <- as.POSIXct(dates) + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + # Separate Start() call for monthly vs daily data + if (store.freq == "weekly_mean") { + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m%d") + dim(dates_file) <- dim(dates) + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = c('file_date', 'var'), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq %in% c("daily_mean", "daily")) { + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m%d') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = c('file_date', 'var'), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + } + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(obs))) { + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + } + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!(recipe$Analysis$Regrid$type == 'none')) { + compare_exp_obs_grids(hcst, obs) + } + + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + for (var_idx in 1:length(variable)) { + var_name <- variable[var_idx] + if (dictionary$vars[[var_name]]$accum) { + info(recipe$Run$logger, + paste0("Accumulated variable ", var_name, + ": setting negative values to zero.")) + # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, + # along = "var", + # indices = var_idx, F), 0) + obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 + hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + } + } + } + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + ############################################################################ + # + # CHECKS ON MISSING FILES + # + ############################################################################ + + #obs.NA_dates.ind <- Apply(obs, + # fun=(function(x){ all(is.na(x))}), + # target_dims=c('time', 'latitude', 'longitude'))[[1]] + #obs.NA_dates <- dates_file[obs.NA_dates.ind] + #obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] + #obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", + # freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") + # + #if (any(is.na(hcst))){ + # fatal(recipe$Run$logger, + # paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "hcst files:", + # hcst.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status = 1) + #} + # + #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ + # fatal(recipe$logger, + # paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "obs files:", + # obs.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status=1) + #} + # + #info(recipe$logger, + # "######### DATA LOADING COMPLETED SUCCESFULLY ##############") + + ############################################################################ + ############################################################################ + .log_memory_usage(recipe$Run$logger, when = "After loading") + return(list(hcst = hcst, fcst = fcst, obs = obs)) + +} + diff --git a/modules/Loading/R/subseas_file_dates.R b/modules/Loading/R/subseas_file_dates.R new file mode 100644 index 0000000000000000000000000000000000000000..eca66c5572807abef6cc5496f5154accca380055 --- /dev/null +++ b/modules/Loading/R/subseas_file_dates.R @@ -0,0 +1,108 @@ +# Given an startdate e.g. 20240104 +# The number of weeks that will be used for metrics calculation +# The number of days that will be used for calibration +# the hindcast period and the forecast times +# It returns an array with dimension sday, sweek, syear +# for the hcst is out = 'hcst' or for the forecast if out = 'fcst' + +subseas_file_dates <- function(startdate, n.skill.weeks, n.days, + hcst.start, hcst.end, + ftime_min, ftime_max, out) { + # Generate the sday_window vectors: + ## Only for Thursdays and Mondays + ## Create a diagonal matrix of 3 and 4 days matching Mondays/Thursdays + ## To be substracted to startdate in the loop below + prev_sday <- t(sapply(1:((n.days + 1)/2 - 1), function(x) { + if (x %% 2 == 0) { + res <- rep(4, (n.days + 1)/2 - 1) + } else { + res <- rep(3, (n.days + 1)/2 - 1) + } + return(res)})) + prev_sday[lower.tri(prev_sday)] <- 0 + + next_sday <- t(sapply(1:((n.days + 1)/2 - 1), function(x) { + if (x %% 2 == 0) { + res <- rep(3, (n.days + 1)/2 - 1) + } else { + res <- rep(4, (n.days + 1)/2 - 1) + } + return(res)})) + next_sday[lower.tri(next_sday)] <- 0 + + if (n.days > 1) { + prev_sday <- colSums(prev_sday) + next_sday <- colSums(next_sday) + thrusday_win <- c(rev(prev_sday) * -1, 0, next_sday) + monday_win <- c(rev(next_sday) * -1, 0, prev_sday) + } else { + thrusday_win <- monday_win <- 0 + } + #### END Generation sdya_window vectors + + ftime_min <- as.numeric(substr(as.character(startdate), 1, 4)) - hcst.end + ftime_max <- as.numeric(substr(as.character(startdate), 1, 4)) - hcst.start + startdate <- as.Date(toString(startdate), "%Y%m%d") + sdates <- numeric(0) + while (length(sdates) < n.skill.weeks){ + if (format(startdate, "%a") == "Thu" || format(startdate, "%a") == "Mon") { + sdates <- c(sdates, format(startdate, "%Y%m%d")) + } + startdate <- startdate - 1 + } + + file_dates <- array(numeric(), c(0, n.days, (ftime_max -ftime_min) + 1)) + file_dates.fcst <- array(numeric(), c(0, n.days, (ftime_max - ftime_min) + 1)) + for (sdate in sdates) { + ## TODO: take into account year of the fcst to select hcst years + fcst.day <- substr(sdate, 7, 8) + fcst.month <- substr(sdate, 5, 6) + fcst.year <- substr(sdate, 1, 4) + + startdate <- as.Date(toString(sdate), "%Y%m%d") + + # makes sure of giving a: [M T M || T M T] combination + if(format(startdate, "%a") == "Thu") { + hcst.sdays <- startdate + thrusday_win +# hcst.sdays <- c(startdate - 3, startdate, startdate + 4, recursive = T) + } else { + hcst.sdays <- startdate + monday_win +# hcst.sdays <- c(startdate - 4, startdate, startdate + 3, recursive = T) + } + hcst.sdays <- format(as.Date(hcst.sdays,"%Y%m%d"), "%Y%m%d") + + sdays.file_dates <- array(numeric(),c(0, (ftime_max - ftime_min) + 1)) + sdays.file_dates.fcst <- array(hcst.sdays, c(n.days, + (ftime_max - ftime_min) + 1)) + for (sday.sdate in hcst.sdays) { + sday.year <- substr(sday.sdate, 1, 4) + sday.mday <- substr(sday.sdate, 5, 8) + sday.years <- paste((strtoi(sday.year)- ftime_max): + (strtoi(sday.year) - ftime_min), sep = "") + sday.dates <- apply(expand.grid(sday.years, sday.mday), + 1, paste, collapse = "") + sdays.file_dates <- abind(sdays.file_dates, sday.dates, along = 1) + } + + names(dim(sdays.file_dates)) <- c('sday','syear') + file_dates <- abind(file_dates, sdays.file_dates, along = 1) + file_dates.fcst <- abind(file_dates.fcst, sdays.file_dates.fcst, along = 1) + + } + names(dim(file_dates)) <- c('sweek','sday','syear') + names(dim(file_dates.fcst)) <- c('sweek','sday','syear') + +# dir_dates <- paste0(file_dates.fcst,"/",var,"_",file_dates) +# dim(dir_dates) <- dim(file_dates) +# names(dim(dir_dates)) <- c('sweek','sday','syear') +# dir_dates <- substr(dir_dates,10,24) +# return(dir_dates) + if (out == 'fcst') { + central_day <- (n.days + 1)/2 + central_week <- (n.skill.weeks + 1)/2 + file_dates <- file_dates.fcst[central_week,central_day, 1] + dim(file_dates) <- c(sday = 1, sweek = 1, syear = 1) + } + file_dates <- Reorder(file_dates, c('sday', 'sweek', 'syear')) + return(file_dates) +} diff --git a/modules/Multimodel/dims_multimodel.R b/modules/Multimodel/dims_multimodel.R index f9076153081bf34d421161e2178e0c93480accd7..b132f1b4bb444ead67c23244ddf81fbaf40b0d47 100644 --- a/modules/Multimodel/dims_multimodel.R +++ b/modules/Multimodel/dims_multimodel.R @@ -16,6 +16,7 @@ dims_multimodel <- function(recipe) { variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] exp_descrip <- archive$System[[exp.name[1]]] reference_descrip <- archive$Reference[[ref.name]] + createFrom <- str_to_title(recipe$Analysis$Datasets$Multimodel$createFrom) sdates <- dates2load(recipe, recipe$Run$logger) lats.min <- recipe$Analysis$Region$latmin @@ -31,8 +32,7 @@ dims_multimodel <- function(recipe) { } # Find the saved data directory - recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", - recipe$Analysis$Datasets$Multimodel$createFrom) + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", createFrom) if (tolower(recipe$Analysis$Output_format) == "scorecards") { hcst_start <- recipe$Analysis$Time$hcst_start hcst_end <- recipe$Analysis$Time$hcst_end diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R index c6bd4585a57367b97eee66fd4ff5d2a46237380b..529d2e5d4c725132c4836fcfeb090478e8b6589c 100644 --- a/modules/Multimodel/load_multimodel.R +++ b/modules/Multimodel/load_multimodel.R @@ -15,8 +15,9 @@ load_multimodel <- function(recipe) { store.freq <- recipe$Analysis$Variables$freq variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] exp_descrip <- archive$System[[exp.name[1]]] - reference_descrip <- archive$Reference[[ref.name]] + reference_descrip <- archive$Reference[[ref.name] sdates <- dates2load(recipe, recipe$Run$logger) + createFrom <- str_to_title(recipe$Analysis$Datasets$Multimodel$createFrom) lats.min <- recipe$Analysis$Region$latmin lats.max <- recipe$Analysis$Region$latmax @@ -31,8 +32,7 @@ load_multimodel <- function(recipe) { } # Find the saved data directory - recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", - recipe$Analysis$Datasets$Multimodel$createFrom) + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", createFrom) if (tolower(recipe$Analysis$Output_format) == "scorecards") { hcst_start <- recipe$Analysis$Time$hcst_start hcst_end <- recipe$Analysis$Time$hcst_end diff --git a/recipe_subseasonal.yml b/recipe_subseasonal.yml new file mode 100644 index 0000000000000000000000000000000000000000..d817c69a4317247aa6d2df401ee3dc72990b180a --- /dev/null +++ b/recipe_subseasonal.yml @@ -0,0 +1,177 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. +# - {name: 'tas', freq: 'weekly_mean', units: 'C'} + name: 'tas' + freq: 'weekly_mean' + units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20240711 #%Y%m%d + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2024' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2006' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 3 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + #- {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: bilinear # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: mse_min # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'fcst_only' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20000202.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: yes # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + #user_def: + # DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + # DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: yes # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'cylindrical_equidistant' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 10 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'S2S4E' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git3/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git3/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: victoria.agudetse@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_template.yml b/recipe_template.yml index 7f9432c7299b907a054c230dc8d95f92b272e801..4dd5bb208d912fbaaaae209fa8034e09ea0d4228 100644 --- a/recipe_template.yml +++ b/recipe_template.yml @@ -30,17 +30,27 @@ Analysis: # To request more References to be divided into atomic recipes, add them this way: # - {name: 'ERA5Land'} Time: - sdate: + sdate: - '1201' # Start date, 'mmdd' (Mandatory, int) # To request more startdates to be divided into atomic recipes, add them this way: # - '0101' # - '0201' # ... + # For subseasonal case there are two options: + #sdate: 20240104 # A specific date of the year + #sdate: 2024 # A year to evaluate all 52 weeks initialized on Thursday fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) + # For subseasonal, a specific date should be requested and it should be + # the same as the one defined as sdate (to be coherent between + # forecast provision and assessment. hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + # For subseasonal, there are three extra paramenters: + #week_day: Thursday # only works for Thursdaays right now + #sday_window: 3 # The number of days use for calibration + #sweek_window: 9 # The numeber of weeks to use for assessment Region: # latmin: minimum latitude (Mandatory, int) # latmax: maximum latitude (Mandatory, int) diff --git a/tests/recipes/recipe-subseasonal_weekly.yml b/tests/recipes/recipe-subseasonal_weekly.yml new file mode 100644 index 0000000000000000000000000000000000000000..56e08b95b94dece5cc94bdf3fc8770abf12f1d25 --- /dev/null +++ b/tests/recipes/recipe-subseasonal_weekly.yml @@ -0,0 +1,177 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. +# - {name: 'tas', freq: 'weekly_mean', units: 'C'} + name: 'tas' + freq: 'weekly_mean' + units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20240104 #%Y%m%d + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2024' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2003' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 5 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + #- {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 10} + Regrid: + method: bilinear # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: mse_min # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'fcst_only' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20000202.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: yes # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + #user_def: + # DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + # DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: yes # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'cylindrical_equidistant' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 10 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'S2S4E' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: ./tests/out-logs/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git3/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: victoria.agudetse@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/tests/test_decadal.R b/tests/test_decadal.R index 75b7fca77734301354e0b3a0d117b9d6e2021cf4..97cd421630d55c5f2dc0d75b352f453d0166ed75 100644 --- a/tests/test_decadal.R +++ b/tests/test_decadal.R @@ -1,7 +1,7 @@ library(testthat) path_testthat <- file.path('./tests/testthat/') -files_testthat <- list.files('./tests/testthat/', pattern = 'decadal') +files_testthat <- list.files('./tests/testthat/', pattern = 'test-decadal') for (i_file in 1:length(files_testthat)) { source(paste0('./tests/testthat/', files_testthat[i_file])) diff --git a/tests/test_seasonal.R b/tests/test_seasonal.R index 4718e3d4d1b224ba390c3a7767519ca42492c4d2..c1f64eb2c5bb7ff0f60918a9bd651130051557ab 100644 --- a/tests/test_seasonal.R +++ b/tests/test_seasonal.R @@ -1,7 +1,7 @@ library(testthat) path_testthat <- file.path('./tests/testthat/') -files_testthat <- list.files('./tests/testthat/', pattern = 'seasonal') +files_testthat <- list.files('./tests/testthat/', pattern = 'test-seasonal') for (i_file in 1:length(files_testthat)) { source(paste0('./tests/testthat/', files_testthat[i_file])) diff --git a/tests/test_subseasonal.R b/tests/test_subseasonal.R new file mode 100644 index 0000000000000000000000000000000000000000..3b3d25e21cd7eddc0be093b93bdd60d00a9eaae6 --- /dev/null +++ b/tests/test_subseasonal.R @@ -0,0 +1,9 @@ +library(testthat) + +path_testthat <- file.path('./tests/testthat/') +files_testthat <- list.files('./tests/testthat/', pattern = 'test-subseasonal') + +for (i_file in 1:length(files_testthat)) { + source(paste0('./tests/testthat/', files_testthat[i_file])) +} + diff --git a/tests/testthat/test-subseasonal_weekly.R b/tests/testthat/test-subseasonal_weekly.R new file mode 100644 index 0000000000000000000000000000000000000000..da99e4ee820dad48e31618146d566922f5018468 --- /dev/null +++ b/tests/testthat/test-subseasonal_weekly.R @@ -0,0 +1,163 @@ +context("Subseasonal weekly data") + +source("modules/Loading/Loading.R") +source("modules/Saving/R/get_dir.R") + +recipe_file <- "tests/recipes/recipe-subseasonal_weekly.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_subseasonal.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) + +# ------- TESTS -------- + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 3, sweek = 5, syear = 5, time = 4, latitude = 10, longitude = 21, ensemble = 12) +) +expect_equal( +dim(data$obs$data), +c(dat = 1, var = 1, sday = 3, sweek = 5, syear = 5, time = 4, latitude = 10, longitude = 21, ensemble = 1) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 4, latitude = 10, longitude = 21, ensemble = 48) +) +expect_equal( +dim(data$hcst$attrs$Dates), +c(sday = 3, sweek = 5, syear = 5, time = 4) +) +# The metadata in files are the sdate + 7 days +expect_equal( +format(data$hcst$attrs$Dates[2, 3, , 1], "%m%d"), +rep("0111", 5) +) +expect_equal( +format(data$hcst$attrs$Dates[, 3, 3, 1], "%m%d"), +c("0108", "0111", "0115"), +) +expect_equal( +format(data$hcst$attrs$Dates[2, , 3, 1], "%m%d"), +c("0118", "0115", "0111", "0108", "0104") +) +expect_equal( +format(data$hcst$attrs$Dates[2, 3, 3, ], "%Y%m%d"), +c("20010111", "20010118", "20010125", "20010201") +) +# Obs metadata +expect_equal( +dim(data$obs$attrs$Dates), +c(sday = 3, sweek = 5, syear = 5, time = 4) +) +expect_equal( +format(data$obs$attrs$Dates[2, 3, , 1], "%m%d"), +rep("0114", 5) +) +expect_equal( +dim(data$obs$attrs$Dates), +dim(data$obs$attrs$load_parameters$dat1$file_date[[1]]) +) +expect_equal( +data$obs$attrs$load_parameters$dat1$file_date[[1]][2, 3, , 1], +c("19990108", "20000108", "20010108", "20020108", "20030108") +) +expect_equal( +data$hcst$data[1, 1, 2, 3, , 1, 1, 1, 1], +c(298.9691, 299.0720, 299.0578, 299.3024, 299.5754), +tolerance = 0.0001 +) +}) + +recipe$Analysis$Time$sweek_window <- 1 +recipe$Analysis$Time$sday_window <- 1 +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +test_that("2. Loading", { +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 5, time = 4, latitude = 10, longitude = 21, ensemble = 12) +) +expect_equal( +dim(data$hcst$attrs$Dates), +c(sday = 1, sweek = 1, syear = 5, time = 4) +) +expect_equal( +format(data$hcst$attrs$Dates[1, 1, , 1], "%m%d"), +rep("0111", 5) +) +expect_equal( +format(data$hcst$attrs$Dates[1, 1, , 1], "%Y"), +as.character(1999:2003), +) +expect_equal( +format(data$hcst$attrs$Dates[1, 1, 3, ], "%Y%m%d"), +c("20010111", "20010118", "20010125", "20010201") +) +# Obs metadata +expect_equal( +dim(data$obs$attrs$Dates), +c(sday = 1, sweek = 1, syear = 5, time = 4) +) +expect_equal( +format(data$obs$attrs$Dates[1, 1, , 1], "%m%d"), +rep("0114", 5) +) +expect_equal( +dim(data$obs$attrs$Dates), +dim(data$obs$attrs$load_parameters$dat1$file_date[[1]]) +) +expect_equal( +data$obs$attrs$load_parameters$dat1$file_date[[1]][1, 1, , 1], +c("19990108", "20000108", "20010108", "20020108", "20030108") +) +expect_equal( +data$hcst$data[1, 1, 1, 1, , 1, 1, 1, 1], +c(298.9691, 299.0720, 299.0578, 299.3024, 299.5754), +tolerance = 0.0001 +) +}) + +# Delete files +unlink(recipe$Run$output_dir, recursive = T) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 245e4e3dc42501c310f57410420e419bcafc022a..011344761ec1521f6bbebf3092ba94131f5fa01b 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -9,12 +9,17 @@ check_recipe <- function(recipe) { TIME_SETTINGS_SEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", "hcst_end") - TIME_SETTINGS_DECADAL <- c("ftime_min", "ftime_max", "hcst_start", "hcst_end") + TIME_SETTINGS_DECADAL <- c("ftime_min", "ftime_max", "hcst_start", "hcst_end") + + TIME_SETTINGS_SUBSEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", + "hcst_end", "week_day", "sweek_window", + "sday_window") PARAMS <- c("Horizon", "Time", "Variables", "Region", "Regrid", "Workflow", "Datasets") HORIZONS <- c("subseasonal", "seasonal", "decadal") ARCHIVE_SEASONAL <- "conf/archive.yml" ARCHIVE_DECADAL <- "conf/archive_decadal.yml" + ARCHIVE_SUBSEASONAL <- "conf/archive_subseasonal.yml" # Define error status variable error_status <- F @@ -33,14 +38,14 @@ check_recipe <- function(recipe) { error_status <- TRUE } - if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { + if (!any(HORIZONS %in% recipe$Analysis$Horizon)) { error(recipe$Run$logger, paste0("The element 'Horizon' in the recipe must be one of the ", "following: ", paste(HORIZONS, collapse = ", "), ".")) error_status <- T } # Check time settings - if (tolower(recipe$Analysis$Horizon) == "seasonal") { + if (recipe$Analysis$Horizon == "seasonal") { ## TODO: Specify filesystem archive <- read_yaml(ARCHIVE_SEASONAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_SEASONAL %in% names(recipe$Analysis$Time))) { @@ -50,7 +55,7 @@ check_recipe <- function(recipe) { collapse = ", "), ".")) error_status <- TRUE } - } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + } else if (recipe$Analysis$Horizon == "decadal") { archive <- read_yaml(ARCHIVE_DECADAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_DECADAL %in% names(recipe$Analysis$Time))) { error(recipe$Run$logger, @@ -59,6 +64,15 @@ check_recipe <- function(recipe) { collapse = ", "), ".")) error_status <- TRUE } + } else if (recipe$Analysis$Horizon == "subseasonal") { + archive <- read_yaml(ARCHIVE_SUBSEASONAL)[[recipe$Run$filesystem]] + if (!all(TIME_SETTINGS_SUBSEASONAL %in% names(recipe$Analysis$Time))) { + error(recipe$Run$logger, + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_SUBSEASONAL, + collapse = ", "), ".")) + error_status <- TRUE + } } else { archive <- NULL } @@ -101,7 +115,7 @@ check_recipe <- function(recipe) { isFALSE(recipe$Analysis$Datasets$Multimodel)) { recipe$Analysis$Datasets$Multimodel <- list(execute = FALSE) } - if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'false') { + if (!isTRUE(recipe$Analysis$Datasets$Multimodel$execute)) { multimodel <- FALSE } else { multimodel <- TRUE @@ -117,14 +131,14 @@ check_recipe <- function(recipe) { "Please specify yes/true, no/false or 'both'.")) error_status <- TRUE } - if (!tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% + if (recipe$Analysis$Datasets$Multimodel$approach %in% MULTIMODEL_METHODS) { error(recipe$Run$logger, paste("The specified approach for the multimodel is not valid.", "Please specify pooled.")) #, mean or median.")) error_status <- TRUE } - if (!tolower(recipe$Analysis$Datasets$Multimodel$createFrom) %in% + if (recipe$Analysis$Datasets$Multimodel$createFrom %in% MULTIMODEL_CREATEFROM) { error(recipe$Run$logger, paste("The specified 'createFrom' for the multimodel is not valid.", @@ -133,7 +147,7 @@ check_recipe <- function(recipe) { } } } else { - recipe$Analysis$Datasets$Multimodel$execute <- FALSE + recipe$Analysis$Datasets$Multimodel$execute <- FALSE } # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || @@ -171,13 +185,37 @@ check_recipe <- function(recipe) { "'hcst_end' cannot be smaller than 'hcst_start'.") error_status <- TRUE } - ## TODO: Is this needed? - if (is.null(recipe$Analysis$Time$fcst_year) || - identical(tolower(recipe$Analysis$Time$fcst_year), 'none')) { - stream <- "hindcast" - # recipe$Analysis$Time$fcst_year <- 'YYYY' - } else { - stream <- "fcst" + # Subseasonal-specific checks + if (recipe$Analysis$Horizon == "subseasonal") { + if (is.null(recipe$Analysis$Time$week_day)) { + recipe$Analysis$Time$week_day <- "thursday" + info(recipe$Run$logger, + "Initialization day set to Thursdays") + } + sww <- recipe$Analysis$Time$sweek_window + if (is.null(sww) || !is.numeric(sww) || (sww %% 2) != 1) { + recipe$Analysis$Time$sweek_window <- 9 + info(recipe$Run$logger, + "Number of initialisation 'weeks' for skill assessment set to 9.") + } + sdw <- recipe$Analysis$Time$sday_window + if (is.null(sdw) || !is.numeric(sdw) || (sdw %% 2) != 1) { + # Calibration + # If 'method' is FALSE/no/'none' or NULL, set to 'raw' + ## TODO: Review this check + if ((is.logical(recipe$Analysis$Workflow$Calibration$method) && + recipe$Analysis$Workflow$Calibration$method == FALSE) || + recipe$Analysis$Workflow$Calibration$method == 'none' || + is.null(recipe$Analysis$Workflow$Calibration$method)) { + recipe$Analysis$Time$sday_window <- 1 + info(recipe$Run$logger, + "Number of initialisation 'days' for calibration set to 1.") + } else { + recipe$Analysis$Time$sday_window <- 3 + info(recipe$Run$logger, + "Number of initialisation 'days' for calibration set to 3.") + } + } } ## TODO: To be implemented in the future @@ -188,27 +226,12 @@ check_recipe <- function(recipe) { # "Element fcst_day in recipe set as 1.") # recipe$Analysis$Time$sdate$fcst_day <- '01' # } - ## TODO: Delete, this parameter was deprecated - # if (is.null(recipe$Analysis$Time$sdate$fcst_sday)) { - # error(recipe$Run$logger, - # paste("The element 'fcst_sday' in the recipe should be defined.")) - # } if (is.null(recipe$Analysis$Time$fcst_year)) { warn(recipe$Run$logger, paste("The element 'fcst_year' is not defined in the recipe.", "No forecast year will be used.")) } - ## TODO: Adapt and move this inside 'if'? - # fcst.sdate <- NULL - # for (syear in recipe$Analysis$Time$fcst_year) { - # for (sday in recipe$Analysis$Time$sdate) { - # fcst.sdate <- c(fcst.sdate, - # paste0(syear, - # sprintf("%04d", as.numeric(sday)))) - # } - # } - # fcst.sdate <- list(stream = stream, fcst.sdate = fcst.sdate) # Regrid checks: if (length(recipe$Analysis$Regrid) != 2) { @@ -313,8 +336,8 @@ check_recipe <- function(recipe) { "'Workflow:Time_aggregation:method' is not defined in the recipe.") error_status <- TRUE } else { - if (!tolower(recipe$Analysis$Workflow$Time_aggregation$method) %in% - TIME_AGG_METHODS) { + if (!(recipe$Analysis$Workflow$Time_aggregation$method %in% + TIME_AGG_METHODS)) { error(recipe$Run$logger, paste0("Time_aggregation method should be one of: ", paste(TIME_AGG_METHODS, collapse = ", "), ".")) @@ -328,8 +351,7 @@ check_recipe <- function(recipe) { "add 'ini' and 'end' or 'user_def'.")) error_status <- TRUE } - if (all(c('ini', 'end') %in% - tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { + if (all(c('ini', 'end') %in% names(recipe$Analysis$Workflow$Time_aggregation))) { if (!is.numeric(recipe$Analysis$Workflow$Time_aggregation$ini) || !is.numeric(recipe$Analysis$Workflow$Time_aggregation$end)) { error(recipe$Run$logger, @@ -353,9 +375,9 @@ check_recipe <- function(recipe) { } } } - if ('user_def' %in% tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { + if ('user_def' %in% names(recipe$Analysis$Workflow$Time_aggregation)) { if (all(c('ini', 'end') %in% - tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { + names(recipe$Analysis$Workflow$Time_aggregation))) { recipe$Analysis$Workflow$Time_aggregation$user_def <- NULL warn(recipe$Run$logger, paste("Both 'ini' and 'end' and 'user_def' are defined under", @@ -391,7 +413,7 @@ check_recipe <- function(recipe) { ## TODO: Review this check if ((is.logical(recipe$Analysis$Workflow$Calibration$method) && recipe$Analysis$Workflow$Calibration$method == FALSE) || - tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || + recipe$Analysis$Workflow$Calibration$method == 'none' || is.null(recipe$Analysis$Workflow$Calibration$method)) { warn(recipe$Run$logger, "No Calibration method was specified, raw data verification.") @@ -448,7 +470,7 @@ check_recipe <- function(recipe) { # Downscaling if ("Downscaling" %in% names(recipe$Analysis$Workflow)) { - downscal_params <- lapply(recipe$Analysis$Workflow$Downscaling, tolower) + downscal_params <- recipe$Analysis$Workflow$Downscaling # Define accepted entries DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", @@ -551,7 +573,7 @@ check_recipe <- function(recipe) { if ("Indices" %in% names(recipe$Analysis$Workflow)) { nino_indices <- paste0("nino", c("1+2", "3", "3.4", "4")) indices <- c("nao", nino_indices) - if (!("anomalies" %in% tolower(names(recipe$Analysis$Workflow)))) { + if (!("Anomalies" %in% names(recipe$Analysis$Workflow))) { error(recipe$Run$logger, paste0("Indices uses Anomalies as input, but Anomalies are missing", "in the recipe.")) @@ -605,14 +627,14 @@ check_recipe <- function(recipe) { } else { requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] - if (!all(tolower(requested_metrics) %in% AVAILABLE_METRICS)) { + if (!all(requested_metrics %in% AVAILABLE_METRICS)) { error(recipe$Run$logger, paste0("Some of the metrics requested under 'Skill' are not ", "available in SUNSET. Check the documentation to see the ", "full list of accepted skill metrics.")) error_status <- TRUE } - if (tolower(recipe$Analysis$Output_format) != 'scorecards') { + if (recipe$Analysis$Output_format != 'scorecards') { if (any(grepl('_syear', requested_metrics))) { recipe$Analysis$Output_format <- 'scorecards' warn(recipe$Run$logger, @@ -722,7 +744,7 @@ check_recipe <- function(recipe) { # Check user-defined file format for plots PLOT_FILE_FORMATS <- c("pdf", "png", "jpg", "jpeg", "eps") if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && - !(tolower(recipe$Analysis$Workflow$Visualization$file_format)) %in% + !(recipe$Analysis$Workflow$Visualization$file_format) %in% PLOT_FILE_FORMATS) { error(recipe$Run$logger, paste0("Visualization:file_format must be one of the following: ", @@ -747,7 +769,7 @@ check_recipe <- function(recipe) { sc_metrics <- strsplit(recipe$Analysis$Workflow$Scorecards$metric, ", | |,")[[1]] if (recipe$Analysis$Workflow$Scorecards$metric_aggregation == 'score') { - if ('rpss' %in% tolower(sc_metrics)) { + if ('rpss' %in% sc_metrics) { if (!('rps_clim_syear' %in% requested_metrics)) { requested_metrics <- c(requested_metrics, 'rps_clim_syear') } @@ -755,7 +777,7 @@ check_recipe <- function(recipe) { requested_metrics <- c(requested_metrics, 'rps_syear') } } - if ('crpss' %in% tolower(sc_metrics)) { + if ('crpss' %in% sc_metrics) { if (!('crps_clim_syear' %in% requested_metrics)) { requested_metrics <- c(requested_metrics, 'crps_clim_syear') } @@ -763,7 +785,7 @@ check_recipe <- function(recipe) { requested_metrics <- c(requested_metrics, 'crps_syear') } } - if ('enscorr' %in% tolower(sc_metrics) && + if (('enscorr' %in% sc_metrics) && !all(c('std', 'cov', 'n_eff') %in% statistics)) { error(recipe$Run$logger, paste("For 'enscorr' to be plotted in the Scorecards with", @@ -775,12 +797,12 @@ check_recipe <- function(recipe) { recipe$Analysis$Workflow$Skill$metric <- paste0(requested_metrics, collapse = " ") } - if (tolower(recipe$Analysis$Output_format) != 'scorecards') { + if (recipe$Analysis$Output_format != 'scorecards') { warn(recipe$Run$logger, "Scorecards requested: setting output format as 'Scorecards'") recipe$Analysis$Output_format <- 'scorecards' } - if (!all(tolower(sc_metrics) %in% tolower(requested_metrics))) { + if (!all(sc_metrics %in% requested_metrics)) { error(recipe$Run$logger, paste0("All of the metrics requested under 'Scorecards' must ", "be requested in the 'Skill' section.")) diff --git a/tools/data_summary.R b/tools/data_summary.R index b76101bac4bba40b1a26ff2cdea1c1c16bae9580..6e1ce9022db0b8efac3ce9b254130b37a2094255 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -10,18 +10,28 @@ data_summary <- function(data_cube, recipe) { object_name <- deparse(substitute(data_cube)) if (recipe$Analysis$Variables$freq == "monthly_mean") { date_format <- "%b %Y" - } else if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { + } else if (recipe$Analysis$Variables$freq %in% + c("daily", "daily_mean", "weekly_mean")) { date_format <- "%b %d %Y" } months <- unique(format(as.Date(data_cube$attrs$Dates), format = '%B')) months <- paste(as.character(months), collapse=", ") - sdate_min <- format(min(as.Date(data_cube$attrs$Dates)), + sdate_min <- format(min(as.Date(data_cube$attrs$Dates), na.rm = TRUE), format = date_format) - sdate_max <- format(max(as.Date(data_cube$attrs$Dates)), + sdate_max <- format(max(as.Date(data_cube$attrs$Dates), na.rm = TRUE), format = date_format) # Log the summary info(recipe$Run$logger, "DATA SUMMARY:") - info(recipe$Run$logger, paste(object_name, "months:", months)) + if (recipe$Analysis$Variables$freq == "monthly_mean") { + info(recipe$Run$logger, paste(object_name, "months:", months)) + } else if (recipe$Analysis$Variables$freq == "weekly_mean") { + sdate <- recipe$Analysis$Time$sdate + ftime_min <- recipe$Analysis$Time$ftime_min + ftime_max <- recipe$Analysis$Time$ftime_max + week_valid_ini <- (ftime_min:ftime_max - 1)*7 + ymd(sdate) + 4 #4 if sdate Thursday + info(recipe$Run$logger, paste(object_name, "corresponding forecast times:", + paste(week_valid_ini, collapse = ' '))) + } # Should we include daily summaries? info(recipe$Run$logger, paste(object_name, "range:", sdate_min, "to", sdate_max)) info(recipe$Run$logger, paste(object_name, "dimensions:")) @@ -49,3 +59,4 @@ data_summary <- function(data_cube, recipe) { info(recipe$Run$logger, "---------------------------------------------") invisible(gc()) } + diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 3b2e6eee3399fec5be8daebe097222951f65123e..ceb696f44a1a7979c9bc5a94a522413157a13976 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -33,13 +33,6 @@ divide_recipe <- function(recipe) { all_recipes[[var]]$Analysis$Variables <- recipe$Analysis$Variables[[var]] } - # for (dep in verifications$dependent) { - # all_recipes[[i]]$Analysis$Variables <- dep - # i = i + 1 - # all_recipes <- append(all_recipes, list(beta_recipe)) - # } - # all_recipes <- all_recipes[-length(all_recipes)] # wth does this do - # duplicate recipe by Datasets: # check Systems # If a single system is not given inside a list, rebuild structure @@ -61,7 +54,7 @@ divide_recipe <- function(recipe) { } for (sys in 1:n_models) { for (reci in 1:length(all_recipes)) { - if (sys == length(recipe$Analysis$Datasets$System)+1){ + if (sys == length(recipe$Analysis$Datasets$System)+1) { # seasonal only needs the model name; decadal also needs the members if (tolower(recipe$Analysis$Horizon) == 'seasonal') { m <- unlist(recipe$Analysis$Datasets$System) @@ -169,6 +162,44 @@ divide_recipe <- function(recipe) { ftime_min = recipe$Analysis$Time$ftime_min, ftime_max = recipe$Analysis$Time$ftime_max) } + } else if (tolower(recipe$Analysis$Horizon) == "subseasonal") { + for (sdate in 1:length(recipe$Analysis$Time$sdate)) { + # If 'sdate' is a year, calculate all days in that year that are the same as + # the day specify in 'week_day' (by default, Thursday) + DAYS_OF_WEEK <- list(monday = 1, tuesday = 2, wednesday = 3, thursday = 4, + friday = 5, saturday = 6, sunday = 0) + if (nchar(recipe$Analysis$Time$sdate[sdate]) == 4) { + sdate_weekday <- DAYS_OF_WEEK[[tolower(recipe$Analysis$Time$week_day)]] + days <- as.POSIXlt(paste(recipe$Analysis$Time$sdate[sdate], + 1:366, sep="-"), format="%Y-%j") + day_of_the_week_ini <- days[days$wday == sdate_weekday] + } else if (nchar(recipe$Analysis$Time$sdate[sdate]) == 8) { + day_of_the_week_ini <- recipe$Analysis$Time$sdate[sdate] + } else { + stop("what other cases can exist in subaseasonal?") + } + } + for (sdate in 1:length(day_of_the_week_ini)) { + for (reci in 1:length(all_recipes)) { + all_recipes[[reci]]$Analysis$Time <- + list(sdate = format(day_of_the_week_ini[sdate], "%Y%m%d"), + fcst_year = recipe$Analysis$Time$fcst_year, + hcst_start = recipe$Analysis$Time$hcst_start, + hcst_end = recipe$Analysis$Time$hcst_end, + ftime_min = recipe$Analysis$Time$ftime_min, + ftime_max = recipe$Analysis$Time$ftime_max, + week_day = recipe$Analysis$Time$week_day, + sweek_window = recipe$Analysis$Time$sweek_window, + sday_window =recipe$Analysis$Time$sday_window) + } + if (sdate == 1) { + recipes <- all_recipes + } else { + recipes <- append(recipes, all_recipes) + } + } + all_recipes <- recipes + rm(list = 'recipes') } # Rest of horizons # Save all recipes in separate YAML files chunk <- 1 diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 16a34d321959d2bf0c3b3c20e23aea8f6ac35829..eab9c4fe63ff7a3070f008451cc32f1617e86dd9 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -108,6 +108,18 @@ prepare_outputs <- function(recipe_file, if (restructure) { recipe <- restructure_recipe(recipe) } + # Pre-processing: convert all strings in Analysis section to lowercase, + # except for system, reference and region names + system <- recipe$Analysis$Datasets$System + reference <- recipe$Analysis$Datasets$Reference + region <- recipe$Analysis$Region + recipe$Analysis <- rapply(recipe$Analysis, tolower, + how = "replace", + classes = "character") + recipe$Analysis$Datasets$System <- system + recipe$Analysis$Datasets$Reference <- reference + recipe$Analysis$Region <- region + # Run recipe checker if (disable_checks) { warn(recipe$Run$logger,