diff --git a/conf/archive_subseasonal.yml b/conf/archive_subseasonal.yml index 3fdfe10ea4ae844bed20b651f143f69086716d44..8c80b679902a706996dc22f919eb802c9b8ddc85 100644 --- a/conf/archive_subseasonal.yml +++ b/conf/archive_subseasonal.yml @@ -4,19 +4,20 @@ gpfs: ECMWF-ENS-EXT: name: "ECMWF-ENS-EXT" insitution: "ECMWF" + onthefly: TRUE srchc: "exp/ecmwf/s2s-monthly_ensforhc/" srcfc: "exp/ecmwf/s2s-monthly_ensfor/" weekly_mean: {"prlr":"weekly_mean/prlr_s0-6h/"} nmember: - fcst: 51 - hcst: 11 + fcst: 101 + hcst: 11 calendar: "gregorian" time_stamp_lag: "0" reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/s2s-monthly_ensforhc/weekly_mean/prlr_s0-6h/prlr_20221205.nc" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? - src: "exp/ncep/cfs-v2/" + onthefly: FALSE srchc: "exp/ncep/cfs-v2/" srcfc: "exp/ncep/cfs-v2/" weekly_mean: {"tas":"weekly_mean/s2s/tas_f24h/", @@ -40,10 +41,27 @@ gpfs: esarchive: src_sys: "/esarchive/" System: + ECMWF-S2S: + name: "ECMWF-S2S" + insitution: "ECMWF" + onthefly: yes + srchc: "exp/ecmwf/s2s-monthly_ensforhc/" + srcfc: "exp/ecmwf/s2s-monthly_ensfor/" + weekly_mean: {"prlr":"weekly_mean/prlr_s0-6h/", + "tas":"weekly_mean/tas_f24h/"} # to review + daily_mean: {"tas":"daily/tas/"} + nmember: + fcst: 51 + hcst: 11 + calendar: "gregorian" + time_stamp_lag: "0" + reference_grid: "/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/tas/20241024/tas_20231024.nc" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? - src: "exp/ncep/cfs-v2/" + onthefly: no + srchc: "exp/ncep/cfs-v2/" + srcfc: "exp/ncep/cfs-v2/" weekly_mean: {"tas":"weekly_mean/s2s/tas_f24h/", "prlr":"weekly_mean/s2s/prlr_f24h/", "tasmax":"weekly_mean/s2s/tasmax_f24h/", diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index 16e24647338c60eaf93d62ef41b58b34b84e387f..61c05fb991a599e2d0fcc5035e2b1186d88c1b5d 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -8,7 +8,8 @@ #' #'@param recipe Auto-S2S configuration recipe as returned by read_yaml() #'@param logger object of class logger containing log output file information -#' +#'@param exp_descipt to know if the hindcast is on the fly to generate directory + #'@return a list of two arrays containing file dates for hcst and fcst #' #'@export @@ -16,89 +17,66 @@ library(lubridate) source("modules/Loading/R/subseas_file_dates.R") -dates2load <- function(recipe, logger) { +dates2load <- function(recipe, logger, exp_descrip) { # from recipe it uses: # temporal frequency of the variable requested # the details in $Time request: hcst period, forecast times ... temp_freq <- recipe$Analysis$Variables$freq system_name <- recipe$Analysis$Datasets$System$name - recipe <- recipe$Analysis$Time + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + sdate <- recipe$Analysis$Time$sdate + fcst_year <- recipe$Analysis$Time$fcst_year + horizon <- recipe$Analysis$Horizon - ## TODO: May be needed for subseasonal daily - # if (nchar(recipe$sdate) == 8 &&) { - # recipe$sdate <- substr(recipe$sdate, 5, 8) - # } - if (temp_freq == "monthly_mean") { - # hcst dates - file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), - recipe$sdate) - # Note: UKMO models are always missing the start date of January 1993. - # This piece of code removes that date from the start dates. - UKMO_MODELS <- c("UK-MetOffice-Glosea600", "UK-MetOffice-Glosea601", - "UK-MetOffice-Glosea602", "UK-MetOffice-Glosea603") - if ((system_name %in% UKMO_MODELS) && (file_dates[1] == "19930101")) { - file_dates <- file_dates[-1] - warn(logger, - paste("January 1993 start date is not available for", system_name, - "and has been removed from the list of start dates.")) - } - file_dates <- .add_dims(file_dates) - } else if (temp_freq == "weekly_mean") { - sday <- recipe$sday_window - if (is.null(sday)) { - sday <- 3 - } - n.skill.weeks <- recipe$sweek_window - if (n.skill.weeks %% 2 != 0) { - fcst_sweek_ind <- (n.skill.weeks + 1)/2 + initializations <- NULL + file_dates.obs <- NULL + if (horizon == "subseasonal") { + subseas_dates <- subseas_file_dates(recipe, exp_descrip) + file_dates <- subseas_dates$hcst + file_dates.fcst <- subseas_dates$fcst + file_dates.obs <- subseas_dates$obs + initializations <- subseas_dates$hcst_init + } else { + if (temp_freq == "monthly_mean") { + # hcst dates + file_dates <- paste0(strtoi(hcst_start):strtoi(hcst_end), + sdate) + # Note: UKMO models are always missing the start date of January 1993. + # This piece of code removes that date from the start dates. + UKMO_MODELS <- c("UK-MetOffice-Glosea600", "UK-MetOffice-Glosea601", + "UK-MetOffice-Glosea602", "UK-MetOffice-Glosea603") + if ((system_name %in% UKMO_MODELS) && (file_dates[1] == "19930101")) { + file_dates <- file_dates[-1] + warn(logger, + paste("January 1993 start date is not available for", system_name, + "and has been removed from the list of start dates.")) + } + file_dates <- .add_dims(file_dates) } else { - fcst_sweek_ind <- (n.skill.weeks)/2 + file_dates <- paste0(strtoi(hcst_start):strtoi(hcst_end), sdate) + file_dates <- .add_dims(file_dates) } - fcst.sdate <- recipe$sdate - if (fcst_sweek_ind != 1) { - if (fcst_sweek_ind %% 2 == 0) { - fcst.sdate_to_start <- as.character(format(as.Date(as.character(fcst.sdate), - format = "%Y%m%d") + 4 + 7 * ((fcst_sweek_ind/2) - 1), "%Y%m%d")) + # fcst dates (if fcst_year empty it creates an empty object) + if (!is.null(fcst_year)) { + if (temp_freq == "monthly_mean" || temp_freq == "daily_mean") { + file_dates.fcst <- paste0(fcst_year, sdate) + file_dates.fcst <- .add_dims(file_dates.fcst) + # } else if (temp_freq == "weekly_mean") { + # ## NOTE: Why are we calling this twice? + # file_dates.fcst <- subseas_file_dates(recipe, exp_descrip) } else { - fcst.sdate_to_start <- as.character(format(as.Date(as.character(fcst.sdate), - format = "%Y%m%d") + 7 *((fcst_sweek_ind - 1)/2), "%Y%m%d")) + stop("Unknown temporal frequency.") } } else { - fcst.sdate_to_start <- recipe$sdate + # if no fcst year is requested: + file_dates.fcst <- NULL } - file_dates <- subseas_file_dates(startdate = fcst.sdate_to_start, - n.skill.weeks = n.skill.weeks, - n.days = sday, - hcst.start = as.numeric(recipe$hcst_start), - hcst.end = as.numeric(recipe$hcst_end), - ftime_min = recipe$ftime_min, - ftime_max = recipe$ftime_max, out = 'hcst') - } else { - file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), - recipe$sdate) - file_dates <- .add_dims(file_dates) - } - # fcst dates (if fcst_year empty it creates an empty object) - if (!is.null(recipe$fcst_year)) { - if (temp_freq == "monthly_mean" || temp_freq == "daily_mean") { - file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) - file_dates.fcst <- .add_dims(file_dates.fcst) - } else if (temp_freq == "weekly_mean") { - file_dates.fcst <- subseas_file_dates(startdate = fcst.sdate_to_start, - n.skill.weeks = n.skill.weeks, - n.days = sday, - hcst.start = as.numeric(recipe$hcst_start), - hcst.end = as.numeric(recipe$hcst_end), - ftime_min = recipe$ftime_min, - ftime_max = recipe$ftime_max, out = 'fcst') - } else { - stop("Unknown temporal frequency.") - } - } else { - # if no fcst year is requested: - file_dates.fcst <- NULL } - return(list(hcst = file_dates, fcst = file_dates.fcst)) + return(list(hcst = file_dates, + fcst = file_dates.fcst, + obs = file_dates.obs, + hcst_init = initializations)) } # adds the correspondent dims to each sdate array diff --git a/modules/Loading/R/load_subseasonal.R b/modules/Loading/R/load_subseasonal.R index 97623b0212e69b04d29eebf2577b82becb0f1b00..54f7806baaba4d0310d3b6cd887b440b5d17954f 100644 --- a/modules/Loading/R/load_subseasonal.R +++ b/modules/Loading/R/load_subseasonal.R @@ -21,10 +21,6 @@ load_subseasonal <- function(recipe) { variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq - # get sdates array - ## LOGGER: Change dates2load to extract logger from recipe? - sdates <- dates2load(recipe, recipe$Run$logger) - ## TODO: Examine this verifications part, verify if it's necessary # stream <- verifications$stream # sdates <- verifications$fcst.sdate @@ -35,13 +31,18 @@ load_subseasonal <- function(recipe) { # get datasets dict: archive <- get_archive(recipe) # read_yaml("conf/archive_subseasonal.yml")[[recipe$Run$filesystem]] exp_descrip <- archive$System[[exp.name]] - + + # get sdates array + ## LOGGER: Change dates2load to extract logger from recipe? + sdates <- dates2load(recipe, recipe$Run$logger, exp_descrip) + + freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]]) reference_descrip <- archive$Reference[[ref.name]] freq.obs <- unlist(reference_descrip[[store.freq]][variable[1]]) obs.dir <- reference_descrip$src - fcst.dir <- exp_descrip$src - hcst.dir <- exp_descrip$src + fcst.dir <- exp_descrip$srcfc + hcst.dir <- exp_descrip$srchc fcst.nmember <- exp_descrip$nmember$fcst hcst.nmember <- exp_descrip$nmember$hcst @@ -66,15 +67,17 @@ load_subseasonal <- function(recipe) { } var_dir_obs <- reference_descrip[[frequency]][variable] var_dir_exp <- exp_descrip[[frequency]][variable] - + # include $var$ in the path + #var_dir_exp <- lapply(var_dir_exp, function(x){gsub(variable, "$var$", x)}) + # ----------- obs.path <- paste0(archive$src_ref, obs.dir, "$var_dir$", "$var$_$file_date$.nc") hcst.path <- paste0(archive$src_sys, hcst.dir, "$var_dir$", - "$var$_$file_date$.nc") + "$initialization$/$var$_$file_date$.nc") - fcst.path <- paste0(archive$src_sys, hcst.dir, "$var_dir$", + fcst.path <- paste0(archive$src_sys, fcst.dir, "$var_dir$", "$var$_$file_date$.nc") # Define regrid parameters: @@ -85,21 +88,14 @@ load_subseasonal <- function(recipe) { #------------------------------------------------------------------- circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) - if (recipe$Analysis$Variables$freq == "weekly_mean") { - split_multiselected_dims = TRUE - } else { - split_multiselected_dims = FALSE - } - # Load hindcast #------------------------------------------------------------------- - ## NOTE: metadata_dims has to be specified as 'file_date' to be able to get - ## the metadata when the first file is missing. However, when retrieving two - ## variables, it must be 'var'. Start() does not admit both. hcst <- Start(dat = hcst.path, var = variable, var_dir = var_dir_exp, file_date = sdates$hcst, + initialization = sdates$hcst_init, + initialization_depends = "file_date", time = recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max, var_dir_depends = 'var', latitude = values(list(lats.min, lats.max)), @@ -119,32 +115,18 @@ load_subseasonal <- function(recipe) { return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, + split_multiselected_dims = TRUE, retrieve = TRUE) # Remove var_dir dimension if ("var_dir" %in% names(dim(hcst))) { hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") } - - if (store.freq %in% c("daily_mean", "daily")) { - # Adjusts dims for daily case, could be removed if startR allows - # multidim split - names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" - default_dims <- c(dat = 1, var = 1, sday = 1, - sweek = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(hcst))] <- dim(hcst) - dim(hcst) <- default_dims - # Change time attribute dimensions - default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) - names(dim(attr(hcst, "Variables")$common$time))[which(names( - dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" - default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- - dim(attr(hcst, "Variables")$common$time) - dim(attr(hcst, "Variables")$common$time) <- default_time_dims + # Remove initialization dimension + if ("initialization" %in% names(dim(hcst))) { + hcst <- Subset(hcst, along = "initialization", indices = 1, drop = "selected") } - + # Convert hcst to s2dv_cube object hcst_dates <- attr(hcst, "Variables")$common$time hcst <- as.s2dv_cube(hcst) @@ -183,31 +165,13 @@ load_subseasonal <- function(recipe) { return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, + split_multiselected_dims = TRUE, retrieve = TRUE) if ("var_dir" %in% names(dim(fcst))) { fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") } - if (store.freq %in% c("daily_mean", "daily")) { - # Adjusts dims for daily case, could be removed if startR allows - # multidim split - names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" - default_dims <- c(dat = 1, var = 1, sday = 1, - sweek = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(fcst))] <- dim(fcst) - dim(fcst) <- default_dims - # Change time attribute dimensions - default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) - names(dim(attr(fcst, "Variables")$common$time))[which(names( - dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" - default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- - dim(attr(fcst, "Variables")$common$time) - dim(attr(fcst, "Variables")$common$time) <- default_time_dims - } - # Convert fcst to s2dv_cube fcst <- as.s2dv_cube(fcst) # Adjust dates for models where the time stamp goes into the next month @@ -226,37 +190,27 @@ load_subseasonal <- function(recipe) { # Obtain dates and date dimensions from the loaded hcst data to make sure # the corresponding observations are loaded correctly. ## TODO: Generalize to other models - dates <- Apply(sdates$hcst, - margins = list(x = c("syear", "sweek", "sday")), - fun = function(x, idx_min, idx_max) { - x <- as.POSIXct(x, format = "%Y%m%d", tz = "UTC") %m+% weeks(idx_min:idx_max) %m+% days(4) - return(as.array(x)) - }, - output_dims = c("time"), - idx_min = recipe$Analysis$Time$ftime_min - 1, - idx_max = recipe$Analysis$Time$ftime_max - 1, - ncores = 1)[[1]] - dates <- Reorder(dates, c("sday", "sweek", "syear", "time")) - dates_dims <- dim(dates) - dates <- as.POSIXct(dates, origin = "1970-01-01", tz = "UTC") - dates <- format(dates, "%Y%m%d") - dim(dates) <- dates_dims + ## NOTE: This does not work for daily! if (any(is.na(hcst_dates))) { warn(recipe$Run$logger, paste("Missing files found for the hindcast, the corresponding", "observations will be filled with NA values.")) - dates[is.na(hcst_dates)] <- NA + if (store.freq == "weekly_mean") { + sdates$obs[is.na(hcst_dates)] <- NA + } else { + ## What? + } } ## Separate Start() call for monthly vs daily data if (store.freq == "weekly_mean") { - ## Is this needed...? + obs <- Start(dat = obs.path, var = variable, var_dir = var_dir_obs, var_dir_depends = 'var', - file_date = dates, + file_date = sdates$obs, latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), @@ -276,22 +230,16 @@ load_subseasonal <- function(recipe) { retrieve = TRUE) } else if (store.freq %in% c("daily_mean", "daily")) { - - # Get year and month for file_date - dates_file <- sapply(dates, format, '%Y%m%d') - dim(dates_file) <- dim(dates) - # Set hour to 12:00 to ensure correct date retrieval for daily data - lubridate::hour(dates) <- 12 - lubridate::minute(dates) <- 00 - # Restore correct dimensions - dim(idxs$hcst) <- dim(dates_file) + + obs_file_dates <- sort(unique(format(sdates$obs, "%Y%m"))) + obs_file_dates <- obs_file_dates[!is.na(obs_file_dates)] obs <- Start(dat = obs.path, var = variable, var_dir = var_dir_obs, var_dir_depends = 'var', - file_date = sort(unique(dates_file)), - time = idxs$hcst, + file_date = obs_file_dates, + time = sdates$obs, time_var = 'time', time_across = 'file_date', merge_across_dims = TRUE, diff --git a/modules/Loading/R/subseas_file_dates.R b/modules/Loading/R/subseas_file_dates.R index eca66c5572807abef6cc5496f5154accca380055..1e5ec5835b52bb38c0e45ed28a4bc8972ca82d6d 100644 --- a/modules/Loading/R/subseas_file_dates.R +++ b/modules/Loading/R/subseas_file_dates.R @@ -3,106 +3,116 @@ # The number of days that will be used for calibration # the hindcast period and the forecast times # It returns an array with dimension sday, sweek, syear -# for the hcst is out = 'hcst' or for the forecast if out = 'fcst' -subseas_file_dates <- function(startdate, n.skill.weeks, n.days, - hcst.start, hcst.end, - ftime_min, ftime_max, out) { - # Generate the sday_window vectors: - ## Only for Thursdays and Mondays - ## Create a diagonal matrix of 3 and 4 days matching Mondays/Thursdays - ## To be substracted to startdate in the loop below - prev_sday <- t(sapply(1:((n.days + 1)/2 - 1), function(x) { - if (x %% 2 == 0) { - res <- rep(4, (n.days + 1)/2 - 1) - } else { - res <- rep(3, (n.days + 1)/2 - 1) - } - return(res)})) - prev_sday[lower.tri(prev_sday)] <- 0 - - next_sday <- t(sapply(1:((n.days + 1)/2 - 1), function(x) { - if (x %% 2 == 0) { - res <- rep(3, (n.days + 1)/2 - 1) - } else { - res <- rep(4, (n.days + 1)/2 - 1) - } - return(res)})) - next_sday[lower.tri(next_sday)] <- 0 - - if (n.days > 1) { - prev_sday <- colSums(prev_sday) - next_sday <- colSums(next_sday) - thrusday_win <- c(rev(prev_sday) * -1, 0, next_sday) - monday_win <- c(rev(next_sday) * -1, 0, prev_sday) - } else { - thrusday_win <- monday_win <- 0 +subseas_file_dates <- function(recipe, exp_descrip) { + sdate <- recipe$Analysis$Time$sdate + sweek <- recipe$Analysis$Time$sweek_window + sday <- recipe$Analysis$Time$sday_window + if (is.null(sday)) { + sday <- 3 } - #### END Generation sdya_window vectors - - ftime_min <- as.numeric(substr(as.character(startdate), 1, 4)) - hcst.end - ftime_max <- as.numeric(substr(as.character(startdate), 1, 4)) - hcst.start - startdate <- as.Date(toString(startdate), "%Y%m%d") - sdates <- numeric(0) - while (length(sdates) < n.skill.weeks){ - if (format(startdate, "%a") == "Thu" || format(startdate, "%a") == "Mon") { - sdates <- c(sdates, format(startdate, "%Y%m%d")) - } - startdate <- startdate - 1 + hcst.start <- recipe$Analysis$Time$hcst_start + hcst.end <- recipe$Analysis$Time$hcst_end + weekday <- recipe$Analysis$Time$week_day + fcst_year <- recipe$Analysis$Time$fcst_year + freq <- recipe$Analysis$Variable$freq + # Convert sdate to date + sdate <- as.Date(as.character(sdate), format = "%Y%m%d") + if (tolower(weekday) %in% c("thursday", "monday")) { + # Use sweek_window to define all initialisations + ## first a window of days to cover a period + timeframe <- ((sweek - 1)/2 + (sday - 1)/2) * 4 # 4 is the number of days from Thursday to Monday + inidates <- seq(sdate - timeframe, sdate + timeframe, 1) + # Subset Mondays and Thursdays + inidates <- inidates[which(wday(inidates) %in% c(2,5))] # Sunday is one for lubridate + } else if (weekday == 2) { + timeframe <- ((sweek - 1)/2 + (sday - 1)/2) * 2 + inidates <- seq(sdate - timeframe, sdate + timeframe, 1) + inidates <- inidates[seq(1,length(inidates), 2)] + } + ## Check number of dates is equal to sweek requested + if (length(inidates) > (sweek + (sday - 1))) { + which(inidates == sdate) + } else if (length(inidates) < (sweek + (sday-1))) { + stop("Check how initialization dates are created") } + ## sweek is a vector of Thursdays and Mondays of length sweek + extra dates for sday + # create sday sample + inidates <- do.call("c", lapply(1:sweek, function(x) { + inidates[x:(x + sday - 1)]})) + # format for path strings + inidates <- format(inidates, "%Y%m%d") + # add dimensions for Start() + dim(inidates) <- c(sday = sday, sweek = sweek) - file_dates <- array(numeric(), c(0, n.days, (ftime_max -ftime_min) + 1)) - file_dates.fcst <- array(numeric(), c(0, n.days, (ftime_max - ftime_min) + 1)) - for (sdate in sdates) { - ## TODO: take into account year of the fcst to select hcst years - fcst.day <- substr(sdate, 7, 8) - fcst.month <- substr(sdate, 5, 6) - fcst.year <- substr(sdate, 1, 4) + # generalize for all years in hindcast period + file_dates <- Apply(inidates, + margins = list(x = c("sday", "sweek")), + fun = function(x, yr_min, yr_max) { + x <- as.POSIXct(x, format = "%Y%m%d", tz = "UTC") %m-% years(yr_min:yr_max) + x <- format(x, "%Y%m%d") + return(as.array(x)) + }, + output_dims = c("syear"), + yr_min = as.numeric(fcst_year) - as.numeric(hcst.start), + yr_max = as.numeric(fcst_year) - as.numeric(hcst.end), + ncores = 1)[[1]] + file_dates <- Reorder(file_dates, c("sday", "sweek", "syear")) - startdate <- as.Date(toString(sdate), "%Y%m%d") + ## 'initializations' is a list of all the forecast initializations, linked + ## to the corresponding file dates by the list names. The list elements are + ## blank if the hindcast is not on-the-fly. + if (exp_descrip$onthefly) { + initializations <- lapply(file_dates, + function(x) { + paste0(fcst_year, format(as.Date(x, "%Y%m%d"), "%m%d")) + }) + } else { + initializations <- as.list(rep("", length(file_dates))) + } + names(initializations) <- file_dates - # makes sure of giving a: [M T M || T M T] combination - if(format(startdate, "%a") == "Thu") { - hcst.sdays <- startdate + thrusday_win -# hcst.sdays <- c(startdate - 3, startdate, startdate + 4, recursive = T) - } else { - hcst.sdays <- startdate + monday_win -# hcst.sdays <- c(startdate - 4, startdate, startdate + 3, recursive = T) - } - hcst.sdays <- format(as.Date(hcst.sdays,"%Y%m%d"), "%Y%m%d") + # Forecast: + files.fcst <- format(sdate, "%Y%m%d") + dim(files.fcst) <- c(sday = 1, sweek = 1, syear = 1) - sdays.file_dates <- array(numeric(),c(0, (ftime_max - ftime_min) + 1)) - sdays.file_dates.fcst <- array(hcst.sdays, c(n.days, - (ftime_max - ftime_min) + 1)) - for (sday.sdate in hcst.sdays) { - sday.year <- substr(sday.sdate, 1, 4) - sday.mday <- substr(sday.sdate, 5, 8) - sday.years <- paste((strtoi(sday.year)- ftime_max): - (strtoi(sday.year) - ftime_min), sep = "") - sday.dates <- apply(expand.grid(sday.years, sday.mday), - 1, paste, collapse = "") - sdays.file_dates <- abind(sdays.file_dates, sday.dates, along = 1) - } + # Observations: + # +4 because hcst weekly means are build this way in esarchive + if (freq == "weekly_mean") { + files.obs <- Apply(file_dates, + margins = list(x = c("syear", "sweek", "sday")), + fun = function(x, idx_min, idx_max) { + x <- as.POSIXct(x, format = "%Y%m%d", tz = "UTC") %m+% weeks(idx_min:idx_max) %m+% days(4) + return(as.array(x)) + }, + output_dims = c("time"), + idx_min = recipe$Analysis$Time$ftime_min - 1, + idx_max = recipe$Analysis$Time$ftime_max - 1, + ncores = 1)[[1]] + files.obs <- Reorder(files.obs, c("sday", "sweek", "syear", "time")) + time_dims <- dim(files.obs) + files.obs <- as.POSIXct(files.obs, origin = "1970-01-01", tz = "UTC") + files.obs <- format(files.obs, "%Y%m%d") + dim(files.obs) <- time_dims - names(dim(sdays.file_dates)) <- c('sday','syear') - file_dates <- abind(file_dates, sdays.file_dates, along = 1) - file_dates.fcst <- abind(file_dates.fcst, sdays.file_dates.fcst, along = 1) + } else if (freq %in% c("daily", "daily_mean")) { + files.obs <- Apply(file_dates, + margins = list(x = c("syear", "sweek", "sday")), + fun = function(x, idx_min, idx_max) { + x <- as.POSIXct(x, format = "%Y%m%d", tz = "UTC") %m+% days(idx_min:idx_max) + return(as.array(x)) + }, + output_dims = c("time"), + idx_min = recipe$Analysis$Time$ftime_min - 1, + idx_max = recipe$Analysis$Time$ftime_max - 1, + ncores = 1)[[1]] + files.obs <- Reorder(files.obs, c("sday", "sweek", "syear", "time")) + time_dims <- dim(files.obs) + files.obs <- as.POSIXct(files.obs, origin = "1970-01-01", tz = "UTC") + dim(files.obs) <- time_dims } - names(dim(file_dates)) <- c('sweek','sday','syear') - names(dim(file_dates.fcst)) <- c('sweek','sday','syear') -# dir_dates <- paste0(file_dates.fcst,"/",var,"_",file_dates) -# dim(dir_dates) <- dim(file_dates) -# names(dim(dir_dates)) <- c('sweek','sday','syear') -# dir_dates <- substr(dir_dates,10,24) -# return(dir_dates) - if (out == 'fcst') { - central_day <- (n.days + 1)/2 - central_week <- (n.skill.weeks + 1)/2 - file_dates <- file_dates.fcst[central_week,central_day, 1] - dim(file_dates) <- c(sday = 1, sweek = 1, syear = 1) - } - file_dates <- Reorder(file_dates, c('sday', 'sweek', 'syear')) - return(file_dates) + return(list(hcst = file_dates, fcst = files.fcst, obs = files.obs, + hcst_init = initializations)) } diff --git a/recipes/atomic_recipes/recipe_subseasonal_ecmwf.yml b/recipes/atomic_recipes/recipe_subseasonal_ecmwf.yml new file mode 100644 index 0000000000000000000000000000000000000000..138602d028001497ec64392659e240c7e854143a --- /dev/null +++ b/recipes/atomic_recipes/recipe_subseasonal_ecmwf.yml @@ -0,0 +1,177 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. +# - {name: 'tas', freq: 'weekly_mean', units: 'C'} + name: 'tas' + freq: 'daily_mean' + units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'ECMWF-S2S', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20240104 #%Y%m%d + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2024' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '2004' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2008' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 14 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 5 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + #- {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 10} + Regrid: + method: bilinear # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: mse_min # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'fcst_only' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20000202.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: yes # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + #user_def: + # DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + # DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'cylindrical_equidistant' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: no # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 10 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'S2S4E' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: ./tests/out-logs/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git3/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: victoria.agudetse@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipes/atomic_recipes/recipe_subseasonal_ecmwf_weekly.yml b/recipes/atomic_recipes/recipe_subseasonal_ecmwf_weekly.yml new file mode 100644 index 0000000000000000000000000000000000000000..22d42b425af79adfcbfcb81c896f19e122d68079 --- /dev/null +++ b/recipes/atomic_recipes/recipe_subseasonal_ecmwf_weekly.yml @@ -0,0 +1,179 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. +# - {name: 'tas', freq: 'weekly_mean', units: 'C'} + name: 'prlr' + freq: 'weekly_mean' + units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: + - 20240830 #%Y%m%d + - 20250513 + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2025' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '2004' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2008' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 5 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + #- {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 10} + Regrid: + method: bilinear # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: mse_min # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'fcst_only' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20000202.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: yes # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + #user_def: + # DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + # DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'cylindrical_equidistant' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: no # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 10 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'S2S4E' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: ./tests/out-logs/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git3/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: victoria.agudetse@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/tests/testthat/test-subseasonal_visualization_metadata.R b/tests/testthat/test-subseasonal_visualization_metadata.R index ffa1ba0ac359fd8a340c095bf42490e9f7bb085b..d6d52f1bee26bef6bb6542925cd572aae7326e33 100644 --- a/tests/testthat/test-subseasonal_visualization_metadata.R +++ b/tests/testthat/test-subseasonal_visualization_metadata.R @@ -16,9 +16,9 @@ recipe_file <- "tests/recipes/recipe-subseasonal_visualization.yml" recipe <- prepare_outputs(recipe_file, disable_checks = FALSE) # Load datasets -suppressWarnings({invisible(capture.output( +#suppressWarnings({invisible(capture.output( data <- Loading(recipe) -))}) +#))}) # Units transformation suppressWarnings({invisible(capture.output( diff --git a/tests/testthat/test-subseasonal_weekly.R b/tests/testthat/test-subseasonal_weekly.R index bb0759ac1572a2e0c006b61507ac918a68c1ff44..1e91c4d3110776601fd669652e67215e19dba4a7 100644 --- a/tests/testthat/test-subseasonal_weekly.R +++ b/tests/testthat/test-subseasonal_weekly.R @@ -77,7 +77,7 @@ c("0108", "0111", "0115"), ) expect_equal( format(data$hcst$attrs$Dates[2, , 3, 1], "%m%d"), -c("0118", "0115", "0111", "0108", "0104") +c("0104", "0108", "0111", "0115", "0118") ) expect_equal( format(data$hcst$attrs$Dates[2, 3, 3, ], "%Y%m%d"), diff --git a/tests/testthat/test-subseasonal_weekly_crossval.R b/tests/testthat/test-subseasonal_weekly_crossval.R index 1d65ce1a23b5d31e48ca354f1b3b0202c605e2bf..19198de120c026a2aec99d869c41e53c1e9afe77 100644 --- a/tests/testthat/test-subseasonal_weekly_crossval.R +++ b/tests/testthat/test-subseasonal_weekly_crossval.R @@ -57,7 +57,7 @@ expect_equal(mean(calibrated$fcst$data), expect_equal(mean(calibrated$hcst$data, na.rm = TRUE), 299.4878, tolerance = 0.0001) -expect_equal(as.vector(drop(calibrated$hcst$data)[2,, 1, 2, 3, 4]), +expect_equal(as.vector(drop(calibrated$hcst$data)[4, , 1, 2, 3, 4]), c(298.6695, 298.6082, 299.1191, 299.1713), tolerance = 0.0001) expect_equal(range(calibrated$fcst$data), diff --git a/tests/testthat/test-utils-subseas_file_dates.R b/tests/testthat/test-utils-subseas_file_dates.R new file mode 100644 index 0000000000000000000000000000000000000000..7250ecde1725e7b1fc078864e3d59c0f69b45801 --- /dev/null +++ b/tests/testthat/test-utils-subseas_file_dates.R @@ -0,0 +1,81 @@ +context("subseas_file_dates.R") + +source("modules/Loading/Loading.R") +source("modules/Loading/R/subseas_file_dates.R") + +test_that("NCEP", { + recipe <- list(Analysis = list(Variables = list(name = 'tas', + freq = 'weekly_mean'), + Time = list( + sdate = 20241024, + fcst_year = "2024", + hcst_start= 2004, + hcst_end= 2016, + ftime_min= 1, + ftime_max= 4, + week_day= "Thursday", + sday_window= 3, + sweek_window= 9))) + des <- list(onthefly = FALSE) + test1 <- subseas_file_dates(recipe = recipe, exp_descrip = des) + + # Results + expect_equal(length(test1), 4) + expect_equal(names(test1), c("hcst", "fcst", "obs", "hcst_init")) + # hcst + expect_equal(dim(test1$hcst), c(sday = 3, sweek = 9, syear = 13)) + expect_equal(test1$hcst[2, 5, 1], "20041024") + expect_equal(substr(test1$hcst[2, 5, ], 1, 4), + as.character(recipe$Analysis$Time$hcst_start: + recipe$Analysis$Time$hcst_end)) + expect_equal(substr(test1$hcst[, 5, 1], 5, 8), c("1021", "1024","1028")) + # hcst initialization + expect_equal(test1$hcst_init[[1]], "") + # fcst + expect_equal(dim(test1$fcst), c(sday = 1, sweek = 1, syear = 1)) + expect_equal(test1$fcst[1], as.character(recipe$Analysis$Time$sdate)) + # obs + expect_equal(dim(test1$obs), c(sday = 3, sweek = 9, syear = 13, time = 4)) + expect_equal(substr(test1$obs[2, 5, , 1], 1, 4), + as.character(recipe$Analysis$Time$hcst_start: + recipe$Analysis$Time$hcst_end)) + expect_equal(substr(test1$obs[2, 5, 1, 1], 1, 12), "20041028") +}) + +test_that("ECMWF-S2S Thurs", { + recipe <- list(Analysis = list(Variables = list(name = 'tas', + freq = 'weekly_mean'), + Time = list( + sdate = 20241024, + fcst_year = "2024", + hcst_start= 2004, + hcst_end= 2016, + ftime_min= 1, + ftime_max= 4, + week_day= "Thursday", + sday_window= 3, + sweek_window= 9))) + des <- list(onthefly = TRUE) + test2 <- subseas_file_dates(recipe = recipe, exp_descrip = des) + + # Results + expect_equal(length(test2), 4) + expect_equal(names(test2), c("hcst", "fcst", "obs", "hcst_init")) + # hcst + expect_equal(dim(test2$hcst), c(sday = 3, sweek = 9, syear = 13)) + expect_equal(test2$hcst[2, 5, 1], "20041024") + expect_equal(substr(test2$hcst[2, 5, ], 1, 4), + as.character(recipe$Analysis$Time$hcst_start: + recipe$Analysis$Time$hcst_end)) + expect_equal(substr(test2$hcst[, 5, 1], 5, 8), c("1021", "1024", "1028")) + # fcst + expect_equal(dim(test2$fcst), c(sday = 1, sweek = 1, syear = 1)) + expect_equal(test2$fcst[1], as.character(recipe$Analysis$Time$sdate)) + # obs + expect_equal(dim(test2$obs), c(sday = 3, sweek = 9, syear = 13, time = 4)) + expect_equal(substr(test2$obs[2, 5, , 1], 1, 4), + as.character(recipe$Analysis$Time$hcst_start: + recipe$Analysis$Time$hcst_end)) + expect_equal(substr(test2$obs[2, 5, 1, 1], 1, 12), "20041028") +}) +