diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R index eda5f2392ff23c953d173c2a7bd4d91147d37824..28d7905e08405f0a27d35069f260ef1dc920d08b 100644 --- a/modules/data_load/dates2load.R +++ b/modules/data_load/dates2load.R @@ -12,20 +12,29 @@ #'@return a list of two arrays containing file dates for hcst and fcst #' #'@export + +library(lubridate) + dates2load <- function(recipe, logger){ + temp_freq <- recipe$Analysis$Variables$freq recipe <- recipe$Analysis$Time # hcst dates file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate$fcst_sday) - file_dates <- .add_dims(file_dates, "hcst") + + if (temp_freq == "monthly_mean"){ + file_dates <- .add_dims(file_dates, "hcst") + } # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$sdate$fcst_syear)){ file_dates.fcst <- paste0(recipe$sdate$fcst_syear, recipe$sdate$fcst_sday) - file_dates.fcst <- .add_dims(file_dates.fcst, "fcst") + if (temp_freq == "monthly_mean"){ + file_dates.fcst <- .add_dims(file_dates.fcst, "fcst") + } } else { file_dates.fcst <- NULL info(logger, @@ -51,4 +60,52 @@ dates2load <- function(recipe, logger){ dim(data) <- default_dims return(data) - } +} + +# Gets the corresponding dates or indices according +# to the sdate/leadtimes requested in the recipe +# +# The leadtimes are defined by months +# Ex. 20201101 with leadtimes 1-4 corresponds to +# the forecasting times covering December to March +get_timeidx <- function(sdates, ltmin, ltmax, + time_freq="monthly_mean"){ + + if (time_freq == "daily_mean"){ + + sdates <- ymd(sdates) + idx_min <- sdates + months(ltmin) + idx_max <- sdates + months(ltmax+1) - days(1) + + indxs <- array(numeric(), c( + file_date=length(sdates), + time=(as.integer(idx_max[1]-idx_min[1])+1) + #syear=length(sdates) + #sday=1,sweek=1)) + )) + + for (sdate in 1:length(sdates)){ + days <- seq(idx_min[sdate],idx_max[sdate], by='days') + indxs[sdate,] <- days[!(format(days, "%m%d") == "0229")] + } + indxs <- as.POSIXct(indxs*86400, + tz = 'UTC', origin = '1970-01-01') + lubridate::hour(indxs) <- 12 + lubridate::minute(indxs) <- 00 + dim(indxs) <- list(file_date=length(sdates), + time=(as.integer(idx_max[1]-idx_min[1])+1)) + + } else if (time_freq == "monthly_mean") { + + idx_min <- ltmin + idx_max <- ltmax + indxs <- indices(idx_min:idx_max) + + } + + # TODO: 6 hourly case + #idx1 <- (sdates + months(ltmin-1) - sdates)*4 + #idx2 <- idx1 + ndays*4 - 1 + + return(indxs) +} diff --git a/modules/data_load/load.R b/modules/data_load/load.R index a6b2a5f063b444cf5c7a6cdfeabcbab49826a173..0083e9a910d1e1ae4c51894cd61b797833f26bcd 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -1,12 +1,14 @@ - - -## TODO: remove paths to personal scratchs +## TODO: remove paths to personal scratchs source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") source("/esarchive/scratch/vagudets/repos/cstools/R/s2dv_cube.R") # Load required libraries/funs source("modules/data_load/dates2load.R") source("tools/libs.R") +path <- "/esarchive/scratch/lpalma/git/startR-test-attrDims/R/" +ff <- lapply(list.files(path), function(x) paste0(path, x)) +invisible(lapply(ff, source)) + ## TODO: Move to CSOperational # Conversion from startR_array to s2dv_array #------------------------------------------------------------------- @@ -33,16 +35,17 @@ startR_to_s2dv <- function(startR_array){ dim(dates_start) <- dates_dims dim(dates_end) <- dates_dims #dim(sdates) <- sdates_dims + ## TODO: change this line when time attributes work correctly? time_dims <- c("time", "sday", "sweek", "syear", "fcst_syear") s2dv_object <- s2dv_cube(data = attr2array(startR_array), lon = attr2array(attr(startR_array, - 'Variables')$dat1$longitude), + 'Variables')$dat1$longitude), lat = attr2array(attr(startR_array, - 'Variables')$dat1$latitude), - Variable = list(varName = names(attr(startR_array, - 'Variables')$common)[2], + 'Variables')$dat1$latitude), + Variable = list(varName=names(attr(startR_array, + 'Variables')$common)[2], level = NULL), Dates = list(start = dates_start, end = dates_end), @@ -50,17 +53,17 @@ startR_to_s2dv <- function(startR_array){ time_dims = time_dims, when = Sys.time(), source_files = attr2array(attr(startR_array, "Files")) - #Datasets = list(exp1 = list(InitializationsDates = list(Member_1 = "01011990", - # Members = "Member_1"))) + #Datasets=list(exp1=list(InitializationsDates=list(Member_1="01011990", + # Members="Member_1"))) ) - return(s2dv_object) - } # RECIPE FOR TESTING # -------------------------------------------------------------------------------- +# recipe_file <- "modules/data_load/testing_recipes/recipe_3.yml" # recipe_file <- "modules/data_load/testing_recipes/recipe_2.yml" +# recipe_file <- "modules/data_load/testing_recipes/recipe_1.yml" load_datasets <- function(recipe_file){ @@ -74,13 +77,12 @@ load_datasets <- function(recipe_file){ folder <- logger$foldername log_file <- logger$logname logger <- logger$logger + # ------------------------------------------- - # Set params ----------------------------------------- + hcst.inityear <- recipe$Analysis$Time$hcst_start hcst.endyear <- recipe$Analysis$Time$hcst_end - ltmin <- recipe$Analysis$Time$leadtimemin - ltmax <- recipe$Analysis$Time$leadtimemax lats.min <- recipe$Analysis$Region$latmin lats.max <- recipe$Analysis$Region$latmax lons.min <- recipe$Analysis$Region$lonmin @@ -90,7 +92,21 @@ load_datasets <- function(recipe_file){ variable <- recipe$Analysis$Variables$name store.freq <- recipe$Analysis$Variables$freq - + + # get sdates array + sdates <- dates2load(recipe, logger) + + idxs <- NULL + idxs$hcst <- get_timeidx(sdates$hcst, + recipe$Analysis$Time$leadtimemin, + recipe$Analysis$Time$leadtimemax, + time_freq=store.freq) + + idxs$fcst <- get_timeidx(sdates$fcst, + recipe$Analysis$Time$leadtimemin, + recipe$Analysis$Time$leadtimemax, + time_freq=store.freq) + ## TODO: Examine this verifications part, verify if it's necessary # stream <- verifications$stream # sdates <- verifications$fcst.sdate @@ -98,9 +114,6 @@ load_datasets <- function(recipe_file){ ## TODO: define fcst.name ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name - # get sdates array - sdates <- dates2load(recipe, logger) - # get esarchive datasets dict: archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive exp_descrip <- archive$System[[exp.name]] @@ -124,15 +137,15 @@ load_datasets <- function(recipe_file){ # ----------- obs.path <- paste0(archive$src, obs.dir, store.freq, "/$var$", - reference_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") + reference_descrip[[store.freq]][[variable]],"$var$_$file_date$.nc") hcst.path <- paste0(archive$src, hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") + exp_descrip[[store.freq]][[variable]],"$var$_$file_date$.nc") fcst.path <- paste0(archive$src, hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], "$var$_$fcst_syear$.nc") + exp_descrip[[store.freq]][[variable]],"$var$_$file_date$.nc") # Define regrid parameters: #------------------------------------------------------------------- @@ -145,13 +158,19 @@ load_datasets <- function(recipe_file){ } else { circularsort <- CircularSort(-180, 180) } + + if (recipe$Analysis$Variables$freq == "monthly_mean"){ + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } # Load hindcast #------------------------------------------------------------------- hcst <- Start(dat = hcst.path, var = variable, file_date = sdates$hcst, - time = indices(ltmin:ltmax), + time = idxs$hcst, latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), @@ -162,24 +181,37 @@ load_datasets <- function(recipe_file){ transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), + ensemble = c('member', 'ensemble')), ensemble = indices(1:hcst.nmember), return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), - split_multiselected_dims = TRUE, + split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - + + if (recipe$Analysis$Variables$freq == "daily_mean"){ + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sweek = 1, + sday = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + } hcst <- startR_to_s2dv(hcst) # Load forecast #------------------------------------------------------------------- if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)){ + # the call uses file_date instead of fcst_syear so that it can work + # with the daily case and the current version of startR not allowing + # multiple dims split fcst <- Start(dat = fcst.path, var = variable, - fcst_syear = sdates$fcst, - time = indices(ltmin:ltmax), + file_date = sdates$fcst, + time = idxs$fcst, latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), @@ -194,10 +226,11 @@ load_datasets <- function(recipe_file){ ensemble = indices(1:fcst.nmember), return_vars = list(latitude = 'dat', longitude = 'dat', - time = 'fcst_syear'), - split_multiselected_dims = TRUE, + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "fcst_year" fcst <- startR_to_s2dv(fcst) } else { @@ -206,27 +239,18 @@ load_datasets <- function(recipe_file){ # Load reference #------------------------------------------------------------------- - - # Get reference dates from hcst Start call - - ## TODO: Replace with new code once An-Chi's fix is released in StartR - #dates <- attr(hcst, 'Variables')$common$time dates <- hcst$Dates$start - # Get year and month for file_date - dates_file <- sapply(dates, format, '%Y%m') - dates.dims <- c(sday = 1, sweek = 1, time = 1) - dates.dims[names(dim(dates))] <- dim(dates) - # Set hour to 12:00 to ensure correct date retrieval for daily data - lubridate::hour(dates) <- 12 - lubridate::minute(dates) <- 00 - # Restore correct dimensions - dim(dates) <- dates.dims - names(dim(dates))[4] <- "syear" - dim(dates_file) <- dim(dates) - + # Separate Start() call for monthly vs daily data if (store.freq == "monthly_mean") { + dates_file <- sapply(dates, format, '%Y%m%d') + dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") + dim(dates_file) <- dim(Subset(hcst$data, + along=c('dat','var', + 'latitude', 'longitude', 'ensemble'), + list(1,1,1,1,1), drop="selected")) + obs <- Start(dat = obs.path, var = variable, file_date = dates_file, @@ -249,7 +273,16 @@ load_datasets <- function(recipe_file){ retrieve = TRUE) } else if (store.freq == "daily_mean") { - + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + obs <- Start(dat = obs.path, var = variable, file_date = sort(unique(dates_file)), diff --git a/modules/data_load/testing_recipes/recipe_1.yml b/modules/data_load/testing_recipes/recipe_1.yml index c7cdfca14de6d2bc9ededa3f8382a4048e39cf61..68715462779ad4f4a8b2a67dc80c75945df8ba11 100644 --- a/modules/data_load/testing_recipes/recipe_1.yml +++ b/modules/data_load/testing_recipes/recipe_1.yml @@ -1,6 +1,6 @@ Description: - Author: N.Pérez-Zanón - '': split version + Author: V. Agudetse + Analysis: Horizon: Seasonal Variables: @@ -14,11 +14,11 @@ Analysis: name: era5 Time: sdate: - fcst_syear: '2017' - fcst_sday: '0101' + fcst_syear: '2020' + fcst_sday: '1101' hcst_start: '1993' hcst_end: '2016' - leadtimemin: 2 + leadtimemin: 1 leadtimemax: 4 Region: latmin: -10 @@ -39,5 +39,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/modules/data_load/testing_recipes/recipe_2.yml b/modules/data_load/testing_recipes/recipe_2.yml index b8d81ad7be688b0079b275e83a653a5033e04ec5..b8f2d13acadda77d5b985807534eaa37b59dc4eb 100644 --- a/modules/data_load/testing_recipes/recipe_2.yml +++ b/modules/data_load/testing_recipes/recipe_2.yml @@ -1,6 +1,6 @@ Description: Author: V. Agudetse - '': split version + Analysis: Horizon: Seasonal Variables: diff --git a/modules/data_load/testing_recipes/recipe_3.yml b/modules/data_load/testing_recipes/recipe_3.yml new file mode 100644 index 0000000000000000000000000000000000000000..ed82f43c9b34971c8692833a0d77b49492d32b86 --- /dev/null +++ b/modules/data_load/testing_recipes/recipe_3.yml @@ -0,0 +1,43 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: system5c3s + Multimodel: no + Reference: + name: era5 + Time: + sdate: + fcst_syear: '2020' + fcst_sday: '1101' + hcst_start: '1993' + hcst_end: '1995' + leadtimemin: 0 + leadtimemax: 0 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: SBC + Skill: + metric: RPSS + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/modules/test.R b/modules/test.R index 9bd30de4d9dfddc4f90fd7972a398914bc63fd56..744f6ecdfff443da670f147a199a6c54cc5a21d9 100644 --- a/modules/test.R +++ b/modules/test.R @@ -1,31 +1,37 @@ - - +recipe_file <- "modules/data_load/testing_recipes/recipe_3.yml" recipe_file <- "modules/data_load/testing_recipes/recipe_2.yml" +recipe_file <- "modules/data_load/testing_recipes/recipe_1.yml" + source("modules/data_load/load.R") data <- load_datasets(recipe_file) -hcst <- data$hcst -obs <- data$obs - method <- "bias" mm=F ncores=4 na.rm=T # Hcst calibration -hcst <- CSTools::CST_Calibration(hcst,obs, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - alpha = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - - +#hcst <- CSTools::CST_Calibration(data$hcst, data$obs, +# cal.method = method, +# eval.method = "leave-one-out", +# multi.model = mm, +# na.fill = TRUE, +# na.rm = na.rm, +# apply_to = NULL, +# alpha = NULL, +# memb_dim = "ensemble", +# sdate_dim = "syear", +# ncores = ncores) + +# For daily +hcst <- CSTools::CST_QuantileMapping(data$hcst, + data$obs, + exp_cor = NULL, + sample_dims = c("syear", "time", "ensemble"), + sample_length = NULL, + method = "QUANT", + ncores = ncores, + n.rm = na.rm) diff --git a/out-logs/ini b/out-logs/ini deleted file mode 100755 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 06f8270fcfd89ddc3412777536cb4149334e8cf8..5efa87d4a9f09816afd29828679e1da917dd50e1 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -31,6 +31,8 @@ prepare_outputs <- function(recipe) { # Create output folders: folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$filename)), "_", gsub(" ", "", gsub(":", "", gsub("-", "", Sys.time())))) + + print(output_dir) print(folder_name) dir.create(file.path(output_dir, folder_name, 'plots'), recursive = TRUE)