diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 9f85c4394b4a5f084684d7e9d0216f383fbfa3a9..62fc4de4d57d74b3f0362d54172377d75d3f9926 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -2,6 +2,7 @@ source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") # Load required libraries/funs source("modules/Loading/dates2load.R") +source("modules/Loading/check_latlon.R") source("tools/libs.R") # RECIPE FOR TESTING diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 97b18c01fbab373a280a185d5fb6b916ef8c6686..fe615594d41f61c80595524cc88c5775ce1e3302 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -8,13 +8,14 @@ setwd('/esarchive/scratch/aho/git/auto-s2s/') source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") source("/esarchive/scratch/vagudets/repos/cstools/R/s2dv_cube.R") # Load required libraries/funs -source("modules/data_load/dates2load.R") +source("modules/Loading/get_daily_time_ind.R") +source("modules/Loading/check_latlon.R") source("tools/libs.R") #==================================================================== -# recipe_file <- "modules/data_load/testing_recipes/recipe_decadal.yml" -# recipe_file <- "modules/data_load/testing_recipes/recipe_decadal_daily.yml" +# recipe_file <- "modules/Loading/testing_recipes/recipe_decadal.yml" +# recipe_file <- "modules/Loading/testing_recipes/recipe_decadal_daily.yml" load_datasets <- function(recipe_file) { @@ -38,14 +39,16 @@ load_datasets <- function(recipe_file) { # change to: sdates <- dates2load(recipe, logger) sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) #1960:2015 sdates_fcst <- as.numeric(recipe$Analysis$Time$fcst) - - time <- as.numeric(recipe$Analysis$Time$leadtimemin):as.numeric(recipe$Analysis$Time$leadtimemax) - #TODO: daily data - # If daily data & time is "month", need a function to calculate the indices. - # E.g., time = 1:4 means Dec to March, then indices will be c(31:(31+30+31+28+31)) for non-leap yr and c(31:(31+30+31+28), (31+30+31+28+2):(31+30+31+28+2+30)) for leap yr. - if (store.freq == "daily_mean") { - # for 1993-1994, Dec to March - time <- c(31:(31+31+31+28+30)) + + #NOTE: time in recipe starts from 0, but in Start() it starts from 1 + if (store.freq == "monthly_mean") { + time <- (as.numeric(recipe$Analysis$Time$leadtimemin):as.numeric(recipe$Analysis$Time$leadtimemax)) + 1 + } else if (store.freq == "daily_mean") { + time <- get_daily_time_ind(leadtimemin = as.numeric(recipe$Analysis$Time$leadtimemin), + leadtimemax = as.numeric(recipe$Analysis$Time$leadtimemax), + initial_month = archive$System[[exp.name]]$initial_month, + sdates = sdates_hcst, + calendar = archive$System[[exp.name]][[store.freq]]$calendar) } #NOTE: May be used in the future @@ -116,9 +119,27 @@ load_datasets <- function(recipe_file) { time = c('syear', 'chunk')), retrieve = T) + tmp_time_attr <- attr(hcst, 'Variables')$common$time + + # If daily data with 0229, need to remove 0229 and extra time + #TODO: Make Start() able to read the correct time indices for each sdate, so we + # don't need this awkward correction. + if (store.freq == "daily_mean" & any(format(tmp_time_attr, "%m%d") == "0229")) { + # Save hcst attributes + hcst_attr <- attributes(hcst)[-1] # without $dim + new_hcst <- Apply(hcst, + target_dims = c('syear', 'time'), + output_dims = list(data = c('syear', 'time')), + fun = correct_daily_for_leap, + time_attr = tmp_time_attr, return_time = F)$data + hcst <- Reorder(new_hcst, names(dim(hcst))) + attributes(hcst) <- c(attributes(hcst), hcst_attr) + tmp_time_attr <- correct_daily_for_leap(data = NULL, time_attr = tmp_time_attr, return_time = T)$time_attr + attr(hcst, 'Variables')$common$time <- tmp_time_attr + } + # change syear to c(sday, sweek, syear) dim(hcst) <- c(dim(hcst)[1:3], sday = 1, sweek = 1, dim(hcst)[4:7]) - tmp_time_attr <- attr(hcst, 'Variables')$common$time if (!identical(dim(tmp_time_attr), dim(hcst)[6:7])) { stop("hcst has problem in matching data and time attr dimension.") } @@ -183,6 +204,8 @@ load_datasets <- function(recipe_file) { stop("hcst and fcst do not share the same dimension structure.") } + } else { + fcst <- NULL } #------------------------------------------- @@ -320,7 +343,26 @@ load_datasets <- function(recipe_file) { as.vector(obs$lon))) stop("hcst and obs don't share the same longitude.") - + # Check fcst + if (!is.null(fcst)) { + # dimension + if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) { + stop("hcst and fcst don't share the same dimension names.") + } else { + ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear')) + match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data))) + if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) + stop("hcst and fcst don't share the same dimension length.") + } + + # lat and lon attributes + if (!identical(as.vector(hcst$lat), + as.vector(fcst$lat))) + stop("hcst and fcst don't share the same latitude.") + if (!identical(as.vector(hcst$lon), + as.vector(fcst$lon))) + stop("hcst and fcst don't share the same longitude.") + } return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/tools/check_latlon.R b/modules/Loading/check_latlon.R similarity index 100% rename from tools/check_latlon.R rename to modules/Loading/check_latlon.R diff --git a/modules/Loading/get_daily_time_ind.R b/modules/Loading/get_daily_time_ind.R new file mode 100644 index 0000000000000000000000000000000000000000..3e41cddb6e64c923825cd9297aba103477b51e6e --- /dev/null +++ b/modules/Loading/get_daily_time_ind.R @@ -0,0 +1,98 @@ +# This function is for decadal-daily data loading. +# The time input (leadtimemin & leadtimemax) units in the recipe is month. For example, +# leadtimemin = 0 means the first month, and leadtimemax = 2 means the 3rd month. +# For daily data, if initial month is November, leadtimemin = 0 equals to c(1:30), +# and leadtimemax = 2, which means January next year, equals to c(31+31:31+31+30). +get_daily_time_ind <- function(leadtimemin, leadtimemax, initial_month, sdates, calendar) { + #TODO: calendar & leap year + # E.g., time = 1:4 means Dec to March, then indices will be c(31:(31+30+31+28+31)) for non-leap yr and c(31:(31+30+31+28), (31+30+31+28+2):(31+30+31+28+2+30)) for leap yr. Calendar needs to be considered too. + + if (!calendar %in% c('360-day', '365_day', 'noleap', 'standard', 'proleptic_gregorian', + 'gregorian')) + stop("The calendar is not recognized. Please contact maintainers.") + + total_months <- (initial_month + leadtimemin):(initial_month + leadtimemax) + total_months <- ifelse(total_months > 12, total_months %% 12, total_months) + total_months <- ifelse(total_months == 0, 12, total_months) + + if (calendar == '360-day') { + daily_time_ind <- 1:(30 * length(total_months)) + + } else { + + total_days_sum <- sum(days_in_month(total_months)) + if (leadtimemin == 0) { + daily_time_ind <- 1:total_days_sum + } else { + before_start_months <- initial_month:(initial_month + leadtimemin - 1) + before_start_months <- ifelse(before_start_months > 12, before_start_months %% 12, before_start_months) + before_start_months <- ifelse(before_start_months == 0, 12, before_start_months) + before_start_total_days_sum <- sum(days_in_month(before_start_months)) + daily_time_ind <- (before_start_total_days_sum + 1):(before_start_total_days_sum + total_days_sum) + } + + + if (calendar %in% c('365_day', 'noleap')) { + # daily_time_ind + } else if (calendar %in% c('standard', 'proleptic_gregorian', 'gregorian')) { + +# leap <- leap_year(sdates) + sdates_full <- ymd(paste0(sdates, sprintf('%02d',initial_month), '01')) + idx_min <- sdates_full + months(leadtimemin) + idx_max <- sdates_full + months(leadtimemax + 1) - days(1) + + day0229 <- rep(NA, length(sdates)) + for (i_sdate in 1:length(sdates_full)) { + days <- seq(idx_min[i_sdate],idx_max[i_sdate], by = 'days') + day0229[i_sdate] <- sum(format(days, "%m%d") == "0229") + } + + if (all(day0229 == 0)) { # no 0229 + # daily_time_ind + } else { + # get extra indices and remove 0229 & extra indices after retrieval + #TODO: Make Start() can handle time as a list or array for this case + daily_time_ind <- c(daily_time_ind, + (tail(daily_time_ind, 1) + 1):((tail(daily_time_ind, 1) + 1) + (max(day0229) - 1))) + } + + } + } + + return(daily_time_ind) +} + + +correct_daily_for_leap <- function(data = NULL, time_attr, return_time = TRUE) { + # data & time_attr: [syear, time] + #NOTE: return_time = TRUE, only return time_attr; FALSE, only return data + + new_tmp_time_attr <- array(NA, dim = dim(time_attr)) + if (!is.null(data)) { + new_data <- array(NA, dim = dim(time_attr)) + } else { + new_data <- NULL + } + for (i_sdate in 1:dim(time_attr)[1]) { + ind_not_0229 <- !(format(time_attr[i_sdate, ], "%m%d") == "0229") + new_tmp_time_attr[i_sdate, 1:sum(ind_not_0229)] <- time_attr[i_sdate, ][ind_not_0229] + if (!is.null(data)) { + new_data[i_sdate, 1:sum(ind_not_0229)] <- data[i_sdate, ][ind_not_0229] + } + } + real_time_length <- min(which(is.na(new_tmp_time_attr),arr.ind = T)[, 2]) - 1 + new_tmp_time_attr <- new_tmp_time_attr[, 1:real_time_length] + new_tmp_time_attr <- as.POSIXct(new_tmp_time_attr, tz = 'UTC', origin = '1970-01-01') + if (!is.null(data)) { + new_data <- new_data[, 1:real_time_length] + } + + if (any(!new_tmp_time_attr %in% time_attr | any(is.na(new_tmp_time_attr)))) { + stop("Wrong time attribute modification.") + } + if (return_time) { + return(list(time_attr = new_tmp_time_attr)) + } else { + return(list(data = new_data)) + } +} diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml index b7b6a10978c4e2f58cfc14bbfd9be04b7522ff8d..0661fb7bf9e2f65a3afee2bb5fdc581ec9be42b1 100644 --- a/modules/Loading/testing_recipes/recipe_decadal.yml +++ b/modules/Loading/testing_recipes/recipe_decadal.yml @@ -20,8 +20,8 @@ Analysis: hcst_start: 1990 #'1993' hcst_end: 1992 #'2016' # season: 'Annual' - leadtimemin: 1 - leadtimemax: 14 + leadtimemin: 0 + leadtimemax: 13 Region: latmin: 10 #-90 latmax: 20 #90 diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml index b314303595441b473def8d0bc5e869d114099c84..a025dca345e030f6d5c94797ad327cabcd1db313 100644 --- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml +++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml @@ -15,12 +15,12 @@ Analysis: name: ERA5 Time: sdate: - fcst: 2021 + fcst: # fcst_sday: '1101' - hcst_start: 1993 - hcst_end: 1994 + hcst_start: 1990 + hcst_end: 1992 season: 'Annual' - leadtimemin: 1 + leadtimemin: 2 leadtimemax: 4 Region: latmin: 10 #-90 diff --git a/tools/libs.R b/tools/libs.R index 020082c06706c6171b5ec87f7bafc5953a139de0..317286508f6929bb3258443ebf5eec29867dbd51 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -28,5 +28,4 @@ source("tools/check_recipe.R") source("tools/prepare_outputs.R") source("tools/divide_recipe.R") source("tools/data_summary.R") -source("tools/check_latlon.R") # source("tools/add_dims.R") # Not sure if necessary yet