From cd10405a42d191bf04e98138ed0eac9db909a9e3 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 16 Jul 2024 12:11:21 +0200 Subject: [PATCH 1/5] Unify multipath = TRUE and multipath = FALSE options for decadal loading (WIP) --- modules/Loading/R/helper_loading_decadal.R | 66 ++++++---------------- modules/Loading/R/load_decadal.R | 45 +++------------ 2 files changed, 26 insertions(+), 85 deletions(-) diff --git a/modules/Loading/R/helper_loading_decadal.R b/modules/Loading/R/helper_loading_decadal.R index b93f3279..444739a3 100644 --- a/modules/Loading/R/helper_loading_decadal.R +++ b/modules/Loading/R/helper_loading_decadal.R @@ -109,57 +109,25 @@ correct_daily_for_leap <- function(data = NULL, time_attr, return_time = TRUE) { # table, grid, version: A list with variables as name. E.g., list(tas = 'Amon') get_dcpp_path <- function(archive, exp.name, table, grid, version, sdates) { - # Define path (monthly and daily) - multi_path <- FALSE - if (is.null(archive$System[[exp.name]]$src$first_dcppB_syear) | - isTRUE(all(sdates < archive$System[[exp.name]]$src$first_dcppB_syear))) { # only dcppA - if (length(table) == 1) { # only one variable - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, - '$ensemble$', table, '$var$', grid, version) - fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') - } else { # multiple vars - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, - '$ensemble$', '$table$', '$var$', '$grid$', '$version$') - fcst.files <- paste0('$var$_', '$table$', '_*_dcppA-hindcast_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') - } - path_list <- file.path(fcst.path, fcst.files) + if (length(table) == 1) { # only one variable + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + '$ensemble$', table, '$var$', grid, version) + fcst.files <- paste0('$var$_', table, '_*_$dcpp$_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + } else { # multiple vars + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + '$ensemble$', '$table$', '$var$', '$grid$', '$version$') + fcst.files <- paste0('$var$_', '$table$', '_*_$dcpp$_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') + } + path_list <- file.path(fcst.path, fcst.files) - } else { - if (all(sdates >= archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppB - if (length(table) == 1) { # only one variable - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, - '$ensemble$', table, '$var$', grid, version) - - fcst.files <- paste0('$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') - } else { - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, - '$ensemble$', '$table$', '$var$', '$grid$', '$version$') - fcst.files <- paste0('$var$_', '$table$', '_*_dcppB-forecast_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') - } - path_list <- file.path(fcst.path, fcst.files) - - } else { # have both dcppA and dcppB - # Create one path for each sdate - #TODO: When *_depends = 'syear' can be more than one, use one path with $dcpp$ - multi_path <- TRUE - path_list <- vector('list', length = length(sdates)) - for (i_sdate in 1:length(sdates)) { - if (sdates[i_sdate] >= archive$System[[exp.name]]$src$first_dcppB_syear) { - path_list[[i_sdate]] <- - list(path = file.path(archive$src, archive$System[[exp.name]]$src$fcst, - '$ensemble$', table, '$var$', grid, #version, - paste0('v*/$var$_', table, '_*_dcppB-forecast_s', sdates[i_sdate], - '-$ensemble$_', grid, '_$chunk$.nc'))) - } else { - path_list[[i_sdate]] <- - list(path = file.path(archive$src, archive$System[[exp.name]]$src$hcst, - '$ensemble$', table, '$var$', grid, #version, - paste0('v*/$var$_', table, '_*_dcppA-hindcast_s', sdates[i_sdate], - '-$ensemble$_', grid, '_$chunk$.nc'))) - } - } + dcpp_list <- vector('list', length = length(sdates)) + for (i_sdate in 1:length(sdates)) { + if (sdates[i_sdate] >= archive$System[[exp.name]]$src$first_dcppB_syear) { + dcpp_list[[i_sdate]] <- "dcppB-forecast*" + } else { + dcpp_list[[i_sdate]] <- "dcppA-hindcast" } } - return(list(path_list = path_list, multi_path = multi_path)) + return(list(path_list = path_list, dcpp_list = dcpp_list)) } diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index d3b4f439..ea890014 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -88,6 +88,7 @@ load_decadal <- function(recipe) { version = version, sdates = sdates_hcst) path_list <- tmp$path_list multi_path <- tmp$multi_path + dcpp_list <- tmp$dcpp_list #TODO: to make this case work; enhance Start() if it's possible if (multi_path & length(variable) > 1) { @@ -100,6 +101,8 @@ load_decadal <- function(recipe) { syear = paste0(sdates_hcst), chunk = 'all', chunk_depends = 'syear', + dcpp = dcpp_list, + dcpp_depends = 'syear', time = indices(time_ind), time_across = 'chunk', merge_across_dims = TRUE, @@ -129,38 +132,13 @@ load_decadal <- function(recipe) { metadata_dims = 'var')) } - if (!multi_path) { - Start_hcst_arg_list <- Start_default_arg_list - hcst <- do.call(Start, Start_hcst_arg_list) + Start_hcst_arg_list <- Start_default_arg_list + hcst <- do.call(Start, Start_hcst_arg_list) - } else { - Start_hcst_arg_list <- Start_default_arg_list - Start_hcst_arg_list[['syear']] <- NULL - Start_hcst_arg_list[['chunk_depends']] <- NULL - remove_ind <- which(Start_hcst_arg_list[['return_vars']][['time']] == 'syear') - Start_hcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind] - - hcst <- do.call(Start, Start_hcst_arg_list) - - # Reshape and reorder dimensions - ## dat should be 1, syear should be length of dat; reorder dimensions - dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6]) - hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) - - # Manipulate time attr because Start() cannot read it correctly - wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only - tmp <- array(dim = c(dim(hcst)[c('syear', 'time')])) - tmp[1, ] <- wrong_time_attr - yr_diff <- (sdates_hcst - sdates_hcst[1])[-1] #diff(sdates_hcst) - for (i_syear in 1:length(yr_diff)) { - tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) - } - attr(hcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') - - } - tmp_time_attr <- attr(hcst, 'Variables')$common$time - + + ## TODO: Remove extra dcpp dimension + ## TODO: Remove this part? # change syear to c(sday, sweek, syear) # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) @@ -171,11 +149,6 @@ load_decadal <- function(recipe) { } dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) - #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 - if (multi_path) { - attributes(hcst)$Variables$common[[variable]] <- attributes(hcst)$Variables$dat1[[variable]] - } - # Change class from startR_array to s2dv_cube suppressWarnings( hcst <- as.s2dv_cube(hcst) @@ -191,7 +164,7 @@ load_decadal <- function(recipe) { path_list <- tmp$path_list multi_path <- tmp$multi_path - #TODO: to make this case work; enhance Start() if it's possible + ## TODO: Try this case if (multi_path & length(variable) > 1) { stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") } -- GitLab From e1b89c175b1c9d3b90681c5eaf85a0a67db7bba5 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 16 Jul 2024 16:12:37 +0200 Subject: [PATCH 2/5] Merge multi_path = TRUE and multi_path = FALSE for decadal loading --- conf/archive_decadal.yml | 16 +++++ modules/Loading/Loading.R | 4 ++ modules/Loading/R/helper_loading_decadal.R | 25 +++++-- modules/Loading/R/load_decadal.R | 76 +++++++--------------- 4 files changed, 63 insertions(+), 58 deletions(-) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 0697fc3f..da889a08 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -8,6 +8,7 @@ esarchive: src: hcst: "exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: + startR: "exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/$dcpp$/" monthly_mean: #NOTE: tos is under both Amon and Omon --> wait to be changed table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tos":["Amon", "Omon"]} @@ -31,6 +32,7 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: + startR: "exp/CMIP6/$dcpp$/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/$dcpp$/" monthly_mean: table: {"tas":"Amon"} grid: {"tas":"gr"} @@ -52,6 +54,7 @@ esarchive: src: hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" + startR: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/$dcpp$/" first_dcppB_syear: 2021 # hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" # fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" @@ -91,6 +94,7 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppB-forecast/" + startR: "exp/CMIP6/$dcpp$/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/$dcpp$/" first_dcppB_syear: 2019 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon", "tos":"Omon"} @@ -113,6 +117,7 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/" fcst: + startR: "exp/CMIP6/$dcpp$/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn"} @@ -135,6 +140,7 @@ esarchive: src: hcst: "exp/canesm5/cmip6-dcppA-hindcast/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/" fcst: "exp/canesm5/cmip6-dcppB-forecast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppB-forecast/" + startR: "exp/canesm5/cmip6-$dcpp$/original_files/cmorfiles/DCPP/CCCma/CanESM5/$dcpp$/" first_dcppB_syear: 2020 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon", "tos":"Omon"} @@ -158,6 +164,7 @@ esarchive: src: hcst: "exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast" fcst: + startR: "exp/ncar/cesm-dple-$dcpp$/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon"} grid: {"tas":"gn", "pr":"gn"} @@ -180,6 +187,7 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppB-forecast/" + startR: "exp/CMIP6/$dcpp$/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/$dcpp$/" first_dcppB_syear: 2020 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "prc":"Amon", "ts":"Amon"} @@ -201,6 +209,7 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppB-forecast/" + startR: "exp/CMIP6/$dcpp$/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/$dcpp$/" first_dcppB_syear: 2017 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tos":"Omon"} @@ -223,6 +232,7 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/dcppA-hindcast/" fcst: + startR: "exp/CMIP6/$dcpp$/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "sfcWind":"Amon"} grid: {"tas":"gr", "pr":"gr", "psl":"gr", "sfcWind":"gr"} @@ -243,6 +253,7 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" fcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" + startR: "exp/CMIP6/$dcpp$/MIROC6/DCPP/MIROC/MIROC6/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn"} @@ -263,6 +274,7 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/" fcst: + startR: "exp/CMIP6/$dcpp$/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn"} @@ -283,6 +295,7 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/dcppA-hindcast/" fcst: + startR: "exp/CMIP6/$dcpp$/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn", "ts":"gn"} @@ -303,6 +316,7 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/dcppA-hindcast/" fcst: + startR: "exp/CMIP6/$dcpp$/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn"} @@ -324,6 +338,7 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: + startR: "exp/CMIP6/$dcpp$/NorCPM1/DCPP/NCC/NorCPM1/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn"} @@ -344,6 +359,7 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: + startR: "exp/CMIP6/$dcpp$/NorCPM1/DCPP/NCC/NorCPM1/$dcpp$/" monthly_mean: table: {"pr":"Amon", "psl":"Amon"} grid: {"pr":"gn", "psl":"gn"} diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 6e219bf1..f5361218 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -1,6 +1,10 @@ source("tools/libs.R") Loading <- function(recipe) { + ## TODO: remove + path <- "/esarchive/scratch/vagudets/repos/startR/R/" + ff <- lapply(list.files(path), function(x) paste0(path, x)) + invisible(lapply(ff, source)) # Source correct function depending on filesystem and time horizon # Case: CERISE (Mars) if (tolower(recipe$Run$filesystem) == "mars") { diff --git a/modules/Loading/R/helper_loading_decadal.R b/modules/Loading/R/helper_loading_decadal.R index 444739a3..4533100f 100644 --- a/modules/Loading/R/helper_loading_decadal.R +++ b/modules/Loading/R/helper_loading_decadal.R @@ -109,21 +109,36 @@ correct_daily_for_leap <- function(data = NULL, time_attr, return_time = TRUE) { # table, grid, version: A list with variables as name. E.g., list(tas = 'Amon') get_dcpp_path <- function(archive, exp.name, table, grid, version, sdates) { + if (length(table) == 1) { # only one variable - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$startR, '$ensemble$', table, '$var$', grid, version) fcst.files <- paste0('$var$_', table, '_*_$dcpp$_s$syear$-$ensemble$_', grid, '_$chunk$.nc') } else { # multiple vars - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$startR, '$ensemble$', '$table$', '$var$', '$grid$', '$version$') fcst.files <- paste0('$var$_', '$table$', '_*_$dcpp$_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') } path_list <- file.path(fcst.path, fcst.files) - + dcppB_string <- "dcppB-forecast" + ## NOTE: CanESM5 has a 'special' path pattern... Adding the global expression '*' + ## only works if Start() is loading both dcppA and dcppB. For dcppB only, + ## it crashes. + if (exp.name == "CanESM5") { + if (all(sdates >= archive$System[[exp.name]]$src$first_dcppB_syear)) { + # Replace first instance of $dcpp$ with the actual subdirectory name + path_list <- str_replace(path_list, fixed("$dcpp$"), "dcppB-forecast_i1p2") + } else { + # Add global expression to the string so that it will find 'dcppB-forecast_i1p2' + dcppB_string <- paste0(dcppB_string, "*") + } + } dcpp_list <- vector('list', length = length(sdates)) + names(dcpp_list) <- as.character(sdates) for (i_sdate in 1:length(sdates)) { - if (sdates[i_sdate] >= archive$System[[exp.name]]$src$first_dcppB_syear) { - dcpp_list[[i_sdate]] <- "dcppB-forecast*" + if ((!is.null(archive$System[[exp.name]]$src$first_dcppB_syear)) && + (sdates[i_sdate] >= archive$System[[exp.name]]$src$first_dcppB_syear)) { + dcpp_list[[i_sdate]] <- dcppB_string } else { dcpp_list[[i_sdate]] <- "dcppA-hindcast" } diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index ea890014..a598afe0 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -16,7 +16,7 @@ load_decadal <- function(recipe) { archive <- read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]] # Print Start() info or not - DEBUG <- FALSE + DEBUG <- TRUE ## TODO: this should come from the main script # Create output folder and log: @@ -27,7 +27,6 @@ load_decadal <- function(recipe) { exp.name <- recipe$Analysis$Datasets$System$name # 'HadGEM3' ref.name <- recipe$Analysis$Datasets$Reference$name # 'era5' member <- strsplit(recipe$Analysis$Datasets$System$member, ', | |,')[[1]] #c("r1i1p1f2", "r2i1p1f2") - # variable <- recipe$Analysis$Variables$name #'tas' variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq #monthly_mean lats.min <- as.numeric(recipe$Analysis$Region$latmin) #0 @@ -77,9 +76,11 @@ load_decadal <- function(recipe) { regrid_params <- get_regrid_params(recipe, archive) # Only if the time length in each chunk may differ that we need largest_dims_length to be TRUE. Otherwise, set FALSE to increase efficiency. - need_largest_dims_length <- ifelse(exp.name %in% c('HadGEM3-GC31-MM', 'EC-Earth3-i2'), TRUE, FALSE) - + need_largest_dims_length <- ifelse(exp.name %in% c('HadGEM3-GC31-MM', 'EC-Earth3-i2'), TRUE, FALSE) + ## TODO: Remove if possible + multi_path <- FALSE + #------------------------------------------- # Step 1: Load the hcst #------------------------------------------- @@ -87,10 +88,9 @@ load_decadal <- function(recipe) { tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, version = version, sdates = sdates_hcst) path_list <- tmp$path_list - multi_path <- tmp$multi_path dcpp_list <- tmp$dcpp_list - #TODO: to make this case work; enhance Start() if it's possible + #TODO: test this case if (multi_path & length(variable) > 1) { stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") } @@ -137,11 +137,10 @@ load_decadal <- function(recipe) { tmp_time_attr <- attr(hcst, 'Variables')$common$time - ## TODO: Remove extra dcpp dimension ## TODO: Remove this part? # change syear to c(sday, sweek, syear) # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] - dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) + dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:length(dim(hcst))]) if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) { error(recipe$Run$logger, "hcst has problem in matching data and time attr dimension.") @@ -149,6 +148,8 @@ load_decadal <- function(recipe) { } dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) + # Remove 'dcpp' dimension: + hcst <- Subset(hcst, along = "dcpp", indices = 1, drop = "selected") # Change class from startR_array to s2dv_cube suppressWarnings( hcst <- as.s2dv_cube(hcst) @@ -162,67 +163,35 @@ load_decadal <- function(recipe) { tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, version = version, sdates = sdates_fcst) path_list <- tmp$path_list - multi_path <- tmp$multi_path + dcpp_list <- tmp$dcpp_list ## TODO: Try this case if (multi_path & length(variable) > 1) { - stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") + stop("The recipe requests multiple variables and start dates from both", + "dpccA-hindcast and dcppB-forecast. This case is not available for now.") } # monthly & daily - if (!multi_path) { - #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst - Start_fcst_arg_list <- Start_default_arg_list - Start_fcst_arg_list[['dat']] <- path_list - Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) - fcst <- do.call(Start, Start_fcst_arg_list) - - - } else { # multi_path + Start_fcst_arg_list <- Start_default_arg_list + Start_fcst_arg_list[['dat']] <- path_list + Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) + Start_fcst_arg_list[['dcpp']] <- dcpp_list + fcst <- do.call(Start, Start_fcst_arg_list) - #TODO: time attribute is not correct. Improve Start(). - Start_fcst_arg_list <- Start_default_arg_list - Start_fcst_arg_list[['dat']] <- path_list - Start_fcst_arg_list[['syear']] <- NULL - Start_fcst_arg_list[['chunk_depends']] <- NULL - remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear') - Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] - fcst <- do.call(Start, Start_fcst_arg_list) - - # Reshape and reorder dimensions - ## dat should be 1, syear should be length of dat; reorder dimensions - ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble] - dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) - fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) - - # Manipulate time attr because Start() cannot read it correctly - wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only - tmp <- array(dim = c(dim(fcst)[c('syear', 'time')])) - tmp[1, ] <- wrong_time_attr - yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst) - for (i_syear in 1:length(yr_diff)) { - tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) - } - attr(fcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') - - } - tmp_time_attr <- attr(fcst, 'Variables')$common$time # change syear to c(sday, sweek, syear) # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] - dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) + dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:length(dim(fcst))]) if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { error(recipe$Run$logger, "fcst has problem in matching data and time attr dimension.") stop() } dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) - - #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 - if (multi_path) { - attributes(fcst)$Variables$common[[variable]] <- attributes(fcst)$Variables$dat1[[variable]] - } + + # Remove 'dcpp' dimension: + fcst <- Subset(fcst, along = "dcpp", indices = 1, drop = "selected") # Change class from startR_array to s2dv_cube suppressWarnings( @@ -230,7 +199,8 @@ load_decadal <- function(recipe) { ) # Only syear could be different - if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { + syear_dim <- which(names(dim(hcst$data)) == 'syear') + if (!identical(dim(hcst$data)[-syear_dim], dim(fcst$data)[-syear_dim])) { error(recipe$Run$logger, "hcst and fcst do not share the same dimension structure.") stop() -- GitLab From 11f089acf5051661622db34720a2d3329ca87598 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 16 Jul 2024 16:50:19 +0200 Subject: [PATCH 3/5] Minor change --- modules/Loading/R/load_decadal.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index a598afe0..d3ce1e1e 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -238,7 +238,7 @@ load_decadal <- function(recipe) { lubridate::minute(dates) <- 00 # Restore correct dimensions dim(dates) <- dim(dates_file) - + obs <- Start(dat = obs.path, var = variable, var_dir = var_dir_obs, @@ -268,7 +268,7 @@ load_decadal <- function(recipe) { #//////////////// # Method 2: reshape hcst time attr's date into an array with time dim then as obs date selector #//////////////// - + obs <- Start(dat = obs.path, var = variable, var_dir = var_dir_obs, -- GitLab From bd3d8162e87dbef264c7eebd73852c32f90613d4 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 17 Jul 2024 17:07:34 +0200 Subject: [PATCH 4/5] Update archive and adapt for multivariable and daily case --- conf/archive_decadal.yml | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index da889a08..a2f340e2 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -390,10 +390,34 @@ esarchive: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" - monthly_mean: {"tas":"_f1h-r1440x721cds", "prlr":"_f1h-r1440x721cds", "psl":"_f1h-r1440x721cds", "tos":"_f1h-r1440x721cds"} - daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", - "prlr":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/", - "tos":"_f1h-r1440x721cds"} + daily_mean: {"tas":"daily_mean/tas_f1h-r1440x721cds/", + "rsds":"daily_mean/rsds_f1h-r1440x721cds/", + "prlr":"daily_mean/prlr_f1h-r1440x721cds/", + "g300":"daily_mean/g300_f1h-r1440x721cds/", + "g500":"daily_mean/g500_f1h-r1440x721cds/", + "g850":"daily_mean/g850_f1h-r1440x721cds/", + "sfcWind":"daily_mean/sfcWind_f1h-r1440x721cds/", + "tasmax":"daily/tasmax-r1440x721cds/", + "tasmin":"daily/tasmin-r1440x721cds/", + "ta300":"daily_mean/ta300_f1h-r1440x721cds/", + "ta500":"daily_mean/ta500_f1h-r1440x721cds/", + "ta850":"daily_mean/ta850_f1h-r1440x721cds/", + "hurs":"daily_mean/hurs_f1h-r1440x721cds/"} + monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", + "psl":"monthly_mean/psl_f1h-r1440x721cds/", + "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", + "rsds":"monthly_mean/rsds_f1h-r1440x721cds/", + "g300":"monthly_mean/g300_f1h-r1440x721cds/", + "g500":"monthly_mean/g500_f1h-r1440x721cds/", + "g850":"monthly_mean/g850_f1h-r1440x721cds/", + "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/", + "tasmax":"monthly_mean/tasmax_f1h-r1440x721cds/", + "tasmin":"monthly_mean/tasmin_f1h-r1440x721cds/", + "ta300":"montly_mean/ta300_f1h-r1440x721cds/", + "ta500":"monthly_mean/ta500_f1h-r1440x721cds/", + "ta850":"monthly_mean/ta850_f1h-r1440x721cds/", + "tos":"monthly_mean/tos_f1h-r1440x721cds/", + "sic":"monthly_mean/sic_f1h-r1440x721cds/"} calendar: "gregorian" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" @@ -406,8 +430,11 @@ esarchive: name: "JRA-55" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/jma/jra55/" - monthly_mean: {"tas":"_f6h", "psl":"_f6h", "tos":"", "pr":"_s0-3h", "prlr":"_s0-3h"} - daily_mean: {"tas":"_f6h", "psl":"_f6h", "prlr":"_s0-3h", "sfcWind":"_f6h"} + monthly_mean: {"tas":"monthly_mean/tas_f6h", "psl":"monthly_mean/psl_f6h", + "tos":"", "pr":"monthly_mean/pr_s0-3h", + "prlr":"monthly_mean/prlr_s0-3h"} + daily_mean: {"tas":"daily_mean/tas_f6h", "psl":"daily_mean/psl_f6h", + "prlr":"daily_mean/prlr_s0-3h", "sfcWind":"daily_mean/sfcWind_f6h"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/jma/jra55/monthly_mean/tas_f6h/tas_200811.nc" -- GitLab From 4fefe80c955dff194aa149b8d71949c8a7efad60 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 17 Jul 2024 17:07:54 +0200 Subject: [PATCH 5/5] Clean code; modifications for daily case and multivariable case --- modules/Loading/R/helper_loading_decadal.R | 11 +++-- modules/Loading/R/load_decadal.R | 57 ++++++++++------------ 2 files changed, 32 insertions(+), 36 deletions(-) diff --git a/modules/Loading/R/helper_loading_decadal.R b/modules/Loading/R/helper_loading_decadal.R index 4533100f..9b71c94b 100644 --- a/modules/Loading/R/helper_loading_decadal.R +++ b/modules/Loading/R/helper_loading_decadal.R @@ -12,7 +12,7 @@ get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calendar) { #NOTE: "sdates" is not needed if leap year is not considered - if (!calendar %in% c('360-day', '365_day', 'noleap', 'standard', 'proleptic_gregorian', + if (!calendar %in% c('360_day', '365_day', 'noleap', 'standard', 'proleptic_gregorian', 'gregorian')) stop("The calendar is not recognized. Please contact maintainers.") @@ -109,12 +109,13 @@ correct_daily_for_leap <- function(data = NULL, time_attr, return_time = TRUE) { # table, grid, version: A list with variables as name. E.g., list(tas = 'Amon') get_dcpp_path <- function(archive, exp.name, table, grid, version, sdates) { - - if (length(table) == 1) { # only one variable + if (length(table) == 1) { + # only one variable, or the variables share a common path pattern fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$startR, - '$ensemble$', table, '$var$', grid, version) + '$ensemble$', table, '$var$', grid, 'v*') fcst.files <- paste0('$var$_', table, '_*_$dcpp$_s$syear$-$ensemble$_', grid, '_$chunk$.nc') - } else { # multiple vars + } else { + # path pattern depends on the variable fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$startR, '$ensemble$', '$table$', '$var$', '$grid$', '$version$') fcst.files <- paste0('$var$_', '$table$', '_*_$dcpp$_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index d3ce1e1e..bfd28979 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -39,14 +39,14 @@ load_decadal <- function(recipe) { sdates_fcst <- recipe$Analysis$Time$fcst if (store.freq == "monthly_mean") { - time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) - - } else if (store.freq == "daily_mean") { + time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) + } else if (store.freq %in% c("daily", "daily_mean")) { time_ind <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min), ftimemax = as.numeric(recipe$Analysis$Time$ftime_max), initial_month = archive$System[[exp.name]]$initial_month, sdates = sdates_hcst, calendar = archive$System[[exp.name]]$calendar) + store.freq <- "daily_mean" } #NOTE: May be used in the future @@ -57,10 +57,13 @@ load_decadal <- function(recipe) { #------------------------- if (store.freq == "monthly_mean") { table <- archive$System[[exp.name]][[store.freq]]$table[variable] #list(tas = 'Amon') + grid <- archive$System[[exp.name]][[store.freq]]$grid[variable] #list(tas = 'gr') } else { table <- 'day' + # For grid, get first element as they are all the same? + grid <- archive$System[[exp.name]][[store.freq]]$grid[variable][[1]] } - grid <- archive$System[[exp.name]][[store.freq]]$grid[variable] #list(tas = 'gr') + # grid <- archive$System[[exp.name]][[store.freq]]$grid[variable] #list(tas = 'gr') version <- archive$System[[exp.name]][[store.freq]]$version[variable] #list(tas = 'v20210910') if (identical(member, 'all')) { member <- strsplit(archive$System[[exp.name]]$member, ',')[[1]] @@ -78,9 +81,6 @@ load_decadal <- function(recipe) { # Only if the time length in each chunk may differ that we need largest_dims_length to be TRUE. Otherwise, set FALSE to increase efficiency. need_largest_dims_length <- ifelse(exp.name %in% c('HadGEM3-GC31-MM', 'EC-Earth3-i2'), TRUE, FALSE) - ## TODO: Remove if possible - multi_path <- FALSE - #------------------------------------------- # Step 1: Load the hcst #------------------------------------------- @@ -90,11 +90,6 @@ load_decadal <- function(recipe) { path_list <- tmp$path_list dcpp_list <- tmp$dcpp_list - #TODO: test this case - if (multi_path & length(variable) > 1) { - stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") - } - Start_default_arg_list <- list( dat = path_list, var = variable, @@ -112,12 +107,12 @@ load_decadal <- function(recipe) { longitude = values(list(lons.min, lons.max)), longitude_reorder = circularsort, ensemble = member, + path_glob_permissive = 2, # for version transform = regrid_params$fcst.transform, transform_extra_cells = 2, transform_params = list(grid = regrid_params$fcst.gridtype, method = regrid_params$fcst.gridmethod), transform_vars = c('latitude', 'longitude'), - # path_glob_permissive = 2, # for version synonims = list(longitude = c('lon', 'longitude'), latitude = c('lat', 'latitude')), return_vars = list(latitude = NULL, longitude = NULL, @@ -125,13 +120,14 @@ load_decadal <- function(recipe) { silent = !DEBUG, retrieve = T) - if (length(variable) > 1) { + if (length(table) > 1) { Start_default_arg_list <- c(Start_default_arg_list, list(table = table, grid = grid, version = version, table_depends = 'var', grid_depends = 'var', version_depends = 'var', metadata_dims = 'var')) + Start_default_arg_list[["path_glob_permissive"]] <- FALSE } - + Start_hcst_arg_list <- Start_default_arg_list hcst <- do.call(Start, Start_hcst_arg_list) @@ -148,8 +144,13 @@ load_decadal <- function(recipe) { } dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) - # Remove 'dcpp' dimension: - hcst <- Subset(hcst, along = "dcpp", indices = 1, drop = "selected") + # Remove 'dcpp' and other extra dimensions: + if (length(table) > 1) { + hcst <- Subset(hcst, along = c("dcpp", "table", "grid", "version"), + indices = list(1, 1, 1, 1), drop = "selected") + } else { + hcst <- Subset(hcst, along = "dcpp", indices = 1, drop = "selected") + } # Change class from startR_array to s2dv_cube suppressWarnings( hcst <- as.s2dv_cube(hcst) @@ -165,12 +166,6 @@ load_decadal <- function(recipe) { path_list <- tmp$path_list dcpp_list <- tmp$dcpp_list - ## TODO: Try this case - if (multi_path & length(variable) > 1) { - stop("The recipe requests multiple variables and start dates from both", - "dpccA-hindcast and dcppB-forecast. This case is not available for now.") - } - # monthly & daily Start_fcst_arg_list <- Start_default_arg_list Start_fcst_arg_list[['dat']] <- path_list @@ -190,8 +185,13 @@ load_decadal <- function(recipe) { } dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) - # Remove 'dcpp' dimension: - fcst <- Subset(fcst, along = "dcpp", indices = 1, drop = "selected") + # Remove 'dcpp' and any other extra dimensions: + if (length(table) > 1) { + fcst <- Subset(fcst, along = c("dcpp", "table", "grid", "version"), + indices = list(1, 1, 1, 1), drop = "selected") + } else { + fcst <- Subset(fcst, along = "dcpp", indices = 1, drop = "selected") + } # Change class from startR_array to s2dv_cube suppressWarnings( @@ -214,7 +214,7 @@ load_decadal <- function(recipe) { # Step 3. Load the reference #------------------------------------------- obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, - store.freq, "$var$$var_dir$", "$var$_$file_date$.nc") + "$var_dir$", "$var$_$file_date$.nc") var_dir_obs <- archive$Reference[[ref.name]][[store.freq]][variable] # list(tas = "_f1h-r1440x721cds", tos = "_f1h-r1440x721cds") # obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, store.freq, @@ -293,11 +293,6 @@ load_decadal <- function(recipe) { retrieve = TRUE) } - - #dim(attr(obs, 'Variables')$common$time) - # sday sweek syear time - # 1 1 2 14 - # Remove var_dir dimension obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") -- GitLab