From 16442f959ce6dcad362a68393fc7c913d6b8f971 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 29 Jul 2024 16:20:37 +0200 Subject: [PATCH] Revert "Merge branch 'dev-Loading_decadal_multipath' into 'master'" This reverts merge request !147 --- conf/archive_decadal.yml | 55 +------- modules/Loading/Loading.R | 4 - modules/Loading/R/helper_loading_decadal.R | 84 ++++++----- modules/Loading/R/load_decadal.R | 154 +++++++++++++++------ 4 files changed, 164 insertions(+), 133 deletions(-) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index a2f340e2..0697fc3f 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -8,7 +8,6 @@ esarchive: src: hcst: "exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: - startR: "exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/$dcpp$/" monthly_mean: #NOTE: tos is under both Amon and Omon --> wait to be changed table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tos":["Amon", "Omon"]} @@ -32,7 +31,6 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: - startR: "exp/CMIP6/$dcpp$/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/$dcpp$/" monthly_mean: table: {"tas":"Amon"} grid: {"tas":"gr"} @@ -54,7 +52,6 @@ esarchive: src: hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" - startR: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/$dcpp$/" first_dcppB_syear: 2021 # hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" # fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" @@ -94,7 +91,6 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppB-forecast/" - startR: "exp/CMIP6/$dcpp$/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/$dcpp$/" first_dcppB_syear: 2019 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon", "tos":"Omon"} @@ -117,7 +113,6 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/" fcst: - startR: "exp/CMIP6/$dcpp$/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn"} @@ -140,7 +135,6 @@ esarchive: src: hcst: "exp/canesm5/cmip6-dcppA-hindcast/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/" fcst: "exp/canesm5/cmip6-dcppB-forecast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppB-forecast/" - startR: "exp/canesm5/cmip6-$dcpp$/original_files/cmorfiles/DCPP/CCCma/CanESM5/$dcpp$/" first_dcppB_syear: 2020 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon", "tos":"Omon"} @@ -164,7 +158,6 @@ esarchive: src: hcst: "exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast" fcst: - startR: "exp/ncar/cesm-dple-$dcpp$/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon"} grid: {"tas":"gn", "pr":"gn"} @@ -187,7 +180,6 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppB-forecast/" - startR: "exp/CMIP6/$dcpp$/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/$dcpp$/" first_dcppB_syear: 2020 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "prc":"Amon", "ts":"Amon"} @@ -209,7 +201,6 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppB-forecast/" - startR: "exp/CMIP6/$dcpp$/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/$dcpp$/" first_dcppB_syear: 2017 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tos":"Omon"} @@ -232,7 +223,6 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/dcppA-hindcast/" fcst: - startR: "exp/CMIP6/$dcpp$/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "sfcWind":"Amon"} grid: {"tas":"gr", "pr":"gr", "psl":"gr", "sfcWind":"gr"} @@ -253,7 +243,6 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" fcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" - startR: "exp/CMIP6/$dcpp$/MIROC6/DCPP/MIROC/MIROC6/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn"} @@ -274,7 +263,6 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/" fcst: - startR: "exp/CMIP6/$dcpp$/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn"} @@ -295,7 +283,6 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/dcppA-hindcast/" fcst: - startR: "exp/CMIP6/$dcpp$/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn", "ts":"gn"} @@ -316,7 +303,6 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/dcppA-hindcast/" fcst: - startR: "exp/CMIP6/$dcpp$/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn"} @@ -338,7 +324,6 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: - startR: "exp/CMIP6/$dcpp$/NorCPM1/DCPP/NCC/NorCPM1/$dcpp$/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn"} @@ -359,7 +344,6 @@ esarchive: src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: - startR: "exp/CMIP6/$dcpp$/NorCPM1/DCPP/NCC/NorCPM1/$dcpp$/" monthly_mean: table: {"pr":"Amon", "psl":"Amon"} grid: {"pr":"gn", "psl":"gn"} @@ -390,34 +374,10 @@ esarchive: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" - daily_mean: {"tas":"daily_mean/tas_f1h-r1440x721cds/", - "rsds":"daily_mean/rsds_f1h-r1440x721cds/", - "prlr":"daily_mean/prlr_f1h-r1440x721cds/", - "g300":"daily_mean/g300_f1h-r1440x721cds/", - "g500":"daily_mean/g500_f1h-r1440x721cds/", - "g850":"daily_mean/g850_f1h-r1440x721cds/", - "sfcWind":"daily_mean/sfcWind_f1h-r1440x721cds/", - "tasmax":"daily/tasmax-r1440x721cds/", - "tasmin":"daily/tasmin-r1440x721cds/", - "ta300":"daily_mean/ta300_f1h-r1440x721cds/", - "ta500":"daily_mean/ta500_f1h-r1440x721cds/", - "ta850":"daily_mean/ta850_f1h-r1440x721cds/", - "hurs":"daily_mean/hurs_f1h-r1440x721cds/"} - monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", - "psl":"monthly_mean/psl_f1h-r1440x721cds/", - "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", - "rsds":"monthly_mean/rsds_f1h-r1440x721cds/", - "g300":"monthly_mean/g300_f1h-r1440x721cds/", - "g500":"monthly_mean/g500_f1h-r1440x721cds/", - "g850":"monthly_mean/g850_f1h-r1440x721cds/", - "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/", - "tasmax":"monthly_mean/tasmax_f1h-r1440x721cds/", - "tasmin":"monthly_mean/tasmin_f1h-r1440x721cds/", - "ta300":"montly_mean/ta300_f1h-r1440x721cds/", - "ta500":"monthly_mean/ta500_f1h-r1440x721cds/", - "ta850":"monthly_mean/ta850_f1h-r1440x721cds/", - "tos":"monthly_mean/tos_f1h-r1440x721cds/", - "sic":"monthly_mean/sic_f1h-r1440x721cds/"} + monthly_mean: {"tas":"_f1h-r1440x721cds", "prlr":"_f1h-r1440x721cds", "psl":"_f1h-r1440x721cds", "tos":"_f1h-r1440x721cds"} + daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", + "prlr":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/", + "tos":"_f1h-r1440x721cds"} calendar: "gregorian" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" @@ -430,11 +390,8 @@ esarchive: name: "JRA-55" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/jma/jra55/" - monthly_mean: {"tas":"monthly_mean/tas_f6h", "psl":"monthly_mean/psl_f6h", - "tos":"", "pr":"monthly_mean/pr_s0-3h", - "prlr":"monthly_mean/prlr_s0-3h"} - daily_mean: {"tas":"daily_mean/tas_f6h", "psl":"daily_mean/psl_f6h", - "prlr":"daily_mean/prlr_s0-3h", "sfcWind":"daily_mean/sfcWind_f6h"} + monthly_mean: {"tas":"_f6h", "psl":"_f6h", "tos":"", "pr":"_s0-3h", "prlr":"_s0-3h"} + daily_mean: {"tas":"_f6h", "psl":"_f6h", "prlr":"_s0-3h", "sfcWind":"_f6h"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/jma/jra55/monthly_mean/tas_f6h/tas_200811.nc" diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 9736c115..8e322ab1 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -1,10 +1,6 @@ source("tools/libs.R") Loading <- function(recipe) { - ## TODO: remove - path <- "/esarchive/scratch/vagudets/repos/startR/R/" - ff <- lapply(list.files(path), function(x) paste0(path, x)) - invisible(lapply(ff, source)) # Source correct function depending on filesystem and time horizon # Case: CERISE (Mars) if (tolower(recipe$Run$filesystem) == "mars") { diff --git a/modules/Loading/R/helper_loading_decadal.R b/modules/Loading/R/helper_loading_decadal.R index 9b71c94b..b93f3279 100644 --- a/modules/Loading/R/helper_loading_decadal.R +++ b/modules/Loading/R/helper_loading_decadal.R @@ -12,7 +12,7 @@ get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calendar) { #NOTE: "sdates" is not needed if leap year is not considered - if (!calendar %in% c('360_day', '365_day', 'noleap', 'standard', 'proleptic_gregorian', + if (!calendar %in% c('360-day', '365_day', 'noleap', 'standard', 'proleptic_gregorian', 'gregorian')) stop("The calendar is not recognized. Please contact maintainers.") @@ -109,41 +109,57 @@ correct_daily_for_leap <- function(data = NULL, time_attr, return_time = TRUE) { # table, grid, version: A list with variables as name. E.g., list(tas = 'Amon') get_dcpp_path <- function(archive, exp.name, table, grid, version, sdates) { - if (length(table) == 1) { - # only one variable, or the variables share a common path pattern - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$startR, - '$ensemble$', table, '$var$', grid, 'v*') - fcst.files <- paste0('$var$_', table, '_*_$dcpp$_s$syear$-$ensemble$_', grid, '_$chunk$.nc') - } else { - # path pattern depends on the variable - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$startR, - '$ensemble$', '$table$', '$var$', '$grid$', '$version$') - fcst.files <- paste0('$var$_', '$table$', '_*_$dcpp$_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') - } - path_list <- file.path(fcst.path, fcst.files) - dcppB_string <- "dcppB-forecast" - ## NOTE: CanESM5 has a 'special' path pattern... Adding the global expression '*' - ## only works if Start() is loading both dcppA and dcppB. For dcppB only, - ## it crashes. - if (exp.name == "CanESM5") { - if (all(sdates >= archive$System[[exp.name]]$src$first_dcppB_syear)) { - # Replace first instance of $dcpp$ with the actual subdirectory name - path_list <- str_replace(path_list, fixed("$dcpp$"), "dcppB-forecast_i1p2") - } else { - # Add global expression to the string so that it will find 'dcppB-forecast_i1p2' - dcppB_string <- paste0(dcppB_string, "*") + # Define path (monthly and daily) + multi_path <- FALSE + if (is.null(archive$System[[exp.name]]$src$first_dcppB_syear) | + isTRUE(all(sdates < archive$System[[exp.name]]$src$first_dcppB_syear))) { # only dcppA + if (length(table) == 1) { # only one variable + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + '$ensemble$', table, '$var$', grid, version) + fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + } else { # multiple vars + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + '$ensemble$', '$table$', '$var$', '$grid$', '$version$') + fcst.files <- paste0('$var$_', '$table$', '_*_dcppA-hindcast_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') } - } - dcpp_list <- vector('list', length = length(sdates)) - names(dcpp_list) <- as.character(sdates) - for (i_sdate in 1:length(sdates)) { - if ((!is.null(archive$System[[exp.name]]$src$first_dcppB_syear)) && - (sdates[i_sdate] >= archive$System[[exp.name]]$src$first_dcppB_syear)) { - dcpp_list[[i_sdate]] <- dcppB_string - } else { - dcpp_list[[i_sdate]] <- "dcppA-hindcast" + path_list <- file.path(fcst.path, fcst.files) + + } else { + if (all(sdates >= archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppB + if (length(table) == 1) { # only one variable + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, + '$ensemble$', table, '$var$', grid, version) + + fcst.files <- paste0('$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + } else { + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, + '$ensemble$', '$table$', '$var$', '$grid$', '$version$') + fcst.files <- paste0('$var$_', '$table$', '_*_dcppB-forecast_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') + } + path_list <- file.path(fcst.path, fcst.files) + + } else { # have both dcppA and dcppB + # Create one path for each sdate + #TODO: When *_depends = 'syear' can be more than one, use one path with $dcpp$ + multi_path <- TRUE + path_list <- vector('list', length = length(sdates)) + for (i_sdate in 1:length(sdates)) { + if (sdates[i_sdate] >= archive$System[[exp.name]]$src$first_dcppB_syear) { + path_list[[i_sdate]] <- + list(path = file.path(archive$src, archive$System[[exp.name]]$src$fcst, + '$ensemble$', table, '$var$', grid, #version, + paste0('v*/$var$_', table, '_*_dcppB-forecast_s', sdates[i_sdate], + '-$ensemble$_', grid, '_$chunk$.nc'))) + } else { + path_list[[i_sdate]] <- + list(path = file.path(archive$src, archive$System[[exp.name]]$src$hcst, + '$ensemble$', table, '$var$', grid, #version, + paste0('v*/$var$_', table, '_*_dcppA-hindcast_s', sdates[i_sdate], + '-$ensemble$_', grid, '_$chunk$.nc'))) + } + } } } - return(list(path_list = path_list, dcpp_list = dcpp_list)) + return(list(path_list = path_list, multi_path = multi_path)) } diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index bfd28979..d3b4f439 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -16,7 +16,7 @@ load_decadal <- function(recipe) { archive <- read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]] # Print Start() info or not - DEBUG <- TRUE + DEBUG <- FALSE ## TODO: this should come from the main script # Create output folder and log: @@ -27,6 +27,7 @@ load_decadal <- function(recipe) { exp.name <- recipe$Analysis$Datasets$System$name # 'HadGEM3' ref.name <- recipe$Analysis$Datasets$Reference$name # 'era5' member <- strsplit(recipe$Analysis$Datasets$System$member, ', | |,')[[1]] #c("r1i1p1f2", "r2i1p1f2") + # variable <- recipe$Analysis$Variables$name #'tas' variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq #monthly_mean lats.min <- as.numeric(recipe$Analysis$Region$latmin) #0 @@ -39,14 +40,14 @@ load_decadal <- function(recipe) { sdates_fcst <- recipe$Analysis$Time$fcst if (store.freq == "monthly_mean") { - time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) - } else if (store.freq %in% c("daily", "daily_mean")) { + time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) + + } else if (store.freq == "daily_mean") { time_ind <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min), ftimemax = as.numeric(recipe$Analysis$Time$ftime_max), initial_month = archive$System[[exp.name]]$initial_month, sdates = sdates_hcst, calendar = archive$System[[exp.name]]$calendar) - store.freq <- "daily_mean" } #NOTE: May be used in the future @@ -57,13 +58,10 @@ load_decadal <- function(recipe) { #------------------------- if (store.freq == "monthly_mean") { table <- archive$System[[exp.name]][[store.freq]]$table[variable] #list(tas = 'Amon') - grid <- archive$System[[exp.name]][[store.freq]]$grid[variable] #list(tas = 'gr') } else { table <- 'day' - # For grid, get first element as they are all the same? - grid <- archive$System[[exp.name]][[store.freq]]$grid[variable][[1]] } - # grid <- archive$System[[exp.name]][[store.freq]]$grid[variable] #list(tas = 'gr') + grid <- archive$System[[exp.name]][[store.freq]]$grid[variable] #list(tas = 'gr') version <- archive$System[[exp.name]][[store.freq]]$version[variable] #list(tas = 'v20210910') if (identical(member, 'all')) { member <- strsplit(archive$System[[exp.name]]$member, ',')[[1]] @@ -79,7 +77,8 @@ load_decadal <- function(recipe) { regrid_params <- get_regrid_params(recipe, archive) # Only if the time length in each chunk may differ that we need largest_dims_length to be TRUE. Otherwise, set FALSE to increase efficiency. - need_largest_dims_length <- ifelse(exp.name %in% c('HadGEM3-GC31-MM', 'EC-Earth3-i2'), TRUE, FALSE) + need_largest_dims_length <- ifelse(exp.name %in% c('HadGEM3-GC31-MM', 'EC-Earth3-i2'), TRUE, FALSE) + #------------------------------------------- # Step 1: Load the hcst @@ -88,7 +87,12 @@ load_decadal <- function(recipe) { tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, version = version, sdates = sdates_hcst) path_list <- tmp$path_list - dcpp_list <- tmp$dcpp_list + multi_path <- tmp$multi_path + + #TODO: to make this case work; enhance Start() if it's possible + if (multi_path & length(variable) > 1) { + stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") + } Start_default_arg_list <- list( dat = path_list, @@ -96,8 +100,6 @@ load_decadal <- function(recipe) { syear = paste0(sdates_hcst), chunk = 'all', chunk_depends = 'syear', - dcpp = dcpp_list, - dcpp_depends = 'syear', time = indices(time_ind), time_across = 'chunk', merge_across_dims = TRUE, @@ -107,12 +109,12 @@ load_decadal <- function(recipe) { longitude = values(list(lons.min, lons.max)), longitude_reorder = circularsort, ensemble = member, - path_glob_permissive = 2, # for version transform = regrid_params$fcst.transform, transform_extra_cells = 2, transform_params = list(grid = regrid_params$fcst.gridtype, method = regrid_params$fcst.gridmethod), transform_vars = c('latitude', 'longitude'), + # path_glob_permissive = 2, # for version synonims = list(longitude = c('lon', 'longitude'), latitude = c('lat', 'latitude')), return_vars = list(latitude = NULL, longitude = NULL, @@ -120,23 +122,48 @@ load_decadal <- function(recipe) { silent = !DEBUG, retrieve = T) - if (length(table) > 1) { + if (length(variable) > 1) { Start_default_arg_list <- c(Start_default_arg_list, list(table = table, grid = grid, version = version, table_depends = 'var', grid_depends = 'var', version_depends = 'var', metadata_dims = 'var')) - Start_default_arg_list[["path_glob_permissive"]] <- FALSE } - - Start_hcst_arg_list <- Start_default_arg_list - hcst <- do.call(Start, Start_hcst_arg_list) + + if (!multi_path) { + Start_hcst_arg_list <- Start_default_arg_list + hcst <- do.call(Start, Start_hcst_arg_list) + + } else { + Start_hcst_arg_list <- Start_default_arg_list + Start_hcst_arg_list[['syear']] <- NULL + Start_hcst_arg_list[['chunk_depends']] <- NULL + remove_ind <- which(Start_hcst_arg_list[['return_vars']][['time']] == 'syear') + Start_hcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind] + + hcst <- do.call(Start, Start_hcst_arg_list) + + # Reshape and reorder dimensions + ## dat should be 1, syear should be length of dat; reorder dimensions + dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6]) + hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + # Manipulate time attr because Start() cannot read it correctly + wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only + tmp <- array(dim = c(dim(hcst)[c('syear', 'time')])) + tmp[1, ] <- wrong_time_attr + yr_diff <- (sdates_hcst - sdates_hcst[1])[-1] #diff(sdates_hcst) + for (i_syear in 1:length(yr_diff)) { + tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) + } + attr(hcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') + + } + tmp_time_attr <- attr(hcst, 'Variables')$common$time - - ## TODO: Remove this part? + # change syear to c(sday, sweek, syear) # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] - dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:length(dim(hcst))]) + dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) { error(recipe$Run$logger, "hcst has problem in matching data and time attr dimension.") @@ -144,13 +171,11 @@ load_decadal <- function(recipe) { } dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) - # Remove 'dcpp' and other extra dimensions: - if (length(table) > 1) { - hcst <- Subset(hcst, along = c("dcpp", "table", "grid", "version"), - indices = list(1, 1, 1, 1), drop = "selected") - } else { - hcst <- Subset(hcst, along = "dcpp", indices = 1, drop = "selected") + #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 + if (multi_path) { + attributes(hcst)$Variables$common[[variable]] <- attributes(hcst)$Variables$dat1[[variable]] } + # Change class from startR_array to s2dv_cube suppressWarnings( hcst <- as.s2dv_cube(hcst) @@ -164,33 +189,66 @@ load_decadal <- function(recipe) { tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, version = version, sdates = sdates_fcst) path_list <- tmp$path_list - dcpp_list <- tmp$dcpp_list + multi_path <- tmp$multi_path + + #TODO: to make this case work; enhance Start() if it's possible + if (multi_path & length(variable) > 1) { + stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") + } # monthly & daily - Start_fcst_arg_list <- Start_default_arg_list - Start_fcst_arg_list[['dat']] <- path_list - Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) - Start_fcst_arg_list[['dcpp']] <- dcpp_list - fcst <- do.call(Start, Start_fcst_arg_list) + if (!multi_path) { + #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst + Start_fcst_arg_list <- Start_default_arg_list + Start_fcst_arg_list[['dat']] <- path_list + Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) + fcst <- do.call(Start, Start_fcst_arg_list) + + + } else { # multi_path + + #TODO: time attribute is not correct. Improve Start(). + Start_fcst_arg_list <- Start_default_arg_list + Start_fcst_arg_list[['dat']] <- path_list + Start_fcst_arg_list[['syear']] <- NULL + Start_fcst_arg_list[['chunk_depends']] <- NULL + remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear') + Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] + fcst <- do.call(Start, Start_fcst_arg_list) + # Reshape and reorder dimensions + ## dat should be 1, syear should be length of dat; reorder dimensions + ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble] + dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) + fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + + # Manipulate time attr because Start() cannot read it correctly + wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only + tmp <- array(dim = c(dim(fcst)[c('syear', 'time')])) + tmp[1, ] <- wrong_time_attr + yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst) + for (i_syear in 1:length(yr_diff)) { + tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) + } + attr(fcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') + + } + tmp_time_attr <- attr(fcst, 'Variables')$common$time # change syear to c(sday, sweek, syear) # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] - dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:length(dim(fcst))]) + dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { error(recipe$Run$logger, "fcst has problem in matching data and time attr dimension.") stop() } dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) - - # Remove 'dcpp' and any other extra dimensions: - if (length(table) > 1) { - fcst <- Subset(fcst, along = c("dcpp", "table", "grid", "version"), - indices = list(1, 1, 1, 1), drop = "selected") - } else { - fcst <- Subset(fcst, along = "dcpp", indices = 1, drop = "selected") + + #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 + if (multi_path) { + attributes(fcst)$Variables$common[[variable]] <- attributes(fcst)$Variables$dat1[[variable]] } # Change class from startR_array to s2dv_cube @@ -199,8 +257,7 @@ load_decadal <- function(recipe) { ) # Only syear could be different - syear_dim <- which(names(dim(hcst$data)) == 'syear') - if (!identical(dim(hcst$data)[-syear_dim], dim(fcst$data)[-syear_dim])) { + if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { error(recipe$Run$logger, "hcst and fcst do not share the same dimension structure.") stop() @@ -214,7 +271,7 @@ load_decadal <- function(recipe) { # Step 3. Load the reference #------------------------------------------- obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, - "$var_dir$", "$var$_$file_date$.nc") + store.freq, "$var$$var_dir$", "$var$_$file_date$.nc") var_dir_obs <- archive$Reference[[ref.name]][[store.freq]][variable] # list(tas = "_f1h-r1440x721cds", tos = "_f1h-r1440x721cds") # obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, store.freq, @@ -238,7 +295,7 @@ load_decadal <- function(recipe) { lubridate::minute(dates) <- 00 # Restore correct dimensions dim(dates) <- dim(dates_file) - + obs <- Start(dat = obs.path, var = variable, var_dir = var_dir_obs, @@ -268,7 +325,7 @@ load_decadal <- function(recipe) { #//////////////// # Method 2: reshape hcst time attr's date into an array with time dim then as obs date selector #//////////////// - + obs <- Start(dat = obs.path, var = variable, var_dir = var_dir_obs, @@ -293,6 +350,11 @@ load_decadal <- function(recipe) { retrieve = TRUE) } + + #dim(attr(obs, 'Variables')$common$time) + # sday sweek syear time + # 1 1 2 14 + # Remove var_dir dimension obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") -- GitLab