Loading dcppA-hindcast and dcppB-forecast data in the same Start() call with correct time metadata
Hi @vagudets,
Summary
This issue addresses the problem of loading start dates from both dcppA-hindcast and dcppB-forecast data in the same Start() call when the time dimension goes across different files.
Approach 1: passing a list of paths to dat
, where each start date has its own path. The data is loaded correctly but the time metadata is incorrect and the dimensions have to be manipulated afterwards. If dat
is established as one of the dimensions that the time metadata depends on, Start() returns an error.
Approach 2: Creating a single common path with a file dimension $dcpp$
and establishing the dependency between $dcpp$
and the start date dimension syear
. In the example below this results in missing files and an error message.
Example
# Approach 1: list of paths
library(startR)
path_list <- list(dat1 = list(path = "/esarchive//exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast//$ensemble$/Amon/$var$/gn/v*/$var$_Amon_*_dcppA-hindcast_s2018-$ensemble$_gn_$chunk$.nc"),
dat2 = list(path = "/esarchive//exp/CMIP6/dcppB-forecast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppB-forecast//$ensemble$/Amon/$var$/gn/v*/$var$_Amon_*_dcppB-forecast_s2019-$ensemble$_gn_$chunk$.nc"))
exp <- Start(dat = path_list,
var = "tas",
chunk = 'all',
time = 1:12,
# syear = paste0(2018:2019)
time_across = 'chunk',
merge_across_dims = TRUE,
largest_dims_length = TRUE,
latitude = values(list(0, 20)),
latitude_reorder = Sort(decreasing = TRUE),
longitude = values(list(0, 20)),
longitude_reorder = CircularSort(0, 360),
ensemble = c("r1i1p1f2", "r2i1p1f2", "r3i1p1f2"),
# path_glob_permissive = 2, # for version
synonims = list(longitude = c('lon', 'longitude'),
latitude = c('lat', 'latitude')),
return_vars = list(latitude = NULL, longitude = NULL,
time = c('chunk')),
retrieve = FALSE)
attr(exp, "Variables")$common$time
# [1] "2018-11-16 UTC" "2018-12-16 UTC" NA NA
# [5] NA NA NA NA
# [9] NA NA NA NA
If time = c('chunk', 'dat')
in return_vars
, then:
Error in
[.default
(x, TRUE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), : subscript out of bounds
# Approach 2: single path
library(startR)
path <- "/esarchive//exp/CMIP6/$dcpp$/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/$dcpp$/$ensemble$/Amon/$var$/gn/v*/$var$_Amon_*_$dcpp$_s$syear$-$ensemble$_gn_$chunk$.nc"
exp <- Start(dat = path,
var = "tas",
chunk = 'all',
time = 1:12,
syear = paste0(2018:2019),
dcpp = list('2018' = "dcppA-hindcast", '2019' = "dcppB-forecast"),
dcpp_depends = 'syear',
time_across = 'chunk',
merge_across_dims = TRUE,
largest_dims_length = TRUE,
latitude = values(list(0, 20)),
latitude_reorder = Sort(decreasing = TRUE),
longitude = values(list(0, 20)),
longitude_reorder = CircularSort(0, 360),
ensemble = c("r1i1p1f2", "r2i1p1f2", "r3i1p1f2"),
# path_glob_permissive = 2, # for version
synonims = list(longitude = c('lon', 'longitude'),
latitude = c('lat', 'latitude')),
return_vars = list(latitude = NULL, longitude = NULL,
time = c('chunk', 'syear')),
retrieve = FALSE)
Error in R_nc4_open: No such file or directory (...) Error in if (any(na.omit(unlist(sub_array_of_selectors)) < 1) || any(na.omit(unlist(sub_array_of_selectors)) > : missing value where TRUE/FALSE needed
Module and Package Version
R/4.1.2 And R/4.2.1 startR_2.3.1