Compute() fails when chunking by a dimension that has another dimension that depends on it
I need to load members that have different versions (v20210312 for members r1-6, and v20210719 for members r7-10). Then, I am providing the members, a list with the version for each member, and adding version_depends = 'member'
to the Start() call.
I have been able to do it with retrieve==TRUE:
data_retrieveTRUE <- Start(dataset = '/esarchive/exp/CMIP6/dcppA-hindcast/cmcc-cm2-sr5/cmip6-dcppA-hindcast_i1p1/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Amon/$var$/gn/$version$/$var$_*_s$sdate$-$member$_gn_$aux$.nc',
var = 'tas',
sdate = paste0(1970:1972),
fmonth = c(1:12),
aux = 'all',
aux_depends = 'sdate',
lat = values(list(-90, 90)),
lon = values(list(0, 359.9)),
member = paste0('r',1:10,'i1p1f1'),
version = list('r1i1p1f1' = 'v20210312', 'r2i1p1f1' = 'v20210312', 'r3i1p1f1' = 'v20210312', 'r4i1p1f1' = 'v20210312', 'r5i1p1f1' = 'v20210312', 'r6i1p1f1' = 'v20210312',
'r7i1p1f1' = 'v20210719', 'r8i1p1f1' = 'v20210719', 'r9i1p1f1' = 'v20210719', 'r10i1p1f1' = 'v20210719'),
version_depends = 'member',
synonims = list(fmonth = c('fmonth','time'), lon = c('lon', 'longitude'), lat = c('lat', 'latitude')),
return_vars = list(lat = 'dataset', lon = 'dataset'),
num_procs = 1, retrieve = TRUE)
and with retrieve==FALSE if chunking by a dimension different than 'member' (in this example, I chunk by 'sdate').
data_retrieveFALSE <- Start(dataset = '/esarchive/exp/CMIP6/dcppA-hindcast/cmcc-cm2-sr5/cmip6-dcppA-hindcast_i1p1/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Amon/$var$/gn/$version$/$var$_*_s$sdate$-$member$_gn_$aux$.nc',
var = 'tas',
sdate = paste0(1970:1972),
fmonth = c(1:12),
aux = 'all',
aux_depends = 'sdate',
lat = values(list(-90, 90)),
lon = values(list(0, 359.9)),
member = paste0('r',1:10,'i1p1f1'),
version = list('r1i1p1f1' = 'v20210312', 'r2i1p1f1' = 'v20210312', 'r3i1p1f1' = 'v20210312', 'r4i1p1f1' = 'v20210312', 'r5i1p1f1' = 'v20210312', 'r6i1p1f1' = 'v20210312',
'r7i1p1f1' = 'v20210719', 'r8i1p1f1' = 'v20210719', 'r9i1p1f1' = 'v20210719', 'r10i1p1f1' = 'v20210719'),
version_depends = 'member',
synonims = list(fmonth = c('fmonth','time'), lon = c('lon', 'longitude'), lat = c('lat', 'latitude')),
return_vars = list(lat = 'dataset', lon = 'dataset'),
num_procs = 1, retrieve = FALSE)
fun <- function(data){
library(multiApply)
output <- multiApply::Apply(data = data, target_dims = 'fmonth', fun = mean)$output1
return(output)
}
step <- Step(fun = fun,
use_libraries = c('multiApply'),
target_dims = 'fmonth',
output_dims = NULL)
wf <- AddStep(inputs = data_retrieveFALSE,
step_fun = step)
INDEX_chunkingSdate <- Compute(workflow = wf,
chunks = list(sdate = 3),
threads_load = 1,
threads_compute = 1,
cluster = list(queue_host = 'nord1.bsc.es',
queue_type = 'lsf',
temp_dir = '/gpfs/scratch/bsc32/bsc32924/startR_hpc/',
cores_per_job = 1,
job_wallclock = '00:30',
max_jobs = 10,
extra_queue_params = list('#BSUB -q bsc_es -M 3000'),
bidirectional = FALSE,
polling_period = 10),
ecflow_suite_dir = '/home/Earth/cdelgado/Desktop/startR_local/',
wait = TRUE)$output1
However, I cannot load the data if chunking by 'member'.
INDEX_chunkingMember <- Compute(workflow = wf,
chunks = list(member = 10),
threads_load = 1,
threads_compute = 1,
cluster = list(queue_host = 'nord1.bsc.es',
queue_type = 'lsf',
temp_dir = '/gpfs/scratch/bsc32/bsc32924/startR_hpc/',
cores_per_job = 1,
job_wallclock = '00:30',
max_jobs = 10,
extra_queue_params = list('#BSUB -q bsc_es -M 3000'),
bidirectional = FALSE,
polling_period = 10),
ecflow_suite_dir = '/home/Earth/cdelgado/Desktop/startR_local/',
wait = TRUE)$output1
In the last case (the one that fails), I get the following error:
* Exploring files... This will take a variable amount of time depending
* on the issued request and the performance of the file server...
The data cannot be loaded.
See the original error message:
If providing selectors for the depending dimension 'version', a vector of selectors must be provided for each selector of the dimension it depends on, 'member'.
Current files in /dev/shm:
/.statelite/tmpfs/gpfs/scratch/bsc32/bsc32924/startR_hpc/STARTR_CHUNKING_2574156359
Files _2574156359_1_1_1_1_1_1_1_1_ has been removed.
Error in value[[3L]](cond) :
The job has failed while loading data. See original error reported above.
Calls: tryCatch -> tryCatchList -> tryCatchOne -> <Anonymous>
Could you please have a look at it?
Thanks a lot in advance,
Carlos