Compute(): Limitation of variable naming in Start() when sending jobs to Power9
Hi @aho,
As we have been seeing, there is an error in Compute() when the names of the variables in Start() contain underscores. This only happens when sending the job to Power9 (it works fine in the workstation).
This is an example of the code that returns an error:
library(startR)
library(s2dverification)
data_exp = Start(dataset = '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/$var$_Amon_*_s$sdate$-$member$_gr_$leadyear$.nc',
var = 'tas',
sdate = paste0(2000:2004),
month = 'all',
lat = values(list(0,14)),
lon = values(list(0,28)),
lead_year = 'all',
member = 1:3,
lead_year_depends = 'sdate',
month_across = 'lead_year',
synonims = list(month = c('month','time'), lon = c('lon', 'longitude'), lat = c('lat', 'latitude')),
return_vars = list(lat = 'dataset', lon = 'dataset'),
num_procs = 1, retrieve = FALSE)
fun <- function(x) {
y = apply(x,2,mean)
return(y)
}
step <- Step(fun = fun,
target_dims = c('month','member'),
output_dims = c('member'))
wf = AddStep(inputs = data_exp, step_fun = step)
res = Compute(workflow = wf,
chunks = list(lat = 2, lon = 2),
threads_load = 2,
threads_compute = 4,
cluster = list(queue_host = 'power',
queue_type = 'slurm',
temp_dir = '/gpfs/scratch/bsc32/bsc32924/startR_hpc/',
# lib_dir = '/gpfs/projects/bsc32/share/R_libs/3.5/',
r_module = 'R/3.5.0-foss-2018b',
CDO_module = 'CDO/1.9.5-foss-2018b',
cores_per_job = 4,
job_wallclock = '00:30:00',
max_jobs = 4,
extra_queue_params = list('#SBATCH --mem-per-cpu=3000'),
bidirectional = FALSE,
polling_period = 20),
ecflow_suite_dir = '/home/Earth/cdelgado/Desktop/startR_local/',
wait = TRUE)
The error doesn't occur when the variable "lead_year" is changed to "leadyear":
data_exp = Start(dataset = '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/$var$_Amon_*_s$sdate$-$member$_gr_$leadyear$.nc',
var = 'tas',
sdate = paste0(2000:2004),
month = 'all',
lat = values(list(0,14)),
lon = values(list(0,28)),
leadyear = 'all',
member = 1:3,
leadyear_depends = 'sdate',
month_across = 'leadyear',
synonims = list(month = c('month','time'), lon = c('lon', 'longitude'), lat = c('lat', 'latitude')),
return_vars = list(lat = 'dataset', lon = 'dataset'),
num_procs = 1, retrieve = FALSE)
Cheers,
Carlos