Issue of dimension in Compute function depending on the StartR call
I think I have an issue due to the dimensions when using a ClimDiagProj function in Compute.
First part of the script common to both cases:
library(startR)
library(ClimProjDiags)
source('/esarchive/scratch/Earth/acarreri/scripts/bkup_scripts_HR_forecast/scripts_R/region_lonlat.R')
expid <- 'a3vg'
model <- 'EC-Earth3-HR'
init_month <- 5
vari <- 'tosmean'
vari_obs <- 'tos'
compo <- 'Omon'
membs_num <- c(seq(1,2))
reg <- 'Nino3'
startdates <- as.character(c(seq(1990,1993)))
length_forecast <- 4
obs_product <- 'hadisst_v1.1'
machine_call <- 'local'
retrieve_data_obs <- FALSE
region_box <- region_lonlat(reg)
lon_min <- region_box$lon_min
lon_max <- region_box$lon_max
lat_min <- region_box$lat_min
lat_max <- region_box$lat_max
print('Check the startdates asked: ')
print(startdates)
membs <- paste0('r', membs_num, 'i1p1f1', sep="")
nmemb <- length(membs)
print(paste0('Nb of members of the EC-Earth forecast asked: ', nmemb))
print(paste0('Month init ', init_month))
month_init <- 'May'
forecast_month <- c(seq(init_month,(init_month+length_forecast-1)))
nstartdates <- length(startdates)
grid <- 'gn'
product_obs <- read.csv('/esarchive/scratch/Earth/acarreri/Obs_diags/product_obs.csv', header = TRUE) #, stringsAsFactors = TRUE)
obs_type <- product_obs[product_obs$product == obs_product,]$type
obs_center <- product_obs[product_obs$product == obs_product,]$center
obs_freq <- product_obs[product_obs$product == obs_product,]$freq
obs_var_folder <- vari_obs
fun_spatial_mean <- function(x, lons_data, lats_data, lon_min, lon_max, lat_min, lat_max){
print(dim(x))
data_smean <- ClimProjDiags::WeightedMean(data = x, lon = lons_data, lat=lats_data,
region = c(lon_min, lon_max, lat_min, lat_max),
mask = NULL,
londim = which(names(dim(x)) == 'lon'),
latdim = which(names(dim(x)) == 'lat'))
return(data_smean)
}
target_dims = c('time','lat','lon')
output_dims = c('time')
step <- startR::Step(fun = fun_spatial_mean,
use_libraries = c('startR','ClimProjDiags'),
target_dims = target_dims,
output_dims = output_dims)
The load script which works:
repos_obs1 <- paste('/esarchive', obs_type, obs_center, obs_product, obs_freq, obs_var_folder,
'$var$_$date$$time$.nc', sep='/')
print(repos_obs1)
data_obs1 <- startR::Start(dat = repos_obs1,
var = vari_obs,
date = startdates,
time = c(1:length_forecast),
lat = 'all',
lat_reorder = Sort(decreasing = FALSE),
lon = 'all',
lon_reorder = CircularSort(-180, 180),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude')),
return_vars = list(lat='dat', lon='dat', time=c('date')),
retrieve = F)
lat_obs1 = as.vector(attr(data_obs1, 'Variables')$dat1$lat)
lon_obs1 = as.vector(attr(data_obs1, 'Variables')$dat1$lon)
print('Dimension of non-retrieved obs: ')
print(attr(data_obs1, 'Dimension'))
wf_obs1 <- startR::AddStep(inputs = data_obs1,
step_fun = step,
lons_data = lon_obs1,
lats_data = lat_obs1,
lon_min = lon_min,
lon_max = lon_max,
lat_min = lat_min,
lat_max = lat_max)
chunks1 <- list(date = as.numeric(attr(data_obs1,'Dimensions')['date']))
data_obs_mean1 <- Compute(workflow = wf_obs1, chunks = chunks1)$output1
print(dim(data_obs_mean1))
The load which doesn't work is this one (the dates come from an experiment):
path <- paste0('/esarchive/exp/ecearth/', expid, '/diags/DCPP/EC-Earth-Consortium/', model, '/dcppA-hindcast/$memb$/', compo, '/$var$/', grid, '/v*/$var$_', compo, '_', model, '_dcppA-hindcast_s$sdate$-$memb$_', grid, '_$chunk$.nc')
print(paste0('path of the exp: ', path))
data_exp <- startR::Start(dat = path, var = vari,
memb = membs,
region = reg,
region_var = 'region',
sdate = startdates,
time = c(1:length(forecast_month)),
chunk = 'all',
chunk_depends = 'sdate',
time_across = 'chunk',
merge_across_dims = TRUE,
return_vars = list(time=c('sdate','chunk'), region=NULL),
retrieve = F)
dates_to_use <- drop(attr(data_exp, 'Variables')$common$time)
print('Load observations')
repos_obs <- paste('/esarchive', obs_type, obs_center, obs_product, obs_freq, obs_var_folder,'$var$_$date$.nc', sep='/')
print(repos_obs)
data_obs <- startR::Start(dat = repos_obs,
var = vari_obs,
date = unique(format(dates_to_use, '%Y%m')),
time = values(dates_to_use),
lat = 'all',
lat_reorder = Sort(decreasing = FALSE),
lon = 'all',
lon_reorder = CircularSort(-180, 180),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude')),
time_across = 'date',
merge_across_dims = TRUE,
merge_across_dims_narm = TRUE,
split_multiselected_dims = TRUE,
return_vars = list(lat='dat', lon='dat', time=c('date')),
retrieve = F)
lat_obs = as.vector(attr(data_obs, 'Variables')$dat1$lat)
lon_obs = as.vector(attr(data_obs, 'Variables')$dat1$lon)
print('Dimension of non-retrieved obs: ')
print(attr(data_obs, 'Dimension'))
wf_obs <- startR::AddStep(inputs = data_obs,
step_fun = step,
lons_data = lon_obs,
lats_data = lat_obs,
lon_min = lon_min,
lon_max = lon_max,
lat_min = lat_min,
lat_max = lat_max)
chunks <- list(sdate = as.numeric(attr(data_obs,'Dimensions')['sdate']))
data_obs_mean <- Compute(workflow = wf_obs, chunks = chunks)$output1
print(dim(data_obs_mean))
Could you help me with the Call of StartR? I can't figure what I'm doing wrongly.
Thanks