Skip to content
GitLab
Projects Groups Topics Snippets
  • /
  • Help
    • Help
    • Support
    • Community forum
    • Submit feedback
  • Sign in
  • startR startR
  • Project information
    • Project information
    • Activity
    • Labels
    • Members
  • Repository
    • Repository
    • Files
    • Commits
    • Branches
    • Tags
    • Contributor statistics
    • Graph
    • Compare revisions
  • Issues 29
    • Issues 29
    • List
    • Boards
    • Service Desk
    • Milestones
  • Merge requests 7
    • Merge requests 7
  • CI/CD
    • CI/CD
    • Pipelines
    • Jobs
    • Schedules
  • Deployments
    • Deployments
    • Environments
    • Releases
  • Monitor
    • Monitor
    • Incidents
  • Analytics
    • Analytics
    • Value stream
    • CI/CD
    • Repository
  • Wiki
    • Wiki
  • Snippets
    • Snippets
  • Activity
  • Graph
  • Create a new issue
  • Jobs
  • Commits
  • Issue Boards
Collapse sidebar
  • Earth SciencesEarth Sciences
  • startRstartR
  • Issues
  • #119
Closed
Open
Issue created Oct 22, 2021 by acarreri@acarreriDeveloper

Issue of dimension in Compute function depending on the StartR call

Hi @aho , @nperez ,

I think I have an issue due to the dimensions when using a ClimDiagProj function in Compute.

First part of the script common to both cases:

library(startR)
library(ClimProjDiags)

source('/esarchive/scratch/Earth/acarreri/scripts/bkup_scripts_HR_forecast/scripts_R/region_lonlat.R')
expid <- 'a3vg'
model <- 'EC-Earth3-HR'
init_month <- 5
vari <- 'tosmean'
vari_obs <- 'tos'
compo <- 'Omon'
membs_num <- c(seq(1,2))
reg <- 'Nino3'
startdates <- as.character(c(seq(1990,1993)))
length_forecast <- 4
obs_product <- 'hadisst_v1.1'
machine_call <- 'local'
retrieve_data_obs <- FALSE

region_box <- region_lonlat(reg)
lon_min <- region_box$lon_min
lon_max <- region_box$lon_max
lat_min <- region_box$lat_min
lat_max <- region_box$lat_max

print('Check the startdates asked: ')
print(startdates)
membs <- paste0('r', membs_num, 'i1p1f1', sep="")
nmemb <- length(membs)
print(paste0('Nb of members of the EC-Earth forecast asked: ', nmemb))
print(paste0('Month init ', init_month))
month_init <- 'May'
forecast_month <- c(seq(init_month,(init_month+length_forecast-1)))
nstartdates <- length(startdates)
grid <- 'gn' 

product_obs <- read.csv('/esarchive/scratch/Earth/acarreri/Obs_diags/product_obs.csv', header = TRUE) #, stringsAsFactors = TRUE)
obs_type <- product_obs[product_obs$product == obs_product,]$type
obs_center <- product_obs[product_obs$product == obs_product,]$center
obs_freq <- product_obs[product_obs$product == obs_product,]$freq
obs_var_folder <- vari_obs

fun_spatial_mean <- function(x, lons_data, lats_data, lon_min, lon_max, lat_min, lat_max){
    print(dim(x))
    data_smean <- ClimProjDiags::WeightedMean(data = x, lon = lons_data, lat=lats_data, 
                                              region = c(lon_min, lon_max, lat_min, lat_max), 
                                              mask = NULL, 
                                              londim = which(names(dim(x)) == 'lon'),
                                              latdim = which(names(dim(x)) == 'lat'))
    return(data_smean)
}
target_dims = c('time','lat','lon') 
output_dims = c('time')
step <- startR::Step(fun = fun_spatial_mean,
                       use_libraries = c('startR','ClimProjDiags'),
                       target_dims = target_dims, 
                       output_dims = output_dims)

The load script which works:

repos_obs1 <- paste('/esarchive', obs_type, obs_center, obs_product, obs_freq, obs_var_folder,
                    '$var$_$date$$time$.nc', sep='/')
print(repos_obs1)
data_obs1 <- startR::Start(dat = repos_obs1,
                                    var = vari_obs,
                                    date = startdates, 
                                    time = c(1:length_forecast), 
                                    lat = 'all',
                                    lat_reorder = Sort(decreasing = FALSE), 
                                    lon = 'all', 
                                    lon_reorder = CircularSort(-180, 180),
                                    synonims = list(lon = c('lon', 'longitude'), 
                                                    lat = c('lat', 'latitude')),
                                    return_vars = list(lat='dat', lon='dat', time=c('date')),
                                    retrieve = F)

lat_obs1 = as.vector(attr(data_obs1, 'Variables')$dat1$lat)
lon_obs1 = as.vector(attr(data_obs1, 'Variables')$dat1$lon)
print('Dimension of non-retrieved obs: ')
print(attr(data_obs1, 'Dimension'))
wf_obs1 <- startR::AddStep(inputs = data_obs1, 
                          step_fun = step,
                          lons_data = lon_obs1,
                          lats_data = lat_obs1,
                          lon_min = lon_min,
                          lon_max = lon_max,
                          lat_min = lat_min,
                          lat_max = lat_max)
chunks1 <- list(date = as.numeric(attr(data_obs1,'Dimensions')['date']))
data_obs_mean1 <- Compute(workflow = wf_obs1, chunks = chunks1)$output1
print(dim(data_obs_mean1))

The load which doesn't work is this one (the dates come from an experiment):

path <- paste0('/esarchive/exp/ecearth/', expid, '/diags/DCPP/EC-Earth-Consortium/', model, '/dcppA-hindcast/$memb$/', compo, '/$var$/', grid, '/v*/$var$_', compo, '_', model, '_dcppA-hindcast_s$sdate$-$memb$_', grid, '_$chunk$.nc')
print(paste0('path of the exp: ', path))
data_exp <- startR::Start(dat = path, var = vari,
                                    memb = membs,
                                    region = reg,
                                    region_var = 'region',
                                    sdate = startdates,
                                    time = c(1:length(forecast_month)), 
                                    chunk = 'all',
                                    chunk_depends = 'sdate',
                                    time_across = 'chunk',
                                    merge_across_dims = TRUE,
                                    return_vars = list(time=c('sdate','chunk'), region=NULL), 
                                    retrieve = F)
dates_to_use <- drop(attr(data_exp, 'Variables')$common$time)
print('Load observations')
repos_obs <- paste('/esarchive', obs_type, obs_center, obs_product, obs_freq, obs_var_folder,'$var$_$date$.nc', sep='/')
print(repos_obs)
data_obs <- startR::Start(dat = repos_obs,
                                      var = vari_obs,
                                      date = unique(format(dates_to_use, '%Y%m')), 
                                      time = values(dates_to_use), 
                                    lat = 'all',
                                      lat_reorder = Sort(decreasing = FALSE), 
                                      lon = 'all', 
                                      lon_reorder = CircularSort(-180, 180),
                                      synonims = list(lon = c('lon', 'longitude'), 
                                                      lat = c('lat', 'latitude')),
                                      time_across = 'date', 
                                      merge_across_dims = TRUE,
                                      merge_across_dims_narm = TRUE,
                                      split_multiselected_dims = TRUE,
                                      return_vars = list(lat='dat', lon='dat', time=c('date')),
                                      retrieve = F)
lat_obs = as.vector(attr(data_obs, 'Variables')$dat1$lat)
lon_obs = as.vector(attr(data_obs, 'Variables')$dat1$lon)
print('Dimension of non-retrieved obs: ')
print(attr(data_obs, 'Dimension'))
wf_obs <- startR::AddStep(inputs = data_obs, 
                              step_fun = step,
                              lons_data = lon_obs,
                              lats_data = lat_obs,
                              lon_min = lon_min,
                              lon_max = lon_max,
                              lat_min = lat_min,
                              lat_max = lat_max)
chunks <- list(sdate = as.numeric(attr(data_obs,'Dimensions')['sdate']))
data_obs_mean <- Compute(workflow = wf_obs, chunks = chunks)$output1
print(dim(data_obs_mean))

Could you help me with the Call of StartR? I can't figure what I'm doing wrongly.

Thanks

Assignee
Assign to
Time tracking