ex2_11_multiStart_insert_NA.R 7.58 KB
Newer Older
aho's avatar
aho committed
#------------------------------------------------------
# This use case shows the basic usage of multiStart(), the wrapper of Start()
# for multiple datasets retrieval that have different selector requirement.
# The usage is almost the same as Start(). To assign different selectors for 
# each dataset, use the same format as the pattern_dim: a list of lists, first
# item 'name' is the dataset name and the second item '<dim_name>' is the selector.
#
# This use case wants to put HadGEM3 and MPI-ESM datasets together in one call.
# The major differences between them are the spatial grids and calendar type.
# HadGEM3 uses 360_day calendar and MPI-ESM uses standard calendar. For the grid
# difference, the 'tramsform' parameters can deal with it; while for the calendar
# difference, Start() has difficulty align the time steps. We cannot assign NAs
# as the selector in Start(), and Start() coarces the values to the nearest found 
# values, producing the hidden errors.
#
# Therefore, we use multiStart() to insert NAs manually. Three days, 30th Dec.,
# 31st Dec., and 1st Jan. are requested. HadGEM3 with 360_day calendar doesn't 
# have 31st Dec., so we need to insert one NA at the 2nd time step. Be sure that
# the length (NA included) of two time selectors are equal.
#
# Here are some limitations for now:
# 1. The selector for each dataset can only be indices(), not values() or 'all'.
#    If the selector is the common one (e.g., 'sdate', 'lat', and 'lon' in this
#    use case), then it follows the rule of Start().
# 2. The reshape options are not all available. See ex1_13 for details.
# 3. The checker of multiStart() is not comprehensive. Please carefully examine
#    the results you get.
#------------------------------------------------------

#=======================================================
# Define the paths and selectors
#=======================================================

  # Define paths
  # HadGEM3 daily data (360_day)
  path_hadgem3 <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/',
                         'cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/',
                         'dcppA-hindcast/r1i1p1f2/day/$var$/gn/v20200101/',
                         '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$fyear$.nc')
  ## tasmax_day_HadGEM3-GC31-MM_dcppA-hindcast_s2000-r1i1p1f2_gn_20001101-20101230.nc

  # MPI-ESM1.2-HR (standard)
  path_mpi_esm <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/mpi-esm1-2-hr/',
                         'cmip6-dcppA-hindcast_i1p1/DCPP/MPI-M/MPI-ESM1-2-HR/',
                         'dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200101/',
                         '$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$fyear$.nc')
  ## tasmax_day_MPI-ESM1-2-HR_dcppA-hindcast_s2000-r1i1p1f1_gn_20001101-20101231.nc

  # Define selectors for each dataset
  sdate <- '2000'
  fyear_hadgem3 <- paste0(sdate, '1101-', as.numeric(sdate) + 10, '1230')
  fyear_mpi_esm <- paste0(sdate, '1101-', as.numeric(sdate) + 10, '1231')
  # To align the time steps, insert an NA in hadgem3
  time_hadgem3 <- indices(c(60, NA, 61)) #12/30, NA, 1/1
  time_mpi_esm <- indices(60:62) #12/30, 12/31, 1/1


#=======================================================
# retrieve = TRUE; interpolation
#=======================================================

  res <- multiStart(
         dat = list(list(name = 'hadgem3', path = path_hadgem3),
                    list(name = 'mpi_esm', path = path_mpi_esm)),
         var = 'tasmax', 
         sdate = '2000',
         fyear = list(list(name = 'hadgem3', fyear = fyear_hadgem3),
                      list(name = 'mpi_esm', fyear = fyear_mpi_esm)),
         time = list(list(name = 'hadgem3', time = time_hadgem3),
                     list(name = 'mpi_esm', time = time_mpi_esm)),
         lat = values(list(-30, 30)), 
         lon = values(list(0, 50)),
         transform = CDORemapper,
         transform_params = list(grid = 'r360x181',
                                 method = 'conservative',
                                 crop = c(0, 50, -30, 30)),
         transform_vars = c('lat', 'lon'),
         return_vars = list(lat = NULL, lon = NULL, time = 'sdate'),
         retrieve = TRUE)

  # Check the returned values. hadgem3 has NA in the 2nd time step.
  dim(res)
  #  dat   var sdate fyear  time   lat   lon 
  #    2     1     1     1     3    61    51 
  res[,1,1,1,,10,10]
  #         [,1]     [,2]     [,3]
  #[1,] 292.5023       NA 292.0899
  #[2,] 293.0914 293.4608 292.8224



#=======================================================
# retrieve = FALSE; Compute()
#=======================================================
aho's avatar
aho committed
  res <- multiStart(
aho's avatar
aho committed
         dat = list(list(name = 'hadgem3', path = path_hadgem3),
                    list(name = 'mpi_esm', path = path_mpi_esm)),
aho's avatar
aho committed
         var = 'tasmax',
         sdate = '2000',
aho's avatar
aho committed
         fyear = list(list(name = 'hadgem3', fyear = fyear_hadgem3),
                      list(name = 'mpi_esm', fyear = fyear_mpi_esm)),
         time = list(list(name = 'hadgem3', time = time_hadgem3),
                     list(name = 'mpi_esm', time = time_mpi_esm)),
aho's avatar
aho committed
         lat = values(list(-30, 30)),
         lon = values(list(0, 50)),
         transform = CDORemapper,
         transform_params = list(grid = 'r360x181',
                                 method = 'conservative',
                                 crop = c(0, 50, -30, 30)),
         transform_vars = c('lat', 'lon'),
         return_vars = list(lat = NULL, lon = NULL, time = 'sdate'),
aho's avatar
aho committed
         retrieve = FALSE)

str(res)
#List of 2
aho's avatar
aho committed
# $ hadgem3: language Start ...
#  ..- attr(*, "Dimensions") ...
aho's avatar
aho committed
#  ..
aho's avatar
aho committed
#  ..- attr(*, "Variables") ...
aho's avatar
aho committed
#  ..
aho's avatar
aho committed
# $ mpi_esm: language Start ...
#  ..- attr(*, "Dimensions") ...
aho's avatar
aho committed
#  ..
aho's avatar
aho committed
#  ..- attr(*, "Variables") ...
aho's avatar
aho committed
#  ..

str(attr(res[[1]], 'na_pos_record'))
#List of 1
# $ hadgem3:List of 7
#  ..$ dat  : num 0
#  ..$ var  : num 0
#  ..$ sdate: num 0
#  ..$ fyear: num 0
#  ..$ time : int 2
#  ..$ lat  : num 0
#  ..$ lon  : num 0

str(attr(res[[2]], 'na_pos_record'))
#List of 1
# $ mpi_esm:List of 7
#  ..$ dat  : num 0
#  ..$ var  : num 0
#  ..$ sdate: num 0
#  ..$ fyear: num 0
#  ..$ time : num 0
#  ..$ lat  : num 0
#  ..$ lon  : num 0

func_sum <- function(x, y) {
  return(x + y)
}

# 'time' and 'fyear' can also be the operational dimension
aho's avatar
aho committed
step <- Step(func_sum, target_dims = list(hadgem3 = c('var'),
                                          mpi_esm = c('var')),
                       output_dims = c('var'))
aho's avatar
aho committed

wf <- AddStep(res, step)

# Option 1: run locally
aho's avatar
aho committed
output <- Compute(wf, chunks = list(time = 2))
aho's avatar
aho committed

# Option 2: run on Nord3
#-------------------user-defined---------------------
aho's avatar
aho committed
  queue_host <- 'nord3'
  temp_dir <- '/gpfs/scratch/bsc32/bsc32<id>/startR_hpc/'
  ecflow_suite_dir <- '/home/Earth/<use_id>/startR_local/'
aho's avatar
aho committed
#----------------------------------------------------
aho's avatar
aho committed
 output <- Compute(wf,
aho's avatar
aho committed
                   chunks = list(time = 2),
aho's avatar
aho committed
                   threads_load = 2,
                   threads_compute = 4,
                   cluster = list(queue_host = queue_host,
                     queue_type = 'lsf',
                     temp_dir = temp_dir,
                     cores_per_job = 2,
                     job_wallclock = '05:00',
                     max_jobs = 4,
                     extra_queue_params = list('#BSUB -q bsc_es'),
                     bidirectional = FALSE,
                     polling_period = 10
                   ),
                   ecflow_suite_dir = ecflow_suite_dir,
                   wait = TRUE
aho's avatar
aho committed

dim(output$output1)
aho's avatar
aho committed
#  var   time   dat sdate fyear   lat   lon 
#    1      3     1     1     1    61    51
output$output1[1, , 1, 1, 1, 1, 1]
[1] 584.9990       NA 587.8201