Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#------------------------------------------------------
# This use case shows the basic usage of multiStart(), the wrapper of Start()
# for multiple datasets retrieval that have different selector requirement.
# The usage is almost the same as Start(). To assign different selectors for
# each dataset, use the same format as the pattern_dim: a list of lists, first
# item 'name' is the dataset name and the second item '<dim_name>' is the selector.
#
# This use case wants to put HadGEM3 and MPI-ESM datasets together in one call.
# The major differences between them are the spatial grids and calendar type.
# HadGEM3 uses 360_day calendar and MPI-ESM uses standard calendar. For the grid
# difference, the 'tramsform' parameters can deal with it; while for the calendar
# difference, Start() has difficulty align the time steps. We cannot assign NAs
# as the selector in Start(), and Start() coarces the values to the nearest found
# values, producing the hidden errors.
#
# Therefore, we use multiStart() to insert NAs manually. Three days, 30th Dec.,
# 31st Dec., and 1st Jan. are requested. HadGEM3 with 360_day calendar doesn't
# have 31st Dec., so we need to insert one NA at the 2nd time step. Be sure that
# the length (NA included) of two time selectors are equal.
#
# Here are some limitations for now:
# 1. The selector for each dataset can only be indices(), not values() or 'all'.
# If the selector is the common one (e.g., 'sdate', 'lat', and 'lon' in this
# use case), then it follows the rule of Start().
# 2. The reshape options are not all available. See ex1_13 for details.
# 3. The checker of multiStart() is not comprehensive. Please carefully examine
# the results you get.
#------------------------------------------------------
#=======================================================
# Define the paths and selectors
#=======================================================
# Define paths
# HadGEM3 daily data (360_day)
path_hadgem3 <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/',
'cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/',
'dcppA-hindcast/r1i1p1f2/day/$var$/gn/v20200101/',
'$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$fyear$.nc')
## tasmax_day_HadGEM3-GC31-MM_dcppA-hindcast_s2000-r1i1p1f2_gn_20001101-20101230.nc
# MPI-ESM1.2-HR (standard)
path_mpi_esm <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/mpi-esm1-2-hr/',
'cmip6-dcppA-hindcast_i1p1/DCPP/MPI-M/MPI-ESM1-2-HR/',
'dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200101/',
'$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$fyear$.nc')
## tasmax_day_MPI-ESM1-2-HR_dcppA-hindcast_s2000-r1i1p1f1_gn_20001101-20101231.nc
# Define selectors for each dataset
sdate <- '2000'
fyear_hadgem3 <- paste0(sdate, '1101-', as.numeric(sdate) + 10, '1230')
fyear_mpi_esm <- paste0(sdate, '1101-', as.numeric(sdate) + 10, '1231')
# To align the time steps, insert an NA in hadgem3
time_hadgem3 <- indices(c(60, NA, 61)) #12/30, NA, 1/1
time_mpi_esm <- indices(60:62) #12/30, 12/31, 1/1
#=======================================================
# retrieve = TRUE; interpolation
#=======================================================
res <- multiStart(
dat = list(list(name = 'hadgem3', path = path_hadgem3),
list(name = 'mpi_esm', path = path_mpi_esm)),
var = 'tasmax',
sdate = '2000',
fyear = list(list(name = 'hadgem3', fyear = fyear_hadgem3),
list(name = 'mpi_esm', fyear = fyear_mpi_esm)),
time = list(list(name = 'hadgem3', time = time_hadgem3),
list(name = 'mpi_esm', time = time_mpi_esm)),
lat = values(list(-30, 30)),
lon = values(list(0, 50)),
transform = CDORemapper,
transform_params = list(grid = 'r360x181',
method = 'conservative',
crop = c(0, 50, -30, 30)),
transform_vars = c('lat', 'lon'),
return_vars = list(lat = NULL, lon = NULL, time = 'sdate'),
retrieve = TRUE)
# Check the returned values. hadgem3 has NA in the 2nd time step.
dim(res)
# dat var sdate fyear time lat lon
# 2 1 1 1 3 61 51
res[,1,1,1,,10,10]
# [,1] [,2] [,3]
#[1,] 292.5023 NA 292.0899
#[2,] 293.0914 293.4608 292.8224
#=======================================================
# retrieve = FALSE; Compute()
#=======================================================
dat = list(list(name = 'hadgem3', path = path_hadgem3),
list(name = 'mpi_esm', path = path_mpi_esm)),
fyear = list(list(name = 'hadgem3', fyear = fyear_hadgem3),
list(name = 'mpi_esm', fyear = fyear_mpi_esm)),
time = list(list(name = 'hadgem3', time = time_hadgem3),
list(name = 'mpi_esm', time = time_mpi_esm)),
lat = values(list(-30, 30)),
lon = values(list(0, 50)),
transform = CDORemapper,
transform_params = list(grid = 'r360x181',
method = 'conservative',
crop = c(0, 50, -30, 30)),
transform_vars = c('lat', 'lon'),
return_vars = list(lat = NULL, lon = NULL, time = 'sdate'),
# $ hadgem3: language Start ...
# ..- attr(*, "Dimensions") ...
# $ mpi_esm: language Start ...
# ..- attr(*, "Dimensions") ...
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# ..
str(attr(res[[1]], 'na_pos_record'))
#List of 1
# $ hadgem3:List of 7
# ..$ dat : num 0
# ..$ var : num 0
# ..$ sdate: num 0
# ..$ fyear: num 0
# ..$ time : int 2
# ..$ lat : num 0
# ..$ lon : num 0
str(attr(res[[2]], 'na_pos_record'))
#List of 1
# $ mpi_esm:List of 7
# ..$ dat : num 0
# ..$ var : num 0
# ..$ sdate: num 0
# ..$ fyear: num 0
# ..$ time : num 0
# ..$ lat : num 0
# ..$ lon : num 0
func_sum <- function(x, y) {
return(x + y)
}
# 'time' and 'fyear' can also be the operational dimension
step <- Step(func_sum, target_dims = list(hadgem3 = c('var'),
mpi_esm = c('var')),
output_dims = c('var'))
wf <- AddStep(res, step)
# Option 1: run locally
output <- Compute(wf, chunks = list(time = 2))
# Option 2: run on Nord3
#-------------------user-defined---------------------
queue_host <- 'nord3'
temp_dir <- '/gpfs/scratch/bsc32/bsc32<id>/startR_hpc/'
ecflow_suite_dir <- '/home/Earth/<use_id>/startR_local/'
#----------------------------------------------------
threads_load = 2,
threads_compute = 4,
cluster = list(queue_host = queue_host,
queue_type = 'lsf',
temp_dir = temp_dir,
cores_per_job = 2,
job_wallclock = '05:00',
max_jobs = 4,
extra_queue_params = list('#BSUB -q bsc_es'),
bidirectional = FALSE,
polling_period = 10
),
ecflow_suite_dir = ecflow_suite_dir,
wait = TRUE
# var time dat sdate fyear lat lon
# 1 3 1 1 1 61 51
output$output1[1, , 1, 1, 1, 1, 1]
[1] 584.9990 NA 587.8201