Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#------------------------------------------------------
# This use case shows the basic usage of multiStart(), the wrapper of Start()
# for multiple datasets retrieval that have different selector requirement.
# The usage is almost the same as Start(). To assign different selectors for
# each dataset, use the same format as the pattern_dim: a list of lists, first
# item 'name' is the dataset name and the second item '<dim_name>' is the selector.
#
# This use case wants to put HadGEM3 and MPI-ESM datasets together in one call.
# The major differences between them are the spatial grids and calendar type.
# HadGEM3 uses 360_day calendar and MPI-ESM uses standard calendar. For the grid
# difference, the 'tramsform' parameters can deal with it; while for the calendar
# difference, Start() has difficulty align the time steps. We cannot assign NAs
# as the selector in Start(), and Start() coarces the values to the nearest found
# values, producing the hidden errors.
#
# Therefore, we use multiStart() to insert NAs manually. Three days, 30th Dec.,
# 31st Dec., and 1st Jan. are requested. HadGEM3 with 360_day calendar doesn't
# have 31st Dec., so we need to insert one NA at the 2nd time step. Be sure that
# the length (NA included) of two time selectors are equal.
#
# Here are some limitations for now:
# 1. The selector for each dataset can only be indices(), not values() or 'all'.
# If the selector is the common one (e.g., 'sdate', 'lat', and 'lon' in this
# use case), then it follows the rule of Start().
# 2. The reshape options are not all available. See ex1_13 for details.
# 3. The checker of multiStart() is not comprehensive. Please carefully examine
# the results you get.
#------------------------------------------------------
#=======================================================
# Define the paths and selectors
#=======================================================
# Define paths
# HadGEM3 daily data (360_day)
path_hadgem3 <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/',
'cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/',
'dcppA-hindcast/r1i1p1f2/day/$var$/gn/v20200101/',
'$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$fyear$.nc')
## tasmax_day_HadGEM3-GC31-MM_dcppA-hindcast_s2000-r1i1p1f2_gn_20001101-20101230.nc
# MPI-ESM1.2-HR (standard)
path_mpi_esm <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/mpi-esm1-2-hr/',
'cmip6-dcppA-hindcast_i1p1/DCPP/MPI-M/MPI-ESM1-2-HR/',
'dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200101/',
'$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$fyear$.nc')
## tasmax_day_MPI-ESM1-2-HR_dcppA-hindcast_s2000-r1i1p1f1_gn_20001101-20101231.nc
# Define selectors for each dataset
sdate <- '2000'
fyear_hadgem3 <- paste0(sdate, '1101-', as.numeric(sdate) + 10, '1230')
fyear_mpi_esm <- paste0(sdate, '1101-', as.numeric(sdate) + 10, '1231')
# To align the time steps, insert an NA in hadgem3
time_hadgem3 <- indices(c(60, NA, 61)) #12/30, NA, 1/1
time_mpi_esm <- indices(60:62) #12/30, 12/31, 1/1
#=======================================================
# retrieve = TRUE; interpolation
#=======================================================
res <- multiStart(
dat = list(list(name = 'hadgem3', path = path_hadgem3),
list(name = 'mpi_esm', path = path_mpi_esm)),
var = 'tasmax',
sdate = '2000',
fyear = list(list(name = 'hadgem3', fyear = fyear_hadgem3),
list(name = 'mpi_esm', fyear = fyear_mpi_esm)),
time = list(list(name = 'hadgem3', time = time_hadgem3),
list(name = 'mpi_esm', time = time_mpi_esm)),
lat = values(list(-30, 30)),
lon = values(list(0, 50)),
transform = CDORemapper,
transform_params = list(grid = 'r360x181',
method = 'conservative',
crop = c(0, 50, -30, 30)),
transform_vars = c('lat', 'lon'),
return_vars = list(lat = NULL, lon = NULL, time = 'sdate'),
retrieve = TRUE)
# Check the returned values. hadgem3 has NA in the 2nd time step.
dim(res)
# dat var sdate fyear time lat lon
# 2 1 1 1 3 61 51
res[,1,1,1,,10,10]
# [,1] [,2] [,3]
#[1,] 292.5023 NA 292.0899
#[2,] 293.0914 293.4608 292.8224
#=======================================================
# retrieve = FALSE; Compute()
#=======================================================
#NOTE: To show a clearer concept of multiStart usage, the data are not interpolated
# here, but they should be. If interpolated, the chunking dimensions cannot be
# lat and lon (or the results contain many NAs.)
res <- multiStart(
dat = list(list(name = 'hadgem3', path = path_hadgem3),
list(name = 'mpi_esm', path = path_mpi_esm)),
var = 'tasmax', sdate = '2000',
fyear = list(list(name = 'hadgem3', fyear = fyear_hadgem3),
list(name = 'mpi_esm', fyear = fyear_mpi_esm)),
time = list(list(name = 'hadgem3', time = time_hadgem3),
list(name = 'mpi_esm', time = time_mpi_esm)),
lat = indices(3:5), lon = indices(1),
return_vars = list(lat = 'dat', lon = 'dat', time = 'dat'),
retrieve = FALSE)
str(res)
#List of 2
# $ hadgem3:
# ..
# ..
# $ mpi_esm:
# ..
# ..
str(attr(res[[1]], 'na_pos_record'))
#List of 1
# $ hadgem3:List of 7
# ..$ dat : num 0
# ..$ var : num 0
# ..$ sdate: num 0
# ..$ fyear: num 0
# ..$ time : int 2
# ..$ lat : num 0
# ..$ lon : num 0
str(attr(res[[2]], 'na_pos_record'))
#List of 1
# $ mpi_esm:List of 7
# ..$ dat : num 0
# ..$ var : num 0
# ..$ sdate: num 0
# ..$ fyear: num 0
# ..$ time : num 0
# ..$ lat : num 0
# ..$ lon : num 0
func_sum <- function(x, y) {
return(x + y)
}
# 'time' and 'fyear' can also be the operational dimension
step <- Step(func_sum, target_dims = list(hadgem3 = c('var', 'lon'),
mpi_esm = c('var', 'lon')),
output_dims = c('var', 'lon'))
wf <- AddStep(res, step)
# Option 1: run locally
output <- Compute(wf, chunks = list(lat = 2))
output <- Compute(wf, chunks = list(lat = 2, time = 2))
# Option 2: run on Nord3
#-------------------user-defined---------------------
queue_host <- 'nord1'
temp_dir <- '/gpfs/scratch/bsc32/bsc32734/startR_hpc/'
ecflow_suite_dir <- '/home/Earth/aho/startR_local/'
#----------------------------------------------------
output <- Compute(wf,
chunks = list(lat = 2, time = 2),
threads_load = 2,
threads_compute = 4,
cluster = list(queue_host = queue_host,
queue_type = 'lsf',
temp_dir = temp_dir,
cores_per_job = 2,
job_wallclock = '05:00',
max_jobs = 4,
extra_queue_params = list('#BSUB -q bsc_es'),
bidirectional = FALSE,
polling_period = 10
),
ecflow_suite_dir = ecflow_suite_dir,
wait = TRUE
)
dim(output$output1)
# var lon time dat sdate fyear lat
# 1 1 3 1 1 1 3
output$output1[1, 1, , 1, 1, 1, ]
# [,1] [,2] [,3]
#[1,] 503.0177 502.6464 503.0280
#[2,] NA NA NA
#[3,] 507.7226 507.1748 507.9795