Newer
Older
#---------------------------------------------------------------------
# This script tells you how to load experimental and observational data in a
# consistent way, facilating the following comparison. We use the attributes of
# the experimental data to define the selectors of obs Start() call, so they
# can have the same dimension structure.
# Spatial dimensions:
# The exp and obs data happen to have the same spatial resolution (256x512) and
# the grids are not shifted, so we don't need to regrid them. However, their latitude
# orders are opposite. exp has ascending order while obs has descending order.
# To make them consistent, we need to use "_reorder" parameter. In fact, it is
# always recommended using "lat_reorder" and "lon_reorder" to ensure you get
# the expected latitude and longitude.
# Temporal dimensions:
# The exp and obs files have different date/time structure. exp has one file per year and
# each file has 12 time steps. obs has one file per month and each file has 1 time step.
# To shape the obs array as exp, we need to use the time attribute of exp to define
# the date/time selector of obs. You can see how to use parameter '*_across', 'merge_across_dims',
# and 'split_multiselected_dims' to achieve the goal.
#---------------------------------------------------------------------
library(startR)
# exp
repos_exp <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/',
'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/',
'$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_$sdate$01-$sdate$12.nc')
exp <- Start(dat = repos_exp,
var = 'tas',
sdate = as.character(c(2005:2008)),
time = indices(1:3),
lat = 'all',
lon_reorder = CircularSort(0, 360),
synonims = list(lat = c('lat', 'latitude'),
lon = c('lon', 'longitude')),
return_vars = list(lon = NULL,
lat = NULL,
time = 'sdate'),
retrieve = FALSE)
# Retrieve attributes for observational data retrieval.
## Because latitude order in experiment is [-90, 90] but in observation is [90, -90],
## latitude values need to be retrieved and used below.
lats <- attr(exp, 'Variables')$common$lat
lons <- attr(exp, 'Variables')$common$lon
## The 'time' attribute is a two-dim array
dates <- attr(exp, 'Variables')$common$time
dates
# [1] "2005-01-16 12:00:00 UTC" "2006-01-16 12:00:00 UTC"
# [3] "2007-01-16 12:00:00 UTC" "2008-01-16 12:00:00 UTC"
# [5] "2005-02-15 00:00:00 UTC" "2006-02-15 00:00:00 UTC"
# [7] "2007-02-15 00:00:00 UTC" "2008-02-15 12:00:00 UTC"
# [9] "2005-03-16 12:00:00 UTC" "2006-03-16 12:00:00 UTC"
#[11] "2007-03-16 12:00:00 UTC" "2008-03-16 12:00:00 UTC"
#-------------------------------------------
# obs
# 1. For lat, use the experiment attribute or reversed indices. For lon, it is not necessary
# because their lons are identical, but either way works.
# 2. For dimension 'date', it is a vector involving the 3 months (ftime) of the four years (sdate).
# 3. Dimension 'time' is assigned by the matrix, so we can split it into 'sdate' and 'time'
# by 'split_multiselected_dims'.
# 4. Because 'time' is actually across all the files, so we need to specify 'time_across'.
# Then, use 'merge_across_dims' to make dimension 'date' disappears.
# At this moment, the dimension is 'time = 12'.
# 5. However, we want to seperate year and month (which are 'sdate' and 'ftime' in
# experimental data). So we use 'split_multiselected_dims' to split 'time' into the two dimensions.
repos_obs <- '/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$_f6h/$var$_$date$.nc'
obs <- Start(dat = repos_obs,
var = 'tas',
date = unique(format(dates, '%Y%m')),
time = values(dates), #dim: [sdate = 4, time = 3]
lat = 'all',
lat_reorder = Sort(),
lon = 'all',
lon_reorder = CircularSort(0, 360),
time_across = 'date',
merge_across_dims = TRUE,
split_multiselected_dims = TRUE,
synonims = list(lat = c('lat', 'latitude'),
lon = c('lon', 'longitude')),
return_vars = list(lon = NULL,
lat = NULL,
time = 'date'),
retrieve = FALSE)
#====================================================
# Check the attributes. They should be all identical.
#====================================================
##-----dimension-----
print(attr(exp, 'Dimensions'))
# dat var sdate time lat lon
# 1 1 4 3 256 512
print(attr(obs, 'Dimensions'))
# dat var sdate time lat lon
# 1 1 4 3 256 512
##-----time-----
## They're not identical but the years and months are. See below for more details.
print(attr(exp, 'Variables')$common$time)
# [1] "2005-01-16 12:00:00 UTC" "2006-01-16 12:00:00 UTC"
# [3] "2007-01-16 12:00:00 UTC" "2008-01-16 12:00:00 UTC"
# [5] "2005-02-15 00:00:00 UTC" "2006-02-15 00:00:00 UTC"
# [7] "2007-02-15 00:00:00 UTC" "2008-02-15 12:00:00 UTC"
# [9] "2005-03-16 12:00:00 UTC" "2006-03-16 12:00:00 UTC"
#[11] "2007-03-16 12:00:00 UTC" "2008-03-16 12:00:00 UTC"
print(attr(obs, 'Variables')$common$time)
# [1] "2005-01-31 18:00:00 UTC" "2006-01-31 18:00:00 UTC"
# [3] "2007-01-31 18:00:00 UTC" "2008-01-31 18:00:00 UTC"
# [5] "2005-02-28 18:00:00 UTC" "2006-02-28 18:00:00 UTC"
# [7] "2007-02-28 18:00:00 UTC" "2008-02-29 18:00:00 UTC"
# [9] "2005-03-31 18:00:00 UTC" "2006-03-31 18:00:00 UTC"
#[11] "2007-03-31 18:00:00 UTC" "2008-03-31 18:00:00 UTC"
##-----lat-----
print(attr(exp, 'Variables')$common$lat[1:3])
#[1] -89.46282 -88.76695 -88.06697
print(attr(exp, 'Variables')$common$lat[256])
#[1] 89.46282
print(attr(obs, 'Variables')$common$lat[1:3])
#[1] -89.46282 -88.76695 -88.06697
print(attr(obs, 'Variables')$common$lat[256])
#[1] 89.46282
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
##-----lon-----
print(attr(exp, 'Variables')$common$lon[1:3])
#[1] 0.000000 0.703125 1.406250
print(attr(obs, 'Variables')$common$lon[1:3])
#[1] 0.000000 0.703125 1.406250
#=======================
# About time attributes
#=======================
# You may notice that the date and time of exp and obs are not the same.
# In this case, the data are monthly data, so only the years and months matter.
# The thing worth noticing is that the actual time values of obs are half month different
# from the values we assigned. For example, the first time from exp is "2005-01-16 12:00:00 UTC",
# and the obs time we get is "2005-01-31 18:00:00 UTC".
# If the provided selector is value, Start() looks for the closest value in the data.
# So for "2005-01-16 12:00:00 UTC", the two closest obs values are "2004-12-31 18:00:00 UTC" and
# "2005-01-31 18:00:00 UTC", and the later one is the closest and happen to be the desired one.
# It's fortunate that in this case, all the provided values are closer to the values we want.
#----- 1. Manually adjust the values -----
# It is always necessary to check the data attributes before and after data retrieval.
# If the provided exp values are quite in the middle of two values in obs, we can adjust a bit to
# make exp values closer to the desired obs values.
dates_adjust <- dates + 86400*15
dates_adjust
# [1] "2005-01-31 12:00:00 UTC" "2006-01-31 12:00:00 UTC"
# [3] "2007-01-31 12:00:00 UTC" "2008-01-31 12:00:00 UTC"
# [5] "2005-03-02 00:00:00 UTC" "2006-03-02 00:00:00 UTC"
# [7] "2007-03-02 00:00:00 UTC" "2008-03-01 12:00:00 UTC"
# [9] "2005-03-31 12:00:00 UTC" "2006-03-31 12:00:00 UTC"
#[11] "2007-03-31 12:00:00 UTC" "2008-03-31 12:00:00 UTC"
obs2 <- Start(dat = repos_obs,
var = 'tas',
date = unique(format(dates, '%Y%m')),
time = values(dates_adjust), # use the adjust ones
lat = 'all',
lat_reorder = Sort(),
lon = 'all',
lon_reorder = CircularSort(0, 360),
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
time_across = 'date',
merge_across_dims = TRUE,
split_multiselected_dims = TRUE,
synonims = list(lat = c('lat', 'latitude'),
lon = c('lon', 'longitude')),
return_vars = list(lon = NULL,
lat = NULL,
time = 'date'),
retrieve = FALSE)
# The time should be the same as obs above.
print(attr(obs2, 'Variables')$common$time)
# [1] "2005-01-31 18:00:00 UTC" "2006-01-31 18:00:00 UTC"
# [3] "2007-01-31 18:00:00 UTC" "2008-01-31 18:00:00 UTC"
# [5] "2005-02-28 18:00:00 UTC" "2006-02-28 18:00:00 UTC"
# [7] "2007-02-28 18:00:00 UTC" "2008-02-29 18:00:00 UTC"
# [9] "2005-03-31 18:00:00 UTC" "2006-03-31 18:00:00 UTC"
#[11] "2007-03-31 18:00:00 UTC" "2008-03-31 18:00:00 UTC"
#----- 2. Set the tolerance -----
# Sometimes, it may be useful to set the tolerance. If the provided values are too much different
# from the values in obs, Start() returns an error directly (if none of the data found) or returns
# incorrect time attributes.
obs3 <- Start(dat = repos_obs,
var = 'tas',
date = unique(format(dates, '%Y%m')),
time = values(dates),
lat = 'all',
lat_reorder = Sort(),
lon = 'all',
lon_reorder = CircularSort(0, 360),
time_across = 'date',
time_tolerance = as.difftime(15, units = 'days'),
merge_across_dims = TRUE,
split_multiselected_dims = TRUE,
synonims = list(lat = c('lat', 'latitude'),
lon = c('lon', 'longitude')),
return_vars = list(lon = NULL,
lat = NULL,
time = 'date'),
retrieve = FALSE)
# We lose many data because there are no data within 15 days from the provided time values.
print(attr(obs3, 'Variables')$common$time)
#[1] "2005-02-28 18:00:00 UTC" "2006-02-28 18:00:00 UTC"
#[3] "2007-02-28 18:00:00 UTC" "2008-02-29 18:00:00 UTC"