#--------------------------------------------------------------------- # This script tells you how to load experimental and observational data in a # consistent way, facilating the following comparison. We use the attributes of # the experimental data to define the selectors of obs Start() call, so they # can have the same dimension structure. # Spatial dimensions: # The exp and obs data happen to have the same spatial resolution (256x512) and # the grids are not shifted, so we don't need to regrid them. However, their latitude # orders are opposite. exp has ascending order while obs has descending order. # To make them consistent, we need to use "_reorder" parameter. In fact, it is # always recommended using "lat_reorder" and "lon_reorder" to ensure you get # the expected latitude and longitude. # Temporal dimensions: # The exp and obs files have different date/time structure. exp has one file per year and # each file has 12 time steps. obs has one file per month and each file has 1 time step. # To shape the obs array as exp, we need to use the time attribute of exp to define # the date/time selector of obs. You can see how to use parameter '*_across', 'merge_across_dims', # and 'split_multiselected_dims' to achieve the goal. #--------------------------------------------------------------------- library(startR) # exp repos_exp <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_$sdate$01-$sdate$12.nc') exp <- Start(dat = repos_exp, var = 'tas', sdate = as.character(c(2005:2008)), time = indices(1:3), lat = 'all', lat_reorder = Sort(), lon = 'all', lon_reorder = CircularSort(0, 360), synonims = list(lat = c('lat', 'latitude'), lon = c('lon', 'longitude')), return_vars = list(lon = NULL, lat = NULL, time = 'sdate'), retrieve = FALSE) # Retrieve attributes for observational data retrieval. ## Because latitude order in experiment is [-90, 90] but in observation is [90, -90], ## latitude values need to be retrieved and used below. lats <- attr(exp, 'Variables')$common$lat lons <- attr(exp, 'Variables')$common$lon ## The 'time' attribute is a two-dim array dates <- attr(exp, 'Variables')$common$time dim(dates) #sdate time # 4 3 dates # [1] "2005-01-16 12:00:00 UTC" "2006-01-16 12:00:00 UTC" # [3] "2007-01-16 12:00:00 UTC" "2008-01-16 12:00:00 UTC" # [5] "2005-02-15 00:00:00 UTC" "2006-02-15 00:00:00 UTC" # [7] "2007-02-15 00:00:00 UTC" "2008-02-15 12:00:00 UTC" # [9] "2005-03-16 12:00:00 UTC" "2006-03-16 12:00:00 UTC" #[11] "2007-03-16 12:00:00 UTC" "2008-03-16 12:00:00 UTC" #------------------------------------------- # obs # 1. For lat, use the experiment attribute or reversed indices. For lon, it is not necessary # because their lons are identical, but either way works. # 2. For dimension 'date', it is a vector involving the 3 months (ftime) of the four years (sdate). # 3. Dimension 'time' is assigned by the matrix, so we can split it into 'sdate' and 'time' # by 'split_multiselected_dims'. # 4. Because 'time' is actually across all the files, so we need to specify 'time_across'. # Then, use 'merge_across_dims' to make dimension 'date' disappears. # At this moment, the dimension is 'time = 12'. # 5. However, we want to seperate year and month (which are 'sdate' and 'ftime' in # experimental data). So we use 'split_multiselected_dims' to split 'time' into the two dimensions. repos_obs <- '/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$_f6h/$var$_$date$.nc' obs <- Start(dat = repos_obs, var = 'tas', date = unique(format(dates, '%Y%m')), time = values(dates), #dim: [sdate = 4, time = 3] lat = 'all', lat_reorder = Sort(), lon = 'all', lon_reorder = CircularSort(0, 360), time_across = 'date', merge_across_dims = TRUE, split_multiselected_dims = TRUE, synonims = list(lat = c('lat', 'latitude'), lon = c('lon', 'longitude')), return_vars = list(lon = NULL, lat = NULL, time = 'date'), retrieve = FALSE) #==================================================== # Check the attributes. They should be all identical. #==================================================== ##-----dimension----- print(attr(exp, 'Dimensions')) # dat var sdate time lat lon # 1 1 4 3 256 512 print(attr(obs, 'Dimensions')) # dat var sdate time lat lon # 1 1 4 3 256 512 ##-----time----- ## They're not identical but the years and months are. See below for more details. print(attr(exp, 'Variables')$common$time) # [1] "2005-01-16 12:00:00 UTC" "2006-01-16 12:00:00 UTC" # [3] "2007-01-16 12:00:00 UTC" "2008-01-16 12:00:00 UTC" # [5] "2005-02-15 00:00:00 UTC" "2006-02-15 00:00:00 UTC" # [7] "2007-02-15 00:00:00 UTC" "2008-02-15 12:00:00 UTC" # [9] "2005-03-16 12:00:00 UTC" "2006-03-16 12:00:00 UTC" #[11] "2007-03-16 12:00:00 UTC" "2008-03-16 12:00:00 UTC" print(attr(obs, 'Variables')$common$time) # [1] "2005-01-31 18:00:00 UTC" "2006-01-31 18:00:00 UTC" # [3] "2007-01-31 18:00:00 UTC" "2008-01-31 18:00:00 UTC" # [5] "2005-02-28 18:00:00 UTC" "2006-02-28 18:00:00 UTC" # [7] "2007-02-28 18:00:00 UTC" "2008-02-29 18:00:00 UTC" # [9] "2005-03-31 18:00:00 UTC" "2006-03-31 18:00:00 UTC" #[11] "2007-03-31 18:00:00 UTC" "2008-03-31 18:00:00 UTC" ##-----lat----- print(attr(exp, 'Variables')$common$lat[1:3]) #[1] -89.46282 -88.76695 -88.06697 print(attr(exp, 'Variables')$common$lat[256]) #[1] 89.46282 print(attr(obs, 'Variables')$common$lat[1:3]) #[1] -89.46282 -88.76695 -88.06697 print(attr(obs, 'Variables')$common$lat[256]) #[1] 89.46282 ##-----lon----- print(attr(exp, 'Variables')$common$lon[1:3]) #[1] 0.000000 0.703125 1.406250 print(attr(obs, 'Variables')$common$lon[1:3]) #[1] 0.000000 0.703125 1.406250 #======================= # About time attributes #======================= # You may notice that the date and time of exp and obs are not the same. # In this case, the data are monthly data, so only the years and months matter. # The thing worth noticing is that the actual time values of obs are half month different # from the values we assigned. For example, the first time from exp is "2005-01-16 12:00:00 UTC", # and the obs time we get is "2005-01-31 18:00:00 UTC". # If the provided selector is value, Start() looks for the closest value in the data. # So for "2005-01-16 12:00:00 UTC", the two closest obs values are "2004-12-31 18:00:00 UTC" and # "2005-01-31 18:00:00 UTC", and the later one is the closest and happen to be the desired one. # It's fortunate that in this case, all the provided values are closer to the values we want. #----- 1. Manually adjust the values ----- # It is always necessary to check the data attributes before and after data retrieval. # If the provided exp values are quite in the middle of two values in obs, we can adjust a bit to # make exp values closer to the desired obs values. dates_adjust <- dates + 86400*15 dates_adjust # [1] "2005-01-31 12:00:00 UTC" "2006-01-31 12:00:00 UTC" # [3] "2007-01-31 12:00:00 UTC" "2008-01-31 12:00:00 UTC" # [5] "2005-03-02 00:00:00 UTC" "2006-03-02 00:00:00 UTC" # [7] "2007-03-02 00:00:00 UTC" "2008-03-01 12:00:00 UTC" # [9] "2005-03-31 12:00:00 UTC" "2006-03-31 12:00:00 UTC" #[11] "2007-03-31 12:00:00 UTC" "2008-03-31 12:00:00 UTC" obs2 <- Start(dat = repos_obs, var = 'tas', date = unique(format(dates, '%Y%m')), time = values(dates_adjust), # use the adjust ones lat = 'all', lat_reorder = Sort(), lon = 'all', lon_reorder = CircularSort(0, 360), time_across = 'date', merge_across_dims = TRUE, split_multiselected_dims = TRUE, synonims = list(lat = c('lat', 'latitude'), lon = c('lon', 'longitude')), return_vars = list(lon = NULL, lat = NULL, time = 'date'), retrieve = FALSE) # The time should be the same as obs above. print(attr(obs2, 'Variables')$common$time) # [1] "2005-01-31 18:00:00 UTC" "2006-01-31 18:00:00 UTC" # [3] "2007-01-31 18:00:00 UTC" "2008-01-31 18:00:00 UTC" # [5] "2005-02-28 18:00:00 UTC" "2006-02-28 18:00:00 UTC" # [7] "2007-02-28 18:00:00 UTC" "2008-02-29 18:00:00 UTC" # [9] "2005-03-31 18:00:00 UTC" "2006-03-31 18:00:00 UTC" #[11] "2007-03-31 18:00:00 UTC" "2008-03-31 18:00:00 UTC" #----- 2. Set the tolerance ----- # Sometimes, it may be useful to set the tolerance. If the provided values are too much different # from the values in obs, Start() returns an error directly (if none of the data found) or returns # incorrect time attributes. obs3 <- Start(dat = repos_obs, var = 'tas', date = unique(format(dates, '%Y%m')), time = values(dates), lat = 'all', lat_reorder = Sort(), lon = 'all', lon_reorder = CircularSort(0, 360), time_across = 'date', time_tolerance = as.difftime(15, units = 'days'), merge_across_dims = TRUE, split_multiselected_dims = TRUE, synonims = list(lat = c('lat', 'latitude'), lon = c('lon', 'longitude')), return_vars = list(lon = NULL, lat = NULL, time = 'date'), retrieve = FALSE) # We lose many data because there are no data within 15 days from the provided time values. print(attr(obs3, 'Variables')$common$time) #[1] "2005-02-28 18:00:00 UTC" "2006-02-28 18:00:00 UTC" #[3] "2007-02-28 18:00:00 UTC" "2008-02-29 18:00:00 UTC" # If 'time_tolerance' is changed to "as.difftime(1, units = 'days')", an error shows: # Selectors do not match any of the possible values for the dimension 'time'.