ex1_3_attr_loadin.R 5.95 KB
Newer Older
#---------------------------------------------------------------------
# This usecase shows you how to load experimental and observational data in a 
# consistent way.

# First, load the experimental data. Then, use the time attributes of experimental data to define the selectors for observational data.

# You can see how to use parameter '*_across', 'merge_across_dims', 'merge_across_dims_narm',
# and 'split_multiselected_dims' to create the same dimension structure.
#---------------------------------------------------------------------

# experimental data
# May to December 1994, monthly file with 6-hourly frequency
aho's avatar
aho committed
  repos <- paste0('/esarchive/exp/ecmwf/system4_m1/6hourly/',
                  '$var$/$var$_$sdate$.nc')
  sdate <- sapply(1994, function(x) paste0(x, sprintf('%02d', 5:12), '01'))

aho's avatar
aho committed
  system4 <- Start(dat = repos,
                   var = 'tas',
aho's avatar
aho committed
                   time = indices(seq(1, 124, 4)),  #first time step per day
                   ensemble = 'all',
                   latitude = indices(1:10),
                   longitude = indices(1:10),
                   return_vars = list(latitude = NULL,
                                      longitude = NULL,
                                      time = c('sdate')))

#-------- Check exp data -----------
 attr(system4, 'Dimensions')
#      dat       var     sdate      time  ensemble  latitude longitude 
#        1         1         8        31        51        10        10 
#-----------------------------------

# ------- retrieve the attributes for obs load-in ----------
  dates <- attr(system4, 'Variables')$common$time
aho's avatar
aho committed
#sdate  time 
#    8    31 

# NOTE: Even though June, September, and November only have 30 days, it reads
#       31 days for each month. Therefore, the last day of these months is the
#       first day of the following month. No NA in this dates array.

# Manually retrieve the observation dates in the required format 
aho's avatar
aho committed
  dates_file <- sort(unique(gsub('-', '', sapply(as.character(dates),
  substr, 1, 7))))
aho's avatar
aho committed
#[1] "199405" "199406" "199407" "199408" "199409" "199410" "199411" "199412"
# -----------------------------------------------------------
  
# observational data
# May to December 1994, monthly file with 6-hourly frequency
aho's avatar
aho committed
  repos_obs <- paste0('/esarchive/recon/ecmwf/erainterim/6hourly/',
                      '$var$/$var$_$file_date$.nc')
  erai <- Start(dat = repos_obs,
                var = 'tas',
                file_date = dates_file,
                time = values(dates), 
                latitude = indices(1:10),
                longitude = indices(1:10),
                time_var = 'time',
                #because time is assigned by 'values', set the tolerance to avoid too distinct match
                time_tolerance = as.difftime(1, units = 'hours'), 
                #time values are across all the files
                time_across = 'file_date',  
                #combine time and file_date dims 
                merge_across_dims = TRUE,
                #exclude the additional NAs generated by merge_across_dims
                merge_across_dims_narm = TRUE,
                #split time dim, because it is two-dimensional
                split_multiselected_dims = TRUE,
                return_vars = list(latitude = NULL,
                                   longitude = NULL,
                                   time = 'file_date'),
                retrieve = TRUE)

# NOTE: 'merge_across_dims_narm = TRUE' is necessary because the observational 
#       data have unequal time length of 30-day and 31-day months.
#       If the NAs are not removed, unwanted NAs will exist and make the
#       values misplaced in the array.
aho's avatar
aho committed

#------- Check erai -----------
aho's avatar
aho committed
#      dat       var     sdate      time  latitude longitude 
#        1         1         8        31        10        10 

erai[1, 1, 1, , 1, 1]
# [1] 255.0120 256.8095 254.3654 254.6059 257.0551 255.5087 256.8167 257.9717
# [9] 258.7491 259.2942 259.6682 260.7215 260.0988 261.2605 263.3590 265.6683
#[17] 262.4813 262.6136 263.0591 262.8377 261.7276 263.9910 264.7755 266.0213
#[25] 268.5927 267.8699 268.9210 269.4702 267.6735 267.9255 268.2216

erai[1, 1, , 1, 2, 2]
#[1] 254.5793 269.6221 274.5021 274.0269 269.5855 253.7458 243.9750 244.2415

# NOTE: You will see that the observation array is the same as experiment one
#       that the last day in 30-day months are the first day of the following
#       month.
erai[1, 1, 3, 1, 1, 1]  # 1st March
#[1] 274.6019
erai[1, 1, 2, 31, 1, 1]  # 1st March also, since June only has 30 days
#[1] 274.6019
#------------------------------

# The experimental and observational data are comparable with same structure.

aho's avatar
aho committed
#---------Check time attributes--------
dim(attr(erai, 'Variables')$common$time)
#file_date      time 
#        8        31 
attr(erai, 'Variables')$common$time[1, ]
# [1] "1994-05-01 UTC" "1994-05-02 UTC" "1994-05-03 UTC" "1994-05-04 UTC"
# [5] "1994-05-05 UTC" "1994-05-06 UTC" "1994-05-07 UTC" "1994-05-08 UTC"
# [9] "1994-05-09 UTC" "1994-05-10 UTC" "1994-05-11 UTC" "1994-05-12 UTC"
#[13] "1994-05-13 UTC" "1994-05-14 UTC" "1994-05-15 UTC" "1994-05-16 UTC"
#[17] "1994-05-17 UTC" "1994-05-18 UTC" "1994-05-19 UTC" "1994-05-20 UTC"
#[21] "1994-05-21 UTC" "1994-05-22 UTC" "1994-05-23 UTC" "1994-05-24 UTC"
#[25] "1994-05-25 UTC" "1994-05-26 UTC" "1994-05-27 UTC" "1994-05-28 UTC"
#[29] "1994-05-29 UTC" "1994-05-30 UTC" "1994-05-31 UTC"
attr(erai, 'Variables')$common$time[2, ]
# [1] "1994-06-01 UTC" "1994-06-02 UTC" "1994-06-03 UTC" "1994-06-04 UTC"
# [5] "1994-06-05 UTC" "1994-06-06 UTC" "1994-06-07 UTC" "1994-06-08 UTC"
# [9] "1994-06-09 UTC" "1994-06-10 UTC" "1994-06-11 UTC" "1994-06-12 UTC"
#[13] "1994-06-13 UTC" "1994-06-14 UTC" "1994-06-15 UTC" "1994-06-16 UTC"
#[17] "1994-06-17 UTC" "1994-06-18 UTC" "1994-06-19 UTC" "1994-06-20 UTC"
#[21] "1994-06-21 UTC" "1994-06-22 UTC" "1994-06-23 UTC" "1994-06-24 UTC"
#[25] "1994-06-25 UTC" "1994-06-26 UTC" "1994-06-27 UTC" "1994-06-28 UTC"
#[29] "1994-06-29 UTC" "1994-06-30 UTC" NA