From f2781fa00f0673eff0590a66051a68e335257c33 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 11 May 2020 14:06:34 +0200 Subject: [PATCH] Improve the explanation of usecase ex1_7 and FAQ how-to-17. --- inst/doc/faq.md | 21 +++++++++++++++++++-- inst/doc/usecase/ex1_7_split_merge.R | 28 +++++++++++++++++++--------- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 66df1e1..bcb937a 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -588,7 +588,7 @@ List of 1 ### 17. Use parameter 'split_multiselected_dims' in Start() The selectors can be not only vectors, but also multidimensional array. For instance, the 'time' dimension -can be assigned by a two-dimensional array `[sdate = 12, time = 31]`, which is the daily data of 12 months. +can be assigned by a two-dimensional array `[sdate = 12, time = 31]`, which is 31 timesteps for 12 start dates. You may want to have both 'sdate' and 'time' in the output dimension, even though 'sdate' is not explicitly specified in Start(). The parameter 'split_multiselected_dims' is for this goal. It is common in the case that uses experimental data attributes to get the corresponding observational data. @@ -636,7 +636,7 @@ It is important to check if **the order of file_date is in line with dates dimen Regardless 'time', which is explicitly specified in Start(), the vector should list 'sdate' first, then 'syear'. As you can see below, the first element '199607' is sdate = 1, and the second element '199612' is sdate = 2. If the order is wrong, you will still get a return data but with mixed dimensions. Because 'sdate' and 'syear' are only -implied in the given selectors, Start() cannot check if the order of file_date and dates are consistent or not. +implied in the given selector vector without any attributes, Start() cannot check if the order of file_date and dates are consistent or not. ```r dates <- attr(hcst, 'Variables')$common$time @@ -649,6 +649,23 @@ file_date <- sort(unique(gsub('-', '', print(file_date) #[1] "199607" "199612" "199701" "199707" "199712" "199801" "199807" "199812" #[9] "199901" + +obs <- Start(dat = path.obs, + var = var_name, + file_date = file_date, # a vector with the information of sdate and syear + latitude = indices(1:10), + longitude = indices(1:10), + time = values(dates), # a 3-dim array (sdate, syear, time) + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + split_multiselected_dims = TRUE, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + retrieve = T) ``` diff --git a/inst/doc/usecase/ex1_7_split_merge.R b/inst/doc/usecase/ex1_7_split_merge.R index e72ccee..86a704d 100644 --- a/inst/doc/usecase/ex1_7_split_merge.R +++ b/inst/doc/usecase/ex1_7_split_merge.R @@ -1,11 +1,17 @@ #--------------------------------------------------------------------- # This usecase shows the things to be noticed when the parameters # 'split_multiselected_dims' and 'merge_across_dims' are both used. -# The problem may occur when the dimension number of the splitted selector -# is more than two. - -# If you're not familiar with the usage of these parameters, please see usecases ex1_2 and ex1_3 first, which are less complicated. +# The problem may occur when the dimension number of the splitted selector is +# more than two. If you're not familiar with the usage of these parameters, +# please see usecases ex1_2 and ex1_3 first, which are less complicated. # See FAQ How-to-#17 for more explanation. + +# In this example, hindcast data corresponding to 2 start dates ('20160704', '20161222') +# is retrieved. The hindcasts consist of daily values for the 47 following days +# after each start date (in this case only the first 12 days are retrieved) and +# for the 20 years of hindacst (in this case only the first 3 years are retrieved). +# Once the hindacst data is retrieved/read, the corresponding ERA5 values for each +# hindcast time and year are retrieved using the 'dates' metadata from the hindcast object. #--------------------------------------------------------------------- library(startR) @@ -18,7 +24,7 @@ path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/ hcst <- Start(dat = path.exp, var = var_name, sdate = c('20160704', '20161222'), - syear = indices(1:3), #2016:2018 + syear = indices(1:3), #1996:1998 syear_depends = 'sdate', time = indices(1:12), #4th-15th Jul; 22nd Dec-2nd Jan latitude = indices(1:10), @@ -44,11 +50,15 @@ dim(dates) #----------------------------------------------------------------------- # This two lines should NOT be used!! It is an example showing the potential # problem when using 'split_multiselected_dims' and 'merge_across_dims'. -# If 'dates' is reordered to 'syear' ahead of 'sdate', while 'file_date' below -# remains the same, the result will have mixed dimension. +# If 'dates' is reordered to the order that 'syear' ahead of 'sdate', while +# 'file_date' below remains the same, the result will have mixed dimension. +# The order of 'time', which is the name of the inner dimension `time = values(dates)` +# below, is not important. We only need to pay attention on 'sdate' and 'syear' +# because they are relavent of the required files (i.e., 'file_date' below). +# See more explanation in FAQ How-to-#17. library(s2dv) -dates <- Reorder(dates, c('syear','time','sdate')) +dates <- Reorder(dates, c('syear', 'sdate', 'time')) #----------------------------------------------------------------------- #----------------------------------------------------------------------- @@ -70,7 +80,7 @@ path.obs <- '/esarchive/recon/ecmwf/era5/1hourly/$var$/$var$_$file_date$.nc' obs <- Start(dat = path.obs, var = var_name, - file_date = file_date, # a vector + file_date = file_date, # a vector with the information of sdate and syear latitude = indices(1:10), longitude = indices(1:10), time = values(dates), # a 3-dim array (sdate, syear, time) -- GitLab