From 03c8de6deefb4f900e4316779e5d8e5381077645 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 7 May 2020 14:59:06 +0200
Subject: [PATCH 1/4] Create example script for startR data check.

---
 inst/doc/usecase/ex3_1_data_check.R | 318 ++++++++++++++++++++++++++++
 1 file changed, 318 insertions(+)
 create mode 100644 inst/doc/usecase/ex3_1_data_check.R

diff --git a/inst/doc/usecase/ex3_1_data_check.R b/inst/doc/usecase/ex3_1_data_check.R
new file mode 100644
index 0000000..9b6376b
--- /dev/null
+++ b/inst/doc/usecase/ex3_1_data_check.R
@@ -0,0 +1,318 @@
+#-------------------------------------------------------------------------
+# Since startR hasn't been fully stable with many developments going on,
+# it is important to self-check if the retrieving data array is as you expect.
+# This script provided several tools and tips to ensure everything is fine
+# before you head to further analysis.
+
+# Other data retrieval tools: (1) s2dv::Load  (2) ncdf4  (3) easyNCDF  (4) ncview
+# Tips: (a) dimensions  (b) attributes  (c) summary  (d) individual values
+# Extra examination: (5) Compute()
+#-------------------------------------------------------------------------
+
+#========== (1) s2dv::Load ===========
+# If you used Load() before, it is convenient to compare with its output directly.
+# It is useful when transformation is applied. Other methods provided here
+# can only show original data. Also, it helps check the retrieved data structure,
+# not only the values.
+
+# startR
+library(startR)
+
+obs_path <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h/$var$_$sdate$.nc'
+var_name <- 'sfcWind'
+sdate <- '201811'
+lons.min <- 10
+lons.max <- 20
+lats.min <- 0
+lats.max <- 10
+
+obs1 <- Start(dat = obs_path,
+              var = var_name,
+              sdate = sdate,
+              time = 'all',
+              latitude = values(list(lats.min, lats.max)),
+              latitude_reorder = Sort(decreasing = T),
+              longitude = values(list(lons.min, lons.max)),
+              longitude_reorder = CircularSort(0, 360),
+              synonims = list(longitude = c('lon', 'longitude'),
+                              latitude = c('lat', 'latitude')),
+              transform = CDORemapper,
+              transform_extra_cells = 2,
+              transform_params = list(grid = 'r360x181',
+                                      method = 'conservative',
+                                      crop = c(lons.min, lons.max, lats.min, lats.max)),
+              transform_vars = c('latitude', 'longitude'),
+              return_vars = list(time = NULL,
+                                 latitude = 'dat',
+                                 longitude = 'dat'),
+              retrieve = T)
+
+# Load()
+library(s2dv)
+pobs <- paste0('/esarchive/recon/ecmwf/era5/monthly_mean/',
+               '$VAR_NAME$_f1h/$VAR_NAME$_$YEAR$$MONTH$.nc')
+
+obs2 <- Load(var = var_name,
+             obs = list(list(path = pobs)),
+             sdates = '20181101',
+             leadtimemin = 1,
+             leadtimemax = 1,
+             output = 'lonlat',
+             grid = 'r360x181',
+             method = 'conservative',
+             storefreq = 'monthly',
+             latmin = lats.min,
+             latmax = lats.max,
+             lonmin = lons.min,
+             lonmax = lons.max)
+
+# Check
+## (a) dimensions
+dim(obs1)
+#      dat       var     sdate      time  latitude longitude 
+#        1         1         1         1        11        11 
+dim(obs2$obs)
+#  dataset  member   sdate   ftime     lat     lon 
+#        1       1       1       1      11      11 
+
+## (b) attributes
+attr(obs1, 'Variables')$dat1$longitude
+# [1] 10 11 12 13 14 15 16 17 18 19 20
+as.vector(obs2$lon)
+# [1] 10 11 12 13 14 15 16 17 18 19 20
+attr(obs1, 'Variables')$dat1$latitude
+# [1] 10  9  8  7  6  5  4  3  2  1  0
+as.vector(obs2$lat)
+# [1] 10  9  8  7  6  5  4  3  2  1  0
+
+## (c) summary
+diff <- drop(obs1) - drop(obs2$obs)
+range(diff)
+#[1] -4.965553e-05  4.929314e-05
+summary(obs1)
+#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+#  1.090   1.313   1.479   1.636   1.936   2.803 
+summary(obs2$obs)
+#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+#  1.090   1.313   1.479   1.636   1.936   2.803 
+
+## (d) individual values
+obs1[1, 1, 1, 1, 1:3, 1:2]
+#         [,1]     [,2]
+#[1,] 2.345519 2.365326
+#[2,] 2.219121 1.958319
+#[3,] 1.840537 1.386617
+obs2$obs[1, 1, 1, 1, 1:3, 1:2]
+#       [,1]   [,2]
+#[1,] 2.3455 2.3653
+#[2,] 2.2191 1.9583
+#[3,] 1.8405 1.3866
+
+
+#========== (2) ncdf4 ===========
+# It is resource-consumning if the whole domain is retrieved through ncdf4..
+# If doing so, use fatnodes/Power9/Nord3 interactive session for more memory space.
+# Check ncdf4 documentation to learn the dimension structure first.
+
+# startR
+library(startR)
+obs_path <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h/$var$_$sdate$.nc'
+var_name <- 'sfcWind'
+sdate <- '201811'
+
+obs1 <- Start(dat = obs_path,
+              var = var_name,
+              sdate = sdate,
+              time = 'all',
+              latitude = indices(1:10), 
+              longitude = indices(1:10),
+              synonims = list(longitude = c('lon', 'longitude'),
+                              latitude = c('lat', 'latitude')),
+              return_vars = list(time = 'sdate',
+                                 latitude = 'dat',
+                                 longitude = 'dat'),
+              retrieve = T)
+
+# ncdf4
+library(ncdf4)
+file <- nc_open('/esarchive/recon/ecmwf/era5/monthly_mean/sfcWind_f1h/sfcWind_201811.nc')
+obs2 <- ncvar_get(file, 'sfcWind', start = c(1, 1, 1), count = c(10, 10, 1))
+nc_close(file)
+
+# Check
+## (a) dimensions
+dim(obs1)
+#      dat       var     sdate      time  latitude longitude 
+#        1         1         1         1        10        10 
+dim(obs2)
+#[1] 10 10
+
+## (c) summary
+summary(obs1)
+#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+#  4.934   4.954   5.025   5.035   5.105   5.165 
+summary(as.vector(obs2))
+#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+#  4.934   4.954   5.025   5.035   5.105   5.165
+
+## (d) individual values
+obs1[1, 1, 1, 1, , 2]
+# [1] 4.960041 4.959622 4.956322 4.939547 4.991117 5.075977 5.143179 5.157578
+# [9] 5.095344 5.051348
+obs2[2, ]
+# [1] 4.960041 4.959622 4.956322 4.939547 4.991117 5.075977 5.143179 5.157578
+# [9] 5.095344 5.051348
+
+
+#========== (3) easyNCDF ===========
+# startR
+# Use the same one as (2).
+
+# easyNCDF
+library(easyNCDF)
+file <- NcOpen('/esarchive/recon/ecmwf/era5/monthly_mean/sfcWind_f1h/sfcWind_201811.nc')
+
+obs2 <- NcToArray(file, vars_to_read = 'sfcWind', 
+                  dim_indices = list(longitude = c(1:10), 
+                                     latitude = c(1:10),
+                                     time = c(1)))
+NcClose(file)
+
+# Check
+## (a) dimensions
+dim(obs1)
+#      dat       var     sdate      time  latitude longitude 
+#        1         1         1         1        10        10 
+dim(obs2)
+#      var longitude  latitude      time 
+#        1        10        10         1 
+
+## (c) summary
+summary(obs1)
+#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+#  4.934   4.954   5.025   5.035   5.105   5.165 
+summary(as.vector(obs2))
+#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+#  4.934   4.954   5.025   5.035   5.105   5.165 
+
+## (d) individual values
+obs1[1, 1, 1, 1, , 2]
+# [1] 4.960041 4.959622 4.956322 4.939547 4.991117 5.075977 5.143179 5.157578
+# [9] 5.095344 5.051348
+obs2[1, 2, , 1]
+# [1] 4.960041 4.959622 4.956322 4.939547 4.991117 5.075977 5.143179 5.157578
+# [9] 5.095344 5.051348
+
+
+#========== (4) ncview ===========
+# In the terminal, type 'ncview <file_name>'. Then check the values dynamically in the pop-up window.
+# You can also plot the map to compare with ncview. It is more clear if you use a bigger domain (rather than the example here, which only has 10*10 points)
+
+s2dv::PlotEquiMap(obs1[1,1,1,1,,], 
+                  lon = attributes(obs1)$Variable$dat1$longitude,
+                  lat = attributes(obs1)$Variable$dat1$latitude)
+
+#========== (5) Compute() ===========
+# You can compare the result of Compute() with what you get from Start(retrieve = TRUE), or with any tool mentioned above.
+
+# startR (retrieve = TRUE)
+library(startR)
+
+obs_path <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h/$var$_$sdate$.nc'
+var_name <- 'sfcWind'
+sdate <- '201811'
+lons.min <- 10
+lons.max <- 20
+lats.min <- 0
+lats.max <- 10
+
+obs1 <- Start(dat = obs_path,
+              var = var_name,
+              sdate = sdate,
+              time = 'all',
+              latitude = values(list(lats.min, lats.max)),
+              latitude_reorder = Sort(decreasing = T),
+              longitude = values(list(lons.min, lons.max)),
+              longitude_reorder = CircularSort(0, 360),
+              synonims = list(longitude = c('lon', 'longitude'),
+                              latitude = c('lat', 'latitude')),
+              transform = CDORemapper,
+              transform_extra_cells = 2,
+              transform_params = list(grid = 'r360x181',
+                                      method = 'conservative',
+                                      crop = c(lons.min, lons.max, lats.min, lats.max)),
+              transform_vars = c('latitude', 'longitude'),
+              return_vars = list(time = NULL,
+                                 latitude = 'dat',
+                                 longitude = 'dat'),
+              retrieve = T)
+
+
+# startR (retrieve = FALSE, use the whole workflow)
+
+obs2 <- Start(dat = obs_path,
+              var = var_name,
+              sdate = sdate,
+              time = 'all',
+              latitude = values(list(lats.min, lats.max)),
+              latitude_reorder = Sort(decreasing = T),
+              longitude = values(list(lons.min, lons.max)),
+              longitude_reorder = CircularSort(0, 360),
+              synonims = list(longitude = c('lon', 'longitude'),
+                              latitude = c('lat', 'latitude')),
+              transform = CDORemapper,
+              transform_extra_cells = 2,
+              transform_params = list(grid = 'r360x181',
+                                      method = 'conservative',
+                                      crop = c(lons.min, lons.max, lats.min, lats.max)),
+              transform_vars = c('latitude', 'longitude'),
+              return_vars = list(time = NULL,
+                                 latitude = 'dat',
+                                 longitude = 'dat'),
+              retrieve = F)
+
+func <- function(x) {
+  return(x)
+}
+
+step <- Step(func,
+             target_dims = 'latitude',
+             output_dims = 'latitude')
+
+wf <- AddStep(obs2, step)
+
+res <- Compute(wf, chunks = list(time = 1))$output1
+
+# Check
+## (a) dimensions
+dim(obs1)
+#      dat       var     sdate      time  latitude longitude 
+#        1         1         1         1        11        11 
+dim(res)
+# latitude       dat       var     sdate      time longitude 
+#       11         1         1         1         1        11 
+
+## (c) summary
+diff <- drop(obs1) - drop(res)
+range(diff)
+#[1] 0 0
+summary(obs1)
+#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+#  1.090   1.313   1.479   1.636   1.936   2.803 
+summary(res)
+#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+#  1.090   1.313   1.479   1.636   1.936   2.803 
+
+## (d) individual values
+obs1[1, 1, 1, 1, 1:3, 1:2]
+#         [,1]     [,2]
+#[1,] 2.345519 2.365326
+#[2,] 2.219121 1.958319
+#[3,] 1.840537 1.386617
+res[1:3, 1, 1, 1, 1, 1:2]
+#         [,1]     [,2]
+#[1,] 2.345519 2.365326
+#[2,] 2.219121 1.958319
+#[3,] 1.840537 1.386617
+
+
-- 
GitLab


From 2d4f3ecc05f1b380a53d96d1745496c7b35cea68 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 7 May 2020 19:16:23 +0200
Subject: [PATCH 2/4] Create .md file to replace .R file

---
 .../ex3_1_data_check.R => data_check.md}      | 125 +++++++++++++-----
 1 file changed, 92 insertions(+), 33 deletions(-)
 rename inst/doc/{usecase/ex3_1_data_check.R => data_check.md} (70%)

diff --git a/inst/doc/usecase/ex3_1_data_check.R b/inst/doc/data_check.md
similarity index 70%
rename from inst/doc/usecase/ex3_1_data_check.R
rename to inst/doc/data_check.md
index 9b6376b..e40c9a4 100644
--- a/inst/doc/usecase/ex3_1_data_check.R
+++ b/inst/doc/data_check.md
@@ -1,20 +1,38 @@
-#-------------------------------------------------------------------------
-# Since startR hasn't been fully stable with many developments going on,
-# it is important to self-check if the retrieving data array is as you expect.
-# This script provided several tools and tips to ensure everything is fine
-# before you head to further analysis.
-
-# Other data retrieval tools: (1) s2dv::Load  (2) ncdf4  (3) easyNCDF  (4) ncview
-# Tips: (a) dimensions  (b) attributes  (c) summary  (d) individual values
-# Extra examination: (5) Compute()
-#-------------------------------------------------------------------------
-
-#========== (1) s2dv::Load ===========
-# If you used Load() before, it is convenient to compare with its output directly.
-# It is useful when transformation is applied. Other methods provided here
-# can only show original data. Also, it helps check the retrieved data structure,
-# not only the values.
+# Retrieved Data Check
+Since startR has not been fully stable with many developments going on,
+it is important to self-check if the retrieving data array is as you expect before you go to further analysis.
+ 
+It is recommended to use small datasets as the testing sample. For example, reducing 'ensemble' dimension since it is a less-trouble one. 
+Shortening 'time' dimension to the length that is light-weighted but also can be representative. Take the monthly file of daily data for example, it is better to include both 30-day and 31-day months. 
+As for latitude and longitude, keep both the negative and positive values.
 
+Here are some tools and tips for data comparison.
+
+## Index
+- Data retrieval tools
+   1. [s2dv::Load]  
+   2. [ncdf4]  
+   3. [easyNCDF]  
+   4. [ncview]() 
+
+- Extra examination
+   (5) [Compute()]  
+
+- Tips
+   a. [Dimensions]  
+   b. [Attributes]  
+   c. [Summary]  
+   d. [Individual values]
+
+
+
+## Data retrieval tools
+### (1) s2dv::Load
+If you used Load() before, it is convenient to compare with its output dir
+ectly. 
+It is useful when transformation is applied. Other methods provided here can only show original data. Also, it helps check the retrieved data structure, not only the values.
+
+```r
 # startR
 library(startR)
 
@@ -108,12 +126,16 @@ obs2$obs[1, 1, 1, 1, 1:3, 1:2]
 #[2,] 2.2191 1.9583
 #[3,] 1.8405 1.3866
 
+```
 
-#========== (2) ncdf4 ===========
-# It is resource-consumning if the whole domain is retrieved through ncdf4..
-# If doing so, use fatnodes/Power9/Nord3 interactive session for more memory space.
-# Check ncdf4 documentation to learn the dimension structure first.
+### (2) ncdf4
+It shows original data values in netCDF files. You can get the same domain if the 
+interpolation is not applied in Start(), while the data structure will be different.  
+It is resource-consumning if the whole domain is retrieved through ncdf4.
+If doing so, it is better to use fatnodes/Power9/Nord3 interactive session for more memory space. 
+Check ncdf4 documentation to learn the dimension structure first.
 
+```r
 # startR
 library(startR)
 obs_path <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h/$var$_$sdate$.nc'
@@ -124,7 +146,7 @@ obs1 <- Start(dat = obs_path,
               var = var_name,
               sdate = sdate,
               time = 'all',
-              latitude = indices(1:10), 
+              latitude = indices(1:10),
               longitude = indices(1:10),
               synonims = list(longitude = c('lon', 'longitude'),
                               latitude = c('lat', 'latitude')),
@@ -162,9 +184,12 @@ obs1[1, 1, 1, 1, , 2]
 obs2[2, ]
 # [1] 4.960041 4.959622 4.956322 4.939547 4.991117 5.075977 5.143179 5.157578
 # [9] 5.095344 5.051348
+```
 
+### (3) easyNCDF  
+It is a wrapper package of ncdf4. The idea is similar to ncdf4 but with some more-friendly feature.
 
-#========== (3) easyNCDF ===========
+```r
 # startR
 # Use the same one as (2).
 
@@ -172,8 +197,8 @@ obs2[2, ]
 library(easyNCDF)
 file <- NcOpen('/esarchive/recon/ecmwf/era5/monthly_mean/sfcWind_f1h/sfcWind_201811.nc')
 
-obs2 <- NcToArray(file, vars_to_read = 'sfcWind', 
-                  dim_indices = list(longitude = c(1:10), 
+obs2 <- NcToArray(file, vars_to_read = 'sfcWind',
+                  dim_indices = list(longitude = c(1:10),
                                      latitude = c(1:10),
                                      time = c(1)))
 NcClose(file)
@@ -202,22 +227,31 @@ obs1[1, 1, 1, 1, , 2]
 obs2[1, 2, , 1]
 # [1] 4.960041 4.959622 4.956322 4.939547 4.991117 5.075977 5.143179 5.157578
 # [9] 5.095344 5.051348
+```
 
+### (4) ncview
+Using ncview to check netCDF file is the easiest way. While it may be slow. 
+It is convenient if you want to check with the map.
 
-#========== (4) ncview ===========
-# In the terminal, type 'ncview <file_name>'. Then check the values dynamically in the pop-up window.
-# You can also plot the map to compare with ncview. It is more clear if you use a bigger domain (rather than the example here, which only has 10*10 points)
+In the terminal, type 'ncview <file_name>'. Then check the values dynamically in the pop-up window.
+You can also plot the map to compare with ncview. It is more clear if you use a bigger domain (rather than the example here, which only has 10*10 points).
 
-s2dv::PlotEquiMap(obs1[1,1,1,1,,], 
+```r
+s2dv::PlotEquiMap(obs1[1,1,1,1,,],
                   lon = attributes(obs1)$Variable$dat1$longitude,
                   lat = attributes(obs1)$Variable$dat1$latitude)
+```
 
-#========== (5) Compute() ===========
-# You can compare the result of Compute() with what you get from Start(retrieve = TRUE), or with any tool mentioned above.
 
-# startR (retrieve = TRUE)
-library(startR)
+## Extra examination
+### (5) Compute()
+Some problems hide in Compute() but not Start(). If your final goal is to use Compute()
+for analysis, you can do some simple/dumb tests to ensure the data remain the same
+after going through the workflow. 
+You can compare the result of Compute() with what you get from Start(retrieve = TRUE), or with any tool mentioned above.
 
+```r
+library(startR)
 obs_path <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h/$var$_$sdate$.nc'
 var_name <- 'sfcWind'
 sdate <- '201811'
@@ -226,6 +260,7 @@ lons.max <- 20
 lats.min <- 0
 lats.max <- 10
 
+# startR (retrieve = TRUE)
 obs1 <- Start(dat = obs_path,
               var = var_name,
               sdate = sdate,
@@ -249,7 +284,6 @@ obs1 <- Start(dat = obs_path,
 
 
 # startR (retrieve = FALSE, use the whole workflow)
-
 obs2 <- Start(dat = obs_path,
               var = var_name,
               sdate = sdate,
@@ -315,4 +349,29 @@ res[1:3, 1, 1, 1, 1, 1:2]
 #[2,] 2.219121 1.958319
 #[3,] 1.840537 1.386617
 
+```
+
+
+## Tips  
+### (a) Dimensions
+Dimension is the most basic thing to check if the data structure is align with 
+your expectation.
+
+### (b) Attributes  
+Check longitude, latitude, and time, etc. Even the dimension length is correct, 
+the order may be opposite or out of range. To learn how to get the proper attributes 
+by Start(), see [FAQ How-to-#16](#16-use-parameter-return_vars-in-start).
+
+### (c) Summary
+After checking the data structure, you can look into the values to see if the data 
+are retrieved correctly and filled in the right place. First of all, summary 
+(mean, min, max, etc.) gives you a general view of the values. If the summary is 
+consistent, you know Start() finds the correct files and data position.
+
+### (d) Individual values
+There is a chance that the dimensions are mixed, or a few data points are incorrect due to interpolation or other reasons. 
+By the overall summary, these mistakes may not be detected. Therefore, checking 
+a few data points is recommended. 
+It is better to check not only the first element in each dimension but also some points in the middle of the array.
+
 
-- 
GitLab


From 2bbb06191278114b9db1914da0e56eb041cb114e Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 7 May 2020 21:52:15 +0200
Subject: [PATCH 3/4] Fix hyperlink and index

---
 inst/doc/data_check.md | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/inst/doc/data_check.md b/inst/doc/data_check.md
index e40c9a4..0ed8b42 100644
--- a/inst/doc/data_check.md
+++ b/inst/doc/data_check.md
@@ -9,20 +9,20 @@ As for latitude and longitude, keep both the negative and positive values.
 Here are some tools and tips for data comparison.
 
 ## Index
-- Data retrieval tools
-   1. [s2dv::Load]  
-   2. [ncdf4]  
-   3. [easyNCDF]  
-   4. [ncview]() 
-
-- Extra examination
-   (5) [Compute()]  
-
-- Tips
-   a. [Dimensions]  
-   b. [Attributes]  
-   c. [Summary]  
-   d. [Individual values]
+- Data retrieval tools  
+ (1) [s2dv::Load](inst/doc/data_check.md#1-s2dvload)  
+ (2) [ncdf4](inst/doc/data_check.md#2-ncdf4)   
+ (3) [easyNCDF](inst/doc/data_check.md#3-easyncdf)   
+ (4) [ncview](inst/doc/data_check.md#4-ncview)  
+
+- Extra examination  
+ (5) [Compute()](inst/doc/data_check.md#5-compute)   
+
+- Tips  
+ (a) [Dimensions](inst/doc/data_check.md#a-dimensions)   
+ (b) [Attributes](inst/doc/data_check.md#b-attributes)   
+ (c) [Summary](inst/doc/data_check.md#c-summary)  
+ (d) [Individual values](inst/doc/data_check.md#d-individual-values) 
 
 
-- 
GitLab


From cbd21018a176ade84215663a5a6793bd0939a450 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 8 May 2020 09:41:35 +0200
Subject: [PATCH 4/4] Adjust section order and finalize the format.

---
 inst/doc/data_check.md | 88 +++++++++++++++++++++++-------------------
 1 file changed, 49 insertions(+), 39 deletions(-)

diff --git a/inst/doc/data_check.md b/inst/doc/data_check.md
index 0ed8b42..9d42c56 100644
--- a/inst/doc/data_check.md
+++ b/inst/doc/data_check.md
@@ -1,14 +1,18 @@
 # Retrieved Data Check
 Since startR has not been fully stable with many developments going on,
-it is important to self-check if the retrieving data array is as you expect before you go to further analysis.
- 
-It is recommended to use small datasets as the testing sample. For example, reducing 'ensemble' dimension since it is a less-trouble one. 
-Shortening 'time' dimension to the length that is light-weighted but also can be representative. Take the monthly file of daily data for example, it is better to include both 30-day and 31-day months. 
-As for latitude and longitude, keep both the negative and positive values.
+it is important to check manually if the retrieving data array is as you expect before you go for further analysis, especially when several 
+parameters are used.
 
-Here are some tools and tips for data comparison.
+Here we list some tips recommended to pay attention, and some tools for data comparison.
 
 ## Index
+- Tips  
+ (a) [Small dataset](inst/doc/data_check.md#a-small-dataset)  
+ (b) [Dimensions](inst/doc/data_check.md#b-dimensions)   
+ (c) [Attributes](inst/doc/data_check.md#c-attributes)   
+ (d) [Summary](inst/doc/data_check.md#d-summary)  
+ (e) [Individual values](inst/doc/data_check.md#e-individual-values) 
+
 - Data retrieval tools  
  (1) [s2dv::Load](inst/doc/data_check.md#1-s2dvload)  
  (2) [ncdf4](inst/doc/data_check.md#2-ncdf4)   
@@ -18,19 +22,45 @@ Here are some tools and tips for data comparison.
 - Extra examination  
  (5) [Compute()](inst/doc/data_check.md#5-compute)   
 
-- Tips  
- (a) [Dimensions](inst/doc/data_check.md#a-dimensions)   
- (b) [Attributes](inst/doc/data_check.md#b-attributes)   
- (c) [Summary](inst/doc/data_check.md#c-summary)  
- (d) [Individual values](inst/doc/data_check.md#d-individual-values) 
 
+## Tips  
+Here is the general idea. See the [Tools](#data-retrieval-tools) section for example code.
+
+### (a) Small dataset  
+It is recommended to use small datasets as the testing sample. For example, reducing 'ensemble' dimension since it is a less-trouble one. 
+Shortening 'time' dimension to the length that is light-weighted but can still be representative. 
+Take the monthly file of daily data as an example, it is better to include both 30-day and 31-day months. 
+As for latitude and longitude, keep both the negative and positive values, 
+and use the same way (i.e., values(), indices(), or 'all') to define the selectors.
+
+### (b) Dimensions
+Dimension is the most basic thing to check if the data structure is align with 
+your expectation. You can check it with or without retrieving the data.
+
+### (c) Attributes  
+Check longitude, latitude, and time, etc. Even the dimension length is correct, 
+the order may be opposite or out of range. You can check it with or without retrieving the data.
+To learn how to get the proper attributes 
+by Start(), see [FAQ How-to-#16](#16-use-parameter-return_vars-in-start).
+
+### (d) Summary
+After checking the data structure, you can look into the values to see if the data 
+are retrieved correctly and filled in the right place. First of all, summary 
+(mean, min, max, etc.) gives you a general view of the values. If the summary is 
+consistent, you know Start() finds the correct files and data position.
+
+### (e) Individual values
+There is a chance that the dimensions are mixed, or a few data points are incorrect due to interpolation or other reasons. 
+By the overall summary, these mistakes may not be detected. Therefore, checking 
+a few data points is recommended. 
+It is better to check not only the first element in each dimension but also some points in the middle of the array.
 
 
 ## Data retrieval tools
 ### (1) s2dv::Load
-If you used Load() before, it is convenient to compare with its output dir
-ectly. 
-It is useful when transformation is applied. Other methods provided here can only show original data. Also, it helps check the retrieved data structure, not only the values.
+If you used Load() before, it is convenient to compare with its output directly. 
+It is useful when transformation is applied. Other methods provided here can only show original data. 
+Also, it helps check the retrieved data structure, not only the values.
 
 ```r
 # startR
@@ -131,9 +161,9 @@ obs2$obs[1, 1, 1, 1, 1:3, 1:2]
 ### (2) ncdf4
 It shows original data values in netCDF files. You can get the same domain if the 
 interpolation is not applied in Start(), while the data structure will be different.  
-It is resource-consumning if the whole domain is retrieved through ncdf4.
+It is resource-consuming if the whole domain is retrieved through ncdf4.
 If doing so, it is better to use fatnodes/Power9/Nord3 interactive session for more memory space. 
-Check ncdf4 documentation to learn the dimension structure first.
+Check [ncdf4 documentation](https://cran.r-project.org/web/packages/ncdf4/index.html) to learn how to use it.
 
 ```r
 # startR
@@ -187,7 +217,9 @@ obs2[2, ]
 ```
 
 ### (3) easyNCDF  
-It is a wrapper package of ncdf4. The idea is similar to ncdf4 but with some more-friendly feature.
+It is a wrapper package of ncdf4. The idea is similar to ncdf4 but with some more-friendly feature. 
+Check [easyNCDF documentation](https://cran.r-project.org/web/packages/easyNCDF/index.html) to learn how to use it.
+
 
 ```r
 # startR
@@ -352,26 +384,4 @@ res[1:3, 1, 1, 1, 1, 1:2]
 ```
 
 
-## Tips  
-### (a) Dimensions
-Dimension is the most basic thing to check if the data structure is align with 
-your expectation.
-
-### (b) Attributes  
-Check longitude, latitude, and time, etc. Even the dimension length is correct, 
-the order may be opposite or out of range. To learn how to get the proper attributes 
-by Start(), see [FAQ How-to-#16](#16-use-parameter-return_vars-in-start).
-
-### (c) Summary
-After checking the data structure, you can look into the values to see if the data 
-are retrieved correctly and filled in the right place. First of all, summary 
-(mean, min, max, etc.) gives you a general view of the values. If the summary is 
-consistent, you know Start() finds the correct files and data position.
-
-### (d) Individual values
-There is a chance that the dimensions are mixed, or a few data points are incorrect due to interpolation or other reasons. 
-By the overall summary, these mistakes may not be detected. Therefore, checking 
-a few data points is recommended. 
-It is better to check not only the first element in each dimension but also some points in the middle of the array.
-
 
-- 
GitLab