From 03c8de6deefb4f900e4316779e5d8e5381077645 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 7 May 2020 14:59:06 +0200 Subject: [PATCH 1/4] Create example script for startR data check. --- inst/doc/usecase/ex3_1_data_check.R | 318 ++++++++++++++++++++++++++++ 1 file changed, 318 insertions(+) create mode 100644 inst/doc/usecase/ex3_1_data_check.R diff --git a/inst/doc/usecase/ex3_1_data_check.R b/inst/doc/usecase/ex3_1_data_check.R new file mode 100644 index 0000000..9b6376b --- /dev/null +++ b/inst/doc/usecase/ex3_1_data_check.R @@ -0,0 +1,318 @@ +#------------------------------------------------------------------------- +# Since startR hasn't been fully stable with many developments going on, +# it is important to self-check if the retrieving data array is as you expect. +# This script provided several tools and tips to ensure everything is fine +# before you head to further analysis. + +# Other data retrieval tools: (1) s2dv::Load (2) ncdf4 (3) easyNCDF (4) ncview +# Tips: (a) dimensions (b) attributes (c) summary (d) individual values +# Extra examination: (5) Compute() +#------------------------------------------------------------------------- + +#========== (1) s2dv::Load =========== +# If you used Load() before, it is convenient to compare with its output directly. +# It is useful when transformation is applied. Other methods provided here +# can only show original data. Also, it helps check the retrieved data structure, +# not only the values. + +# startR +library(startR) + +obs_path <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h/$var$_$sdate$.nc' +var_name <- 'sfcWind' +sdate <- '201811' +lons.min <- 10 +lons.max <- 20 +lats.min <- 0 +lats.max <- 10 + +obs1 <- Start(dat = obs_path, + var = var_name, + sdate = sdate, + time = 'all', + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = T), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort(0, 360), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + transform = CDORemapper, + transform_extra_cells = 2, + transform_params = list(grid = 'r360x181', + method = 'conservative', + crop = c(lons.min, lons.max, lats.min, lats.max)), + transform_vars = c('latitude', 'longitude'), + return_vars = list(time = NULL, + latitude = 'dat', + longitude = 'dat'), + retrieve = T) + +# Load() +library(s2dv) +pobs <- paste0('/esarchive/recon/ecmwf/era5/monthly_mean/', + '$VAR_NAME$_f1h/$VAR_NAME$_$YEAR$$MONTH$.nc') + +obs2 <- Load(var = var_name, + obs = list(list(path = pobs)), + sdates = '20181101', + leadtimemin = 1, + leadtimemax = 1, + output = 'lonlat', + grid = 'r360x181', + method = 'conservative', + storefreq = 'monthly', + latmin = lats.min, + latmax = lats.max, + lonmin = lons.min, + lonmax = lons.max) + +# Check +## (a) dimensions +dim(obs1) +# dat var sdate time latitude longitude +# 1 1 1 1 11 11 +dim(obs2$obs) +# dataset member sdate ftime lat lon +# 1 1 1 1 11 11 + +## (b) attributes +attr(obs1, 'Variables')$dat1$longitude +# [1] 10 11 12 13 14 15 16 17 18 19 20 +as.vector(obs2$lon) +# [1] 10 11 12 13 14 15 16 17 18 19 20 +attr(obs1, 'Variables')$dat1$latitude +# [1] 10 9 8 7 6 5 4 3 2 1 0 +as.vector(obs2$lat) +# [1] 10 9 8 7 6 5 4 3 2 1 0 + +## (c) summary +diff <- drop(obs1) - drop(obs2$obs) +range(diff) +#[1] -4.965553e-05 4.929314e-05 +summary(obs1) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 1.090 1.313 1.479 1.636 1.936 2.803 +summary(obs2$obs) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 1.090 1.313 1.479 1.636 1.936 2.803 + +## (d) individual values +obs1[1, 1, 1, 1, 1:3, 1:2] +# [,1] [,2] +#[1,] 2.345519 2.365326 +#[2,] 2.219121 1.958319 +#[3,] 1.840537 1.386617 +obs2$obs[1, 1, 1, 1, 1:3, 1:2] +# [,1] [,2] +#[1,] 2.3455 2.3653 +#[2,] 2.2191 1.9583 +#[3,] 1.8405 1.3866 + + +#========== (2) ncdf4 =========== +# It is resource-consumning if the whole domain is retrieved through ncdf4.. +# If doing so, use fatnodes/Power9/Nord3 interactive session for more memory space. +# Check ncdf4 documentation to learn the dimension structure first. + +# startR +library(startR) +obs_path <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h/$var$_$sdate$.nc' +var_name <- 'sfcWind' +sdate <- '201811' + +obs1 <- Start(dat = obs_path, + var = var_name, + sdate = sdate, + time = 'all', + latitude = indices(1:10), + longitude = indices(1:10), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(time = 'sdate', + latitude = 'dat', + longitude = 'dat'), + retrieve = T) + +# ncdf4 +library(ncdf4) +file <- nc_open('/esarchive/recon/ecmwf/era5/monthly_mean/sfcWind_f1h/sfcWind_201811.nc') +obs2 <- ncvar_get(file, 'sfcWind', start = c(1, 1, 1), count = c(10, 10, 1)) +nc_close(file) + +# Check +## (a) dimensions +dim(obs1) +# dat var sdate time latitude longitude +# 1 1 1 1 10 10 +dim(obs2) +#[1] 10 10 + +## (c) summary +summary(obs1) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 4.934 4.954 5.025 5.035 5.105 5.165 +summary(as.vector(obs2)) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 4.934 4.954 5.025 5.035 5.105 5.165 + +## (d) individual values +obs1[1, 1, 1, 1, , 2] +# [1] 4.960041 4.959622 4.956322 4.939547 4.991117 5.075977 5.143179 5.157578 +# [9] 5.095344 5.051348 +obs2[2, ] +# [1] 4.960041 4.959622 4.956322 4.939547 4.991117 5.075977 5.143179 5.157578 +# [9] 5.095344 5.051348 + + +#========== (3) easyNCDF =========== +# startR +# Use the same one as (2). + +# easyNCDF +library(easyNCDF) +file <- NcOpen('/esarchive/recon/ecmwf/era5/monthly_mean/sfcWind_f1h/sfcWind_201811.nc') + +obs2 <- NcToArray(file, vars_to_read = 'sfcWind', + dim_indices = list(longitude = c(1:10), + latitude = c(1:10), + time = c(1))) +NcClose(file) + +# Check +## (a) dimensions +dim(obs1) +# dat var sdate time latitude longitude +# 1 1 1 1 10 10 +dim(obs2) +# var longitude latitude time +# 1 10 10 1 + +## (c) summary +summary(obs1) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 4.934 4.954 5.025 5.035 5.105 5.165 +summary(as.vector(obs2)) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 4.934 4.954 5.025 5.035 5.105 5.165 + +## (d) individual values +obs1[1, 1, 1, 1, , 2] +# [1] 4.960041 4.959622 4.956322 4.939547 4.991117 5.075977 5.143179 5.157578 +# [9] 5.095344 5.051348 +obs2[1, 2, , 1] +# [1] 4.960041 4.959622 4.956322 4.939547 4.991117 5.075977 5.143179 5.157578 +# [9] 5.095344 5.051348 + + +#========== (4) ncview =========== +# In the terminal, type 'ncview '. Then check the values dynamically in the pop-up window. +# You can also plot the map to compare with ncview. It is more clear if you use a bigger domain (rather than the example here, which only has 10*10 points) + +s2dv::PlotEquiMap(obs1[1,1,1,1,,], + lon = attributes(obs1)$Variable$dat1$longitude, + lat = attributes(obs1)$Variable$dat1$latitude) + +#========== (5) Compute() =========== +# You can compare the result of Compute() with what you get from Start(retrieve = TRUE), or with any tool mentioned above. + +# startR (retrieve = TRUE) +library(startR) + +obs_path <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h/$var$_$sdate$.nc' +var_name <- 'sfcWind' +sdate <- '201811' +lons.min <- 10 +lons.max <- 20 +lats.min <- 0 +lats.max <- 10 + +obs1 <- Start(dat = obs_path, + var = var_name, + sdate = sdate, + time = 'all', + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = T), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort(0, 360), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + transform = CDORemapper, + transform_extra_cells = 2, + transform_params = list(grid = 'r360x181', + method = 'conservative', + crop = c(lons.min, lons.max, lats.min, lats.max)), + transform_vars = c('latitude', 'longitude'), + return_vars = list(time = NULL, + latitude = 'dat', + longitude = 'dat'), + retrieve = T) + + +# startR (retrieve = FALSE, use the whole workflow) + +obs2 <- Start(dat = obs_path, + var = var_name, + sdate = sdate, + time = 'all', + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = T), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort(0, 360), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + transform = CDORemapper, + transform_extra_cells = 2, + transform_params = list(grid = 'r360x181', + method = 'conservative', + crop = c(lons.min, lons.max, lats.min, lats.max)), + transform_vars = c('latitude', 'longitude'), + return_vars = list(time = NULL, + latitude = 'dat', + longitude = 'dat'), + retrieve = F) + +func <- function(x) { + return(x) +} + +step <- Step(func, + target_dims = 'latitude', + output_dims = 'latitude') + +wf <- AddStep(obs2, step) + +res <- Compute(wf, chunks = list(time = 1))$output1 + +# Check +## (a) dimensions +dim(obs1) +# dat var sdate time latitude longitude +# 1 1 1 1 11 11 +dim(res) +# latitude dat var sdate time longitude +# 11 1 1 1 1 11 + +## (c) summary +diff <- drop(obs1) - drop(res) +range(diff) +#[1] 0 0 +summary(obs1) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 1.090 1.313 1.479 1.636 1.936 2.803 +summary(res) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 1.090 1.313 1.479 1.636 1.936 2.803 + +## (d) individual values +obs1[1, 1, 1, 1, 1:3, 1:2] +# [,1] [,2] +#[1,] 2.345519 2.365326 +#[2,] 2.219121 1.958319 +#[3,] 1.840537 1.386617 +res[1:3, 1, 1, 1, 1, 1:2] +# [,1] [,2] +#[1,] 2.345519 2.365326 +#[2,] 2.219121 1.958319 +#[3,] 1.840537 1.386617 + + -- GitLab From 2d4f3ecc05f1b380a53d96d1745496c7b35cea68 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 7 May 2020 19:16:23 +0200 Subject: [PATCH 2/4] Create .md file to replace .R file --- .../ex3_1_data_check.R => data_check.md} | 125 +++++++++++++----- 1 file changed, 92 insertions(+), 33 deletions(-) rename inst/doc/{usecase/ex3_1_data_check.R => data_check.md} (70%) diff --git a/inst/doc/usecase/ex3_1_data_check.R b/inst/doc/data_check.md similarity index 70% rename from inst/doc/usecase/ex3_1_data_check.R rename to inst/doc/data_check.md index 9b6376b..e40c9a4 100644 --- a/inst/doc/usecase/ex3_1_data_check.R +++ b/inst/doc/data_check.md @@ -1,20 +1,38 @@ -#------------------------------------------------------------------------- -# Since startR hasn't been fully stable with many developments going on, -# it is important to self-check if the retrieving data array is as you expect. -# This script provided several tools and tips to ensure everything is fine -# before you head to further analysis. - -# Other data retrieval tools: (1) s2dv::Load (2) ncdf4 (3) easyNCDF (4) ncview -# Tips: (a) dimensions (b) attributes (c) summary (d) individual values -# Extra examination: (5) Compute() -#------------------------------------------------------------------------- - -#========== (1) s2dv::Load =========== -# If you used Load() before, it is convenient to compare with its output directly. -# It is useful when transformation is applied. Other methods provided here -# can only show original data. Also, it helps check the retrieved data structure, -# not only the values. +# Retrieved Data Check +Since startR has not been fully stable with many developments going on, +it is important to self-check if the retrieving data array is as you expect before you go to further analysis. + +It is recommended to use small datasets as the testing sample. For example, reducing 'ensemble' dimension since it is a less-trouble one. +Shortening 'time' dimension to the length that is light-weighted but also can be representative. Take the monthly file of daily data for example, it is better to include both 30-day and 31-day months. +As for latitude and longitude, keep both the negative and positive values. +Here are some tools and tips for data comparison. + +## Index +- Data retrieval tools + 1. [s2dv::Load] + 2. [ncdf4] + 3. [easyNCDF] + 4. [ncview]() + +- Extra examination + (5) [Compute()] + +- Tips + a. [Dimensions] + b. [Attributes] + c. [Summary] + d. [Individual values] + + + +## Data retrieval tools +### (1) s2dv::Load +If you used Load() before, it is convenient to compare with its output dir +ectly. +It is useful when transformation is applied. Other methods provided here can only show original data. Also, it helps check the retrieved data structure, not only the values. + +```r # startR library(startR) @@ -108,12 +126,16 @@ obs2$obs[1, 1, 1, 1, 1:3, 1:2] #[2,] 2.2191 1.9583 #[3,] 1.8405 1.3866 +``` -#========== (2) ncdf4 =========== -# It is resource-consumning if the whole domain is retrieved through ncdf4.. -# If doing so, use fatnodes/Power9/Nord3 interactive session for more memory space. -# Check ncdf4 documentation to learn the dimension structure first. +### (2) ncdf4 +It shows original data values in netCDF files. You can get the same domain if the +interpolation is not applied in Start(), while the data structure will be different. +It is resource-consumning if the whole domain is retrieved through ncdf4. +If doing so, it is better to use fatnodes/Power9/Nord3 interactive session for more memory space. +Check ncdf4 documentation to learn the dimension structure first. +```r # startR library(startR) obs_path <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h/$var$_$sdate$.nc' @@ -124,7 +146,7 @@ obs1 <- Start(dat = obs_path, var = var_name, sdate = sdate, time = 'all', - latitude = indices(1:10), + latitude = indices(1:10), longitude = indices(1:10), synonims = list(longitude = c('lon', 'longitude'), latitude = c('lat', 'latitude')), @@ -162,9 +184,12 @@ obs1[1, 1, 1, 1, , 2] obs2[2, ] # [1] 4.960041 4.959622 4.956322 4.939547 4.991117 5.075977 5.143179 5.157578 # [9] 5.095344 5.051348 +``` +### (3) easyNCDF +It is a wrapper package of ncdf4. The idea is similar to ncdf4 but with some more-friendly feature. -#========== (3) easyNCDF =========== +```r # startR # Use the same one as (2). @@ -172,8 +197,8 @@ obs2[2, ] library(easyNCDF) file <- NcOpen('/esarchive/recon/ecmwf/era5/monthly_mean/sfcWind_f1h/sfcWind_201811.nc') -obs2 <- NcToArray(file, vars_to_read = 'sfcWind', - dim_indices = list(longitude = c(1:10), +obs2 <- NcToArray(file, vars_to_read = 'sfcWind', + dim_indices = list(longitude = c(1:10), latitude = c(1:10), time = c(1))) NcClose(file) @@ -202,22 +227,31 @@ obs1[1, 1, 1, 1, , 2] obs2[1, 2, , 1] # [1] 4.960041 4.959622 4.956322 4.939547 4.991117 5.075977 5.143179 5.157578 # [9] 5.095344 5.051348 +``` +### (4) ncview +Using ncview to check netCDF file is the easiest way. While it may be slow. +It is convenient if you want to check with the map. -#========== (4) ncview =========== -# In the terminal, type 'ncview '. Then check the values dynamically in the pop-up window. -# You can also plot the map to compare with ncview. It is more clear if you use a bigger domain (rather than the example here, which only has 10*10 points) +In the terminal, type 'ncview '. Then check the values dynamically in the pop-up window. +You can also plot the map to compare with ncview. It is more clear if you use a bigger domain (rather than the example here, which only has 10*10 points). -s2dv::PlotEquiMap(obs1[1,1,1,1,,], +```r +s2dv::PlotEquiMap(obs1[1,1,1,1,,], lon = attributes(obs1)$Variable$dat1$longitude, lat = attributes(obs1)$Variable$dat1$latitude) +``` -#========== (5) Compute() =========== -# You can compare the result of Compute() with what you get from Start(retrieve = TRUE), or with any tool mentioned above. -# startR (retrieve = TRUE) -library(startR) +## Extra examination +### (5) Compute() +Some problems hide in Compute() but not Start(). If your final goal is to use Compute() +for analysis, you can do some simple/dumb tests to ensure the data remain the same +after going through the workflow. +You can compare the result of Compute() with what you get from Start(retrieve = TRUE), or with any tool mentioned above. +```r +library(startR) obs_path <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h/$var$_$sdate$.nc' var_name <- 'sfcWind' sdate <- '201811' @@ -226,6 +260,7 @@ lons.max <- 20 lats.min <- 0 lats.max <- 10 +# startR (retrieve = TRUE) obs1 <- Start(dat = obs_path, var = var_name, sdate = sdate, @@ -249,7 +284,6 @@ obs1 <- Start(dat = obs_path, # startR (retrieve = FALSE, use the whole workflow) - obs2 <- Start(dat = obs_path, var = var_name, sdate = sdate, @@ -315,4 +349,29 @@ res[1:3, 1, 1, 1, 1, 1:2] #[2,] 2.219121 1.958319 #[3,] 1.840537 1.386617 +``` + + +## Tips +### (a) Dimensions +Dimension is the most basic thing to check if the data structure is align with +your expectation. + +### (b) Attributes +Check longitude, latitude, and time, etc. Even the dimension length is correct, +the order may be opposite or out of range. To learn how to get the proper attributes +by Start(), see [FAQ How-to-#16](#16-use-parameter-return_vars-in-start). + +### (c) Summary +After checking the data structure, you can look into the values to see if the data +are retrieved correctly and filled in the right place. First of all, summary +(mean, min, max, etc.) gives you a general view of the values. If the summary is +consistent, you know Start() finds the correct files and data position. + +### (d) Individual values +There is a chance that the dimensions are mixed, or a few data points are incorrect due to interpolation or other reasons. +By the overall summary, these mistakes may not be detected. Therefore, checking +a few data points is recommended. +It is better to check not only the first element in each dimension but also some points in the middle of the array. + -- GitLab From 2bbb06191278114b9db1914da0e56eb041cb114e Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 7 May 2020 21:52:15 +0200 Subject: [PATCH 3/4] Fix hyperlink and index --- inst/doc/data_check.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/inst/doc/data_check.md b/inst/doc/data_check.md index e40c9a4..0ed8b42 100644 --- a/inst/doc/data_check.md +++ b/inst/doc/data_check.md @@ -9,20 +9,20 @@ As for latitude and longitude, keep both the negative and positive values. Here are some tools and tips for data comparison. ## Index -- Data retrieval tools - 1. [s2dv::Load] - 2. [ncdf4] - 3. [easyNCDF] - 4. [ncview]() - -- Extra examination - (5) [Compute()] - -- Tips - a. [Dimensions] - b. [Attributes] - c. [Summary] - d. [Individual values] +- Data retrieval tools + (1) [s2dv::Load](inst/doc/data_check.md#1-s2dvload) + (2) [ncdf4](inst/doc/data_check.md#2-ncdf4) + (3) [easyNCDF](inst/doc/data_check.md#3-easyncdf) + (4) [ncview](inst/doc/data_check.md#4-ncview) + +- Extra examination + (5) [Compute()](inst/doc/data_check.md#5-compute) + +- Tips + (a) [Dimensions](inst/doc/data_check.md#a-dimensions) + (b) [Attributes](inst/doc/data_check.md#b-attributes) + (c) [Summary](inst/doc/data_check.md#c-summary) + (d) [Individual values](inst/doc/data_check.md#d-individual-values) -- GitLab From cbd21018a176ade84215663a5a6793bd0939a450 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 8 May 2020 09:41:35 +0200 Subject: [PATCH 4/4] Adjust section order and finalize the format. --- inst/doc/data_check.md | 88 +++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 39 deletions(-) diff --git a/inst/doc/data_check.md b/inst/doc/data_check.md index 0ed8b42..9d42c56 100644 --- a/inst/doc/data_check.md +++ b/inst/doc/data_check.md @@ -1,14 +1,18 @@ # Retrieved Data Check Since startR has not been fully stable with many developments going on, -it is important to self-check if the retrieving data array is as you expect before you go to further analysis. - -It is recommended to use small datasets as the testing sample. For example, reducing 'ensemble' dimension since it is a less-trouble one. -Shortening 'time' dimension to the length that is light-weighted but also can be representative. Take the monthly file of daily data for example, it is better to include both 30-day and 31-day months. -As for latitude and longitude, keep both the negative and positive values. +it is important to check manually if the retrieving data array is as you expect before you go for further analysis, especially when several +parameters are used. -Here are some tools and tips for data comparison. +Here we list some tips recommended to pay attention, and some tools for data comparison. ## Index +- Tips + (a) [Small dataset](inst/doc/data_check.md#a-small-dataset) + (b) [Dimensions](inst/doc/data_check.md#b-dimensions) + (c) [Attributes](inst/doc/data_check.md#c-attributes) + (d) [Summary](inst/doc/data_check.md#d-summary) + (e) [Individual values](inst/doc/data_check.md#e-individual-values) + - Data retrieval tools (1) [s2dv::Load](inst/doc/data_check.md#1-s2dvload) (2) [ncdf4](inst/doc/data_check.md#2-ncdf4) @@ -18,19 +22,45 @@ Here are some tools and tips for data comparison. - Extra examination (5) [Compute()](inst/doc/data_check.md#5-compute) -- Tips - (a) [Dimensions](inst/doc/data_check.md#a-dimensions) - (b) [Attributes](inst/doc/data_check.md#b-attributes) - (c) [Summary](inst/doc/data_check.md#c-summary) - (d) [Individual values](inst/doc/data_check.md#d-individual-values) +## Tips +Here is the general idea. See the [Tools](#data-retrieval-tools) section for example code. + +### (a) Small dataset +It is recommended to use small datasets as the testing sample. For example, reducing 'ensemble' dimension since it is a less-trouble one. +Shortening 'time' dimension to the length that is light-weighted but can still be representative. +Take the monthly file of daily data as an example, it is better to include both 30-day and 31-day months. +As for latitude and longitude, keep both the negative and positive values, +and use the same way (i.e., values(), indices(), or 'all') to define the selectors. + +### (b) Dimensions +Dimension is the most basic thing to check if the data structure is align with +your expectation. You can check it with or without retrieving the data. + +### (c) Attributes +Check longitude, latitude, and time, etc. Even the dimension length is correct, +the order may be opposite or out of range. You can check it with or without retrieving the data. +To learn how to get the proper attributes +by Start(), see [FAQ How-to-#16](#16-use-parameter-return_vars-in-start). + +### (d) Summary +After checking the data structure, you can look into the values to see if the data +are retrieved correctly and filled in the right place. First of all, summary +(mean, min, max, etc.) gives you a general view of the values. If the summary is +consistent, you know Start() finds the correct files and data position. + +### (e) Individual values +There is a chance that the dimensions are mixed, or a few data points are incorrect due to interpolation or other reasons. +By the overall summary, these mistakes may not be detected. Therefore, checking +a few data points is recommended. +It is better to check not only the first element in each dimension but also some points in the middle of the array. ## Data retrieval tools ### (1) s2dv::Load -If you used Load() before, it is convenient to compare with its output dir -ectly. -It is useful when transformation is applied. Other methods provided here can only show original data. Also, it helps check the retrieved data structure, not only the values. +If you used Load() before, it is convenient to compare with its output directly. +It is useful when transformation is applied. Other methods provided here can only show original data. +Also, it helps check the retrieved data structure, not only the values. ```r # startR @@ -131,9 +161,9 @@ obs2$obs[1, 1, 1, 1, 1:3, 1:2] ### (2) ncdf4 It shows original data values in netCDF files. You can get the same domain if the interpolation is not applied in Start(), while the data structure will be different. -It is resource-consumning if the whole domain is retrieved through ncdf4. +It is resource-consuming if the whole domain is retrieved through ncdf4. If doing so, it is better to use fatnodes/Power9/Nord3 interactive session for more memory space. -Check ncdf4 documentation to learn the dimension structure first. +Check [ncdf4 documentation](https://cran.r-project.org/web/packages/ncdf4/index.html) to learn how to use it. ```r # startR @@ -187,7 +217,9 @@ obs2[2, ] ``` ### (3) easyNCDF -It is a wrapper package of ncdf4. The idea is similar to ncdf4 but with some more-friendly feature. +It is a wrapper package of ncdf4. The idea is similar to ncdf4 but with some more-friendly feature. +Check [easyNCDF documentation](https://cran.r-project.org/web/packages/easyNCDF/index.html) to learn how to use it. + ```r # startR @@ -352,26 +384,4 @@ res[1:3, 1, 1, 1, 1, 1:2] ``` -## Tips -### (a) Dimensions -Dimension is the most basic thing to check if the data structure is align with -your expectation. - -### (b) Attributes -Check longitude, latitude, and time, etc. Even the dimension length is correct, -the order may be opposite or out of range. To learn how to get the proper attributes -by Start(), see [FAQ How-to-#16](#16-use-parameter-return_vars-in-start). - -### (c) Summary -After checking the data structure, you can look into the values to see if the data -are retrieved correctly and filled in the right place. First of all, summary -(mean, min, max, etc.) gives you a general view of the values. If the summary is -consistent, you know Start() finds the correct files and data position. - -### (d) Individual values -There is a chance that the dimensions are mixed, or a few data points are incorrect due to interpolation or other reasons. -By the overall summary, these mistakes may not be detected. Therefore, checking -a few data points is recommended. -It is better to check not only the first element in each dimension but also some points in the middle of the array. - -- GitLab