diff --git a/inst/doc/data_check.md b/inst/doc/data_check.md index 156a2531ce091bac515310c240e297c282b7619f..abe07c64580b14422eac040339af823d3923a4d8 100644 --- a/inst/doc/data_check.md +++ b/inst/doc/data_check.md @@ -221,7 +221,6 @@ obs2[2, ] It is a wrapper package of ncdf4. The idea is similar to ncdf4 but with some more-friendly feature. Check [easyNCDF documentation](https://cran.r-project.org/web/packages/easyNCDF/index.html) to learn how to use it. - ```r # startR # Use the same one as (2). @@ -231,9 +230,15 @@ library(easyNCDF) file <- NcOpen('/esarchive/recon/ecmwf/era5/monthly_mean/sfcWind_f1h/sfcWind_201811.nc') obs2 <- NcToArray(file, vars_to_read = 'sfcWind', - dim_indices = list(longitude = c(1:10), - latitude = c(1:10), + dim_indices = list(lon = c(1:10), + lat = c(1:10), time = c(1))) +lats2 <- NcToArray(file, + dim_indices = list(lat = 1:10), + vars_to_read = 'lat') +lons2 <- NcToArray(file, + dim_indices = list(lon = 1:10), + vars_to_read = 'lon') NcClose(file) # Check @@ -245,6 +250,12 @@ dim(obs2) # var longitude latitude time # 1 10 10 1 +## (b) attributes +all.equal(as.vector(attr(obs1, 'Variables')$dat1$longitude), as.vector(lons2)) +#[1] TRUE +all.equal(as.vector(attr(obs1, 'Variables')$dat1$latitude), as.vector(lats2)) +#[1] TRUE + ## (c) summary summary(obs1) # Min. 1st Qu. Median Mean 3rd Qu. Max. @@ -252,6 +263,8 @@ summary(obs1) summary(as.vector(obs2)) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 4.934 4.954 5.025 5.035 5.105 5.165 +all.equal(as.vector(obs1), as.vector(aperm(obs2, c(1, 3, 2, 4)))) +#[1] TRUE ## (d) individual values obs1[1, 1, 1, 1, , 2] diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 23b1deabe54d9cf1975dd13f27c1a18fd186a955..9aa3889a85eedbbf258bf6da8ac1dff16147da4e 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -475,6 +475,8 @@ When trying to load both start dates at once using Start(), the order in which t - `sdates = c('19991101', '19990901')`, the member dimension will be of length 51, showing missing values for the members 26 to 51 in the second start date; - `sdates = c('19990901', '19991101')`, the member dimension will be of length 25, any member will be missing. +To ensure that all the members are retrieved, we can use parameter 'largest_dims_length'. See [FAQ 21](https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/faq.md#21-retrieve-the-complete-data-when-the-dimension-length-varies-among-files) for details. + The code to reproduce this behaviour could be found in the Use Cases section, [example 1.4](/inst/doc/usecase/ex1_4_variable_nmember.R). ### 11. Select the longitude/latitude region @@ -859,6 +861,8 @@ by a named integer vector, for example, `largest_dims_length = c(member = 51)`. adopt the provided ones and use the first valid file to decide the rest of dimensions. By this means, the efficiency can be similar to `largest_dims_length = FALSE`. +Find example in [use case ex1_4](/inst/doc/usecase/ex1_4_variable_nmember.R). + ### 22. Define the selector when the indices in the files are not aligned When the data structure between the requested files is not identical, we need to give different diff --git a/inst/doc/usecase/ex1_2_exp_obs_attr.R b/inst/doc/usecase/ex1_2_exp_obs_attr.R index fa2323d6aaa5ace57195ea52e127a8ad1fa1b3c7..19874efc730861847f23ceb39db8140e65c3d20b 100644 --- a/inst/doc/usecase/ex1_2_exp_obs_attr.R +++ b/inst/doc/usecase/ex1_2_exp_obs_attr.R @@ -5,13 +5,12 @@ # can have the same dimension structure. # Spatial dimensions: -# The exp and obs data happen to have the same spatial resolution (256x512) and +# The exp and obs data happen to have the same spatial resolution (256x512) and # the grids are not shifted, so we don't need to regrid them. However, their latitude # orders are opposite. exp has ascending order while obs has descending order. -# To make them consistent, we cannot simply use 'all' as the selector of obs because -# for now, the reordering parameter '*_Sort' is only functional when the dimension is -# defined by values(). We can use either `indices(256:1)` or the exp attributes (`values()`) -# to define the latitude of obs. +# To make them consistent, we need to use "_reorder" parameter. In fact, it is +# always recommended using "lat_reorder" and "lon_reorder" to ensure you get +# the expected latitude and longitude. # Temporal dimensions: # The exp and obs files have different date/time structure. exp has one file per year and @@ -33,7 +32,9 @@ exp <- Start(dat = repos_exp, sdate = as.character(c(2005:2008)), time = indices(1:3), lat = 'all', + lat_reorder = Sort(), lon = 'all', + lon_reorder = CircularSort(0, 360), synonims = list(lat = c('lat', 'latitude'), lon = c('lon', 'longitude')), return_vars = list(lon = NULL, @@ -41,10 +42,6 @@ exp <- Start(dat = repos_exp, time = 'sdate'), retrieve = FALSE) -attr(exp, 'Dimensions') -# dat var sdate time lat lon -# 1 1 4 3 256 512 - # Retrieve attributes for observational data retrieval. ## Because latitude order in experiment is [-90, 90] but in observation is [90, -90], ## latitude values need to be retrieved and used below. @@ -83,8 +80,10 @@ obs <- Start(dat = repos_obs, var = 'tas', date = unique(format(dates, '%Y%m')), time = values(dates), #dim: [sdate = 4, time = 3] - lat = values(lats), # indices(256:1), - lon = values(lons), # 'all', + lat = 'all', + lat_reorder = Sort(), + lon = 'all', + lon_reorder = CircularSort(0, 360), time_across = 'date', merge_across_dims = TRUE, split_multiselected_dims = TRUE, @@ -95,10 +94,6 @@ obs <- Start(dat = repos_obs, time = 'date'), retrieve = FALSE) -attr(obs, 'Dimensions') -# dat var sdate time lat lon -# 1 1 4 3 256 512 - #==================================================== # Check the attributes. They should be all identical. #==================================================== @@ -177,8 +172,10 @@ obs2 <- Start(dat = repos_obs, var = 'tas', date = unique(format(dates, '%Y%m')), time = values(dates_adjust), # use the adjust ones - lat = values(lats), - lon = values(lons), + lat = 'all', + lat_reorder = Sort(), + lon = 'all', + lon_reorder = CircularSort(0, 360), time_across = 'date', merge_across_dims = TRUE, split_multiselected_dims = TRUE, @@ -207,8 +204,10 @@ obs3 <- Start(dat = repos_obs, var = 'tas', date = unique(format(dates, '%Y%m')), time = values(dates), - lat = values(lats), - lon = values(lons), + lat = 'all', + lat_reorder = Sort(), + lon = 'all', + lon_reorder = CircularSort(0, 360), time_across = 'date', time_tolerance = as.difftime(15, units = 'days'), merge_across_dims = TRUE, @@ -222,10 +221,9 @@ obs3 <- Start(dat = repos_obs, # We lose many data because there are no data within 15 days from the provided time values. print(attr(obs3, 'Variables')$common$time) -[1] "2005-02-28 18:00:00 UTC" "2006-02-28 18:00:00 UTC" -[3] "2007-02-28 18:00:00 UTC" "2008-02-29 18:00:00 UTC" +#[1] "2005-02-28 18:00:00 UTC" "2006-02-28 18:00:00 UTC" +#[3] "2007-02-28 18:00:00 UTC" "2008-02-29 18:00:00 UTC" # If 'time_tolerance' is changed to "as.difftime(1, units = 'days')", an error shows: # Selectors do not match any of the possible values for the dimension 'time'. - diff --git a/inst/doc/usecase/ex1_4_variable_nmember.R b/inst/doc/usecase/ex1_4_variable_nmember.R index 495c6f8278685482cd865378c59ed40faca3e38f..2e90fe6ab9d2733d99d096c8a151f58ecdb10ad5 100644 --- a/inst/doc/usecase/ex1_4_variable_nmember.R +++ b/inst/doc/usecase/ex1_4_variable_nmember.R @@ -1,11 +1,13 @@ # This code shows that the number of members could depend on the start date -# and the order of start dates requested -# See FAQ 10 [The members depends on the start date](/inst/doc/faq.md) +# and the order of start dates impacts the length of member dimension. +# See FAQ 10 [The members depends on the start date](https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/faq.md#10-the-number-of-members-depends-on-the-start-date) and +# FAQ 21[Retrieve the complete data when the dimension length varies among files](https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/faq.md#21-retrieve-the-complete-data-when-the-dimension-length-varies-among-files) +# For more explanation. library(startR) path_list <- list(list(name = 'system5', - path = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc')) + path = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc')) sdates_exp <- c('19991101', '19990901') data_Nov_Sep <- Start(dat = path_list, var = 'psl', @@ -13,8 +15,9 @@ data_Nov_Sep <- Start(dat = path_list, sdate = sdates_exp, time = indices(1), latitude = values(list(0, 20)), - latitude_reorder=Sort(), + latitude_reorder = Sort(), longitude = values(list(0, 5)), + longitude_reorder = CircularSort(0, 360), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), member = c('ensemble', 'realization')), @@ -23,9 +26,13 @@ data_Nov_Sep <- Start(dat = path_list, dim(data_Nov_Sep) # dat var member sdate time latitude longitude # 1 1 51 2 1 71 19 + apply(data_Nov_Sep, 4, function(x){sum(is.na(x))}) -# 26 missing values for the second start date +#[1] 0 35074 +# --> 26*71*19 = 35074 missing values for the second start date. + +#-------- exchange the order of sdate ------------- sdates_exp <- c('19990901', '19991101') data_Sep_Nov <- Start(dat = path_list, var = 'psl', @@ -33,20 +40,44 @@ data_Sep_Nov <- Start(dat = path_list, sdate = sdates_exp, time = indices(1), latitude = values(list(0, 20)), - latitude_reorder=Sort(), + latitude_reorder = Sort(), longitude = values(list(0, 5)), + longitude_reorder = CircularSort(0, 360), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), - member = c('ensemble', 'realization')), + member = c('ensemble', 'realization')), retrieve = TRUE) -# 25 members available +# Only 25 members available dim(data_Sep_Nov) # dat var member sdate time latitude longitude # 1 1 25 2 1 71 19 -# Any missing value: apply(data_Sep_Nov, 4, function(x){sum(is.na(x))}) +#[1] 0 0 +#--> No missing value. +#-----------Use 'largest_dims_length' to get full members ------------- +data_largest_len <- Start(dat = path_list, + var = 'psl', + member = 'all', + largest_dims_length = c(member = 51), #TRUE, + sdate = sdates_exp, + time = indices(1), + latitude = values(list(0, 20)), + latitude_reorder = Sort(), + longitude = values(list(0, 5)), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + member = c('ensemble', 'realization')), + retrieve = TRUE) +dim(data_largest_len) +# dat var member sdate time latitude longitude +# 1 1 51 2 1 71 19 +# Switch the sdate order of data_largest_len and compare to the first data +all.equal(as.vector(data_largest_len[, , , 2:1, , , ]), + as.vector(data_Nov_Sep),check.attribtes = F) +#[1] TRUE diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index 8ac0760502fe1924b109f00d14616145606bf2b0..11e866d859618d07b8e1cc770d06812241ecb989 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -48,7 +48,7 @@ expect_equal( }) test_that("2. 365_day, daily, unit = 'days since 1984-01-01'", { -path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200408/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' +path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200114/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' suppressWarnings( data <- Start(dat = path_bcc_csm2,