From 37d7df08c8df1924544542c3fdce9cfc74cd40a6 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 26 Feb 2020 13:19:34 +0100 Subject: [PATCH 01/11] FAQ and use case --- inst/doc/faq.md | 14 ++++++ inst/doc/usecase/ex_1_4_variable_nmember.R | 54 ++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 inst/doc/usecase/ex_1_4_variable_nmember.R diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 087e00a..65326c0 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -13,6 +13,7 @@ This document intends to be the first reference for any doubts that you may have 7. [Avoid or specify a node from cluster in Compute()](#7-avoid-or-specify-a-node-from-cluster-in-compute) 8. [Define a path with multiple dependencies](#8-define-a-path-with-multiple-dependencies) 9. [Use CDORemap() in function](#9-use-cdoremap-in-function) + 10. [The number of members depends on the start date](#10-the-number-of-members-depends-on-the-start-date) 2. **Something goes wrong...** @@ -387,6 +388,19 @@ If you want to interpolate data by s2dverification::CDORemap in function, you ne machine which CDO module to use. Therefore, `CDO_module = 'CDO/1.9.5-foss-2018b'` should be added in Compute() cluster list. See the example in usecase [ex2_3_cdo.R](inst/doc/usecase/ex2_3_cdo.R). +### 10. The number of member depends on the start date + +In seasonal forecast, there are a few start dates that are more widely used (e.g. November 1st) than others. For those start dates extensively used, the number of members available is greater than for other start dates. This is the case of the model system5_m1: + - for the start date November 1st, 1999, there are 51 members available, while + - for the start date September 1st, 1999, there are 25 members available. + +When trying to load both start dates at once using Start(), the order in which the start dates is specify will impact on the dimensions of the dataset if all member are loaded with `member = 'all'`: + - `sdates = c('19991101', '19990901')`, the member dimension will be of length 51, showing missing values for the members 26 to 51 in the second start date; + - `sdates = c('19990901', '19991101')`, the member dimension will be of length 25, any member will be missing. + +The code to reproduce this behaviour could be found in the Use Cases section, [example 1.4](/inst/doc/usecase/ex_1_4_variable_nmember.R). + + ## Something goes wrong... diff --git a/inst/doc/usecase/ex_1_4_variable_nmember.R b/inst/doc/usecase/ex_1_4_variable_nmember.R new file mode 100644 index 0000000..58cdcac --- /dev/null +++ b/inst/doc/usecase/ex_1_4_variable_nmember.R @@ -0,0 +1,54 @@ +# This code shows that the number of members could depend on the start date +# and the order of start dates requested +# See FAQ 10 [The members depends on the start date](/inst/doc/faq.md) + +library(startR) +lats.min <- -90 +lats.max <- 90 +lons.min <- 0 +lons.max <- 360 + +path_list <- list(list(name = 'system5', + path = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc')) +sdates_exp <- c('19991101', '19990901') +data_Nov_Sep <- Start(dat = path_list, + var = 'psl', + member = 'all', + sdate = sdates_exp, + time = indices(1), + latitude = values(list(0, 20)), + longitude = values(list(0, 5)), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + member = c('ensemble', 'realization')), + retrieve = TRUE) +# 51 members +dim(data_Nov_Sep) +# dat var member sdate time latitude longitude +# 1 1 51 2 1 640 19 +apply(data_Nov_Sep, 4, function(x){sum(is.na(x))}) +# 26 missing values for the second start date + +sdates_exp <- c('19990901', '19991101') +data_Sep_Nov <- Start(dat = path_list, + var = 'psl', + member = 'all', + sdate = sdates_exp, + time = indices(1), + latitude = values(list(0, 20)), + longitude = values(list(0, 5)), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + member = c('ensemble', 'realization')), + retrieve = TRUE) + +# 25 members available +dim(data_Sep_Nov) +# dat var member sdate time latitude longitude +# 1 1 25 2 1 640 19 + +# Any missing value: +apply(data_Sep_Nov, 4, function(x){sum(is.na(x))}) + + + -- GitLab From 8fcbbafed6f0fafffd6a96dac84a5d8fec9ff9e5 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 26 Feb 2020 15:57:37 +0100 Subject: [PATCH 02/11] Incluing link in usecase.md to new code for member vs startdate --- inst/doc/usecase.md | 4 +++- .../{ex_1_4_variable_nmember.R => ex1_4_variable_nmember.R} | 0 2 files changed, 3 insertions(+), 1 deletion(-) rename inst/doc/usecase/{ex_1_4_variable_nmember.R => ex1_4_variable_nmember.R} (100%) diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 8f02b8e..3518793 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -18,7 +18,9 @@ In this document, you can link to the example scripts for various demands. For t 3. [Use experimental data attribute to load in oberservational data](inst/doc/usecase/ex1_3_attr_loadin.R) Load the experimental data first (with `retrieve = FALSE`), then retreive its dates and time attributes to use in the observational data load-in. It also shows how to use parameters `xxx_tolerance`, `xxx_across`, `merge_across_dims`, `split_multiselected_dims`. - + + 4. [Checking impact of start date order in the number of members](inst/doc/usecase/ex1_4_variable_nmember.R) + Mixing start dates of different months can lead to load different number of members, check the code provided and the [FAQ 10](/inst/doc/faq.md). 2. **Execute computation (use `Compute()`)** 1. [Function working on time dimension](inst/doc/usecase/ex2_1_timedim.R) diff --git a/inst/doc/usecase/ex_1_4_variable_nmember.R b/inst/doc/usecase/ex1_4_variable_nmember.R similarity index 100% rename from inst/doc/usecase/ex_1_4_variable_nmember.R rename to inst/doc/usecase/ex1_4_variable_nmember.R -- GitLab From 3441fd6d8fdd7f344ae25dff54528267c7b02a97 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 26 Feb 2020 15:58:53 +0100 Subject: [PATCH 03/11] fixing link in faq 10 --- inst/doc/faq.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 65326c0..7f48bc8 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -13,7 +13,7 @@ This document intends to be the first reference for any doubts that you may have 7. [Avoid or specify a node from cluster in Compute()](#7-avoid-or-specify-a-node-from-cluster-in-compute) 8. [Define a path with multiple dependencies](#8-define-a-path-with-multiple-dependencies) 9. [Use CDORemap() in function](#9-use-cdoremap-in-function) - 10. [The number of members depends on the start date](#10-the-number-of-members-depends-on-the-start-date) + 10. [The number of members depends on the start date](#10-the-number-of-members-depends-on-the-start-date) 2. **Something goes wrong...** -- GitLab From 4281bdeea05b9532b0805a231fc8037d78f7c989 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 26 Feb 2020 16:04:31 +0100 Subject: [PATCH 04/11] improving text and link in faq10 --- inst/doc/faq.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 7f48bc8..c561cac 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -388,9 +388,9 @@ If you want to interpolate data by s2dverification::CDORemap in function, you ne machine which CDO module to use. Therefore, `CDO_module = 'CDO/1.9.5-foss-2018b'` should be added in Compute() cluster list. See the example in usecase [ex2_3_cdo.R](inst/doc/usecase/ex2_3_cdo.R). -### 10. The number of member depends on the start date +### 10. The number of members depends on the start date -In seasonal forecast, there are a few start dates that are more widely used (e.g. November 1st) than others. For those start dates extensively used, the number of members available is greater than for other start dates. This is the case of the model system5_m1: +In seasonal forecast, some start dates, such us November 1st, are more widely used than others. For those start dates extensively used, the number of members available is greater than for other start dates. This is the case of the model system5_m1: - for the start date November 1st, 1999, there are 51 members available, while - for the start date September 1st, 1999, there are 25 members available. -- GitLab From 605693ca5478031711e77ea0ccd8f06a0dd6adab Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 26 Feb 2020 16:07:03 +0100 Subject: [PATCH 05/11] fix FAQ 10 link to usecase 1.4 --- inst/doc/faq.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/doc/faq.md b/inst/doc/faq.md index c561cac..ab10e3c 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -398,7 +398,7 @@ When trying to load both start dates at once using Start(), the order in which t - `sdates = c('19991101', '19990901')`, the member dimension will be of length 51, showing missing values for the members 26 to 51 in the second start date; - `sdates = c('19990901', '19991101')`, the member dimension will be of length 25, any member will be missing. -The code to reproduce this behaviour could be found in the Use Cases section, [example 1.4](/inst/doc/usecase/ex_1_4_variable_nmember.R). +The code to reproduce this behaviour could be found in the Use Cases section, [example 1.4](/inst/doc/usecase/ex1_4_variable_nmember.R). -- GitLab From a6d8407183732b1310b4a0610c0a3c05ed3c76f5 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 26 Feb 2020 16:10:04 +0100 Subject: [PATCH 06/11] Formatting usecase.md 1.4 --- inst/doc/usecase.md | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 3518793..e1e4614 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -20,6 +20,7 @@ In this document, you can link to the example scripts for various demands. For t Load the experimental data first (with `retrieve = FALSE`), then retreive its dates and time attributes to use in the observational data load-in. It also shows how to use parameters `xxx_tolerance`, `xxx_across`, `merge_across_dims`, `split_multiselected_dims`. 4. [Checking impact of start date order in the number of members](inst/doc/usecase/ex1_4_variable_nmember.R) + Mixing start dates of different months can lead to load different number of members, check the code provided and the [FAQ 10](/inst/doc/faq.md). 2. **Execute computation (use `Compute()`)** -- GitLab From 5345d2e449c364768dc884217a9956ee2d5abd84 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 26 Feb 2020 16:11:12 +0100 Subject: [PATCH 07/11] Formatting usecase.md 1.4 removing line --- inst/doc/usecase.md | 1 - 1 file changed, 1 deletion(-) diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index e1e4614..3518793 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -20,7 +20,6 @@ In this document, you can link to the example scripts for various demands. For t Load the experimental data first (with `retrieve = FALSE`), then retreive its dates and time attributes to use in the observational data load-in. It also shows how to use parameters `xxx_tolerance`, `xxx_across`, `merge_across_dims`, `split_multiselected_dims`. 4. [Checking impact of start date order in the number of members](inst/doc/usecase/ex1_4_variable_nmember.R) - Mixing start dates of different months can lead to load different number of members, check the code provided and the [FAQ 10](/inst/doc/faq.md). 2. **Execute computation (use `Compute()`)** -- GitLab From fba022ce905769b4ae0eaa9bc4462c3c750c1cdb Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 26 Feb 2020 16:11:59 +0100 Subject: [PATCH 08/11] Formatting usecase.md 1.4 adding line --- inst/doc/usecase.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 3518793..15f2faa 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -20,7 +20,7 @@ In this document, you can link to the example scripts for various demands. For t Load the experimental data first (with `retrieve = FALSE`), then retreive its dates and time attributes to use in the observational data load-in. It also shows how to use parameters `xxx_tolerance`, `xxx_across`, `merge_across_dims`, `split_multiselected_dims`. 4. [Checking impact of start date order in the number of members](inst/doc/usecase/ex1_4_variable_nmember.R) - Mixing start dates of different months can lead to load different number of members, check the code provided and the [FAQ 10](/inst/doc/faq.md). + Mixing start dates of different months can lead to load different number of members, check the code provided and the [FAQ 10](/inst/doc/faq.md). 2. **Execute computation (use `Compute()`)** 1. [Function working on time dimension](inst/doc/usecase/ex2_1_timedim.R) -- GitLab From d12745a9b75fdef6963a4f19fb25e44e27cce764 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 26 Feb 2020 16:13:47 +0100 Subject: [PATCH 09/11] Formatting usecase.md 1.4 adding spaces --- inst/doc/usecase.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 15f2faa..fcaa782 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -19,8 +19,9 @@ In this document, you can link to the example scripts for various demands. For t 3. [Use experimental data attribute to load in oberservational data](inst/doc/usecase/ex1_3_attr_loadin.R) Load the experimental data first (with `retrieve = FALSE`), then retreive its dates and time attributes to use in the observational data load-in. It also shows how to use parameters `xxx_tolerance`, `xxx_across`, `merge_across_dims`, `split_multiselected_dims`. - 4. [Checking impact of start date order in the number of members](inst/doc/usecase/ex1_4_variable_nmember.R) - Mixing start dates of different months can lead to load different number of members, check the code provided and the [FAQ 10](/inst/doc/faq.md). + 4. [Checking impact of start date order in the number of members](inst/doc/usecase/ex1_4_variable_nmember.R) + Mixing start dates of different months can lead to load different number of members, check the code provided and the [FAQ 10](/inst/doc/faq.md). + 2. **Execute computation (use `Compute()`)** 1. [Function working on time dimension](inst/doc/usecase/ex2_1_timedim.R) -- GitLab From 61bb6ba815414251dc143c40946cb958a0483343 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 26 Feb 2020 16:27:36 +0100 Subject: [PATCH 10/11] Removing unnecessary lines and adding lat Sort --- inst/doc/usecase/ex1_4_variable_nmember.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/inst/doc/usecase/ex1_4_variable_nmember.R b/inst/doc/usecase/ex1_4_variable_nmember.R index 58cdcac..495c6f8 100644 --- a/inst/doc/usecase/ex1_4_variable_nmember.R +++ b/inst/doc/usecase/ex1_4_variable_nmember.R @@ -3,10 +3,6 @@ # See FAQ 10 [The members depends on the start date](/inst/doc/faq.md) library(startR) -lats.min <- -90 -lats.max <- 90 -lons.min <- 0 -lons.max <- 360 path_list <- list(list(name = 'system5', path = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc')) @@ -17,6 +13,7 @@ data_Nov_Sep <- Start(dat = path_list, sdate = sdates_exp, time = indices(1), latitude = values(list(0, 20)), + latitude_reorder=Sort(), longitude = values(list(0, 5)), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), @@ -25,7 +22,7 @@ data_Nov_Sep <- Start(dat = path_list, # 51 members dim(data_Nov_Sep) # dat var member sdate time latitude longitude -# 1 1 51 2 1 640 19 +# 1 1 51 2 1 71 19 apply(data_Nov_Sep, 4, function(x){sum(is.na(x))}) # 26 missing values for the second start date @@ -36,6 +33,7 @@ data_Sep_Nov <- Start(dat = path_list, sdate = sdates_exp, time = indices(1), latitude = values(list(0, 20)), + latitude_reorder=Sort(), longitude = values(list(0, 5)), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), @@ -45,7 +43,7 @@ data_Sep_Nov <- Start(dat = path_list, # 25 members available dim(data_Sep_Nov) # dat var member sdate time latitude longitude -# 1 1 25 2 1 640 19 +# 1 1 25 2 1 71 19 # Any missing value: apply(data_Sep_Nov, 4, function(x){sum(is.na(x))}) -- GitLab From 90213b855c3aaf07b89ef4abf1c1915336ac3f09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ver=C3=B3nica=20Torralba-Fern=C3=A1ndez?= Date: Wed, 26 Feb 2020 16:40:02 +0100 Subject: [PATCH 11/11] Update faq.md --- inst/doc/faq.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/doc/faq.md b/inst/doc/faq.md index ab10e3c..572bff8 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -390,11 +390,11 @@ added in Compute() cluster list. See the example in usecase [ex2_3_cdo.R](inst/d ### 10. The number of members depends on the start date -In seasonal forecast, some start dates, such us November 1st, are more widely used than others. For those start dates extensively used, the number of members available is greater than for other start dates. This is the case of the model system5_m1: +In seasonal forecast, some start dates, such as November 1st, are more widely used than others. For those start dates extensively used, the number of members available is greater than for other start dates. This is the case of the seasonal forecast system ECMWF SEAS5 (system5_m1): - for the start date November 1st, 1999, there are 51 members available, while - for the start date September 1st, 1999, there are 25 members available. -When trying to load both start dates at once using Start(), the order in which the start dates is specify will impact on the dimensions of the dataset if all member are loaded with `member = 'all'`: +When trying to load both start dates at once using Start(), the order in which the start dates is specified will impact on the dimensions of the dataset if all members are loaded with `member = 'all'`: - `sdates = c('19991101', '19990901')`, the member dimension will be of length 51, showing missing values for the members 26 to 51 in the second start date; - `sdates = c('19990901', '19991101')`, the member dimension will be of length 25, any member will be missing. -- GitLab