From b988d356533b98926227489f45fa8a223f198387 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 27 Nov 2019 10:10:22 +0100 Subject: [PATCH 1/2] Revise invalid scripts and add ex1_3 --- inst/doc/usecase.md | 12 +- inst/doc/usecase/ex1_2_plotmap.R | 22 ++- inst/doc/usecase/ex1_3_attr_loadin.R | 57 ++++++++ inst/doc/usecase/ex2_5_exp_and_obs.R | 50 ------- .../ex2_7_seasonal_forecast_verification.R | 138 ++++++++++++------ inst/doc/usecase/ex2_8_calibration.R | 12 +- 6 files changed, 194 insertions(+), 97 deletions(-) create mode 100644 inst/doc/usecase/ex1_3_attr_loadin.R delete mode 100644 inst/doc/usecase/ex2_5_exp_and_obs.R diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index af65e8d..4e2ebe6 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -2,11 +2,13 @@ In this document, you can link to the example scripts for various demands. For the beginners, it is highly recommended to read the [practical guide](inst/doc/practical_guide.md) carefully first. You can find basic scripts and the configuration for different machines there. -1. **Retrieve data (use `Start()` only)** +1. **Retrieve data (use `Start()` only)**It also shows how to use parameters 1. [Interpolation in Start()](inst/doc/usecase/ex1_1_tranform.R) Do the interpolation within Start(), and compare with Load() result. When the Start() parameter `transform_extra_cells = 2`, the two results will be the same. 2. [Use s2dverification map plotting functions for exp and obs data](inst/doc/usecase/ex1_2_plotmap.R) - Use s2dverification::PlotEquiMap, PlotStereoMap, PlotLayout to visualize load-in data, and use experimental data attributes to load in associated observational data. + Use `s2dverification::PlotEquiMap, PlotStereoMap, PlotLayout` to visualize load-in data, and use the experimental data attributes to load in associated observational data. It also shows how to use parameters `xxx_reorder`, `xxx_across`, `merge_across_dims`, `split_multiselected_dims`. + 3. [Use experimental data attribute to load in oberservational data](inst/doc/usecase/ex1_3_attr_loadin.R) + Load the experimental data first (with `retrieve = FALSE`), then retreive its dates and time attributes to use in the observational data load-in. It also shows how to use parameters `xxx_tolerance`, `xxx_across`, `merge_across_dims`, `split_multiselected_dims`. 2. **Execute computation (use `Compute()`)** 1. [Function working on time dimension](inst/doc/usecase/ex2_1_timedim.R) @@ -15,7 +17,9 @@ In this document, you can link to the example scripts for various demands. For t 3. [Use function CDORemap for interpolation](inst/doc/usecase/ex2_3_cdo.R) Using parameter `CDO_module` is only available in startR_v0.1.3 or above. Interpolate data by using `s2dverification::CDORemap` in the workflow. 4. [Use two functions in workflow](inst/doc/usecase/ex2_4_two_func.R) - 5. [Using experimental and (date-corresponding) observational data](inst/doc/usecase/ex2_5_exp_and_obs.R) + 5. 6. [Use external parameters in atomic function](inst/doc/usecase/ex2_6_ext_param_func.R) - 7. [Seasonal forecast verification on cca](inst/doc/usecase/ex2_7_seasonal_forecast_verification.R) + 7. [Calculate the ensemble-adjusted Continuous Ranked Probability Score (CRPS)](inst/doc/usecase/ex2_7_seasonal_forecast_crps.R) + Use `SpecsVerification::EnsCrps` to calculate the ensemble-adjusted Continuous Ranked Probability Score (CRPS) for ECWMF experimental data, and do ensemble mean. Use `s2dverification::PlotEquiMap` to plot the CRPS map. 8. [Use CSTools Calibration function](inst/doc/usecase/ex2_8_calibration.R) + Use `CSTools:::.cal`, the interior function of `CSTools::CST_Calibration`, to do the bias adjustment for ECMWF experimental monthly mean data. diff --git a/inst/doc/usecase/ex1_2_plotmap.R b/inst/doc/usecase/ex1_2_plotmap.R index 0287a77..f6230b4 100644 --- a/inst/doc/usecase/ex1_2_plotmap.R +++ b/inst/doc/usecase/ex1_2_plotmap.R @@ -1,6 +1,7 @@ #--------------------------------------------------------------------- # 1. Check the data with s2dverification map plotting functions # 2. Read associated data of another data set matching the dates of the first one +# 3. How to use paramters `xxx_reorder`, `xxx_across`, `merge_across_dims`, `split_multiselected_dims` #--------------------------------------------------------------------- library(startR) library(s2dverification) @@ -29,6 +30,12 @@ header <- Start(dat = repos, time = 'sdate') ) +# ------ Check exp data ------ + attr(header, 'Dimensions') +# dat var member sdate ftime lat lon +# 1 1 5 21 3 256 512 +# ---------------------------- + lon <- attr(header, 'Variables')$common$lon lat <- attr(header, 'Variables')$common$lat @@ -67,6 +74,9 @@ PlotLayout(PlotStereoMap, # Associated obs data #----------------------------- dates <- attr(header, 'Variables')$common$time #use date attributes in exp data +#> dim(dates) +#sdate ftime +# 21 3 path <- '/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$_*/' repos <- paste0(path, '$var$_$date$.nc') @@ -78,16 +88,26 @@ data_obs <- Start(dat = repos, lon = 'all', lat_reorder = Sort(), lon_reorder = CircularSort(0, 360), + # ftime continues along date ftime_across = 'date', + # merge ftime and date into one dim merge_across_dims = TRUE, + # separate ftime dimension, which is [sdate = 21, ftime = 3] split_multiselected_dims = TRUE, synonims = list(ftime = 'time', lat = c('lat', 'latitude'), lon = c('lon', 'longitude')), return_vars = list(lon = NULL, - lat = NULL) + lat = NULL, + ftime = 'date') ) +# ------ Check obs data ------ + attr(data_obs, 'Dimensions') +# dat var sdate ftime lat lon +# 1 1 21 3 256 512 +# ---------------------------- +# retrieve obs data into workstation data_obs <- eval(data_obs) # check 3 ftimes diff --git a/inst/doc/usecase/ex1_3_attr_loadin.R b/inst/doc/usecase/ex1_3_attr_loadin.R new file mode 100644 index 0000000..ce37db5 --- /dev/null +++ b/inst/doc/usecase/ex1_3_attr_loadin.R @@ -0,0 +1,57 @@ + repos <- paste0('/esarchive/exp/ecmwf/system4_m1/6hourly/', + '$var$/$var$_$sdate$.nc') + + system4 <- Start(dat = repos, + var = 'tas', + sdate = sapply(1994, function(x) paste0(x, sprintf('%02d', 5:12), '01')), + time = indices(seq(1, 124, 4)), #first time step per day + ensemble = 'all', + latitude = indices(1:10), + longitude = indices(1:10), + return_vars = list(latitude = NULL, + longitude = NULL, + time = c('sdate'))) + +#-------- Check exp data ----------- + attr(system4, 'Dimensions') +# dat var sdate time ensemble latitude longitude +# 1 1 8 31 51 10 10 +#----------------------------------- + +# ------- retrieve the attributes for obs load-in ---------- + dates <- attr(system4, 'Variables')$common$time +#> dim(dates) +#sdate time +# 8 31 + dates_file <- sort(unique(gsub('-', '', sapply(as.character(dates), + substr, 1, 7)))) +#> dates_file +#[1] "199405" "199406" "199407" "199408" "199409" "199410" "199411" "199412" +# ----------------------------------------------------------- + + repos_obs <- paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc') + erai <- Start(dat = repos_obs, + var = 'tas', + file_date = dates_file, + time = values(dates), #use dates from exp + latitude = indices(1:10), + longitude = indices(1:10), + time_var = 'time', + #because time is assigned by 'values', set the tolerance to avoid too distinct match + time_tolerance = as.difftime(1, units = 'hours'), + #time is defined by dates, which dimension is [sdate = 8, time = 200] + time_across = 'file_date', + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + #combine time and file_date + merge_across_dims = TRUE, + #split time, because it is two-dimensional + split_multiselected_dims = TRUE) + +#------- Check erai ----------- + attr(erai, 'Dimensions') +# dat var sdate time latitude longitude +# 1 1 8 31 10 10 +#------------------------------ diff --git a/inst/doc/usecase/ex2_5_exp_and_obs.R b/inst/doc/usecase/ex2_5_exp_and_obs.R deleted file mode 100644 index 71874e6..0000000 --- a/inst/doc/usecase/ex2_5_exp_and_obs.R +++ /dev/null @@ -1,50 +0,0 @@ - repos <- paste0('/esnas/exp/ecmwf/system4_m1/6hourly/', - '$var$/$var$_$sdate$.nc') - - system4 <- Start(dat = repos, - var = 'sfcWind', - sdate = paste0(1981:1984, '1101'), - time = indices((30*4+1):(30*4+4)), - ensemble = 'all', - latitude = indices(1:10), - longitude = indices(1:10), - return_vars = list(latitude = NULL, - longitude = NULL, - time = c('sdate'))) - - repos <- paste0('/esnas/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc') - - dates <- attr(system4, 'Variables')$common$time - dates_file <- sort(unique(gsub('-', '', sapply(as.character(dates), - substr, 1, 7)))) - - erai <- Start(dat = repos, - var = 'sfcWind', - file_date = dates_file, - time = values(dates), - latitude = indices(1:10), - longitude = indices(1:10), - time_var = 'time', - time_tolerance = as.difftime(1, units = 'hours'), - time_across = 'file_date', - return_vars = list(latitude = NULL, - longitude = NULL, - time = 'file_date'), - merge_across_dims = TRUE, - split_multiselected_dims = TRUE) - - step <- Step(eqmcv_atomic, - list(a = c('ensemble', 'sdate'), - b = c('sdate')), - list(c = c('ensemble', 'sdate'))) - - res <- Compute(step, list(system4, erai), - chunks = list(latitude = 5, - longitude = 5, - time = 2), - cluster = list(queue_host = 'bsceslogin01.bsc.es', - max_jobs = 4, - cores_per_job = 2), - shared_dir = '/esnas/scratch/nmanuben/test_bychunk', - wait = FALSE) diff --git a/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R b/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R index bcd11a5..7c7d5d8 100644 --- a/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R +++ b/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R @@ -1,45 +1,101 @@ library(startR) - repos <- '/perm/ms/spesiccf/c3ah/qa4seas/data/seasonal/g1x1/ecmf-system4/msmm/atmos/seas/tprate/12/ecmf-system4_msmm_atmos_seas_sfc_$date$_tprate_g1x1_init12.nc' - - data <- Start(dat = repos, - var = 'tprate', - date = 'all', - time = 'all', - number = 'all', - latitude = 'all', - longitude = 'all', - return_vars = list(time = 'date')) - - dates <- attr(data, 'Variables')$common$time - - repos <- '/perm/ms/spesiccf/c3ah/qa4seas/data/ecmf-ei_msmm_atmos_seas_sfc_19910101-20161201_t2m_g1x1_init02.nc' - - obs <- Start(dat = repos, - var = 't2m', - time = values(dates), - latitude = 'all', - longitude = 'all', - split_multiselected_dims = TRUE) - - crps <- function(x, y) { - mean(SpecsVerification::EnsCrps(x, y, R.new = Inf)) +# exp data + repos <- paste0('/esarchive/exp/ecmwf/system4_m1/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + sdates <- sapply(2012:2016, function(x) paste0(x, sprintf('%02d', 1:12), '01')) + + exp <- Start(dat = repos, + var = 'tas', + sdate = sdates, + time = indices(1), + ensemble = 'all', + latitude = 'all', + longitude = 'all', + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + retrieve = F) + +# obs data + repos_obs <- paste0('/esarchive/recon/ecmwf/erainterim/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + sdates_obs <- (sapply(2012:2016, function(x) paste0(x, sprintf('%02d', 1:12)))) + + obs <- Start(dat = repos_obs, + var = 'tas', + sdate = sdates_obs, + time = indices(1), + latitude = 'all', + longitude = 'all', + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + retrieve = F) + + + func <- function(x, y) { + crps <- mean(SpecsVerification::EnsCrps(x, y, R.new = Inf)) + return(crps) } - - s <- Step(crps, target_dims = list(c('date', 'number'), c('date')), - output_dims = NULL) - wf <- AddStep(list(data, obs), s) - - r <- Compute(wf, - chunks = list(latitude = 10, - longitude = 3), - cluster = list(queue_host = 'cca', - queue_type = 'pbs', - max_jobs = 10, - init_commands = list('module load ecflow'), - r_module = 'R/3.3.1', - extra_queue_params = list('#PBS -l EC_billing_account=spesiccf')), - ecflow_output_dir = '/perm/ms/spesiccf/c3ah/startR_test/', - is_ecflow_output_dir_shared = FALSE - ) + step <- Step(func, target_dims = list(c('sdate', 'ensemble'), c('sdate')), + output_dims = NULL) + wf <- AddStep(list(exp, obs), step) + + +# Compute() on fatnodes + res <- Compute(wf, + chunks = list(latitude = 2, + longitude = 2), + threads_load = 2, + threads_compute = 4, + cluster = list(queue_host = 'fat', + queue_type = 'slurm', + cores_per_job = 2, + job_wallclock = '01:10:00', + max_jobs = 4, + bidirectional = TRUE + ), + ecflow_suite_dir = '/home/Earth/aho/startR_local/' + ) + + +# Compute() on Power 9 + res <- Compute(wf, + chunks = list(latitude = 2, + longitude = 2), + threads_load = 2, + threads_compute = 4, + cluster = list(queue_host = 'p1', + queue_type = 'slurm', + temp_dir = '/gpfs/scratch/bsc32/bsc32734/startR_hpc/', + r_module = 'R/3.5.0-foss-2018b', + job_wallclock = '00:30:00', + cores_per_job = 4, + max_jobs = 4, + bidirectional = FALSE, + polling_period = 50), + ecflow_suite_dir = '/home/Earth/aho/startR_local/', + wait = TRUE + ) + +# Results + dim(res$output1) +# dat var time latitude longitude +# 1 1 1 256 512 + summary(res$output1) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +#0.09941 0.37760 0.71640 0.83570 1.20300 6.23400 + + +# Plotting +library(s2dverification) + + lon <- seq(from = 0, to = 359, length.out = 512) + lat <- seq(from = 90, to = -90, length.out = 256) + + brks <- seq(-0.1, 4, length.out = 11) + PlotEquiMap(res$output1[1, 1, 1, , ], lon, lat, + color_fun = clim.palette('yellowred'), brks = brks, + filled.continents = FALSE, triangle_ends = c(TRUE, TRUE), + toptitle = 'ECMWF monthly mean tas CRPS 2012-2016', title_scale = 0.6) + diff --git a/inst/doc/usecase/ex2_8_calibration.R b/inst/doc/usecase/ex2_8_calibration.R index 12c14e9..a8bd575 100644 --- a/inst/doc/usecase/ex2_8_calibration.R +++ b/inst/doc/usecase/ex2_8_calibration.R @@ -65,10 +65,20 @@ res <- Compute(wf, chunks = list(latitude = 2, longitude = 2), # Declaration of HPC and execution ## ECFlow is required -res_fat2 <- Compute(wf,chunks = list(latitude = 2, longitude = 2), +res_fat2 <- Compute(wf, + chunks = list(latitude = 2, longitude = 2), threads_load = 2, threads_compute = 4, cluster = list(queue_host = "bsceslogin01.bsc.es", cores_per_job = 2, max_jobs = 4, job_wallclock = '00:10:00'), ecflow_suite_dir = "/esarchive/scratch/nperez/ecflow") # your path! + +# Results + dim(res$output1) +# ensemble sdate dat var time latitude longitude +# 15 11 1 1 1 14 15 + summary(res$output1) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 292.7 300.6 301.1 301.1 301.6 306.8 + -- GitLab From fc5629e79874eede471e7d5a85928dbde5a57f2e Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 27 Nov 2019 10:12:44 +0100 Subject: [PATCH 2/2] Small fix --- inst/doc/usecase.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 4e2ebe6..880020d 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -2,7 +2,7 @@ In this document, you can link to the example scripts for various demands. For the beginners, it is highly recommended to read the [practical guide](inst/doc/practical_guide.md) carefully first. You can find basic scripts and the configuration for different machines there. -1. **Retrieve data (use `Start()` only)**It also shows how to use parameters +1. **Retrieve data (use `Start()` only)** 1. [Interpolation in Start()](inst/doc/usecase/ex1_1_tranform.R) Do the interpolation within Start(), and compare with Load() result. When the Start() parameter `transform_extra_cells = 2`, the two results will be the same. 2. [Use s2dverification map plotting functions for exp and obs data](inst/doc/usecase/ex1_2_plotmap.R) -- GitLab