From ea9400cc581c06227741dec2bde93d7c7884aa94 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 28 Sep 2023 18:22:06 +0200 Subject: [PATCH 01/27] Add run_dir document; add Autosubmit use case --- inst/doc/practical_guide.md | 1 + inst/doc/usecase/ex2_1_timedim.R | 44 +++++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index b22c629..91c11ee 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -746,6 +746,7 @@ To have the good practice, note down the expid if it is automatically created by - `hpc_user`: Your user ID on the HPC (i.e., "bsc32xxx"). It is required if "queue_host" is not 'local'. - `data_dir`: The path to the data repository if the data is not shared. - `lib_dir`: directory on the HPC where the startR R package and other required R packages are installed, accessible from all HPC nodes. These installed packages must be compatible with the R module specified in `r_module`. This parameter is optional; only required when the libraries are not installed in the R module. +- `run_dir`: The directory to run the startR jobs. It is useful when the self-defined function has relative path. - `init_commands`: The initial commands in bash script before R script runs. For example, the modules required by computation can be loaded here. - `r_module`: Name of the UNIX environment module to be used for R. If not specified, `module load R` will be used. - `CDO_module`: Name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start(). diff --git a/inst/doc/usecase/ex2_1_timedim.R b/inst/doc/usecase/ex2_1_timedim.R index 15ef37d..0c0f36c 100644 --- a/inst/doc/usecase/ex2_1_timedim.R +++ b/inst/doc/usecase/ex2_1_timedim.R @@ -45,9 +45,9 @@ library(startR) ## on Power9 #-----------modify according to your personal info--------- - queue_host = 'cte-power' #your own host name for power9 - temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' - ecflow_suite_dir = '/home/Earth/nperez/startR_local/' #your own local directory + queue_host <- 'cte-power' #your own host name for power9 + temp_dir <- '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' + ecflow_suite_dir <- '/home/Earth/nperez/startR_local/' #your own local directory #------------------------------------------------------------ res <- Compute(wf1, chunks = list(ensemble = 20, @@ -66,11 +66,12 @@ library(startR) ecflow_suite_dir = ecflow_suite_dir, wait = TRUE) -## on Nord3 + +## on Nord3 with ecFlow #-----------modify according to your personal info--------- - queue_host = 'nord4' - temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' - ecflow_suite_dir = '/home/Earth/nperez/startR_local/' #your own local directory + queue_host <- 'nord4' + temp_dir <- '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' + ecflow_suite_dir <- '/home/Earth/nperez/startR_local/' #your own local directory #------------------------------------------------------------ res <- Compute(wf1, chunks = list(ensemble = 20, @@ -88,3 +89,32 @@ library(startR) ecflow_suite_dir = ecflow_suite_dir, wait = TRUE) + + +## on Nord3 with Autosubmit +#-----------modify according to your personal info--------- + hpc_user <- "bsc32xxx" + expid <- "xxxx" # autosubmit exp id; can be NULL + autosubmit_suite_dir <- "/home/Earth//startR_local_autosubmit/" +#------------------------------------------------------------ + res <- Compute(wf1, + chunks = list(ensemble = 20, sdate = 2), + threads_load = 2, + threads_compute = 4, + cluster = list( + queue_host = 'nord3', + r_module = "R/4.1.2-foss-2019b", + autosubmit_module = 'autosubmit/4.0.0b-foss-2015a-Python-3.7.3', + cores_per_job = 2, + job_wallclock = '01:00:00', + max_jobs = 40, + polling_period = 10, + extra_queue_params = list('#SBATCH --constraint=medmem', '#SBATCH --exclusive'), + expid = NULL, + hpc_user = hpc_user + ), + workflow_manager = 'autosubmit', + autosubmit_suite_dir = autosubmit_suite_dir, + autosubmit_server = NULL, #'bscesautosubmit01', + wait = TRUE + ) -- GitLab From 00a0f58ea97515a4b2adf73a74cb778a5a029f5d Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 11 Oct 2023 16:21:09 +0200 Subject: [PATCH 02/27] first draft --- .../tutorial/PATC2023/griddes_system7c3s.txt | 19 +++ .../PATC2023/handson_1-data-loading.md | 124 ++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 inst/doc/tutorial/PATC2023/griddes_system7c3s.txt create mode 100644 inst/doc/tutorial/PATC2023/handson_1-data-loading.md diff --git a/inst/doc/tutorial/PATC2023/griddes_system7c3s.txt b/inst/doc/tutorial/PATC2023/griddes_system7c3s.txt new file mode 100644 index 0000000..b6f1847 --- /dev/null +++ b/inst/doc/tutorial/PATC2023/griddes_system7c3s.txt @@ -0,0 +1,19 @@ +# Grid description file for Meteofrance System 7 (C3S) +# Serves as reference_grid for archive.ym +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = longitude +xlongname = "longitude" +xunits = "degrees_east" +yname = latitude +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md new file mode 100644 index 0000000..0eb4b0c --- /dev/null +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md @@ -0,0 +1,124 @@ +# Hands-on 1: Load data by startR + +## Goal +Use startR to load the data and learn how to adjust data while loading data. + +## 0. Load required packages + +```r +# Clean the session +rm(list = ls()) + +library(startR) +``` +## 1. Load data from data repository (esarchive/) + +#TODO: update it +**Data description**: +This sample data set contains a small cutout of gridded seasonal precipitation +forecast data from the Copernicus Climate Change ECMWF-System 5 forecast system. +Specifically, for the 'prlr' (precipitation) variable, for the first 6 forecast +ensemble members, daily values, for all 31 days in March following the forecast +starting dates in November of years 2010 to 2012, for a small 4x4 pixel cutout in +a region in the North-Western Italian Alps (44N-47N, 6E-9E). The data resolution is 1 degree. + +Use the above information to define the variable, start dates, longitude and latitude. + +```r +#TODO: update the path + # Use this one if on workstation or nord3 (have access to /esarchive) + path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc" + + # Use this one if on Marenostrum4 and log in with PATC2021 account + path_exp <- paste0('/gpfs/scratch/nct01/nct01127/d3_R_handson/esarchive/', + 'exp/ecmwf/system5c3s/daily_mean/', + '$var$_s0-24h/$var$_$sdate$.nc') + + var <- 'tas' + sdate_hsct <- paste0(1993:2016, '1101') + sdate_fcst <- '20201101' + lon.min <- -20 + lon.max <- 40 + lat.min <- 20 + lat.max <- 80 +``` + +Use Start() to load the data. + +```r + hcst <- Start(dat = path_exp, + var = var, + syear = sdate_fcst, + ensemble = 'all', + time = 1:2, + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(-180, 180), + synonims = list(syear = c('syear', 'sdate'), + latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + +```r + fcst <- Start(dat = path_exp, + var = var, + syear = sdate_fcst, + ensemble = 'all', + time = 1:2, + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(-180, 180), + synonims = list(syear = c('syear', 'sdate'), + latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + + +path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc' + +# Create an date array from hcst dates +Check the time attributes of hcst: Is it correct? +To load the corresponding obs data, we can use these time values as the selectors in obs Start() call. + +dim(attributes(hcst)$Variables$common$time) +syear time + 24 2 + +str(attributes(hcst)$Variables$common$time) + POSIXct[1:48], format: "1993-12-01" "1994-12-01" "1995-12-01" "1996-12-01" "1997-12-01" ... + +# Adjust the day to the correct month +attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time - lubridate::days(1) + +date_string <- format(attributes(hcst)$Variables$common$time, '%Y%m') +sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) + +```r + obs <- Start(dat = path_obs, + var = var, + syear = sdate_obs, + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(-180, 180), + transform = CDORemapper, + transform_params = list(grid = './griddes_system7c3s.txt', + method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(syear = c('syear', 'sdate'), + latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + + -- GitLab From f80f6fc46ba65e86cb6d9b5539b1ac4892a6c917 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 11 Oct 2023 16:31:18 +0200 Subject: [PATCH 03/27] several fixes --- .../tutorial/PATC2023/handson_1-data-loading.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md index 0eb4b0c..0daf8eb 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md @@ -35,7 +35,7 @@ Use the above information to define the variable, start dates, longitude and lat '$var$_s0-24h/$var$_$sdate$.nc') var <- 'tas' - sdate_hsct <- paste0(1993:2016, '1101') + sdate_hcst <- paste0(1993:2016, '1101') sdate_fcst <- '20201101' lon.min <- -20 lon.max <- 40 @@ -48,7 +48,7 @@ Use Start() to load the data. ```r hcst <- Start(dat = path_exp, var = var, - syear = sdate_fcst, + syear = sdate_hcst, ensemble = 'all', time = 1:2, latitude = values(list(lat.min, lat.max)), @@ -82,12 +82,12 @@ Use Start() to load the data. ``` -path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc' - # Create an date array from hcst dates Check the time attributes of hcst: Is it correct? + To load the corresponding obs data, we can use these time values as the selectors in obs Start() call. +```r dim(attributes(hcst)$Variables$common$time) syear time 24 2 @@ -100,8 +100,11 @@ attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time date_string <- format(attributes(hcst)$Variables$common$time, '%Y%m') sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) +``` ```r + path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc' + obs <- Start(dat = path_obs, var = var, syear = sdate_obs, @@ -110,7 +113,9 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) longitude = values(list(lon.min, lon.max)), longitude_reorder = CircularSort(-180, 180), transform = CDORemapper, - transform_params = list(grid = './griddes_system7c3s.txt', +#TODO: Change to relative path +# transform_params = list(grid = './griddes_system7c3s.txt', + transform_params = list(grid = '/esarchive/scratch/aho/git/startR/inst/doc/tutorial/PATC2023/griddes_system7c3s.txt', method = 'bilinear'), transform_vars = c('latitude', 'longitude'), synonims = list(syear = c('syear', 'sdate'), -- GitLab From e4d467e0b2e21a990db624813b68590df425396e Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 11 Oct 2023 16:40:57 +0200 Subject: [PATCH 04/27] Turn data into s2dv_cube --- inst/doc/tutorial/PATC2023/handson_1-data-loading.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md index 0daf8eb..8aeb5fb 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md @@ -126,4 +126,12 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) retrieve = TRUE) ``` +# Turn the data into s2dv_cube +```r +library(CSTools) + +hcst <- as.s2dv_cube(hcst) +fcst <- as.s2dv_cube(fcst) +obs <- as.s2dv_cube(obs) +``` -- GitLab From d9dc000e4527dc0d9426c5a9957784bccb25882a Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 11 Oct 2023 16:58:34 +0200 Subject: [PATCH 05/27] Remove as.s2dv_cube --- inst/doc/tutorial/PATC2023/handson_1-data-loading.md | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md index 8aeb5fb..6b280a4 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md @@ -125,13 +125,3 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) longitude = NULL, latitude = NULL), retrieve = TRUE) ``` - -# Turn the data into s2dv_cube - -```r -library(CSTools) - -hcst <- as.s2dv_cube(hcst) -fcst <- as.s2dv_cube(fcst) -obs <- as.s2dv_cube(obs) -``` -- GitLab From d5c5edd156ae06c5aa415877c3cd123d9587298c Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 16 Oct 2023 09:47:15 +0200 Subject: [PATCH 06/27] Add split_multiselected_dims = T in obs call. Add checks for dataset consistency --- .../PATC2023/handson_1-data-loading.md | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md index 6b280a4..69da163 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md @@ -108,6 +108,7 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) obs <- Start(dat = path_obs, var = var, syear = sdate_obs, + split_multiselected_dims = TRUE, latitude = values(list(lat.min, lat.max)), latitude_reorder = Sort(), longitude = values(list(lon.min, lon.max)), @@ -125,3 +126,23 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) longitude = NULL, latitude = NULL), retrieve = TRUE) ``` + +## 2. Check if the datasets are consistent + +```r +lat_hcst <- attributes(hcst)$Variables$common$latitude +lon_hcst <- attributes(hcst)$Variables$common$longitude +lat_obs <- attributes(obs)$Variables$common$latitude +lon_obs <- attributes(obs)$Variables$common$longitude + +identical(c(lat_obs), c(lat_hcst)) +[1] TRUE +identical(c(lon_obs), c(lon_hcst)) +[1] TRUE + +time_hcst <- attributes(hcst)$Variables$common$time +time_obs <- attributes(obs)$Variables$common$time + +identical(format(time_hcst, '%Y%m'), format(time_obs, '%Y%m')) +[1] TRUE +``` -- GitLab From d81063e1f1129dba7ba221654109cc0921ccfbf3 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 17 Oct 2023 14:28:07 +0200 Subject: [PATCH 07/27] Suppress message of all unit tests because testthat edition 3 shows the messages. Run unit test on GitLab. --- .Rbuildignore | 2 +- tests/testthat/test-AddStep-DimNames.R | 2 ++ tests/testthat/test-Compute-CDORemap.R | 2 ++ tests/testthat/test-Compute-NumChunks.R | 2 ++ tests/testthat/test-Compute-chunk_depend_dim.R | 2 ++ tests/testthat/test-Compute-chunk_split_dim.R | 2 ++ tests/testthat/test-Compute-extra_params.R | 2 ++ tests/testthat/test-Compute-inconsistent_target_dim.R | 2 ++ tests/testthat/test-Compute-irregular_regrid.R | 2 ++ tests/testthat/test-Compute-timedim.R | 2 ++ tests/testthat/test-Compute-transform_all.R | 2 ++ tests/testthat/test-Compute-transform_indices.R | 2 ++ tests/testthat/test-Compute-transform_values.R | 2 ++ tests/testthat/test-Compute-two_data.R | 2 ++ tests/testthat/test-Compute-use_attribute.R | 2 ++ tests/testthat/test-Start-DCPP-across-depends.R | 2 ++ tests/testthat/test-Start-calendar.R | 2 ++ tests/testthat/test-Start-depends_values.R | 2 ++ tests/testthat/test-Start-first_file_missing.R | 2 ++ tests/testthat/test-Start-global-lon-across_meridian.R | 2 ++ tests/testthat/test-Start-implicit_dependency_by_selector.R | 2 ++ tests/testthat/test-Start-implicit_inner_dim.R | 2 ++ tests/testthat/test-Start-indices_list_vector.R | 2 ++ tests/testthat/test-Start-largest_dims_length.R | 2 ++ tests/testthat/test-Start-line_order-consistency.R | 2 ++ tests/testthat/test-Start-metadata_dims.R | 2 ++ tests/testthat/test-Start-metadata_filedim_dependency.R | 2 ++ tests/testthat/test-Start-metadata_reshaping.R | 2 ++ tests/testthat/test-Start-multiple-sdates.R | 2 ++ tests/testthat/test-Start-path_glob_permissive.R | 2 ++ tests/testthat/test-Start-reorder-lat.R | 2 ++ tests/testthat/test-Start-reorder-latCoarse.R | 2 ++ tests/testthat/test-Start-reorder-lon-180to180.R | 2 ++ tests/testthat/test-Start-reorder-lon-transform_-180to180.R | 2 ++ tests/testthat/test-Start-reorder-lon-transform_0to360.R | 2 ++ tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R | 2 ++ tests/testthat/test-Start-reorder-lon0to360.R | 2 ++ tests/testthat/test-Start-reorder-lon0to360Coarse.R | 2 ++ tests/testthat/test-Start-reorder-metadata.R | 2 ++ tests/testthat/test-Start-reorder-retrieve.R | 2 ++ tests/testthat/test-Start-reorder_all.R | 2 ++ tests/testthat/test-Start-reorder_indices.R | 2 ++ tests/testthat/test-Start-reshape.R | 2 ++ tests/testthat/test-Start-return_vars_name.R | 2 ++ tests/testthat/test-Start-split-merge.R | 2 ++ tests/testthat/test-Start-time_unit.R | 2 ++ tests/testthat/test-Start-transform-all.R | 2 ++ tests/testthat/test-Start-transform-border.R | 2 ++ tests/testthat/test-Start-transform-lat-Sort-all.R | 2 ++ tests/testthat/test-Start-transform-lat-Sort-indices.R | 2 ++ tests/testthat/test-Start-transform-lat-Sort-values.R | 2 ++ tests/testthat/test-Start-transform-lon-across_meridian.R | 2 ++ tests/testthat/test-Start-transform-metadata.R | 2 ++ tests/testthat/test-Start-transform-three-selectors.R | 2 ++ tests/testthat/test-Start-two_dats.R | 2 ++ tests/testthat/test-Start-values_list_vector.R | 2 ++ 56 files changed, 111 insertions(+), 1 deletion(-) diff --git a/.Rbuildignore b/.Rbuildignore index aa7059a..98316cc 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,7 +9,7 @@ ^inst/doc$ ^\.gitlab-ci\.yml$ ## unit tests should be ignored when building the package for CRAN -^tests$ +#^tests$ ^inst/PlotProfiling\.R$ ^.gitlab$ # Suggested by http://r-pkgs.had.co.nz/package.html diff --git a/tests/testthat/test-AddStep-DimNames.R b/tests/testthat/test-AddStep-DimNames.R index 5e1fe9c..e20ecfa 100644 --- a/tests/testthat/test-AddStep-DimNames.R +++ b/tests/testthat/test-AddStep-DimNames.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') test_that("Single File - Local execution", { @@ -28,3 +29,4 @@ suppressWarnings( "The target dimensions required by 'step_fun' for the input 1 are not present in the corresponding provided object in 'inputs'.") }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-CDORemap.R b/tests/testthat/test-Compute-CDORemap.R index fb31d00..580bf6e 100644 --- a/tests/testthat/test-Compute-CDORemap.R +++ b/tests/testthat/test-Compute-CDORemap.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("ex2_3", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' @@ -54,3 +55,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-NumChunks.R b/tests/testthat/test-Compute-NumChunks.R index ffce880..d47b0f7 100644 --- a/tests/testthat/test-Compute-NumChunks.R +++ b/tests/testthat/test-Compute-NumChunks.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') test_that("Single File - Local execution", { @@ -51,3 +52,4 @@ ignore_attr = TRUE ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-chunk_depend_dim.R b/tests/testthat/test-Compute-chunk_depend_dim.R index 101bfb5..80f407b 100644 --- a/tests/testthat/test-Compute-chunk_depend_dim.R +++ b/tests/testthat/test-Compute-chunk_depend_dim.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test tests the chunking over depended and depending dimension. # ex1_14 # 1. depending dim is values() @@ -221,3 +222,4 @@ Start(dat = path, ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-chunk_split_dim.R b/tests/testthat/test-Compute-chunk_split_dim.R index 0c1da4a..5e43067 100644 --- a/tests/testthat/test-Compute-chunk_split_dim.R +++ b/tests/testthat/test-Compute-chunk_split_dim.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test is to check chunking over the split dim. It involves # how to arrange the chunks in a correct order even when chunking is happening. @@ -224,3 +225,4 @@ c(longitude = 2, dat = 1, var = 1, latitude = 1, sdate = 4, syear = 2, time = 46 }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-extra_params.R b/tests/testthat/test-Compute-extra_params.R index f055e96..49c36da 100644 --- a/tests/testthat/test-Compute-extra_params.R +++ b/tests/testthat/test-Compute-extra_params.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("ex2_6", { @@ -125,3 +126,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-inconsistent_target_dim.R b/tests/testthat/test-Compute-inconsistent_target_dim.R index 58f96a9..5a816ef 100644 --- a/tests/testthat/test-Compute-inconsistent_target_dim.R +++ b/tests/testthat/test-Compute-inconsistent_target_dim.R @@ -1,3 +1,4 @@ +suppressMessages({ # If dataset are more than 1 (e.g., exp and obs), ByChunks() checks if # they have consistent dimensions in favor of Apply() computation. However, # only margin dimensions need to be identical. Target dimensions can have @@ -138,3 +139,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-irregular_regrid.R b/tests/testthat/test-Compute-irregular_regrid.R index 7de1471..ba07d71 100644 --- a/tests/testthat/test-Compute-irregular_regrid.R +++ b/tests/testthat/test-Compute-irregular_regrid.R @@ -1,3 +1,4 @@ +suppressMessages({ library(s2dv) test_that("1. ex2_13", { @@ -73,3 +74,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-timedim.R b/tests/testthat/test-Compute-timedim.R index fbc5af0..922edfb 100644 --- a/tests/testthat/test-Compute-timedim.R +++ b/tests/testthat/test-Compute-timedim.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("ex2_1", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' @@ -54,3 +55,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-transform_all.R b/tests/testthat/test-Compute-transform_all.R index 05d5de6..785070c 100644 --- a/tests/testthat/test-Compute-transform_all.R +++ b/tests/testthat/test-Compute-transform_all.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("1. Chunk along non-lat/lon dim", { #skip_on_cran() @@ -119,3 +120,4 @@ res4 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-transform_indices.R b/tests/testthat/test-Compute-transform_indices.R index c2d3e35..9c8cc39 100644 --- a/tests/testthat/test-Compute-transform_indices.R +++ b/tests/testthat/test-Compute-transform_indices.R @@ -1,3 +1,4 @@ +suppressMessages({ # Using indinces() to assign lat and lon, and transform the data. # Also test transform + chunk along lat/lon. @@ -376,3 +377,4 @@ as.vector(drop(res4$output1)) ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-transform_values.R b/tests/testthat/test-Compute-transform_values.R index 25a803f..32a544e 100644 --- a/tests/testthat/test-Compute-transform_values.R +++ b/tests/testthat/test-Compute-transform_values.R @@ -1,3 +1,4 @@ +suppressMessages({ # Using values() to assign lat and lon, and transform the data. # Also test transform + chunk along lat/lon. @@ -603,3 +604,4 @@ res3_180 #================================================================ }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-two_data.R b/tests/testthat/test-Compute-two_data.R index dfa579a..33d6631 100644 --- a/tests/testthat/test-Compute-two_data.R +++ b/tests/testthat/test-Compute-two_data.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("ex2_7", { # exp data @@ -81,3 +82,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Compute-use_attribute.R b/tests/testthat/test-Compute-use_attribute.R index 6f218e6..07ecd13 100644 --- a/tests/testthat/test-Compute-use_attribute.R +++ b/tests/testthat/test-Compute-use_attribute.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("ex2_2", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' @@ -53,3 +54,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-DCPP-across-depends.R b/tests/testthat/test-Start-DCPP-across-depends.R index bfe44b1..0f49a38 100644 --- a/tests/testthat/test-Start-DCPP-across-depends.R +++ b/tests/testthat/test-Start-DCPP-across-depends.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("Chunks of DCPP files- Local execution", { path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) @@ -52,3 +53,4 @@ expect_equal(dat[1,1,2,1:12,,], dat_2018_chunk1[1,1,,,]) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index 7dfbc2c..43651ab 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("1. 360_day, daily, unit = 'days since 1850-01-01'", { path_hadgem3 <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast//HadGEM3-GC31-MM/', @@ -317,3 +318,4 @@ test_that("8. gregorian, 3hrly, unit = 'days since 1850-1-1'", { }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-depends_values.R b/tests/testthat/test-Start-depends_values.R index 9cccc2d..ec77c7f 100644 --- a/tests/testthat/test-Start-depends_values.R +++ b/tests/testthat/test-Start-depends_values.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test tests the case that using values() to define the depended # and depending dimensions. The depending dimension should be a list with # names that are the values of depended dimensions. @@ -80,3 +81,4 @@ suppressWarnings( ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-first_file_missing.R b/tests/testthat/test-Start-first_file_missing.R index fecbd7c..070ae53 100644 --- a/tests/testthat/test-Start-first_file_missing.R +++ b/tests/testthat/test-Start-first_file_missing.R @@ -1,3 +1,4 @@ +suppressMessages({ # When some of the files are missing, Start() still can retrieve the data and # put NA in those missing positions. However, when the first file is missing, # Start() returned error before because of failing to find metadata. The bug is @@ -182,3 +183,4 @@ data <- Start(dat = file, ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-global-lon-across_meridian.R b/tests/testthat/test-Start-global-lon-across_meridian.R index 921c331..429db5c 100644 --- a/tests/testthat/test-Start-global-lon-across_meridian.R +++ b/tests/testthat/test-Start-global-lon-across_meridian.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') test_that("first test", { @@ -54,3 +55,4 @@ suppressWarnings( ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index d493a87..995310a 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -1,3 +1,4 @@ +suppressMessages({ # Similar as usecase ex1_13. # Use a value array as the inner dimension selector to express dependency on a # file dimension. By this means, we don't need to specify the *_across parameter @@ -156,3 +157,4 @@ c(memb = 2, sdate = 3, region = 1) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-implicit_inner_dim.R b/tests/testthat/test-Start-implicit_inner_dim.R index 7e0264c..9c46975 100644 --- a/tests/testthat/test-Start-implicit_inner_dim.R +++ b/tests/testthat/test-Start-implicit_inner_dim.R @@ -1,3 +1,4 @@ +suppressMessages({ # The unit test is for the implicit inner dimension. If the inner dimension length is 1, # startR allows it not to be specified in the call. Users can still define it in # 'return_vars'. @@ -44,3 +45,4 @@ as.POSIXct('2013-11-15', tz = 'UTC') }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-indices_list_vector.R b/tests/testthat/test-Start-indices_list_vector.R index 2effede..76ec511 100644 --- a/tests/testthat/test-Start-indices_list_vector.R +++ b/tests/testthat/test-Start-indices_list_vector.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test tests the consistence between list of indices and vector of indices. # 1. transform # 2. no transform @@ -241,3 +242,4 @@ as.vector(exp2) ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-largest_dims_length.R b/tests/testthat/test-Start-largest_dims_length.R index 211c132..3585576 100644 --- a/tests/testthat/test-Start-largest_dims_length.R +++ b/tests/testthat/test-Start-largest_dims_length.R @@ -1,3 +1,4 @@ +suppressMessages({ # When certain inner dim of files is not consistent, the parameter 'largest_dims_length' can # be used to ensure the returned array has the largest length of inner dimensions. @@ -299,3 +300,4 @@ as.vector(data5)[-c(5:24)] ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-line_order-consistency.R b/tests/testthat/test-Start-line_order-consistency.R index 11be109..d7f5095 100644 --- a/tests/testthat/test-Start-line_order-consistency.R +++ b/tests/testthat/test-Start-line_order-consistency.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') variable <- "tas" @@ -144,3 +145,4 @@ suppressWarnings( ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-metadata_dims.R b/tests/testthat/test-Start-metadata_dims.R index 2a2e735..3a68a53 100644 --- a/tests/testthat/test-Start-metadata_dims.R +++ b/tests/testthat/test-Start-metadata_dims.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("1. One data set, one var", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) @@ -983,3 +984,4 @@ dataF <- Start(dataset = path_list, ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-metadata_filedim_dependency.R b/tests/testthat/test-Start-metadata_filedim_dependency.R index 227383b..aea6f1f 100644 --- a/tests/testthat/test-Start-metadata_filedim_dependency.R +++ b/tests/testthat/test-Start-metadata_filedim_dependency.R @@ -1,3 +1,4 @@ +suppressMessages({ # When inner dimension selector is an array with filedim dimension name (e.g., time = [sdate = 2, time = 4], # or *_across is used, the inner dim has dependency on file dim. In this case, return_vars must # specify this relationship, i.e., return_vars = list(time = 'sdate'). @@ -197,3 +198,4 @@ expect_equal( ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index b143268..1346dc6 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -1,3 +1,4 @@ +suppressMessages({ # When data is reshaping (e.g., time_across = 'sdate'), the corresponding attribute should be reshaped too. test_that("1. time across fyear, fyear depends on sdate", { @@ -805,3 +806,4 @@ dates }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-multiple-sdates.R b/tests/testthat/test-Start-multiple-sdates.R index e16f2bf..49cc003 100644 --- a/tests/testthat/test-Start-multiple-sdates.R +++ b/tests/testthat/test-Start-multiple-sdates.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') # When certain values in one observation file are required more than once, @@ -163,3 +164,4 @@ obs <- Start(dat = obs_path, 0 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-path_glob_permissive.R b/tests/testthat/test-Start-path_glob_permissive.R index 75f28d4..2ff6278 100644 --- a/tests/testthat/test-Start-path_glob_permissive.R +++ b/tests/testthat/test-Start-path_glob_permissive.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("1. expid/member/version", { years <- paste0(c(1960:1961), '01-', c(1960:1961), '12') @@ -159,3 +160,4 @@ list("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_ }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lat.R b/tests/testthat/test-Start-reorder-lat.R index c87792e..3487484 100644 --- a/tests/testthat/test-Start-reorder-lat.R +++ b/tests/testthat/test-Start-reorder-lat.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -1053,3 +1054,4 @@ as.vector(attr(exp1_3, 'Variables')$common$latitude) }) ############################################## +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-latCoarse.R b/tests/testthat/test-Start-reorder-latCoarse.R index 34a766f..1cb3b8d 100644 --- a/tests/testthat/test-Start-reorder-latCoarse.R +++ b/tests/testthat/test-Start-reorder-latCoarse.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -887,3 +888,4 @@ res <- Start(dat = list(list(path=path_exp)), # #}) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lon-180to180.R b/tests/testthat/test-Start-reorder-lon-180to180.R index 0f71f0a..5dfced4 100644 --- a/tests/testthat/test-Start-reorder-lon-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-180to180.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -817,3 +818,4 @@ res <- Start(dat = list(list(path=path_exp)), tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R index 5e7701a..c36b29e 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -957,3 +958,4 @@ res <- Start(dat = list(list(path=path_exp)), 21 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360.R b/tests/testthat/test-Start-reorder-lon-transform_0to360.R index 86ad5e7..a47b707 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -1040,3 +1041,4 @@ res <- Start(dat = list(list(path=path_exp)), 21 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R index c18d34a..4185cca 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -1045,3 +1046,4 @@ res <- Start(dat = list(list(path=path_exp)), 21 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lon0to360.R b/tests/testthat/test-Start-reorder-lon0to360.R index 1e946d9..8c717b3 100644 --- a/tests/testthat/test-Start-reorder-lon0to360.R +++ b/tests/testthat/test-Start-reorder-lon0to360.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -658,3 +659,4 @@ res <- Start(dat = list(list(path=path_exp)), tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-lon0to360Coarse.R b/tests/testthat/test-Start-reorder-lon0to360Coarse.R index 71361d9..3de49a6 100644 --- a/tests/testthat/test-Start-reorder-lon0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon0to360Coarse.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -658,3 +659,4 @@ res <- Start(dat = list(list(path=path_exp)), tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-metadata.R b/tests/testthat/test-Start-reorder-metadata.R index ea727e5..501873d 100644 --- a/tests/testthat/test-Start-reorder-metadata.R +++ b/tests/testthat/test-Start-reorder-metadata.R @@ -1,3 +1,4 @@ +suppressMessages({ # Ensure returns_vars = NULL or 'dat' have the same metadata test_that("1. Sort() and CircularSort(0, 360)", { @@ -277,3 +278,4 @@ res_dat <- Start(dat = list(list(path = path_exp)), ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder-retrieve.R b/tests/testthat/test-Start-reorder-retrieve.R index 25efcfc..3b8016f 100644 --- a/tests/testthat/test-Start-reorder-retrieve.R +++ b/tests/testthat/test-Start-reorder-retrieve.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') ############################################## @@ -155,3 +156,4 @@ res2 <- Start(dat = path_exp, }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder_all.R b/tests/testthat/test-Start-reorder_all.R index 87a4416..fadee52 100644 --- a/tests/testthat/test-Start-reorder_all.R +++ b/tests/testthat/test-Start-reorder_all.R @@ -1,3 +1,4 @@ +suppressMessages({ # No transform, test reorder function Sort() and CircularSort() with selector 'all'. #--------------------------------------------------------------- @@ -143,3 +144,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reorder_indices.R b/tests/testthat/test-Start-reorder_indices.R index 59d00d4..5f50c49 100644 --- a/tests/testthat/test-Start-reorder_indices.R +++ b/tests/testthat/test-Start-reorder_indices.R @@ -1,3 +1,4 @@ +suppressMessages({ # No transform, test reorder function Sort() and CircularSort() with selector indices(). #--------------------------------------------------------------- @@ -143,3 +144,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-reshape.R b/tests/testthat/test-Start-reshape.R index 480a3bc..af8e89f 100644 --- a/tests/testthat/test-Start-reshape.R +++ b/tests/testthat/test-Start-reshape.R @@ -1,3 +1,4 @@ +suppressMessages({ # This one is more comprehensive than test-Start-split-merge.R path_exp <- '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc' @@ -515,3 +516,4 @@ easy_array[31:61 ,1] ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-return_vars_name.R b/tests/testthat/test-Start-return_vars_name.R index e3ff876..384bd63 100644 --- a/tests/testthat/test-Start-return_vars_name.R +++ b/tests/testthat/test-Start-return_vars_name.R @@ -1,3 +1,4 @@ +suppressMessages({ # The name of return_vars should be one of the inner dimension names. The synonims can # be used but will be changed back to the inner dim names. @@ -237,3 +238,4 @@ c(-19.5, -14.5) ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index 699c01c..8e4d2e1 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -1,3 +1,4 @@ +suppressMessages({ var_name <- 'tas' path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc' @@ -188,3 +189,4 @@ as.POSIXct('2013-11-15', tz = 'UTC') }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-time_unit.R b/tests/testthat/test-Start-time_unit.R index 0c499d3..f15b3bd 100644 --- a/tests/testthat/test-Start-time_unit.R +++ b/tests/testthat/test-Start-time_unit.R @@ -1,3 +1,4 @@ +suppressMessages({ test_that("1. The data has units like time", { @@ -88,3 +89,4 @@ as.POSIXct(c("2018-04-08", "2018-04-15", "2018-04-22 UTC", "2018-04-29 UTC"), tz #test_that("3. Time dimension is implicit", { # See test-Start-implicit_inner_dim.R #}) +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-all.R b/tests/testthat/test-Start-transform-all.R index a8290a6..e21f6c1 100644 --- a/tests/testthat/test-Start-transform-all.R +++ b/tests/testthat/test-Start-transform-all.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test uses 'all' to do the transformation and tests the output grid. # The results should be identical and consistent with cdo result (with precision difference). # The test contains three calls with different target grids: @@ -141,3 +142,4 @@ test_that("2. test path 2", { +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-border.R b/tests/testthat/test-Start-transform-border.R index 9b3cc6a..34a33a2 100644 --- a/tests/testthat/test-Start-transform-border.R +++ b/tests/testthat/test-Start-transform-border.R @@ -1,3 +1,4 @@ +suppressMessages({ ############################################## # This unit test checks different border situations: normal regional that doesn't touch the borders, @@ -712,3 +713,4 @@ expect_equal( # [9,] 299.4723 299.9515 299.4566 299.0601 299.5071 # [10,] 299.5299 299.7573 299.0317 299.1104 300.0644 ############################################## +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-lat-Sort-all.R b/tests/testthat/test-Start-transform-lat-Sort-all.R index d7d895e..2aa8e39 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-all.R +++ b/tests/testthat/test-Start-transform-lat-Sort-all.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test uses 'all' to do the transformation and tests "lat_reorder". # The results should be identical and consistent with cdo result (with precision difference). # "lon_reorder = CircularSort(0, 360)" are used in all the tests. @@ -124,3 +125,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-lat-Sort-indices.R b/tests/testthat/test-Start-transform-lat-Sort-indices.R index 16daa79..8d7312b 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-indices.R +++ b/tests/testthat/test-Start-transform-lat-Sort-indices.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test uses indices() to do the transformation and tests "lat_reorder". # The results should be identical and consistent with cdo result (with precision difference). # The lat/lon range is all the grids here. @@ -230,3 +231,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-lat-Sort-values.R b/tests/testthat/test-Start-transform-lat-Sort-values.R index b70b637..f69d551 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-values.R +++ b/tests/testthat/test-Start-transform-lat-Sort-values.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test uses values() to do the transformation and tests "lat_reorder". # The results should be identical and consistent with cdo result (with precision difference). # The lon range is all the grids here. @@ -430,3 +431,4 @@ tolerance = 0.0001 }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-lon-across_meridian.R b/tests/testthat/test-Start-transform-lon-across_meridian.R index d07388e..a8df42b 100644 --- a/tests/testthat/test-Start-transform-lon-across_meridian.R +++ b/tests/testthat/test-Start-transform-lon-across_meridian.R @@ -1,3 +1,4 @@ +suppressMessages({ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') test_that("first test", { @@ -80,3 +81,4 @@ suppressWarnings( ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-metadata.R b/tests/testthat/test-Start-transform-metadata.R index 227f09d..6010b6f 100644 --- a/tests/testthat/test-Start-transform-metadata.R +++ b/tests/testthat/test-Start-transform-metadata.R @@ -1,3 +1,4 @@ +suppressMessages({ # Ensure returns_vars = NULL or 'dat' have the same metadata test_that("1. Sort() and CircularSort(0, 360)", { @@ -275,3 +276,4 @@ res_dat <- Start(dat = list(list(path = path_exp)), ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-transform-three-selectors.R b/tests/testthat/test-Start-transform-three-selectors.R index 95e7c2b..3fe4824 100644 --- a/tests/testthat/test-Start-transform-three-selectors.R +++ b/tests/testthat/test-Start-transform-three-selectors.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test uses three different selector forms: indices(), values(), and 'all', to do # the transformation. "lat_reorder" is also tested. # Their results should be all identical and consistent with cdo result (with precision difference). @@ -194,3 +195,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-two_dats.R b/tests/testthat/test-Start-two_dats.R index 46b57d8..640b478 100644 --- a/tests/testthat/test-Start-two_dats.R +++ b/tests/testthat/test-Start-two_dats.R @@ -1,3 +1,4 @@ +suppressMessages({ # ex1_8 test_that("1. ex1_8, case 1", { @@ -99,3 +100,4 @@ tolerance = 0.0001 ) }) +}) #suppressMessages diff --git a/tests/testthat/test-Start-values_list_vector.R b/tests/testthat/test-Start-values_list_vector.R index 1a6288b..2b5cda0 100644 --- a/tests/testthat/test-Start-values_list_vector.R +++ b/tests/testthat/test-Start-values_list_vector.R @@ -1,3 +1,4 @@ +suppressMessages({ # This unit test tests the consistence between list of values and vector of values. # 1. transform # 2. no transform @@ -246,3 +247,4 @@ as.vector(exp2) ) }) +}) #suppressMessages -- GitLab From 5c06ad9165199f1b6a2e8ecf1dada1f6f7c2e0d6 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 17 Oct 2023 18:51:07 +0200 Subject: [PATCH 08/27] Correct dat name of work_pieces (internal usage only); Add message when multiple datasets are requested and return_vars has common dimensions --- R/Start.R | 12 ++++++++++++ R/zzz.R | 7 ++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/R/Start.R b/R/Start.R index b0ad40d..89f87e9 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1777,6 +1777,18 @@ Start <- function(..., # dim = indices/selectors, } } } + + # Return info about return_vars when dat > 1 + if (length(dat) > 1 & length(common_return_vars) > 0) { + .message("\n", "[ATTENTION]", + paste0("According to parameter 'return_vars', the inner dimensions: ", + paste(names(common_return_vars), collapse = ', '), + ", are common among all the datasets. Please be sure that ", + "this is expected to avoid potential wrong results, and ", + "verify the outputs carefully."), + "\n", indent = 1) + } + #//////////////////////////////////////////// # This part was above where return_vars is seperated into return_vars and common_return_vars diff --git a/R/zzz.R b/R/zzz.R index 1e56e29..f098a3b 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -980,7 +980,12 @@ build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, if (x %in% names(depending_file_dims)) { vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]] } - selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]] + if (x != found_pattern_dim) { + selectors[[x]][[vector_to_pick]][file_to_load_indices[x]] + } else { + # dat_dim only has one value in each work_piece + selectors[[x]][[vector_to_pick]] + } }) names(file_selectors) <- file_dims work_piece[['file_selectors']] <- file_selectors -- GitLab From 0d93bf916754769539f149d330c50b1ef3ecbc87 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 18 Oct 2023 12:57:45 +0200 Subject: [PATCH 09/27] Don't create pipeline when it is draft --- .gitlab-ci.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c7deb1a..db7b631 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,5 +1,13 @@ stages: - build + +workflow: + rules: + - if: $CI_COMMIT_TITLE =~ /-draft$/ + when: never +# - if: $CI_PIPELINE_SOURCE == "merge_request_event" +# - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH + build: stage: build script: -- GitLab From 64eed86f53d49b141a6e4894cb15323e9cfc1393 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 19 Oct 2023 14:50:30 +0200 Subject: [PATCH 10/27] Update path, add explanation --- .../PATC2023/handson_1-data-loading.md | 73 +++++++++++-------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md index 69da163..8a986f8 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md @@ -1,7 +1,7 @@ # Hands-on 1: Load data by startR ## Goal -Use startR to load the data and learn how to adjust data while loading data. +Use startR to load the data and learn how to adjust data structure while loading data. ## 0. Load required packages @@ -11,32 +11,28 @@ rm(list = ls()) library(startR) ``` -## 1. Load data from data repository (esarchive/) - -#TODO: update it **Data description**: -This sample data set contains a small cutout of gridded seasonal precipitation -forecast data from the Copernicus Climate Change ECMWF-System 5 forecast system. -Specifically, for the 'prlr' (precipitation) variable, for the first 6 forecast -ensemble members, daily values, for all 31 days in March following the forecast -starting dates in November of years 2010 to 2012, for a small 4x4 pixel cutout in -a region in the North-Western Italian Alps (44N-47N, 6E-9E). The data resolution is 1 degree. +We will use two datasets in the hands-on. The experiment data are Meteo-France System 7 from ECMWF, and the observation ones are ERA5 from ECMWF. The data have been first processed into monthly mean data and stored in our data archive (esarchive). + +We're going to analyze the near-surface temperature (short name: tas) for seasonal forecast. We will focus on the Europe region (roughly 20W-40E, 20N-80N). The hindcast years are 1993 to 2016, and the forecast year is 2020. The initial month is November. To speed up the practice, we will only load the first two forecast time steps, but all the ensemble members are used to give a less biased result. + +## 1. Load experimental data from data repository + +### 1.1 Hindcast data -Use the above information to define the variable, start dates, longitude and latitude. +Understand the following script, run it, and check the result. ```r -#TODO: update the path # Use this one if on workstation or nord3 (have access to /esarchive) path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc" - - # Use this one if on Marenostrum4 and log in with PATC2021 account - path_exp <- paste0('/gpfs/scratch/nct01/nct01127/d3_R_handson/esarchive/', - 'exp/ecmwf/system5c3s/daily_mean/', - '$var$_s0-24h/$var$_$sdate$.nc') + #---------------------------------------------------------------------- + # Run these two lines if you're on Marenostrum4 and log in with training account + prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/' + path_exp <- paste0(prefix, path_exp) + #---------------------------------------------------------------------- var <- 'tas' sdate_hcst <- paste0(1993:2016, '1101') - sdate_fcst <- '20201101' lon.min <- -20 lon.max <- 40 lat.min <- 20 @@ -63,7 +59,15 @@ Use Start() to load the data. retrieve = TRUE) ``` +### 1.2 Forecast data + +The forecast data are from the same dataset as hindcast, but with different years. +Therefore, they share the same data path and strucutre. +Try to take the Start() call above and modify it to load the forecast data (hint: the start year is 2020.) + ```r + sdate_fcst <- '20201101' + fcst <- Start(dat = path_exp, var = var, syear = sdate_fcst, @@ -81,29 +85,42 @@ Use Start() to load the data. retrieve = TRUE) ``` +### 1.3 Observational data -# Create an date array from hcst dates -Check the time attributes of hcst: Is it correct? +We need the corresponding observational data to compare with the experimental data. +So, the observational data should be loaded as the same dimensions as the experimental ones. +To achieve this, we can use the metadata of the experimental data as the selectors for observational data. But be careful with the usage! We must verify the correctness and applicability first. -To load the corresponding obs data, we can use these time values as the selectors in obs Start() call. +**Get the time values from hindcast data** +Check the time attributes of `hcst`: Is it correct? ```r dim(attributes(hcst)$Variables$common$time) -syear time - 24 2 +#syear time +# 24 2 str(attributes(hcst)$Variables$common$time) - POSIXct[1:48], format: "1993-12-01" "1994-12-01" "1995-12-01" "1996-12-01" "1997-12-01" ... +# POSIXct[1:48], format: "1993-12-01" "1994-12-01" "1995-12-01" "1996-12-01" "1997-12-01" ... +``` -# Adjust the day to the correct month -attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time - lubridate::days(1) +The values are not correct since they should start from November, not December. +But the array has the correct dimensions and we can take advantage of it. +What we're going to do here is to tune the values one month ahead so we can have the correct dates. +(ps., `lubridate` is a useful R package for time value manipulation!) +```r +attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time - lubridate::days(1) date_string <- format(attributes(hcst)$Variables$common$time, '%Y%m') sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) ``` ```r path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc' + #---------------------------------------------------------------------- + # Run these two lines if you're on Marenostrum4 and log in with training account + prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/' + path_obs <- paste0(prefix, path_obs) + #---------------------------------------------------------------------- obs <- Start(dat = path_obs, var = var, @@ -114,9 +131,7 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) longitude = values(list(lon.min, lon.max)), longitude_reorder = CircularSort(-180, 180), transform = CDORemapper, -#TODO: Change to relative path -# transform_params = list(grid = './griddes_system7c3s.txt', - transform_params = list(grid = '/esarchive/scratch/aho/git/startR/inst/doc/tutorial/PATC2023/griddes_system7c3s.txt', + transform_params = list(grid = '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/griddes_system7c3s.txt', method = 'bilinear'), transform_vars = c('latitude', 'longitude'), synonims = list(syear = c('syear', 'sdate'), -- GitLab From 822f8ff38654bc2b06268f8581c3e8ae869d2001 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 19 Oct 2023 16:31:30 +0200 Subject: [PATCH 11/27] Change grid to r360x181 --- .../PATC2023/handson_1-data-loading.md | 116 ++++++++++++++---- 1 file changed, 89 insertions(+), 27 deletions(-) diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md index 8a986f8..eb9a310 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md @@ -8,7 +8,7 @@ Use startR to load the data and learn how to adjust data structure while loading ```r # Clean the session rm(list = ls()) - +# Load package library(startR) ``` **Data description**: @@ -18,7 +18,7 @@ We're going to analyze the near-surface temperature (short name: tas) for season ## 1. Load experimental data from data repository -### 1.1 Hindcast data +### 1.a Hindcast data Understand the following script, run it, and check the result. @@ -31,26 +31,20 @@ Understand the following script, run it, and check the result. path_exp <- paste0(prefix, path_exp) #---------------------------------------------------------------------- - var <- 'tas' sdate_hcst <- paste0(1993:2016, '1101') - lon.min <- -20 - lon.max <- 40 - lat.min <- 20 - lat.max <- 80 -``` - -Use Start() to load the data. -```r hcst <- Start(dat = path_exp, - var = var, + var = 'tas', syear = sdate_hcst, ensemble = 'all', time = 1:2, - latitude = values(list(lat.min, lat.max)), + latitude = values(list(20, 80)), latitude_reorder = Sort(), - longitude = values(list(lon.min, lon.max)), + longitude = values(list(-20, 40)), longitude_reorder = CircularSort(-180, 180), + transform = CDORemapper, + transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), synonims = list(syear = c('syear', 'sdate'), latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), @@ -58,8 +52,34 @@ Use Start() to load the data. longitude = NULL, latitude = NULL), retrieve = TRUE) ``` + +**Questions** + +(1) What are the dimensions of hcst? Use `dim()` to check. + +```r +dim(hcst) +# dat var syear ensemble time latitude longitude +# 1 1 24 25 2 61 61 +``` -### 1.2 Forecast data +(2) What is the structure of hcst? Use `str()` to check. +```r +str(hcst, max.level = 1) +str(hcst, max.level = 2) +str(hcst, max.level = 3) +``` + +(3) The metadata variables are stored in `attr(hcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. +```r +metadata_attr <- attr(hcst, 'Variables') +str(metadata_attr) +metadata_attr$common$time +metadata_attr$common$latitude +metadata_attr$common$longitude +``` + +### 1.b Forecast data The forecast data are from the same dataset as hindcast, but with different years. Therefore, they share the same data path and strucutre. @@ -69,14 +89,17 @@ Try to take the Start() call above and modify it to load the forecast data (hint sdate_fcst <- '20201101' fcst <- Start(dat = path_exp, - var = var, + var = 'tas', syear = sdate_fcst, ensemble = 'all', time = 1:2, - latitude = values(list(lat.min, lat.max)), + latitude = values(list(20, 80)), latitude_reorder = Sort(), - longitude = values(list(lon.min, lon.max)), + longitude = values(list(-20, 40)), longitude_reorder = CircularSort(-180, 180), + transform = CDORemapper, + transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), synonims = list(syear = c('syear', 'sdate'), latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), @@ -85,13 +108,42 @@ Try to take the Start() call above and modify it to load the forecast data (hint retrieve = TRUE) ``` -### 1.3 Observational data +**Questions** + +Check the forecast data by the same methods for hindcast data. + +(1) What are the dimensions of fcst? Use `dim()` to check. + +```r +dim(fcst) +# dat var syear ensemble time latitude longitude +# 1 1 1 51 2 61 61 +``` + +(2) What is the structure of hcst? Use `str()` to check. +```r +str(fcst, max.level = 1) +str(fcst, max.level = 2) +str(fcst, max.level = 3) +``` + +(3) The metadata variables are stored in `attr(fcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. +```r +metadata_attr <- attr(fcst, 'Variables') +str(metadata_attr) +metadata_attr$common$time +metadata_attr$common$latitude +metadata_attr$common$longitude +``` + +### 1.c Observational data We need the corresponding observational data to compare with the experimental data. So, the observational data should be loaded as the same dimensions as the experimental ones. To achieve this, we can use the metadata of the experimental data as the selectors for observational data. But be careful with the usage! We must verify the correctness and applicability first. **Get the time values from hindcast data** + Check the time attributes of `hcst`: Is it correct? ```r @@ -106,14 +158,19 @@ str(attributes(hcst)$Variables$common$time) The values are not correct since they should start from November, not December. But the array has the correct dimensions and we can take advantage of it. What we're going to do here is to tune the values one month ahead so we can have the correct dates. -(ps., `lubridate` is a useful R package for time value manipulation!) +(p.s. `lubridate` is a useful R package for time value manipulation!) ```r attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time - lubridate::days(1) date_string <- format(attributes(hcst)$Variables$common$time, '%Y%m') sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) +print(sdate_obs) ``` +Now we have the correct date values, we can use them as the selectors of `syear` in the Start() call. In addition, we will use the reshaping feature in startR to get the desired dimensions. + +#TODO: Explain split_multiselected_dims + ```r path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc' #---------------------------------------------------------------------- @@ -123,16 +180,15 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) #---------------------------------------------------------------------- obs <- Start(dat = path_obs, - var = var, + var = 'tas', syear = sdate_obs, split_multiselected_dims = TRUE, - latitude = values(list(lat.min, lat.max)), + latitude = values(list(20, 80)), latitude_reorder = Sort(), - longitude = values(list(lon.min, lon.max)), + longitude = values(list(-20, 40)), longitude_reorder = CircularSort(-180, 180), transform = CDORemapper, - transform_params = list(grid = '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/griddes_system7c3s.txt', - method = 'bilinear'), + transform_params = list(grid = 'r360x181', method = 'bilinear'), transform_vars = c('latitude', 'longitude'), synonims = list(syear = c('syear', 'sdate'), latitude = c('lat', 'latitude'), @@ -144,15 +200,21 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) ## 2. Check if the datasets are consistent +```r +dim(hcst) +dim(fcst) +dim(obs) +``` + ```r lat_hcst <- attributes(hcst)$Variables$common$latitude lon_hcst <- attributes(hcst)$Variables$common$longitude lat_obs <- attributes(obs)$Variables$common$latitude lon_obs <- attributes(obs)$Variables$common$longitude -identical(c(lat_obs), c(lat_hcst)) +identical(lat_obs, lat_hcst) [1] TRUE -identical(c(lon_obs), c(lon_hcst)) +identical(lon_obs, lon_hcst) [1] TRUE time_hcst <- attributes(hcst)$Variables$common$time -- GitLab From 8f18e513f5976e06bfc3433b17f4d12c64f7f4cb Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 20 Oct 2023 14:50:01 +0200 Subject: [PATCH 12/27] Finish the questions --- .../PATC2023/handson_1-data-loading.md | 136 +++++---- .../PATC2023/handson_1-data-loading_ans.md | 272 ++++++++++++++++++ 2 files changed, 352 insertions(+), 56 deletions(-) create mode 100644 inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md index eb9a310..bcd171c 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md @@ -11,6 +11,7 @@ rm(list = ls()) # Load package library(startR) ``` + **Data description**: We will use two datasets in the hands-on. The experiment data are Meteo-France System 7 from ECMWF, and the observation ones are ERA5 from ECMWF. The data have been first processed into monthly mean data and stored in our data archive (esarchive). @@ -55,28 +56,26 @@ Understand the following script, run it, and check the result. **Questions** -(1) What are the dimensions of hcst? Use `dim()` to check. +(1) What are the dimensions of `hcst`? Use `dim()` to check. ```r -dim(hcst) -# dat var syear ensemble time latitude longitude -# 1 1 24 25 2 61 61 +dim(____) ``` -(2) What is the structure of hcst? Use `str()` to check. +(2) What is the structure of `hcst`? Use `str()` to check. ```r -str(hcst, max.level = 1) -str(hcst, max.level = 2) -str(hcst, max.level = 3) +str(hcst, max.level = _____) # try 1, 2, 3 ``` (3) The metadata variables are stored in `attr(hcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. ```r metadata_attr <- attr(hcst, 'Variables') str(metadata_attr) -metadata_attr$common$time -metadata_attr$common$latitude -metadata_attr$common$longitude +names(metadata_attr$common) + +hcst_time <- metadata_attr$common$time +hcst_lat <- __________ +hcst_lon <- __________ ``` ### 1.b Forecast data @@ -86,24 +85,24 @@ Therefore, they share the same data path and strucutre. Try to take the Start() call above and modify it to load the forecast data (hint: the start year is 2020.) ```r - sdate_fcst <- '20201101' + sdate_fcst <- ____________ fcst <- Start(dat = path_exp, - var = 'tas', + var = _____, syear = sdate_fcst, ensemble = 'all', - time = 1:2, - latitude = values(list(20, 80)), + time = _____, + latitude = values(list(____, ____)), latitude_reorder = Sort(), - longitude = values(list(-20, 40)), + longitude = values(list(____, ____)), longitude_reorder = CircularSort(-180, 180), transform = CDORemapper, - transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_params = list(grid = _____, method = 'bilinear'), transform_vars = c('latitude', 'longitude'), synonims = list(syear = c('syear', 'sdate'), latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), - return_vars = list(time = 'syear', + return_vars = list(time = _____, longitude = NULL, latitude = NULL), retrieve = TRUE) ``` @@ -112,28 +111,26 @@ Try to take the Start() call above and modify it to load the forecast data (hint Check the forecast data by the same methods for hindcast data. -(1) What are the dimensions of fcst? Use `dim()` to check. +(1) What are the dimensions of `fcst`? Use `dim()` to check. ```r -dim(fcst) -# dat var syear ensemble time latitude longitude -# 1 1 1 51 2 61 61 +dim(____) ``` -(2) What is the structure of hcst? Use `str()` to check. +(2) What is the structure of `fcst`? Use `str()` to check. ```r -str(fcst, max.level = 1) -str(fcst, max.level = 2) -str(fcst, max.level = 3) +str(hcst, max.level = _____) # try 1, 2, 3 ``` (3) The metadata variables are stored in `attr(fcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. ```r -metadata_attr <- attr(fcst, 'Variables') +metadata_attr <- attr(_____, 'Variables') str(metadata_attr) -metadata_attr$common$time -metadata_attr$common$latitude -metadata_attr$common$longitude +names(metadata_attr$common) + +fcst_time <- __________ +fcst_lat <- __________ +fcst_lon <- __________ ``` ### 1.c Observational data @@ -148,11 +145,7 @@ Check the time attributes of `hcst`: Is it correct? ```r dim(attributes(hcst)$Variables$common$time) -#syear time -# 24 2 - str(attributes(hcst)$Variables$common$time) -# POSIXct[1:48], format: "1993-12-01" "1994-12-01" "1995-12-01" "1996-12-01" "1997-12-01" ... ``` The values are not correct since they should start from November, not December. @@ -169,7 +162,11 @@ print(sdate_obs) Now we have the correct date values, we can use them as the selectors of `syear` in the Start() call. In addition, we will use the reshaping feature in startR to get the desired dimensions. -#TODO: Explain split_multiselected_dims +If the selector is an array, the parameter `split_multiselected_dims` of Start() splits the array by dimensions and we will get those dimensions in the output. +For example, we will use `sdate_obs` as the selector of "syear" dimension below. +`sdate_obs` has two dimensions, "syear" and "time"; +so, by `split_multiselected_dims`, the output `obs` will have these two dimensions, +even "time" is not explicitly specified in the Start() call. ```r path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc' @@ -180,46 +177,73 @@ Now we have the correct date values, we can use them as the selectors of `syear` #---------------------------------------------------------------------- obs <- Start(dat = path_obs, - var = 'tas', + var = _____, syear = sdate_obs, split_multiselected_dims = TRUE, - latitude = values(list(20, 80)), + latitude = values(list(_____, _____)), latitude_reorder = Sort(), - longitude = values(list(-20, 40)), + longitude = values(list(_____, _____)), longitude_reorder = CircularSort(-180, 180), transform = CDORemapper, - transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_params = list(grid = ______, method = 'bilinear'), transform_vars = c('latitude', 'longitude'), synonims = list(syear = c('syear', 'sdate'), latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), - return_vars = list(time = 'syear', + return_vars = list(time = ______, longitude = NULL, latitude = NULL), retrieve = TRUE) ``` -## 2. Check if the datasets are consistent +**Questions** + +Check the obsercational data by the same methods above. + +(1) What are the dimensions of `obs`? Use `dim()` to check. ```r -dim(hcst) -dim(fcst) -dim(obs) +dim(____) ``` + +(2) What is the structure of `obs`? Use `str()` to check. +```r +str(obs, max.level = ____) # try 1, 2, 3 +``` + +(3) The metadata variables are stored in `attr(obs, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. +```r +metadata_attr <- attr(____, 'Variables') +str(metadata_attr) +names(metadata_attr$common) + +obs_time <- __________ +obs_lat <- __________ +obs_lon <- __________ +``` + +## 2. Check if the datasets are consistent + +Wrong data, wrong everything afterward. It is important to examine the data and metadata after we load them. + +(1) Compare the dimensions of the three data by `dim()`. ```r -lat_hcst <- attributes(hcst)$Variables$common$latitude -lon_hcst <- attributes(hcst)$Variables$common$longitude -lat_obs <- attributes(obs)$Variables$common$latitude -lon_obs <- attributes(obs)$Variables$common$longitude -identical(lat_obs, lat_hcst) -[1] TRUE -identical(lon_obs, lon_hcst) -[1] TRUE +``` +(2) Check the summary of the data by `summary()`. +```r +summary(hcst) +summary(fcst) +summary(obs) +``` -time_hcst <- attributes(hcst)$Variables$common$time -time_obs <- attributes(obs)$Variables$common$time +(3) Compare metadata. We have saved the latitude, longitude, and time attributes above after loading each data. +Use `identical()` or `all.equal()` to check if the values are consistent. +```r +# lat and lon +identical(____, ____) +all.equal(____, ____) -identical(format(time_hcst, '%Y%m'), format(time_obs, '%Y%m')) -[1] TRUE +# time: only compare year and month +identical(format(hcst_time, '%Y%m'), format(obs_time, '%Y%m')) ``` diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md new file mode 100644 index 0000000..635be93 --- /dev/null +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md @@ -0,0 +1,272 @@ +# Hands-on 1: Load data by startR + +## Goal +Use startR to load the data and learn how to adjust data structure while loading data. + +## 0. Load required packages + +```r +# Clean the session +rm(list = ls()) +# Load package +library(startR) +``` + +**Data description**: +We will use two datasets in the hands-on. The experiment data are Meteo-France System 7 from ECMWF, and the observation ones are ERA5 from ECMWF. The data have been first processed into monthly mean data and stored in our data archive (esarchive). + +We're going to analyze the near-surface temperature (short name: tas) for seasonal forecast. We will focus on the Europe region (roughly 20W-40E, 20N-80N). The hindcast years are 1993 to 2016, and the forecast year is 2020. The initial month is November. To speed up the practice, we will only load the first two forecast time steps, but all the ensemble members are used to give a less biased result. + +## 1. Load experimental data from data repository + +### 1.a Hindcast data + +Understand the following script, run it, and check the result. + +```r + # Use this one if on workstation or nord3 (have access to /esarchive) + path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc" + #---------------------------------------------------------------------- + # Run these two lines if you're on Marenostrum4 and log in with training account + prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/' + path_exp <- paste0(prefix, path_exp) + #---------------------------------------------------------------------- + + sdate_hcst <- paste0(1993:2016, '1101') + + hcst <- Start(dat = path_exp, + var = 'tas', + syear = sdate_hcst, + ensemble = 'all', + time = 1:2, + latitude = values(list(20, 80)), + latitude_reorder = Sort(), + longitude = values(list(-20, 40)), + longitude_reorder = CircularSort(-180, 180), + transform = CDORemapper, + transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(syear = c('syear', 'sdate'), + latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + +**Questions** + +(1) What are the dimensions of `hcst`? Use `dim()` to check. + +```r +dim(hcst) +# dat var syear ensemble time latitude longitude +# 1 1 24 25 2 61 61 +``` + +(2) What is the structure of `hcst`? Use `str()` to check. +```r +str(hcst, max.level = 1) +str(hcst, max.level = 2) +str(hcst, max.level = 3) +``` + +(3) The metadata variables are stored in `attr(hcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. +```r +metadata_attr <- attr(hcst, 'Variables') +str(metadata_attr) +names(metadata_attr$common) + +hcst_time <- metadata_attr$common$time +hcst_lat <- metadata_attr$common$latitude +hcst_lon <- metadata_attr$common$longitude +``` + +### 1.b Forecast data + +The forecast data are from the same dataset as hindcast, but with different years. +Therefore, they share the same data path and strucutre. +Try to take the Start() call above and modify it to load the forecast data (hint: the start year is 2020.) + +```r + sdate_fcst <- '20201101' + + fcst <- Start(dat = path_exp, + var = 'tas', + syear = sdate_fcst, + ensemble = 'all', + time = 1:2, + latitude = values(list(20, 80)), + latitude_reorder = Sort(), + longitude = values(list(-20, 40)), + longitude_reorder = CircularSort(-180, 180), + transform = CDORemapper, + transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(syear = c('syear', 'sdate'), + latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + +**Questions** + +Check the forecast data by the same methods for hindcast data. + +(1) What are the dimensions of `fcst`? Use `dim()` to check. + +```r +dim(fcst) +# dat var syear ensemble time latitude longitude +# 1 1 1 51 2 61 61 +``` + +(2) What is the structure of `fcst`? Use `str()` to check. +```r +str(fcst, max.level = 1) +str(fcst, max.level = 2) +str(fcst, max.level = 3) +``` + +(3) The metadata variables are stored in `attr(fcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. +```r +metadata_attr <- attr(fcst, 'Variables') +str(metadata_attr) +names(metadata_attr$common) + +fcst_time <- metadata_attr$common$time +fcst_lat <- metadata_attr$common$latitude +fcst_lon <- metadata_attr$common$longitude +``` + +### 1.c Observational data + +We need the corresponding observational data to compare with the experimental data. +So, the observational data should be loaded as the same dimensions as the experimental ones. +To achieve this, we can use the metadata of the experimental data as the selectors for observational data. But be careful with the usage! We must verify the correctness and applicability first. + +**Get the time values from hindcast data** + +Check the time attributes of `hcst`: Is it correct? + +```r +dim(attributes(hcst)$Variables$common$time) +#syear time +# 24 2 + +str(attributes(hcst)$Variables$common$time) +# POSIXct[1:48], format: "1993-12-01" "1994-12-01" "1995-12-01" "1996-12-01" "1997-12-01" ... +``` + +The values are not correct since they should start from November, not December. +But the array has the correct dimensions and we can take advantage of it. +What we're going to do here is to tune the values one month ahead so we can have the correct dates. +(p.s. `lubridate` is a useful R package for time value manipulation!) + +```r +attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time - lubridate::days(1) +date_string <- format(attributes(hcst)$Variables$common$time, '%Y%m') +sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) +print(sdate_obs) +``` + +Now we have the correct date values, we can use them as the selectors of `syear` in the Start() call. In addition, we will use the reshaping feature in startR to get the desired dimensions. + +If the selector is an array, the parameter `split_multiselected_dims` of Start() splits the array by dimensions and we will get those dimensions in the output. +For example, we will use `sdate_obs` as the selector of "syear" dimension below. +`sdate_obs` has two dimensions, "syear" and "time"; +so, by `split_multiselected_dims`, the output `obs` will have these two dimensions, +even "time" is not explicitly specified in the Start() call. + +```r + path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc' + #---------------------------------------------------------------------- + # Run these two lines if you're on Marenostrum4 and log in with training account + prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/' + path_obs <- paste0(prefix, path_obs) + #---------------------------------------------------------------------- + + obs <- Start(dat = path_obs, + var = 'tas', + syear = sdate_obs, + split_multiselected_dims = TRUE, + latitude = values(list(20, 80)), + latitude_reorder = Sort(), + longitude = values(list(-20, 40)), + longitude_reorder = CircularSort(-180, 180), + transform = CDORemapper, + transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(syear = c('syear', 'sdate'), + latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + +**Questions** + +Check the obsercational data by the same methods above. + +(1) What are the dimensions of `obs`? Use `dim()` to check. + +```r +dim(obs) +# dat var syear time latitude longitude +# 1 1 24 2 61 61 +``` + +(2) What is the structure of `obs`? Use `str()` to check. +```r +str(obs, max.level = 1) +str(obs, max.level = 2) +str(obs, max.level = 3) +``` + +(3) The metadata variables are stored in `attr(obs, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values. +```r +metadata_attr <- attr(obs, 'Variables') +str(metadata_attr) +names(metadata_attr$common) + +obs_time <- metadata_attr$common$time +obs_lat <- metadata_attr$common$latitude +obs_lon <- metadata_attr$common$longitude +``` + + +## 2. Check if the datasets are consistent + +Wrong data, wrong everything afterward. It is important to examine the data and metadata after we load them. + +(1) Compare the dimensions of the three data by `dim()`. +```r +dim(hcst) +dim(fcst) +dim(obs) +``` +(2) Check the summary of the data by `summary()`. +```r +summary(hcst) +summary(fcst) +summary(obs) +``` + +(3) Compare metadata. We have saved the latitude, longitude, and time attributes above after loading each data. +Use `identical()` or `all.equal()` to check if the values are consistent. +```r +identical(obs_lat, hcst_lat) +[1] TRUE +identical(obs_lon, hcst_lon) +[1] TRUE +identical(fcst_lat, hcst_lat) +[1] TRUE +identical(fcst_lon, hcst_lon) +[1] TRUE + +identical(format(hcst_time, '%Y%m'), format(obs_time, '%Y%m')) +[1] TRUE +``` -- GitLab From b9de2964ddb6ad83a8f92cea589a9d8d72e58c6c Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 20 Oct 2023 14:52:09 +0200 Subject: [PATCH 13/27] Run if the commit doesn't end with -draft --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index db7b631..7706518 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -5,6 +5,7 @@ workflow: rules: - if: $CI_COMMIT_TITLE =~ /-draft$/ when: never + - when: always # - if: $CI_PIPELINE_SOURCE == "merge_request_event" # - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH -- GitLab From 71396fca4c8ff5863fa9daeb7f84cbf31608960d Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 23 Oct 2023 12:29:47 +0200 Subject: [PATCH 14/27] remove unecessary elements --- inst/doc/tutorial/PATC2023/handson_1-data-loading.md | 9 +++------ inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md | 9 +++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md index bcd171c..1a61862 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md @@ -46,8 +46,7 @@ Understand the following script, run it, and check the result. transform = CDORemapper, transform_params = list(grid = 'r360x181', method = 'bilinear'), transform_vars = c('latitude', 'longitude'), - synonims = list(syear = c('syear', 'sdate'), - latitude = c('lat', 'latitude'), + synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), return_vars = list(time = 'syear', longitude = NULL, latitude = NULL), @@ -99,8 +98,7 @@ Try to take the Start() call above and modify it to load the forecast data (hint transform = CDORemapper, transform_params = list(grid = _____, method = 'bilinear'), transform_vars = c('latitude', 'longitude'), - synonims = list(syear = c('syear', 'sdate'), - latitude = c('lat', 'latitude'), + synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), return_vars = list(time = _____, longitude = NULL, latitude = NULL), @@ -187,8 +185,7 @@ even "time" is not explicitly specified in the Start() call. transform = CDORemapper, transform_params = list(grid = ______, method = 'bilinear'), transform_vars = c('latitude', 'longitude'), - synonims = list(syear = c('syear', 'sdate'), - latitude = c('lat', 'latitude'), + synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), return_vars = list(time = ______, longitude = NULL, latitude = NULL), diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md index 635be93..d0d4b07 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md @@ -46,8 +46,7 @@ Understand the following script, run it, and check the result. transform = CDORemapper, transform_params = list(grid = 'r360x181', method = 'bilinear'), transform_vars = c('latitude', 'longitude'), - synonims = list(syear = c('syear', 'sdate'), - latitude = c('lat', 'latitude'), + synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), return_vars = list(time = 'syear', longitude = NULL, latitude = NULL), @@ -103,8 +102,7 @@ Try to take the Start() call above and modify it to load the forecast data (hint transform = CDORemapper, transform_params = list(grid = 'r360x181', method = 'bilinear'), transform_vars = c('latitude', 'longitude'), - synonims = list(syear = c('syear', 'sdate'), - latitude = c('lat', 'latitude'), + synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), return_vars = list(time = 'syear', longitude = NULL, latitude = NULL), @@ -199,8 +197,7 @@ even "time" is not explicitly specified in the Start() call. transform = CDORemapper, transform_params = list(grid = 'r360x181', method = 'bilinear'), transform_vars = c('latitude', 'longitude'), - synonims = list(syear = c('syear', 'sdate'), - latitude = c('lat', 'latitude'), + synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), return_vars = list(time = 'syear', longitude = NULL, latitude = NULL), -- GitLab From 3a2b89dd5508d0c16c80736d507f340336c512b1 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 26 Oct 2023 17:20:17 +0200 Subject: [PATCH 15/27] hide the potential changes --- .gitlab-ci.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7706518..200b32d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,11 +1,11 @@ stages: - build -workflow: - rules: - - if: $CI_COMMIT_TITLE =~ /-draft$/ - when: never - - when: always +#workflow: +# rules: +# - if: $CI_COMMIT_TITLE =~ /-draft$/ +# when: never +# - when: always # - if: $CI_PIPELINE_SOURCE == "merge_request_event" # - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH -- GitLab From d0fe41ac3ee78cb4e879eb055bc422d27846f09f Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 27 Oct 2023 17:35:48 +0200 Subject: [PATCH 16/27] Update the data path --- inst/doc/tutorial/PATC2023/handson_1-data-loading.md | 2 +- inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md index 1a61862..48bd86a 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md @@ -170,7 +170,7 @@ even "time" is not explicitly specified in the Start() call. path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc' #---------------------------------------------------------------------- # Run these two lines if you're on Marenostrum4 and log in with training account - prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/' + prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/' path_obs <- paste0(prefix, path_obs) #---------------------------------------------------------------------- diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md index d0d4b07..39e5451 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md @@ -28,7 +28,7 @@ Understand the following script, run it, and check the result. path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc" #---------------------------------------------------------------------- # Run these two lines if you're on Marenostrum4 and log in with training account - prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/' + prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/' path_exp <- paste0(prefix, path_exp) #---------------------------------------------------------------------- -- GitLab From 9318c7f1f17e96375a3d9204dc0b8e71f3d0beda Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 30 Oct 2023 13:02:36 +0100 Subject: [PATCH 17/27] correct obs path and last time comparison --- inst/doc/tutorial/PATC2023/handson_1-data-loading.md | 5 +++-- inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md | 7 +++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md index 48bd86a..149f036 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md @@ -28,7 +28,7 @@ Understand the following script, run it, and check the result. path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc" #---------------------------------------------------------------------- # Run these two lines if you're on Marenostrum4 and log in with training account - prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/' + prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/' path_exp <- paste0(prefix, path_exp) #---------------------------------------------------------------------- @@ -242,5 +242,6 @@ identical(____, ____) all.equal(____, ____) # time: only compare year and month -identical(format(hcst_time, '%Y%m'), format(obs_time, '%Y%m')) +hcst_time_corrected <- attributes(hcst)$Variables$common$time +identical(format(hcst_time_correct, '%Y%m'), format(obs_time, '%Y%m')) ``` diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md index 39e5451..84099cd 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md @@ -182,7 +182,7 @@ even "time" is not explicitly specified in the Start() call. path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc' #---------------------------------------------------------------------- # Run these two lines if you're on Marenostrum4 and log in with training account - prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/' + prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/' path_obs <- paste0(prefix, path_obs) #---------------------------------------------------------------------- @@ -257,6 +257,8 @@ Use `identical()` or `all.equal()` to check if the values are consistent. ```r identical(obs_lat, hcst_lat) [1] TRUE +all.equal(obs_lat, hcst_lat) +[1] TRUE identical(obs_lon, hcst_lon) [1] TRUE identical(fcst_lat, hcst_lat) @@ -264,6 +266,7 @@ identical(fcst_lat, hcst_lat) identical(fcst_lon, hcst_lon) [1] TRUE -identical(format(hcst_time, '%Y%m'), format(obs_time, '%Y%m')) +hcst_time_corrected <- attributes(hcst)$Variables$common$time +identical(format(hcst_time_corrected, '%Y%m'), format(obs_time, '%Y%m')) [1] TRUE ``` -- GitLab From c67746c16a9beb4d86135329070c2cd3990e882f Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 30 Oct 2023 14:05:39 +0100 Subject: [PATCH 18/27] check file by ncdump before Start() call --- inst/doc/tutorial/PATC2023/handson_1-data-loading.md | 5 +++++ inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md index 149f036..0741780 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md @@ -21,6 +21,11 @@ We're going to analyze the near-surface temperature (short name: tas) for season ### 1.a Hindcast data +Check one netCDF file to see the data structure. +``` +ncdump -h /gpfs/scratch/nct01/nct01001/d2_handson_R/esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_19931101.nc |less +``` + Understand the following script, run it, and check the result. ```r diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md index 84099cd..41d4a8b 100644 --- a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md +++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md @@ -21,6 +21,11 @@ We're going to analyze the near-surface temperature (short name: tas) for season ### 1.a Hindcast data +Check one netCDF file to see the data structure. +``` +ncdump -h /gpfs/scratch/nct01/nct01001/d2_handson_R/esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_19931101.nc |less +``` + Understand the following script, run it, and check the result. ```r -- GitLab From 40136fd927d64dd74a42c01d06dd57e6cbf3633f Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 22 Nov 2023 16:16:43 +0100 Subject: [PATCH 19/27] Allow to run on not only WS and AS machine --- R/ByChunks_autosubmit.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 65ab36e..ec33632 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -611,14 +611,15 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', #NOTE: If we ssh to AS VM and run everything there, we don't need to ssh here system(sys_commands) - } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") { + } else { +# } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") { # ssh from WS to AS VM to run exp as_login <- paste0(Sys.getenv("USER"), '@', autosubmit_server, '.bsc.es') sys_commands <- paste0('ssh ', as_login, ' "', sys_commands, '"') #'; exit"') system(sys_commands) - } else { - stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") +# } else { +# stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") } # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. -- GitLab From 37f9e7a5c5b37bef2a4339d5cab07af3b56079ab Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 1 Dec 2023 14:59:13 +0100 Subject: [PATCH 20/27] Correct the update code --- R/Collect.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/Collect.R b/R/Collect.R index 6d752f5..62e105e 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -386,19 +386,18 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { run_dir <- startr_exec$cluster[['run_dir']] done <- FALSE - sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite))) while (!done) { # If wait, try until it is done + sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite))) if (sum_received_chunks / num_outputs == prod(unlist(chunks))) { done <- TRUE } else if (!wait) { stop("Computation in progress...") } else { - Sys.sleep(startr_exec$cluster[['polling_period']]) message("Computation in progress, ", sum_received_chunks, " of ", prod(unlist(chunks)), " chunks are done...\n", "Check status on Autosubmit GUI: https://earth.bsc.es/autosubmitapp/experiment/", suite_id) -# Sys.sleep(min(sqrt(attempt), 5)) + Sys.sleep(startr_exec$cluster[['polling_period']]) } } # while !done -- GitLab From 2e82b56fff5a539630e93290a8f6128c79f017b9 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 1 Dec 2023 16:49:50 +0100 Subject: [PATCH 21/27] Enable Collect() to run on HPCs and return combined array there --- DESCRIPTION | 2 +- R/Collect.R | 90 +++++++++++++++++++++++++++++++++++++++----------- man/Collect.Rd | 6 +++- 3 files changed, 76 insertions(+), 22 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 60fa08c..90b03a7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,5 +42,5 @@ URL: https://earth.bsc.es/gitlab/es/startR/ BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues SystemRequirements: cdo ecFlow Encoding: UTF-8 -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.3 Config/testthat/edition: 3 diff --git a/R/Collect.R b/R/Collect.R index 6d752f5..9baa264 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -22,6 +22,9 @@ #' folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the #' data and Collect() them as many times as desired, set remove to FALSE. The #' default value is TRUE. +#' @param on_remote A logical value deciding to the function is run locally and +#' sync the outputs back from HPC (FALSE, default), or it is run on HPC +#' (TRUE). #'@return A list of merged data array. #' #'@examples @@ -72,8 +75,9 @@ #' } #' #'@export -Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { - +#--------NEW------- +Collect <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) { +#-------NEW_END----------- # Parameter checks if (!is(startr_exec, 'startR_exec')) { stop("Parameter 'startr_exec' must be an object of the class ", @@ -88,23 +92,29 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { if (!is.logical(remove)) { stop("Parameter 'remove' must be logical.") } +#------NEW--------- + if (!is.logical(on_remote)) { + stop("Parameter 'on_remote' must be logical.") + } if (tolower(startr_exec$workflow_manager) == 'ecflow') { - res <- Collect_ecflow(startr_exec, wait = wait, remove = remove) + res <- Collect_ecflow(startr_exec, wait = wait, remove = remove, on_remote = on_remote) } else if (tolower(startr_exec$workflow_manager) == 'autosubmit') { - res <- Collect_autosubmit(startr_exec, wait = wait, remove = remove) + res <- Collect_autosubmit(startr_exec, wait = wait, remove = remove, on_remote = on_remote) } +#-------NEW_END---------- return(res) } +#------NEW--------- +Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) { -Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { - - if (Sys.which('ecflow_client') == '') { + if (!on_remote && Sys.which('ecflow_client') == '') { stop("ecFlow must be installed in order to collect results from a ", "Compute() execution.") } +#-------NEW_END----------- cluster <- startr_exec[['cluster']] ecflow_server <- startr_exec[['ecflow_server']] suite_id <- startr_exec[['suite_id']] @@ -114,7 +124,9 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { timings <- startr_exec[['timings']] ecflow_suite_dir_suite <- paste0(ecflow_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') - if (!is.null(cluster[['temp_dir']])) { + if (!is.null(cluster[['temp_dir']])) { #NOTE: Which case doesn't have temp_dir? +#-------NEW--------- + remote_ecflow_suite_dir <- cluster[['temp_dir']] remote_ecflow_suite_dir_suite <- paste0(cluster[['temp_dir']], '/STARTR_CHUNKING_', suite_id, '/') @@ -141,8 +153,13 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { } done <- FALSE attempt <- 1 - sum_received_chunks <- sum(grepl('output.*\\.Rds', - list.files(ecflow_suite_dir_suite))) +#--------NEW----------- + if (!on_remote) { + #TODO: Is it correct? Not all the cases have "output" as beginning + sum_received_chunks <- sum(grepl('output.*\\.Rds', + list.files(ecflow_suite_dir_suite))) + } +#---------NEW_END-------- if (cluster[['bidirectional']]) { t_transfer_back <- NA } else { @@ -156,7 +173,9 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { writeLines(rsync_petition_file_lines, rsync_petition_file) Sys.sleep(2) while (!done) { - failed <- FALSE +#-------NEW----------- + if (!on_remote) { +#------NEW_END---------- if (cluster[['bidirectional']]) { status <- system(paste0("ecflow_client --get_state=STARTR_CHUNKING_", suite_id, " --host=", @@ -197,6 +216,7 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { # received_chunks <- received_files[grepl('Rds$', # received_files)] #} + failed <- FALSE t_begin_transfer_back <- Sys.time() rsync_output <- tryCatch({ system(paste0("rsync -rav --include-from=", rsync_petition_file, " '", @@ -268,11 +288,30 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { } Sys.sleep(cluster[['polling_period']]) } +#--------NEW---------- + } else { # on_remote + + sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_ecflow_suite_dir_suite ))) + + if (sum_received_chunks / num_outputs == prod(unlist(chunks))) { + done <- TRUE + } else if (!wait) { + stop("Computation in progress...") + } else { + message("Computation in progress, ", sum_received_chunks, " of ", prod(unlist(chunks)), " chunks are done.") + message("Will try again after polling_period...") + Sys.sleep(cluster[['polling_period']]) + } + + } +#-------NEW_END--------- attempt <- attempt + 1 } file.remove(rsync_petition_file) timings[['transfer_back']] <- t_transfer_back - if (!is.null(cluster[['temp_dir']])) { + #------NEW-------- + if (!on_remote && !is.null(cluster[['temp_dir']])) { + #-------NEW_END-------- system(paste0('ssh ', cluster[['queue_host']], ' "rm -rf ', remote_ecflow_suite_dir_suite, '"')) } @@ -280,11 +319,19 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { .warning("ATTENTION: The source chunks will be removed from the ", "system. Store the result after Collect() ends if needed.") } +#-------NEW------- + if (!on_remote) { + target_folder <- ecflow_suite_dir + target_folder_suite <- ecflow_suite_dir_suite + } else { + target_folder <- remote_ecflow_suite_dir + target_folder_suite <- remote_ecflow_suite_dir_suite + } t_begin_merge <- Sys.time() - result <- .MergeChunks(ecflow_suite_dir, suite_id, remove) + result <- .MergeChunks(target_folder, suite_id, remove) t_end_merge <- Sys.time() timings[['merge']] <- as.numeric(difftime(t_end_merge, t_begin_merge, units = 'secs')) - received_files <- list.files(ecflow_suite_dir_suite, full.names = TRUE) + received_files <- list.files(target_folder_suite, full.names = TRUE) received_timings_files <- received_files[grepl('timings$', received_files)] for (timings_file in received_timings_files) { times <- readRDS(timings_file) @@ -294,11 +341,14 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { timings[['compute']] <- c(timings[['compute']], times['compute']) } if (remove) { - system(paste0("ecflow_client --delete=force yes /STARTR_CHUNKING_", - suite_id, " --host=", ecflow_server[['host']], - " --port=", ecflow_server[['port']])) - unlink(paste0(ecflow_suite_dir_suite), - recursive = TRUE) +#--------NEW-------------- + if (!on_remote) { + system(paste0("ecflow_client --delete=force yes /STARTR_CHUNKING_", + suite_id, " --host=", ecflow_server[['host']], + " --port=", ecflow_server[['port']])) + } + unlink(target_folder_suite, recursive = TRUE) +#---------NEW_END----------- } if (attempt > 2) { t_end_total <- Sys.time() @@ -374,7 +424,7 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { -Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { +Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) { suite_id <- startr_exec[['suite_id']] chunks <- startr_exec[['chunks']] diff --git a/man/Collect.Rd b/man/Collect.Rd index d90caca..e701a00 100644 --- a/man/Collect.Rd +++ b/man/Collect.Rd @@ -4,7 +4,7 @@ \alias{Collect} \title{Collect and merge the computation results} \usage{ -Collect(startr_exec, wait = TRUE, remove = TRUE) +Collect(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) } \arguments{ \item{startr_exec}{An R object returned by Compute() when the parameter 'wait' @@ -25,6 +25,10 @@ received from the HPC after data being collected, as well as the local job folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the data and Collect() them as many times as desired, set remove to FALSE. The default value is TRUE.} + +\item{on_remote}{A logical value deciding to the function is run locally and +sync the outputs back from HPC (FALSE, default), or it is run on HPC +(TRUE).} } \value{ A list of merged data array. -- GitLab From c32d9286ec9f217bb0dd0b34cc4446a8342f6406 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 11 Dec 2023 14:32:08 +0100 Subject: [PATCH 22/27] Clean code --- R/Collect.R | 206 ++++++++++++++++++++++++---------------------------- 1 file changed, 95 insertions(+), 111 deletions(-) diff --git a/R/Collect.R b/R/Collect.R index 05ed1b4..1bb8648 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -75,9 +75,7 @@ #' } #' #'@export -#--------NEW------- Collect <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) { -#-------NEW_END----------- # Parameter checks if (!is(startr_exec, 'startR_exec')) { stop("Parameter 'startr_exec' must be an object of the class ", @@ -92,7 +90,6 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) if (!is.logical(remove)) { stop("Parameter 'remove' must be logical.") } -#------NEW--------- if (!is.logical(on_remote)) { stop("Parameter 'on_remote' must be logical.") } @@ -102,19 +99,16 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) } else if (tolower(startr_exec$workflow_manager) == 'autosubmit') { res <- Collect_autosubmit(startr_exec, wait = wait, remove = remove, on_remote = on_remote) } -#-------NEW_END---------- return(res) } -#------NEW--------- Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) { if (!on_remote && Sys.which('ecflow_client') == '') { stop("ecFlow must be installed in order to collect results from a ", "Compute() execution.") } -#-------NEW_END----------- cluster <- startr_exec[['cluster']] ecflow_server <- startr_exec[['ecflow_server']] suite_id <- startr_exec[['suite_id']] @@ -125,7 +119,6 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = ecflow_suite_dir_suite <- paste0(ecflow_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') if (!is.null(cluster[['temp_dir']])) { #NOTE: Which case doesn't have temp_dir? -#-------NEW--------- remote_ecflow_suite_dir <- cluster[['temp_dir']] remote_ecflow_suite_dir_suite <- paste0(cluster[['temp_dir']], '/STARTR_CHUNKING_', @@ -153,13 +146,12 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = } done <- FALSE attempt <- 1 -#--------NEW----------- if (!on_remote) { #TODO: Is it correct? Not all the cases have "output" as beginning sum_received_chunks <- sum(grepl('output.*\\.Rds', list.files(ecflow_suite_dir_suite))) } -#---------NEW_END-------- + if (cluster[['bidirectional']]) { t_transfer_back <- NA } else { @@ -173,95 +165,19 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = writeLines(rsync_petition_file_lines, rsync_petition_file) Sys.sleep(2) while (!done) { -#-------NEW----------- if (!on_remote) { -#------NEW_END---------- - if (cluster[['bidirectional']]) { - status <- system(paste0("ecflow_client --get_state=STARTR_CHUNKING_", - suite_id, " --host=", - ecflow_server[['host']], " --port=", ecflow_server[['port']]), - intern = TRUE) - if (any(grepl(paste0("suite STARTR_CHUNKING_", suite_id, " #.* state:complete"), status))) { - done <- TRUE - } else if (!wait) { - stop("Computation in progress...") - } - if (!first_chunk_received) { - if (any(grepl('state:complete', status))) { - if (!is.null(time_before_first_chunk)) { - time_after_first_chunk <- Sys.time() - estimate <- (time_after_first_chunk - - time_before_first_chunk) * - ceiling((prod(unlist(chunks)) - cluster[['max_jobs']]) / - cluster[['max_jobs']]) - units(estimate) <- 'mins' - .message( - paste0('Remaining time estimate (neglecting queue and ', - 'merge time) (at ', format(time_after_first_chunk), - '): ', format(estimate), ' (', - format(time_after_first_chunk - - time_before_first_chunk), ' per chunk)') - ) - } - first_chunk_received <- TRUE - } - } - Sys.sleep(min(sqrt(attempt), 5)) - } else { - #if (sum_received_chunks == 0) { - # # Accounting for the fist chunk received in ByChunks and - # # setting it to complete - # # ByChunks needs the first chunk to calculate remaining time - # received_files <- list.files(ecflow_suite_dir_suite) - # received_chunks <- received_files[grepl('Rds$', - # received_files)] - #} - failed <- FALSE - t_begin_transfer_back <- Sys.time() - rsync_output <- tryCatch({ - system(paste0("rsync -rav --include-from=", rsync_petition_file, " '", - cluster[['queue_host']], ":", remote_ecflow_suite_dir_suite, "' ", - ecflow_suite_dir_suite, "/"), intern = TRUE) - }, error = function(e) { - message("Warning: rsync from remote server to collect results failed. ", - "Retrying soon.") - failed <- TRUE - }) - t_end_transfer_back <- Sys.time() - t_transfer_back <- t_transfer_back + as.numeric(difftime(t_end_transfer_back, - t_begin_transfer_back, units = 'secs')) - if (!failed) { - #if (sum_received_chunks == 0) { - # rsync_output <- c(rsync_output, received_chunks) - #} - received_running <- grepl('running$', rsync_output) - for (received_chunk_index in which(received_running)) { - file_name <- rsync_output[received_chunk_index] - task_name <- find_task_name(file_name) - system(paste0('ecflow_client --force=active recursive ', - task_name, - " --host=", ecflow_server[['host']], - " --port=", ecflow_server[['port']])) - } - received_crashed <- grepl('crashed$', rsync_output) - for (received_chunk_index in which(received_crashed)) { - file_name <- rsync_output[received_chunk_index] - task_name <- find_task_name(file_name) - system(paste0('ecflow_client --force=aborted recursive ', - task_name, - " --host=", ecflow_server[['host']], - " --port=", ecflow_server[['port']])) + if (cluster[['bidirectional']]) { + status <- system(paste0("ecflow_client --get_state=STARTR_CHUNKING_", + suite_id, " --host=", + ecflow_server[['host']], " --port=", ecflow_server[['port']]), + intern = TRUE) + if (any(grepl(paste0("suite STARTR_CHUNKING_", suite_id, " #.* state:complete"), status))) { + done <- TRUE + } else if (!wait) { + stop("Computation in progress...") } - received_chunks <- grepl('Rds$', rsync_output) - for (received_chunk_index in which(received_chunks)) { - file_name <- rsync_output[received_chunk_index] - task_name <- find_task_name(file_name) - system(paste0('ecflow_client --force=complete recursive ', - task_name, - " --host=", ecflow_server[['host']], - " --port=", ecflow_server[['port']])) - sum_received_chunks <- sum_received_chunks + 1 - if (!first_chunk_received) { + if (!first_chunk_received) { + if (any(grepl('state:complete', status))) { if (!is.null(time_before_first_chunk)) { time_after_first_chunk <- Sys.time() estimate <- (time_after_first_chunk - @@ -272,23 +188,97 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = .message( paste0('Remaining time estimate (neglecting queue and ', 'merge time) (at ', format(time_after_first_chunk), - '): ', format(estimate), ' (', - format(time_after_first_chunk - + '): ', format(estimate), ' (', + format(time_after_first_chunk - time_before_first_chunk), ' per chunk)') ) } first_chunk_received <- TRUE } } - if (sum_received_chunks / num_outputs == prod(unlist(chunks))) { - done <- TRUE - } else if (!wait) { - stop("Computation in progress...") + Sys.sleep(min(sqrt(attempt), 5)) + } else { + #if (sum_received_chunks == 0) { + # # Accounting for the fist chunk received in ByChunks and + # # setting it to complete + # # ByChunks needs the first chunk to calculate remaining time + # received_files <- list.files(ecflow_suite_dir_suite) + # received_chunks <- received_files[grepl('Rds$', + # received_files)] + #} + failed <- FALSE + t_begin_transfer_back <- Sys.time() + rsync_output <- tryCatch({ + system(paste0("rsync -rav --include-from=", rsync_petition_file, " '", + cluster[['queue_host']], ":", remote_ecflow_suite_dir_suite, "' ", + ecflow_suite_dir_suite, "/"), intern = TRUE) + }, error = function(e) { + message("Warning: rsync from remote server to collect results failed. ", + "Retrying soon.") + failed <- TRUE + }) + t_end_transfer_back <- Sys.time() + t_transfer_back <- t_transfer_back + as.numeric(difftime(t_end_transfer_back, + t_begin_transfer_back, units = 'secs')) + if (!failed) { + #if (sum_received_chunks == 0) { + # rsync_output <- c(rsync_output, received_chunks) + #} + received_running <- grepl('running$', rsync_output) + for (received_chunk_index in which(received_running)) { + file_name <- rsync_output[received_chunk_index] + task_name <- find_task_name(file_name) + system(paste0('ecflow_client --force=active recursive ', + task_name, + " --host=", ecflow_server[['host']], + " --port=", ecflow_server[['port']])) + } + received_crashed <- grepl('crashed$', rsync_output) + for (received_chunk_index in which(received_crashed)) { + file_name <- rsync_output[received_chunk_index] + task_name <- find_task_name(file_name) + system(paste0('ecflow_client --force=aborted recursive ', + task_name, + " --host=", ecflow_server[['host']], + " --port=", ecflow_server[['port']])) + } + received_chunks <- grepl('Rds$', rsync_output) + for (received_chunk_index in which(received_chunks)) { + file_name <- rsync_output[received_chunk_index] + task_name <- find_task_name(file_name) + system(paste0('ecflow_client --force=complete recursive ', + task_name, + " --host=", ecflow_server[['host']], + " --port=", ecflow_server[['port']])) + sum_received_chunks <- sum_received_chunks + 1 + if (!first_chunk_received) { + if (!is.null(time_before_first_chunk)) { + time_after_first_chunk <- Sys.time() + estimate <- (time_after_first_chunk - + time_before_first_chunk) * + ceiling((prod(unlist(chunks)) - cluster[['max_jobs']]) / + cluster[['max_jobs']]) + units(estimate) <- 'mins' + .message( + paste0('Remaining time estimate (neglecting queue and ', + 'merge time) (at ', format(time_after_first_chunk), + '): ', format(estimate), ' (', + format(time_after_first_chunk - + time_before_first_chunk), ' per chunk)') + ) + } + first_chunk_received <- TRUE + } + } + if (sum_received_chunks / num_outputs == prod(unlist(chunks))) { + done <- TRUE + } else if (!wait) { + stop("Computation in progress...") + } } + Sys.sleep(cluster[['polling_period']]) } - Sys.sleep(cluster[['polling_period']]) - } -#--------NEW---------- + } else { # on_remote sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_ecflow_suite_dir_suite ))) @@ -304,14 +294,11 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = } } -#-------NEW_END--------- attempt <- attempt + 1 } file.remove(rsync_petition_file) timings[['transfer_back']] <- t_transfer_back - #------NEW-------- if (!on_remote && !is.null(cluster[['temp_dir']])) { - #-------NEW_END-------- system(paste0('ssh ', cluster[['queue_host']], ' "rm -rf ', remote_ecflow_suite_dir_suite, '"')) } @@ -319,7 +306,6 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = .warning("ATTENTION: The source chunks will be removed from the ", "system. Store the result after Collect() ends if needed.") } -#-------NEW------- if (!on_remote) { target_folder <- ecflow_suite_dir target_folder_suite <- ecflow_suite_dir_suite @@ -341,14 +327,12 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = timings[['compute']] <- c(timings[['compute']], times['compute']) } if (remove) { -#--------NEW-------------- if (!on_remote) { system(paste0("ecflow_client --delete=force yes /STARTR_CHUNKING_", suite_id, " --host=", ecflow_server[['host']], " --port=", ecflow_server[['port']])) } unlink(target_folder_suite, recursive = TRUE) -#---------NEW_END----------- } if (attempt > 2) { t_end_total <- Sys.time() -- GitLab From 6c0342a65341af4a60ed06a53c0f86317895aa48 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 11 Dec 2023 15:23:08 +0100 Subject: [PATCH 23/27] Improve doc for use_libraries --- DESCRIPTION | 2 +- R/Step.R | 4 +++- man/Step.Rd | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 60fa08c..90b03a7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,5 +42,5 @@ URL: https://earth.bsc.es/gitlab/es/startR/ BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues SystemRequirements: cdo ecFlow Encoding: UTF-8 -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.3 Config/testthat/edition: 3 diff --git a/R/Step.R b/R/Step.R index e5d7304..3c997f0 100644 --- a/R/Step.R +++ b/R/Step.R @@ -19,7 +19,9 @@ #' for multiple returned arrays indicating the dimension names of the function #' output. #'@param use_libraries A vector of character string indicating the R library -#' names to be used in 'fun'. The default value is NULL. +#' names to be used in 'fun'. Only used when the jobs are run on HPCs; if the +#' jobs are run locally, load the necessary libraries by \code{library()} +#' directly. The default value is NULL. #'@param use_attributes One or more lists of vectors of character string #' indicating the data attributes to be used in 'fun'. The list name should be #' consistent with the list name of 'data' in AddStep(). The default value is diff --git a/man/Step.Rd b/man/Step.Rd index c473ccb..283c555 100644 --- a/man/Step.Rd +++ b/man/Step.Rd @@ -28,7 +28,9 @@ for multiple returned arrays indicating the dimension names of the function output.} \item{use_libraries}{A vector of character string indicating the R library -names to be used in 'fun'. The default value is NULL.} +names to be used in 'fun'. Only used when the jobs are run on HPCs; if the +jobs are run locally, load the necessary libraries by \code{library()} +directly. The default value is NULL.} \item{use_attributes}{One or more lists of vectors of character string indicating the data attributes to be used in 'fun'. The list name should be -- GitLab From 85c806d6058ba8eb7dec93e22fd39fb29728ceab Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 20 Dec 2023 17:07:54 +0100 Subject: [PATCH 24/27] Fix .Rds files recognization --- R/Collect.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/Collect.R b/R/Collect.R index 1bb8648..5ae8b15 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -147,8 +147,7 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = done <- FALSE attempt <- 1 if (!on_remote) { - #TODO: Is it correct? Not all the cases have "output" as beginning - sum_received_chunks <- sum(grepl('output.*\\.Rds', + sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(ecflow_suite_dir_suite))) } -- GitLab From d5d4d564e799e0a6bd790a661cc6a535d151f519 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 21 Dec 2023 15:07:23 +0100 Subject: [PATCH 25/27] faq for Collect() --- inst/doc/faq.md | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/inst/doc/faq.md b/inst/doc/faq.md index ffe91a5..7ff7604 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -31,6 +31,8 @@ This document intends to be the first reference for any doubts that you may have 25. [What to do if your function has too many target dimensions](#25-what-to-do-if-your-function-has-too-many-target-dimensions) 26. [Use merge_across_dims_narm to remove NAs](#26-use-merge_across_dims_narm-to-remove-nas) 27. [Utilize chunk number in the function](#27-utilize-chunk-number-in-the-function) + 28. [Run startR in the background](#28-run-startr-in-the-background) + 29. [Collect result on HPCs](#29-collect-result-on-hpcs) 2. **Something goes wrong...** @@ -1008,6 +1010,38 @@ shows how to get start date for each chunk using chunk number; (2) [ex2_14](inst There are many other possible applications of this parameter. Please share with us other uses cases you may create. +### 28. Run startR in the background + +For heavy execution, we usually launch the jobs on HPCs with parallel computation. Sometimes, it takes a lot of time (days, weeks) to finish all the jobs. +It'd be much handy to let the jobs run in the background, so we don't need to make R session on workstation open during the whole process. +To do this: + +(1) Use parameter `wait = FALSE` in Compute() call. The execution therefore won't block the R session. + +(2) Save the object as a .Rds file by saveRDS(). In this file, you have all the information needed for collecting the result later. You can close the R session and turn off the workstation now. + +(3) When you want to collect the result, use Collect() with the saved .Rds file. +You can choose to use parameter `wait = TRUE` and the command will keep running until all the jobs are finished and can be collected. +Or, by `wait = FALSE`, it will tell you the jobs are still running and you can try again later. + +Note that if you use ecFlow as job manager and with Compute(wait = FALSE), the ecFlow-UI won't be updated due to uni-directional connection. +Check [ecFlow UI remains blue and does not update status](#2-ecflow-ui-remains-blue-and-does-not-update-status) for details. + +### 29. Collect result on HPCs +After using Compute() to run execution on HPCs, you can choose to collect the result on local workstation or on HPCs. Here is the instruction of how to do it on HPCs. + +(1) Run the startR workflow as usual on workstation until Compute(). + +(2) In Compute(), use `wait = FALSE`. The execution therefore won't block the R session. + +(3) Save the object as a .Rds file somewhere can be found on HPCs. E.g. `saveRDS(res, "/esarchive/scratch//res_startR_Collect.rds")` + +(4) ssh to HPCS (e.g., Nord3), open an R session. + +(5) Read the saved .Rds file. E.g. `obj_startR <- readRDS("/esarchive/scratch//res_startR_Collect.rds")` + +(6) Collect() the result with parameter `on_remote = TRUE`. E.g. `res <- Collect(obj_startR, on_remote = TRUE)` + # Something goes wrong... @@ -1042,9 +1076,15 @@ To solve this problem, use `Collect()` in the R terminal after running Compute() ### 3. Compute() successfully but then killed on R session -When Compute() on HPCs, the machines are able to process data which are much larger than the local workstation, so the computation works fine (i.e., on ec-Flow UI, the chunks show yellow in the end.) However, after the computation, the output will be sent back to local workstation. **If the returned data is larger than the available local memory space, your R session will be killed.** Therefore, always pre-check if the returned data will fit in your workstation free memory or not. If not, subset the input data or reduce the output size through more computation. +When we use Compute() and run jobs to HPCs, each job/chunk is finished and the result is saved as .Rds file individually. +When all the jobs are finished, the next step is to merge all the chunks into one array and return to workstation. +**If the returned data is larger than the available local memory space on your workstation, +your R session will be killed.** Therefore, it is better to always pre-check if the returned data will fit in your workstation free memory or not. + +If the result can fit on HPCs, you can also choose to collect the data there. Check [How-to-28](#29-collect-result-on-hpcs) for details. -Further explanation: though the complete output (i.e., merging all the chunks into one returned array) cannot be sent back to workstation, but the chunking results (.Rds file) are completed and saved in the directory '/STARTR_CHUNKING_'. If you still want to use the chunking results, you can find them there. +Note that even though the complete output (i.e., merging all the chunks into one returned array) cannot be sent back to workstation and the R session is killed, +the chunking results (.Rds files) are completed and saved in the local directory '/STARTR_CHUNKING_', and you can still utilize the chunk files. ### 4. My jobs work well in workstation and fatnodes but not on Power9 (or vice versa) -- GitLab From c81fc57121b14c6188f1445b0b3feaa8b522403a Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 21 Dec 2023 15:44:03 +0100 Subject: [PATCH 26/27] version bump --- .Rbuildignore | 2 +- DESCRIPTION | 10 ++++++---- NEWS.md | 6 ++++++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 98316cc..aa7059a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,7 +9,7 @@ ^inst/doc$ ^\.gitlab-ci\.yml$ ## unit tests should be ignored when building the package for CRAN -#^tests$ +^tests$ ^inst/PlotProfiling\.R$ ^.gitlab$ # Suggested by http://r-pkgs.had.co.nz/package.html diff --git a/DESCRIPTION b/DESCRIPTION index 90b03a7..8fd5ee1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,16 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 2.3.0 +Version: 2.3.1 Authors@R: c( person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), - person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), + person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-4182-5258")), person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = c("aut"), comment = c(ORCID = "0000-0001-8568-3071")), + person("Eva", "Rifa", , "eva.rifarovira@bsc.es", role = "ctb"), + person("Victoria", "Agudetse", , "victoria.agudetse@bsc.es", role = "ctb"), + person("Bruno", "de Paula Kinoshita", , "bruno.depaulakinoshita@bsc.es", role = "ctb"), person("Javier", "Vegas", , "javier.vegas@bsc.es", role = c("ctb")), person("Pierre-Antoine", "Bretonniere", , "pierre-antoine.bretonniere@bsc.es", role = c("ctb")), - person("Roberto", "Serrano", , "rsnotivoli@gmal.com", role = c("ctb")), - person("Eva", "Rifa", , "eva.rifarovira@bsc.es", role = "ctb"), + person("Roberto", "Serrano", , "rsnotivoli@gmail.com", role = c("ctb")), person("BSC-CNS", role = c("aut", "cph"))) Description: Tool to automatically fetch, transform and arrange subsets of multi- dimensional data sets (collections of files) stored in local and/or diff --git a/NEWS.md b/NEWS.md index 9219f96..c19d7a3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# startR v2.3.1 (Release date: 2023-12-22) +- Use Autosubmit as workflow manager on hub +- New feature: Collect result by Collect() on HPCs +- Bugfix: Correct Collect_autosubmit() .Rds files update +- Bugfix: Collect() correctly recognize the finished chunk (.Rds file) in local ecFlow folder. Prevent neverending Collect() when using `wait = F` in Compute() and Collect() the result later on + # startR v2.3.0 (Release date: 2023-08-31) - Load variable metadata when retreive = F - Change Compute() "threads_load" to 1 to be consistent with documentation -- GitLab From 254ced13d8e2635351561d67cd679a00607d73c7 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 21 Dec 2023 16:23:55 +0100 Subject: [PATCH 27/27] fix syntax error --- R/Start.R | 2 +- man/Start.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Start.R b/R/Start.R index 89f87e9..5bfb3bf 100644 --- a/R/Start.R +++ b/R/Start.R @@ -674,7 +674,7 @@ #' to recognize files such as \cr #' \code{'/path/to/dataset/precipitation_zzz/19901101_yyy_foo.nc'}).\cr\cr #'Note that each glob expression can only represent one possibility (Start() -#'chooses the first). Because /code{*} is not the tag, which means it cannot +#'chooses the first). Because \code{*} is not the tag, which means it cannot #'be a dimension of the output array. Therefore, only one possibility can be #'adopted. For example, if \cr #'\code{'/path/to/dataset/precipitation_*/19901101_*_foo.nc'}\cr diff --git a/man/Start.Rd b/man/Start.Rd index 25eb8d7..640c5a9 100644 --- a/man/Start.Rd +++ b/man/Start.Rd @@ -651,7 +651,7 @@ For example, a path pattern could be as follows: \cr to recognize files such as \cr \code{'/path/to/dataset/precipitation_zzz/19901101_yyy_foo.nc'}).\cr\cr Note that each glob expression can only represent one possibility (Start() -chooses the first). Because /code{*} is not the tag, which means it cannot +chooses the first). Because \code{*} is not the tag, which means it cannot be a dimension of the output array. Therefore, only one possibility can be adopted. For example, if \cr \code{'/path/to/dataset/precipitation_*/19901101_*_foo.nc'}\cr -- GitLab