From ea9400cc581c06227741dec2bde93d7c7884aa94 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 28 Sep 2023 18:22:06 +0200
Subject: [PATCH 01/27] Add run_dir document; add Autosubmit use case

---
 inst/doc/practical_guide.md      |  1 +
 inst/doc/usecase/ex2_1_timedim.R | 44 +++++++++++++++++++++++++++-----
 2 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md
index b22c629..91c11ee 100644
--- a/inst/doc/practical_guide.md
+++ b/inst/doc/practical_guide.md
@@ -746,6 +746,7 @@ To have the good practice, note down the expid if it is automatically created by
  - `hpc_user`: Your user ID on the HPC (i.e., "bsc32xxx"). It is required if "queue_host" is not 'local'.
 - `data_dir`: The path to the data repository if the data is not shared.
 - `lib_dir`: directory on the HPC where the startR R package and other required R packages are installed, accessible from all HPC nodes. These installed packages must be compatible with the R module specified in `r_module`. This parameter is optional; only required when the libraries are not installed in the R module.
+- `run_dir`: The directory to run the startR jobs. It is useful when the self-defined function has relative path.
 - `init_commands`: The initial commands in bash script before R script runs. For example, the modules required by computation can be loaded here.
 - `r_module`: Name of the UNIX environment module to be used for R. If not specified, `module load R` will be used.
 - `CDO_module`: Name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start().
diff --git a/inst/doc/usecase/ex2_1_timedim.R b/inst/doc/usecase/ex2_1_timedim.R
index 15ef37d..0c0f36c 100644
--- a/inst/doc/usecase/ex2_1_timedim.R
+++ b/inst/doc/usecase/ex2_1_timedim.R
@@ -45,9 +45,9 @@ library(startR)
 
 ## on Power9
 #-----------modify according to your personal info---------
-  queue_host = 'cte-power'   #your own host name for power9
-  temp_dir =  '/gpfs/scratch/bsc32/bsc32339/startR_hpc/'
-  ecflow_suite_dir = '/home/Earth/nperez/startR_local/'  #your own local directory
+  queue_host <- 'cte-power'   #your own host name for power9
+  temp_dir <- '/gpfs/scratch/bsc32/bsc32339/startR_hpc/'
+  ecflow_suite_dir <- '/home/Earth/nperez/startR_local/'  #your own local directory
 #------------------------------------------------------------
   res <- Compute(wf1,
                  chunks = list(ensemble = 20,
@@ -66,11 +66,12 @@ library(startR)
                  ecflow_suite_dir = ecflow_suite_dir,  
                  wait = TRUE)
 
-## on Nord3
+
+## on Nord3 with ecFlow
 #-----------modify according to your personal info---------
-  queue_host = 'nord4'
-  temp_dir =  '/gpfs/scratch/bsc32/bsc32339/startR_hpc/'
-  ecflow_suite_dir = '/home/Earth/nperez/startR_local/'  #your own local directory
+  queue_host <- 'nord4'
+  temp_dir <-  '/gpfs/scratch/bsc32/bsc32339/startR_hpc/'
+  ecflow_suite_dir <- '/home/Earth/nperez/startR_local/'  #your own local directory
 #------------------------------------------------------------
   res <- Compute(wf1,
                  chunks = list(ensemble = 20,
@@ -88,3 +89,32 @@ library(startR)
                  ecflow_suite_dir = ecflow_suite_dir,
                  wait = TRUE)
 
+
+
+## on Nord3 with Autosubmit
+#-----------modify according to your personal info---------
+  hpc_user <- "bsc32xxx"
+  expid <- "xxxx" # autosubmit exp id; can be NULL
+  autosubmit_suite_dir <- "/home/Earth/<user_id>/startR_local_autosubmit/"
+#------------------------------------------------------------
+  res <- Compute(wf1, 
+                 chunks = list(ensemble = 20, sdate = 2),
+                 threads_load = 2, 
+                 threads_compute = 4,
+                 cluster = list(
+                   queue_host = 'nord3', 
+                   r_module = "R/4.1.2-foss-2019b",
+                   autosubmit_module = 'autosubmit/4.0.0b-foss-2015a-Python-3.7.3',
+                   cores_per_job = 2,
+                   job_wallclock = '01:00:00',
+                   max_jobs = 40,
+                   polling_period = 10,
+                   extra_queue_params = list('#SBATCH --constraint=medmem', '#SBATCH --exclusive'),
+                   expid = NULL,
+                   hpc_user = hpc_user
+                 ),
+                 workflow_manager = 'autosubmit',
+                 autosubmit_suite_dir = autosubmit_suite_dir,
+                 autosubmit_server = NULL, #'bscesautosubmit01',
+                 wait = TRUE
+                 )
-- 
GitLab


From 00a0f58ea97515a4b2adf73a74cb778a5a029f5d Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 11 Oct 2023 16:21:09 +0200
Subject: [PATCH 02/27] first draft

---
 .../tutorial/PATC2023/griddes_system7c3s.txt  |  19 +++
 .../PATC2023/handson_1-data-loading.md        | 124 ++++++++++++++++++
 2 files changed, 143 insertions(+)
 create mode 100644 inst/doc/tutorial/PATC2023/griddes_system7c3s.txt
 create mode 100644 inst/doc/tutorial/PATC2023/handson_1-data-loading.md

diff --git a/inst/doc/tutorial/PATC2023/griddes_system7c3s.txt b/inst/doc/tutorial/PATC2023/griddes_system7c3s.txt
new file mode 100644
index 0000000..b6f1847
--- /dev/null
+++ b/inst/doc/tutorial/PATC2023/griddes_system7c3s.txt
@@ -0,0 +1,19 @@
+# Grid description file for Meteofrance System 7 (C3S)
+# Serves as reference_grid for archive.ym
+#
+# gridID 2
+#
+gridtype  = lonlat
+gridsize  = 64800
+xsize     = 360
+ysize     = 180
+xname     = longitude
+xlongname = "longitude"
+xunits    = "degrees_east"
+yname     = latitude
+ylongname = "latitude"
+yunits    = "degrees_north"
+xfirst    = 0.5
+xinc      = 1
+yfirst    = 89.5
+yinc      = -1
diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
new file mode 100644
index 0000000..0eb4b0c
--- /dev/null
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
@@ -0,0 +1,124 @@
+# Hands-on 1: Load data by startR
+
+## Goal
+Use startR to load the data and learn how to adjust data while loading data.
+
+## 0. Load required packages
+
+```r
+# Clean the session
+rm(list = ls())
+
+library(startR)
+```
+## 1. Load data from data repository (esarchive/)
+
+#TODO: update it
+**Data description**:  
+This sample data set contains a small cutout of gridded seasonal precipitation 
+forecast data from the Copernicus Climate Change ECMWF-System 5 forecast system.
+Specifically, for the 'prlr' (precipitation) variable, for the first 6 forecast 
+ensemble members, daily values, for all 31 days in March following the forecast 
+starting dates in November of years 2010 to 2012, for a small 4x4 pixel cutout in 
+a region in the North-Western Italian Alps (44N-47N, 6E-9E). The data resolution is 1 degree.
+
+Use the above information to define the variable, start dates, longitude and latitude.
+
+```r
+#TODO: update the path
+  # Use this one if on workstation or nord3 (have access to /esarchive)
+  path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc"
+
+  # Use this one if on Marenostrum4 and log in with PATC2021 account
+  path_exp <- paste0('/gpfs/scratch/nct01/nct01127/d3_R_handson/esarchive/',
+                  'exp/ecmwf/system5c3s/daily_mean/',
+                  '$var$_s0-24h/$var$_$sdate$.nc')
+
+  var <- 'tas'
+  sdate_hsct <- paste0(1993:2016, '1101')
+  sdate_fcst <- '20201101'
+  lon.min <- -20
+  lon.max <- 40
+  lat.min <- 20
+  lat.max <- 80
+```
+
+Use Start() to load the data.
+
+```r
+  hcst <- Start(dat = path_exp,
+                var = var,
+                syear = sdate_fcst,
+                ensemble = 'all',
+                time = 1:2,
+                latitude = values(list(lat.min, lat.max)),
+                latitude_reorder = Sort(),
+                longitude = values(list(lon.min, lon.max)),
+                longitude_reorder = CircularSort(-180, 180),
+                synonims = list(syear = c('syear', 'sdate'),
+                                latitude = c('lat', 'latitude'),
+                                longitude = c('lon', 'longitude')),
+                return_vars = list(time = 'syear',
+                                   longitude = NULL, latitude = NULL),
+                retrieve = TRUE)
+```
+   
+```r
+  fcst <- Start(dat = path_exp,
+                var = var,
+                syear = sdate_fcst,
+                ensemble = 'all',
+                time = 1:2, 
+                latitude = values(list(lat.min, lat.max)),
+                latitude_reorder = Sort(),
+                longitude = values(list(lon.min, lon.max)),
+                longitude_reorder = CircularSort(-180, 180),
+                synonims = list(syear = c('syear', 'sdate'),
+                                latitude = c('lat', 'latitude'),
+                                longitude = c('lon', 'longitude')),
+                return_vars = list(time = 'syear',
+                                   longitude = NULL, latitude = NULL),
+                retrieve = TRUE)
+```
+
+
+path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc'
+
+# Create an date array from hcst dates
+Check the time attributes of hcst: Is it correct? 
+To load the corresponding obs data, we can use these time values as the selectors in obs Start() call.
+
+dim(attributes(hcst)$Variables$common$time)
+syear  time
+   24     2
+
+str(attributes(hcst)$Variables$common$time)
+ POSIXct[1:48], format: "1993-12-01" "1994-12-01" "1995-12-01" "1996-12-01" "1997-12-01" ...
+
+# Adjust the day to the correct month
+attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time - lubridate::days(1)
+
+date_string <- format(attributes(hcst)$Variables$common$time, '%Y%m')
+sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
+
+```r
+  obs <- Start(dat = path_obs,
+               var = var,
+               syear = sdate_obs,
+               latitude = values(list(lat.min, lat.max)),
+               latitude_reorder = Sort(),
+               longitude = values(list(lon.min, lon.max)),
+               longitude_reorder = CircularSort(-180, 180),
+               transform = CDORemapper,
+               transform_params = list(grid = './griddes_system7c3s.txt',
+                                       method = 'bilinear'),
+               transform_vars = c('latitude', 'longitude'),
+               synonims = list(syear = c('syear', 'sdate'),
+                               latitude = c('lat', 'latitude'),
+                               longitude = c('lon', 'longitude')),
+               return_vars = list(time = 'syear',
+                                  longitude = NULL, latitude = NULL),
+               retrieve = TRUE)
+```
+
+
-- 
GitLab


From f80f6fc46ba65e86cb6d9b5539b1ac4892a6c917 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 11 Oct 2023 16:31:18 +0200
Subject: [PATCH 03/27] several fixes

---
 .../tutorial/PATC2023/handson_1-data-loading.md   | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
index 0eb4b0c..0daf8eb 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
@@ -35,7 +35,7 @@ Use the above information to define the variable, start dates, longitude and lat
                   '$var$_s0-24h/$var$_$sdate$.nc')
 
   var <- 'tas'
-  sdate_hsct <- paste0(1993:2016, '1101')
+  sdate_hcst <- paste0(1993:2016, '1101')
   sdate_fcst <- '20201101'
   lon.min <- -20
   lon.max <- 40
@@ -48,7 +48,7 @@ Use Start() to load the data.
 ```r
   hcst <- Start(dat = path_exp,
                 var = var,
-                syear = sdate_fcst,
+                syear = sdate_hcst,
                 ensemble = 'all',
                 time = 1:2,
                 latitude = values(list(lat.min, lat.max)),
@@ -82,12 +82,12 @@ Use Start() to load the data.
 ```
 
 
-path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc'
-
 # Create an date array from hcst dates
 Check the time attributes of hcst: Is it correct? 
+
 To load the corresponding obs data, we can use these time values as the selectors in obs Start() call.
 
+```r
 dim(attributes(hcst)$Variables$common$time)
 syear  time
    24     2
@@ -100,8 +100,11 @@ attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time
 
 date_string <- format(attributes(hcst)$Variables$common$time, '%Y%m')
 sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
+```
 
 ```r
+  path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc'
+
   obs <- Start(dat = path_obs,
                var = var,
                syear = sdate_obs,
@@ -110,7 +113,9 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
                longitude = values(list(lon.min, lon.max)),
                longitude_reorder = CircularSort(-180, 180),
                transform = CDORemapper,
-               transform_params = list(grid = './griddes_system7c3s.txt',
+#TODO: Change to relative path
+#               transform_params = list(grid = './griddes_system7c3s.txt',
+               transform_params = list(grid = '/esarchive/scratch/aho/git/startR/inst/doc/tutorial/PATC2023/griddes_system7c3s.txt',
                                        method = 'bilinear'),
                transform_vars = c('latitude', 'longitude'),
                synonims = list(syear = c('syear', 'sdate'),
-- 
GitLab


From e4d467e0b2e21a990db624813b68590df425396e Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 11 Oct 2023 16:40:57 +0200
Subject: [PATCH 04/27] Turn data into s2dv_cube

---
 inst/doc/tutorial/PATC2023/handson_1-data-loading.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
index 0daf8eb..8aeb5fb 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
@@ -126,4 +126,12 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
                retrieve = TRUE)
 ```
 
+# Turn the data into s2dv_cube
 
+```r
+library(CSTools)
+
+hcst <- as.s2dv_cube(hcst)
+fcst <- as.s2dv_cube(fcst)
+obs <- as.s2dv_cube(obs)
+```
-- 
GitLab


From d9dc000e4527dc0d9426c5a9957784bccb25882a Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 11 Oct 2023 16:58:34 +0200
Subject: [PATCH 05/27] Remove as.s2dv_cube

---
 inst/doc/tutorial/PATC2023/handson_1-data-loading.md | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
index 8aeb5fb..6b280a4 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
@@ -125,13 +125,3 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
                                   longitude = NULL, latitude = NULL),
                retrieve = TRUE)
 ```
-
-# Turn the data into s2dv_cube
-
-```r
-library(CSTools)
-
-hcst <- as.s2dv_cube(hcst)
-fcst <- as.s2dv_cube(fcst)
-obs <- as.s2dv_cube(obs)
-```
-- 
GitLab


From d5c5edd156ae06c5aa415877c3cd123d9587298c Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 16 Oct 2023 09:47:15 +0200
Subject: [PATCH 06/27] Add split_multiselected_dims = T in obs call. Add
 checks for dataset consistency

---
 .../PATC2023/handson_1-data-loading.md        | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
index 6b280a4..69da163 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
@@ -108,6 +108,7 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
   obs <- Start(dat = path_obs,
                var = var,
                syear = sdate_obs,
+               split_multiselected_dims = TRUE,
                latitude = values(list(lat.min, lat.max)),
                latitude_reorder = Sort(),
                longitude = values(list(lon.min, lon.max)),
@@ -125,3 +126,23 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
                                   longitude = NULL, latitude = NULL),
                retrieve = TRUE)
 ```
+
+## 2. Check if the datasets are consistent
+
+```r
+lat_hcst <- attributes(hcst)$Variables$common$latitude
+lon_hcst <- attributes(hcst)$Variables$common$longitude
+lat_obs <- attributes(obs)$Variables$common$latitude
+lon_obs <- attributes(obs)$Variables$common$longitude
+
+identical(c(lat_obs), c(lat_hcst))
+[1] TRUE
+identical(c(lon_obs), c(lon_hcst))
+[1] TRUE
+
+time_hcst <- attributes(hcst)$Variables$common$time
+time_obs <- attributes(obs)$Variables$common$time
+
+identical(format(time_hcst, '%Y%m'), format(time_obs, '%Y%m'))
+[1] TRUE
+```
-- 
GitLab


From d81063e1f1129dba7ba221654109cc0921ccfbf3 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Tue, 17 Oct 2023 14:28:07 +0200
Subject: [PATCH 07/27] Suppress message of all unit tests because testthat
 edition 3 shows the messages. Run unit test on GitLab.

---
 .Rbuildignore                                                  | 2 +-
 tests/testthat/test-AddStep-DimNames.R                         | 2 ++
 tests/testthat/test-Compute-CDORemap.R                         | 2 ++
 tests/testthat/test-Compute-NumChunks.R                        | 2 ++
 tests/testthat/test-Compute-chunk_depend_dim.R                 | 2 ++
 tests/testthat/test-Compute-chunk_split_dim.R                  | 2 ++
 tests/testthat/test-Compute-extra_params.R                     | 2 ++
 tests/testthat/test-Compute-inconsistent_target_dim.R          | 2 ++
 tests/testthat/test-Compute-irregular_regrid.R                 | 2 ++
 tests/testthat/test-Compute-timedim.R                          | 2 ++
 tests/testthat/test-Compute-transform_all.R                    | 2 ++
 tests/testthat/test-Compute-transform_indices.R                | 2 ++
 tests/testthat/test-Compute-transform_values.R                 | 2 ++
 tests/testthat/test-Compute-two_data.R                         | 2 ++
 tests/testthat/test-Compute-use_attribute.R                    | 2 ++
 tests/testthat/test-Start-DCPP-across-depends.R                | 2 ++
 tests/testthat/test-Start-calendar.R                           | 2 ++
 tests/testthat/test-Start-depends_values.R                     | 2 ++
 tests/testthat/test-Start-first_file_missing.R                 | 2 ++
 tests/testthat/test-Start-global-lon-across_meridian.R         | 2 ++
 tests/testthat/test-Start-implicit_dependency_by_selector.R    | 2 ++
 tests/testthat/test-Start-implicit_inner_dim.R                 | 2 ++
 tests/testthat/test-Start-indices_list_vector.R                | 2 ++
 tests/testthat/test-Start-largest_dims_length.R                | 2 ++
 tests/testthat/test-Start-line_order-consistency.R             | 2 ++
 tests/testthat/test-Start-metadata_dims.R                      | 2 ++
 tests/testthat/test-Start-metadata_filedim_dependency.R        | 2 ++
 tests/testthat/test-Start-metadata_reshaping.R                 | 2 ++
 tests/testthat/test-Start-multiple-sdates.R                    | 2 ++
 tests/testthat/test-Start-path_glob_permissive.R               | 2 ++
 tests/testthat/test-Start-reorder-lat.R                        | 2 ++
 tests/testthat/test-Start-reorder-latCoarse.R                  | 2 ++
 tests/testthat/test-Start-reorder-lon-180to180.R               | 2 ++
 tests/testthat/test-Start-reorder-lon-transform_-180to180.R    | 2 ++
 tests/testthat/test-Start-reorder-lon-transform_0to360.R       | 2 ++
 tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R | 2 ++
 tests/testthat/test-Start-reorder-lon0to360.R                  | 2 ++
 tests/testthat/test-Start-reorder-lon0to360Coarse.R            | 2 ++
 tests/testthat/test-Start-reorder-metadata.R                   | 2 ++
 tests/testthat/test-Start-reorder-retrieve.R                   | 2 ++
 tests/testthat/test-Start-reorder_all.R                        | 2 ++
 tests/testthat/test-Start-reorder_indices.R                    | 2 ++
 tests/testthat/test-Start-reshape.R                            | 2 ++
 tests/testthat/test-Start-return_vars_name.R                   | 2 ++
 tests/testthat/test-Start-split-merge.R                        | 2 ++
 tests/testthat/test-Start-time_unit.R                          | 2 ++
 tests/testthat/test-Start-transform-all.R                      | 2 ++
 tests/testthat/test-Start-transform-border.R                   | 2 ++
 tests/testthat/test-Start-transform-lat-Sort-all.R             | 2 ++
 tests/testthat/test-Start-transform-lat-Sort-indices.R         | 2 ++
 tests/testthat/test-Start-transform-lat-Sort-values.R          | 2 ++
 tests/testthat/test-Start-transform-lon-across_meridian.R      | 2 ++
 tests/testthat/test-Start-transform-metadata.R                 | 2 ++
 tests/testthat/test-Start-transform-three-selectors.R          | 2 ++
 tests/testthat/test-Start-two_dats.R                           | 2 ++
 tests/testthat/test-Start-values_list_vector.R                 | 2 ++
 56 files changed, 111 insertions(+), 1 deletion(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index aa7059a..98316cc 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -9,7 +9,7 @@
 ^inst/doc$
 ^\.gitlab-ci\.yml$
 ## unit tests should be ignored when building the package for CRAN
-^tests$
+#^tests$
 ^inst/PlotProfiling\.R$
 ^.gitlab$
 # Suggested by http://r-pkgs.had.co.nz/package.html
diff --git a/tests/testthat/test-AddStep-DimNames.R b/tests/testthat/test-AddStep-DimNames.R
index 5e1fe9c..e20ecfa 100644
--- a/tests/testthat/test-AddStep-DimNames.R
+++ b/tests/testthat/test-AddStep-DimNames.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 test_that("Single File - Local execution", {
@@ -28,3 +29,4 @@ suppressWarnings(
                "The target dimensions required by 'step_fun' for the input 1 are not present in the corresponding provided object in 'inputs'.")
   
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Compute-CDORemap.R b/tests/testthat/test-Compute-CDORemap.R
index fb31d00..580bf6e 100644
--- a/tests/testthat/test-Compute-CDORemap.R
+++ b/tests/testthat/test-Compute-CDORemap.R
@@ -1,3 +1,4 @@
+suppressMessages({
 test_that("ex2_3", {
 
   repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc'
@@ -54,3 +55,4 @@ tolerance = 0.0001
 ) 
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Compute-NumChunks.R b/tests/testthat/test-Compute-NumChunks.R
index ffce880..d47b0f7 100644
--- a/tests/testthat/test-Compute-NumChunks.R
+++ b/tests/testthat/test-Compute-NumChunks.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 test_that("Single File - Local execution", {
@@ -51,3 +52,4 @@ ignore_attr = TRUE
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Compute-chunk_depend_dim.R b/tests/testthat/test-Compute-chunk_depend_dim.R
index 101bfb5..80f407b 100644
--- a/tests/testthat/test-Compute-chunk_depend_dim.R
+++ b/tests/testthat/test-Compute-chunk_depend_dim.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # This unit test tests the chunking over depended and depending dimension.
 # ex1_14
 # 1. depending dim is values()
@@ -221,3 +222,4 @@ Start(dat = path,
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Compute-chunk_split_dim.R b/tests/testthat/test-Compute-chunk_split_dim.R
index 0c1da4a..5e43067 100644
--- a/tests/testthat/test-Compute-chunk_split_dim.R
+++ b/tests/testthat/test-Compute-chunk_split_dim.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # This unit test is to check chunking over the split dim. It involves 
 # how to arrange the chunks in a correct order even when chunking is happening.
 
@@ -224,3 +225,4 @@ c(longitude = 2, dat = 1, var = 1, latitude = 1, sdate = 4, syear = 2, time = 46
 
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Compute-extra_params.R b/tests/testthat/test-Compute-extra_params.R
index f055e96..49c36da 100644
--- a/tests/testthat/test-Compute-extra_params.R
+++ b/tests/testthat/test-Compute-extra_params.R
@@ -1,3 +1,4 @@
+suppressMessages({
 
 test_that("ex2_6", {
 
@@ -125,3 +126,4 @@ tolerance = 0.0001
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Compute-inconsistent_target_dim.R b/tests/testthat/test-Compute-inconsistent_target_dim.R
index 58f96a9..5a816ef 100644
--- a/tests/testthat/test-Compute-inconsistent_target_dim.R
+++ b/tests/testthat/test-Compute-inconsistent_target_dim.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # If dataset are more than 1 (e.g., exp and obs), ByChunks() checks if 
 # they have consistent dimensions in favor of Apply() computation. However,
 # only margin dimensions need to be identical. Target dimensions can have
@@ -138,3 +139,4 @@ tolerance = 0.0001
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Compute-irregular_regrid.R b/tests/testthat/test-Compute-irregular_regrid.R
index 7de1471..ba07d71 100644
--- a/tests/testthat/test-Compute-irregular_regrid.R
+++ b/tests/testthat/test-Compute-irregular_regrid.R
@@ -1,3 +1,4 @@
+suppressMessages({
 library(s2dv)
 
 test_that("1. ex2_13", {
@@ -73,3 +74,4 @@ tolerance = 0.0001
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Compute-timedim.R b/tests/testthat/test-Compute-timedim.R
index fbc5af0..922edfb 100644
--- a/tests/testthat/test-Compute-timedim.R
+++ b/tests/testthat/test-Compute-timedim.R
@@ -1,3 +1,4 @@
+suppressMessages({
 test_that("ex2_1", {
 
   repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc'
@@ -54,3 +55,4 @@ tolerance = 0.0001
 
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Compute-transform_all.R b/tests/testthat/test-Compute-transform_all.R
index 05d5de6..785070c 100644
--- a/tests/testthat/test-Compute-transform_all.R
+++ b/tests/testthat/test-Compute-transform_all.R
@@ -1,3 +1,4 @@
+suppressMessages({
 
 test_that("1. Chunk along non-lat/lon dim", {
 #skip_on_cran()
@@ -119,3 +120,4 @@ res4
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Compute-transform_indices.R b/tests/testthat/test-Compute-transform_indices.R
index c2d3e35..9c8cc39 100644
--- a/tests/testthat/test-Compute-transform_indices.R
+++ b/tests/testthat/test-Compute-transform_indices.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # Using indinces() to assign lat and lon, and transform the data. 
 # Also test transform + chunk along lat/lon.
 
@@ -376,3 +377,4 @@ as.vector(drop(res4$output1))
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Compute-transform_values.R b/tests/testthat/test-Compute-transform_values.R
index 25a803f..32a544e 100644
--- a/tests/testthat/test-Compute-transform_values.R
+++ b/tests/testthat/test-Compute-transform_values.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # Using values() to assign lat and lon, and transform the data. 
 # Also test transform + chunk along lat/lon.
 
@@ -603,3 +604,4 @@ res3_180
 #================================================================
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Compute-two_data.R b/tests/testthat/test-Compute-two_data.R
index dfa579a..33d6631 100644
--- a/tests/testthat/test-Compute-two_data.R
+++ b/tests/testthat/test-Compute-two_data.R
@@ -1,3 +1,4 @@
+suppressMessages({
 test_that("ex2_7", {
 
 # exp data
@@ -81,3 +82,4 @@ tolerance = 0.0001
 
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Compute-use_attribute.R b/tests/testthat/test-Compute-use_attribute.R
index 6f218e6..07ecd13 100644
--- a/tests/testthat/test-Compute-use_attribute.R
+++ b/tests/testthat/test-Compute-use_attribute.R
@@ -1,3 +1,4 @@
+suppressMessages({
 test_that("ex2_2", {
 
   repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc'
@@ -53,3 +54,4 @@ tolerance = 0.0001
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-DCPP-across-depends.R b/tests/testthat/test-Start-DCPP-across-depends.R
index bfe44b1..0f49a38 100644
--- a/tests/testthat/test-Start-DCPP-across-depends.R
+++ b/tests/testthat/test-Start-DCPP-across-depends.R
@@ -1,3 +1,4 @@
+suppressMessages({
 test_that("Chunks of DCPP files- Local execution", {
   path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc'
   path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path)
@@ -52,3 +53,4 @@ expect_equal(dat[1,1,2,1:12,,], dat_2018_chunk1[1,1,,,])
 
 })
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R
index 7dfbc2c..43651ab 100644
--- a/tests/testthat/test-Start-calendar.R
+++ b/tests/testthat/test-Start-calendar.R
@@ -1,3 +1,4 @@
+suppressMessages({
 
 test_that("1. 360_day, daily, unit = 'days since 1850-01-01'", {
   path_hadgem3 <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast//HadGEM3-GC31-MM/',
@@ -317,3 +318,4 @@ test_that("8. gregorian, 3hrly, unit = 'days since 1850-1-1'", {
 
 })
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-depends_values.R b/tests/testthat/test-Start-depends_values.R
index 9cccc2d..ec77c7f 100644
--- a/tests/testthat/test-Start-depends_values.R
+++ b/tests/testthat/test-Start-depends_values.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # This unit test tests the case that using values() to define the depended
 # and depending dimensions. The depending dimension should be a list with 
 # names that are the values of depended dimensions.
@@ -80,3 +81,4 @@ suppressWarnings(
   )
 })
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-first_file_missing.R b/tests/testthat/test-Start-first_file_missing.R
index fecbd7c..070ae53 100644
--- a/tests/testthat/test-Start-first_file_missing.R
+++ b/tests/testthat/test-Start-first_file_missing.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # When some of the files are missing, Start() still can retrieve the data and 
 # put NA in those missing positions. However, when the first file is missing,
 # Start() returned error before because of failing to find metadata. The bug is
@@ -182,3 +183,4 @@ data <- Start(dat = file,
   )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-global-lon-across_meridian.R b/tests/testthat/test-Start-global-lon-across_meridian.R
index 921c331..429db5c 100644
--- a/tests/testthat/test-Start-global-lon-across_meridian.R
+++ b/tests/testthat/test-Start-global-lon-across_meridian.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 test_that("first test", {
@@ -54,3 +55,4 @@ suppressWarnings(
   )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R
index d493a87..995310a 100644
--- a/tests/testthat/test-Start-implicit_dependency_by_selector.R
+++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # Similar as usecase ex1_13.
 # Use a value array as the inner dimension selector to express dependency on a 
 # file dimension. By this means, we don't need to specify the *_across parameter
@@ -156,3 +157,4 @@ c(memb = 2, sdate = 3, region = 1)
 
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-implicit_inner_dim.R b/tests/testthat/test-Start-implicit_inner_dim.R
index 7e0264c..9c46975 100644
--- a/tests/testthat/test-Start-implicit_inner_dim.R
+++ b/tests/testthat/test-Start-implicit_inner_dim.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # The unit test is for the implicit inner dimension. If the inner dimension length is 1,
 # startR allows it not to be specified in the call. Users can still define it in 
 # 'return_vars'.
@@ -44,3 +45,4 @@ as.POSIXct('2013-11-15', tz = 'UTC')
 })
 
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-indices_list_vector.R b/tests/testthat/test-Start-indices_list_vector.R
index 2effede..76ec511 100644
--- a/tests/testthat/test-Start-indices_list_vector.R
+++ b/tests/testthat/test-Start-indices_list_vector.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # This unit test tests the consistence between list of indices and vector of indices.
 # 1. transform
 # 2. no transform
@@ -241,3 +242,4 @@ as.vector(exp2)
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-largest_dims_length.R b/tests/testthat/test-Start-largest_dims_length.R
index 211c132..3585576 100644
--- a/tests/testthat/test-Start-largest_dims_length.R
+++ b/tests/testthat/test-Start-largest_dims_length.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # When certain inner dim of files is not consistent, the parameter 'largest_dims_length' can
 # be used to ensure the returned array has the largest length of inner dimensions.
 
@@ -299,3 +300,4 @@ as.vector(data5)[-c(5:24)]
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-line_order-consistency.R b/tests/testthat/test-Start-line_order-consistency.R
index 11be109..d7f5095 100644
--- a/tests/testthat/test-Start-line_order-consistency.R
+++ b/tests/testthat/test-Start-line_order-consistency.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
   variable   <- "tas"
@@ -144,3 +145,4 @@ suppressWarnings(
   )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-metadata_dims.R b/tests/testthat/test-Start-metadata_dims.R
index 2a2e735..3a68a53 100644
--- a/tests/testthat/test-Start-metadata_dims.R
+++ b/tests/testthat/test-Start-metadata_dims.R
@@ -1,3 +1,4 @@
+suppressMessages({
 test_that("1. One data set, one var", {
   repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc"
   repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos)
@@ -983,3 +984,4 @@ dataF <- Start(dataset = path_list,
   )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-metadata_filedim_dependency.R b/tests/testthat/test-Start-metadata_filedim_dependency.R
index 227383b..aea6f1f 100644
--- a/tests/testthat/test-Start-metadata_filedim_dependency.R
+++ b/tests/testthat/test-Start-metadata_filedim_dependency.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # When inner dimension selector is an array with filedim dimension name (e.g., time = [sdate = 2, time = 4], 
 # or *_across is used, the inner dim has dependency on file dim. In this case, return_vars must 
 # specify this relationship, i.e., return_vars = list(time = 'sdate').
@@ -197,3 +198,4 @@ expect_equal(
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R
index b143268..1346dc6 100644
--- a/tests/testthat/test-Start-metadata_reshaping.R
+++ b/tests/testthat/test-Start-metadata_reshaping.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # When data is reshaping (e.g., time_across = 'sdate'), the corresponding attribute should be reshaped too.
 
 test_that("1. time across fyear, fyear depends on sdate", {
@@ -805,3 +806,4 @@ dates
 
 })
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-multiple-sdates.R b/tests/testthat/test-Start-multiple-sdates.R
index e16f2bf..49cc003 100644
--- a/tests/testthat/test-Start-multiple-sdates.R
+++ b/tests/testthat/test-Start-multiple-sdates.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 # When certain values in one observation file are required more than once,
@@ -163,3 +164,4 @@ obs <- Start(dat = obs_path,
   0
   )
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-path_glob_permissive.R b/tests/testthat/test-Start-path_glob_permissive.R
index 75f28d4..2ff6278 100644
--- a/tests/testthat/test-Start-path_glob_permissive.R
+++ b/tests/testthat/test-Start-path_glob_permissive.R
@@ -1,3 +1,4 @@
+suppressMessages({
 test_that("1. expid/member/version", {
 
 years  <- paste0(c(1960:1961), '01-', c(1960:1961), '12')
@@ -159,3 +160,4 @@ list("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_
 
 })
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-reorder-lat.R b/tests/testthat/test-Start-reorder-lat.R
index c87792e..3487484 100644
--- a/tests/testthat/test-Start-reorder-lat.R
+++ b/tests/testthat/test-Start-reorder-lat.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 #1 selector type	1-values(list)	2-values(vector)  3-indices     4-'all'  5-mix
@@ -1053,3 +1054,4 @@ as.vector(attr(exp1_3, 'Variables')$common$latitude)
 
 })
 ##############################################
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-reorder-latCoarse.R b/tests/testthat/test-Start-reorder-latCoarse.R
index 34a766f..1cb3b8d 100644
--- a/tests/testthat/test-Start-reorder-latCoarse.R
+++ b/tests/testthat/test-Start-reorder-latCoarse.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 #1 selector type	1-values(list)	2-values(vector)  3-indices     4-'all'  5-mix
@@ -887,3 +888,4 @@ res <- Start(dat = list(list(path=path_exp)),
 #
 #})
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-reorder-lon-180to180.R b/tests/testthat/test-Start-reorder-lon-180to180.R
index 0f71f0a..5dfced4 100644
--- a/tests/testthat/test-Start-reorder-lon-180to180.R
+++ b/tests/testthat/test-Start-reorder-lon-180to180.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 #1 selector type        1-values(list)  2-values(vector)  3-indices     4-'all'  5-mix
@@ -817,3 +818,4 @@ res <- Start(dat = list(list(path=path_exp)),
   tolerance = 0.0001
   )
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R
index 5e7701a..c36b29e 100644
--- a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R
+++ b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 #1 selector type        1-values(list)  2-values(vector)  3-indices     4-'all'  5-mix
@@ -957,3 +958,4 @@ res <- Start(dat = list(list(path=path_exp)),
   21
   )
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360.R b/tests/testthat/test-Start-reorder-lon-transform_0to360.R
index 86ad5e7..a47b707 100644
--- a/tests/testthat/test-Start-reorder-lon-transform_0to360.R
+++ b/tests/testthat/test-Start-reorder-lon-transform_0to360.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 #1 selector type        1-values(list)  2-values(vector)  3-indices     4-'all'  5-mix
@@ -1040,3 +1041,4 @@ res <- Start(dat = list(list(path=path_exp)),
   21
   )
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R
index c18d34a..4185cca 100644
--- a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R
+++ b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 #1 selector type        1-values(list)  2-values(vector)  3-indices     4-'all'  5-mix
@@ -1045,3 +1046,4 @@ res <- Start(dat = list(list(path=path_exp)),
   21
   )
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-reorder-lon0to360.R b/tests/testthat/test-Start-reorder-lon0to360.R
index 1e946d9..8c717b3 100644
--- a/tests/testthat/test-Start-reorder-lon0to360.R
+++ b/tests/testthat/test-Start-reorder-lon0to360.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 #1 selector type        1-values(list)  2-values(vector)  3-indices     4-'all'  5-mix
@@ -658,3 +659,4 @@ res <- Start(dat = list(list(path=path_exp)),
   tolerance = 0.0001
   )
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-reorder-lon0to360Coarse.R b/tests/testthat/test-Start-reorder-lon0to360Coarse.R
index 71361d9..3de49a6 100644
--- a/tests/testthat/test-Start-reorder-lon0to360Coarse.R
+++ b/tests/testthat/test-Start-reorder-lon0to360Coarse.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 #1 selector type        1-values(list)  2-values(vector)  3-indices     4-'all'  5-mix
@@ -658,3 +659,4 @@ res <- Start(dat = list(list(path=path_exp)),
   tolerance = 0.0001
   )
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-reorder-metadata.R b/tests/testthat/test-Start-reorder-metadata.R
index ea727e5..501873d 100644
--- a/tests/testthat/test-Start-reorder-metadata.R
+++ b/tests/testthat/test-Start-reorder-metadata.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # Ensure returns_vars = NULL or 'dat' have the same metadata
 
 test_that("1. Sort() and CircularSort(0, 360)", {
@@ -277,3 +278,4 @@ res_dat <- Start(dat = list(list(path = path_exp)),
   )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-reorder-retrieve.R b/tests/testthat/test-Start-reorder-retrieve.R
index 25efcfc..3b8016f 100644
--- a/tests/testthat/test-Start-reorder-retrieve.R
+++ b/tests/testthat/test-Start-reorder-retrieve.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 ##############################################
@@ -155,3 +156,4 @@ res2 <- Start(dat = path_exp,
 
 })
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-reorder_all.R b/tests/testthat/test-Start-reorder_all.R
index 87a4416..fadee52 100644
--- a/tests/testthat/test-Start-reorder_all.R
+++ b/tests/testthat/test-Start-reorder_all.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # No transform, test reorder function Sort() and CircularSort() with selector 'all'.
 
 #---------------------------------------------------------------
@@ -143,3 +144,4 @@ tolerance = 0.0001
 
 })
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-reorder_indices.R b/tests/testthat/test-Start-reorder_indices.R
index 59d00d4..5f50c49 100644
--- a/tests/testthat/test-Start-reorder_indices.R
+++ b/tests/testthat/test-Start-reorder_indices.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # No transform, test reorder function Sort() and CircularSort() with selector indices().
 
 #---------------------------------------------------------------
@@ -143,3 +144,4 @@ tolerance = 0.0001
 
 })
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-reshape.R b/tests/testthat/test-Start-reshape.R
index 480a3bc..af8e89f 100644
--- a/tests/testthat/test-Start-reshape.R
+++ b/tests/testthat/test-Start-reshape.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # This one is more comprehensive than test-Start-split-merge.R
 
 path_exp <- '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc'
@@ -515,3 +516,4 @@ easy_array[31:61 ,1]
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-return_vars_name.R b/tests/testthat/test-Start-return_vars_name.R
index e3ff876..384bd63 100644
--- a/tests/testthat/test-Start-return_vars_name.R
+++ b/tests/testthat/test-Start-return_vars_name.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # The name of return_vars should be one of the inner dimension names. The synonims can 
 # be used but will be changed back to the inner dim names. 
 
@@ -237,3 +238,4 @@ c(-19.5, -14.5)
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R
index 699c01c..8e4d2e1 100644
--- a/tests/testthat/test-Start-split-merge.R
+++ b/tests/testthat/test-Start-split-merge.R
@@ -1,3 +1,4 @@
+suppressMessages({
 
 var_name <- 'tas'
 path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc'
@@ -188,3 +189,4 @@ as.POSIXct('2013-11-15', tz = 'UTC')
 
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-time_unit.R b/tests/testthat/test-Start-time_unit.R
index 0c499d3..f15b3bd 100644
--- a/tests/testthat/test-Start-time_unit.R
+++ b/tests/testthat/test-Start-time_unit.R
@@ -1,3 +1,4 @@
+suppressMessages({
 
 test_that("1. The data has units like time", {
 
@@ -88,3 +89,4 @@ as.POSIXct(c("2018-04-08", "2018-04-15", "2018-04-22 UTC", "2018-04-29 UTC"), tz
 #test_that("3. Time dimension is implicit", {
 # See test-Start-implicit_inner_dim.R
 #})
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-transform-all.R b/tests/testthat/test-Start-transform-all.R
index a8290a6..e21f6c1 100644
--- a/tests/testthat/test-Start-transform-all.R
+++ b/tests/testthat/test-Start-transform-all.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # This unit test uses 'all' to do the transformation and tests the output grid.
 # The results should be identical and consistent with cdo result (with precision difference). 
 # The test contains three calls with different target grids: 
@@ -141,3 +142,4 @@ test_that("2. test path 2", {
 
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-transform-border.R b/tests/testthat/test-Start-transform-border.R
index 9b3cc6a..34a33a2 100644
--- a/tests/testthat/test-Start-transform-border.R
+++ b/tests/testthat/test-Start-transform-border.R
@@ -1,3 +1,4 @@
+suppressMessages({
 
 ##############################################
 # This unit test checks different border situations: normal regional that doesn't touch the borders,
@@ -712,3 +713,4 @@ expect_equal(
 #  [9,] 299.4723 299.9515 299.4566 299.0601 299.5071
 # [10,] 299.5299 299.7573 299.0317 299.1104 300.0644
 ##############################################
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-transform-lat-Sort-all.R b/tests/testthat/test-Start-transform-lat-Sort-all.R
index d7d895e..2aa8e39 100644
--- a/tests/testthat/test-Start-transform-lat-Sort-all.R
+++ b/tests/testthat/test-Start-transform-lat-Sort-all.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # This unit test uses 'all' to do the transformation and tests "lat_reorder".
 # The results should be identical and consistent with cdo result (with precision difference). 
 # "lon_reorder = CircularSort(0, 360)" are used in all the tests.
@@ -124,3 +125,4 @@ tolerance = 0.0001
 
 })
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-transform-lat-Sort-indices.R b/tests/testthat/test-Start-transform-lat-Sort-indices.R
index 16daa79..8d7312b 100644
--- a/tests/testthat/test-Start-transform-lat-Sort-indices.R
+++ b/tests/testthat/test-Start-transform-lat-Sort-indices.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # This unit test uses indices() to do the transformation and tests "lat_reorder".
 # The results should be identical and consistent with cdo result (with precision difference). 
 # The lat/lon range is all the grids here.
@@ -230,3 +231,4 @@ tolerance = 0.0001
 
 })
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-transform-lat-Sort-values.R b/tests/testthat/test-Start-transform-lat-Sort-values.R
index b70b637..f69d551 100644
--- a/tests/testthat/test-Start-transform-lat-Sort-values.R
+++ b/tests/testthat/test-Start-transform-lat-Sort-values.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # This unit test uses values() to do the transformation and tests "lat_reorder".
 # The results should be identical and consistent with cdo result (with precision difference). 
 # The lon range is all the grids here.
@@ -430,3 +431,4 @@ tolerance = 0.0001
 
 })
 
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-transform-lon-across_meridian.R b/tests/testthat/test-Start-transform-lon-across_meridian.R
index d07388e..a8df42b 100644
--- a/tests/testthat/test-Start-transform-lon-across_meridian.R
+++ b/tests/testthat/test-Start-transform-lon-across_meridian.R
@@ -1,3 +1,4 @@
+suppressMessages({
 #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true')
 
 test_that("first test", {
@@ -80,3 +81,4 @@ suppressWarnings(
   )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-transform-metadata.R b/tests/testthat/test-Start-transform-metadata.R
index 227f09d..6010b6f 100644
--- a/tests/testthat/test-Start-transform-metadata.R
+++ b/tests/testthat/test-Start-transform-metadata.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # Ensure returns_vars = NULL or 'dat' have the same metadata
 
 test_that("1. Sort() and CircularSort(0, 360)", {
@@ -275,3 +276,4 @@ res_dat <- Start(dat = list(list(path = path_exp)),
   )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-transform-three-selectors.R b/tests/testthat/test-Start-transform-three-selectors.R
index 95e7c2b..3fe4824 100644
--- a/tests/testthat/test-Start-transform-three-selectors.R
+++ b/tests/testthat/test-Start-transform-three-selectors.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # This unit test uses three different selector forms: indices(), values(), and 'all', to do 
 # the transformation. "lat_reorder" is also tested.
 # Their results should be all identical and consistent with cdo result (with precision difference). 
@@ -194,3 +195,4 @@ tolerance = 0.0001
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-two_dats.R b/tests/testthat/test-Start-two_dats.R
index 46b57d8..640b478 100644
--- a/tests/testthat/test-Start-two_dats.R
+++ b/tests/testthat/test-Start-two_dats.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # ex1_8
 
 test_that("1. ex1_8, case 1", {
@@ -99,3 +100,4 @@ tolerance = 0.0001
 )
 
 })
+}) #suppressMessages
diff --git a/tests/testthat/test-Start-values_list_vector.R b/tests/testthat/test-Start-values_list_vector.R
index 1a6288b..2b5cda0 100644
--- a/tests/testthat/test-Start-values_list_vector.R
+++ b/tests/testthat/test-Start-values_list_vector.R
@@ -1,3 +1,4 @@
+suppressMessages({
 # This unit test tests the consistence between list of values and vector of values.
 # 1. transform
 # 2. no transform
@@ -246,3 +247,4 @@ as.vector(exp2)
 )
 
 })
+}) #suppressMessages
-- 
GitLab


From 5c06ad9165199f1b6a2e8ecf1dada1f6f7c2e0d6 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Tue, 17 Oct 2023 18:51:07 +0200
Subject: [PATCH 08/27] Correct dat name of work_pieces (internal usage only);
 Add message when multiple datasets are requested and return_vars has common
 dimensions

---
 R/Start.R | 12 ++++++++++++
 R/zzz.R   |  7 ++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/R/Start.R b/R/Start.R
index b0ad40d..89f87e9 100644
--- a/R/Start.R
+++ b/R/Start.R
@@ -1777,6 +1777,18 @@ Start <- function(..., # dim = indices/selectors,
       }
     }
   }
+
+  # Return info about return_vars when dat > 1
+  if (length(dat) > 1 & length(common_return_vars) > 0) {
+    .message("\n", "[ATTENTION]", 
+             paste0("According to parameter 'return_vars', the inner dimensions: ",
+                    paste(names(common_return_vars), collapse = ', '),
+                    ", are common among all the datasets. Please be sure that ",
+                    "this is expected to avoid potential wrong results, and ",
+                    "verify the outputs carefully."),
+             "\n", indent = 1)
+   }
+
 #////////////////////////////////////////////
 
 # This part was above where return_vars is seperated into return_vars and common_return_vars
diff --git a/R/zzz.R b/R/zzz.R
index 1e56e29..f098a3b 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -980,7 +980,12 @@ build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims,
                                    if (x %in% names(depending_file_dims)) {
                                      vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]]
                                    }
-                                   selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]]
+                                   if (x != found_pattern_dim) {
+                                     selectors[[x]][[vector_to_pick]][file_to_load_indices[x]]
+                                   } else {
+                                     # dat_dim only has one value in each work_piece
+                                     selectors[[x]][[vector_to_pick]]
+                                   }
                                  })
         names(file_selectors) <- file_dims
         work_piece[['file_selectors']] <- file_selectors
-- 
GitLab


From 0d93bf916754769539f149d330c50b1ef3ecbc87 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 18 Oct 2023 12:57:45 +0200
Subject: [PATCH 09/27] Don't create pipeline when it is draft

---
 .gitlab-ci.yml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index c7deb1a..db7b631 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,5 +1,13 @@
 stages:
   - build
+
+workflow:
+  rules:
+    - if: $CI_COMMIT_TITLE =~ /-draft$/
+      when: never
+#    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
+#    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
+
 build:
   stage: build
   script:
-- 
GitLab


From 64eed86f53d49b141a6e4894cb15323e9cfc1393 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 19 Oct 2023 14:50:30 +0200
Subject: [PATCH 10/27] Update path, add explanation

---
 .../PATC2023/handson_1-data-loading.md        | 73 +++++++++++--------
 1 file changed, 44 insertions(+), 29 deletions(-)

diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
index 69da163..8a986f8 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
@@ -1,7 +1,7 @@
 # Hands-on 1: Load data by startR
 
 ## Goal
-Use startR to load the data and learn how to adjust data while loading data.
+Use startR to load the data and learn how to adjust data structure while loading data.
 
 ## 0. Load required packages
 
@@ -11,32 +11,28 @@ rm(list = ls())
 
 library(startR)
 ```
-## 1. Load data from data repository (esarchive/)
-
-#TODO: update it
 **Data description**:  
-This sample data set contains a small cutout of gridded seasonal precipitation 
-forecast data from the Copernicus Climate Change ECMWF-System 5 forecast system.
-Specifically, for the 'prlr' (precipitation) variable, for the first 6 forecast 
-ensemble members, daily values, for all 31 days in March following the forecast 
-starting dates in November of years 2010 to 2012, for a small 4x4 pixel cutout in 
-a region in the North-Western Italian Alps (44N-47N, 6E-9E). The data resolution is 1 degree.
+We will use two datasets in the hands-on. The experiment data are Meteo-France System 7 from ECMWF, and the observation ones are ERA5 from ECMWF. The data have been first processed into monthly mean data and stored in our data archive (esarchive).
+
+We're going to analyze the near-surface temperature (short name: tas) for seasonal forecast. We will focus on the Europe region (roughly 20W-40E, 20N-80N). The hindcast years are 1993 to 2016, and the forecast year is 2020. The initial month is November. To speed up the practice, we will only load the first two forecast time steps, but all the ensemble members are used to give a less biased result.
+
+## 1. Load experimental data from data repository
+
+### 1.1 Hindcast data
 
-Use the above information to define the variable, start dates, longitude and latitude.
+Understand the following script, run it, and check the result. 
 
 ```r
-#TODO: update the path
   # Use this one if on workstation or nord3 (have access to /esarchive)
   path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc"
-
-  # Use this one if on Marenostrum4 and log in with PATC2021 account
-  path_exp <- paste0('/gpfs/scratch/nct01/nct01127/d3_R_handson/esarchive/',
-                  'exp/ecmwf/system5c3s/daily_mean/',
-                  '$var$_s0-24h/$var$_$sdate$.nc')
+  #----------------------------------------------------------------------
+  # Run these two lines if you're on Marenostrum4 and log in with training account
+  prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/'
+  path_exp <- paste0(prefix, path_exp)
+  #----------------------------------------------------------------------
 
   var <- 'tas'
   sdate_hcst <- paste0(1993:2016, '1101')
-  sdate_fcst <- '20201101'
   lon.min <- -20
   lon.max <- 40
   lat.min <- 20
@@ -63,7 +59,15 @@ Use Start() to load the data.
                 retrieve = TRUE)
 ```
    
+### 1.2 Forecast data
+
+The forecast data are from the same dataset as hindcast, but with different years. 
+Therefore, they share the same data path and strucutre. 
+Try to take the Start() call above and modify it to load the forecast data (hint: the start year is 2020.)
+
 ```r
+  sdate_fcst <- '20201101'
+
   fcst <- Start(dat = path_exp,
                 var = var,
                 syear = sdate_fcst,
@@ -81,29 +85,42 @@ Use Start() to load the data.
                 retrieve = TRUE)
 ```
 
+### 1.3 Observational data
 
-# Create an date array from hcst dates
-Check the time attributes of hcst: Is it correct? 
+We need the corresponding observational data to compare with the experimental data.
+So, the observational data should be loaded as the same dimensions as the experimental ones.
+To achieve this, we can use the metadata of the experimental data as the selectors for observational data. But be careful with the usage! We must verify the correctness and applicability first. 
 
-To load the corresponding obs data, we can use these time values as the selectors in obs Start() call.
+**Get the time values from hindcast data**
+Check the time attributes of `hcst`: Is it correct? 
 
 ```r
 dim(attributes(hcst)$Variables$common$time)
-syear  time
-   24     2
+#syear  time
+#   24     2
 
 str(attributes(hcst)$Variables$common$time)
- POSIXct[1:48], format: "1993-12-01" "1994-12-01" "1995-12-01" "1996-12-01" "1997-12-01" ...
+# POSIXct[1:48], format: "1993-12-01" "1994-12-01" "1995-12-01" "1996-12-01" "1997-12-01" ...
+```
 
-# Adjust the day to the correct month
-attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time - lubridate::days(1)
+The values are not correct since they should start from November, not December. 
+But the array has the correct dimensions and we can take advantage of it. 
+What we're going to do here is to tune the values one month ahead so we can have the correct dates. 
+(ps., `lubridate` is a useful R package for time value manipulation!)
 
+```r
+attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time - lubridate::days(1)
 date_string <- format(attributes(hcst)$Variables$common$time, '%Y%m')
 sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
 ```
 
 ```r
   path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc'
+  #----------------------------------------------------------------------
+  # Run these two lines if you're on Marenostrum4 and log in with training account
+  prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/'
+  path_obs <- paste0(prefix, path_obs)
+  #----------------------------------------------------------------------
 
   obs <- Start(dat = path_obs,
                var = var,
@@ -114,9 +131,7 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
                longitude = values(list(lon.min, lon.max)),
                longitude_reorder = CircularSort(-180, 180),
                transform = CDORemapper,
-#TODO: Change to relative path
-#               transform_params = list(grid = './griddes_system7c3s.txt',
-               transform_params = list(grid = '/esarchive/scratch/aho/git/startR/inst/doc/tutorial/PATC2023/griddes_system7c3s.txt',
+               transform_params = list(grid = '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/griddes_system7c3s.txt',
                                        method = 'bilinear'),
                transform_vars = c('latitude', 'longitude'),
                synonims = list(syear = c('syear', 'sdate'),
-- 
GitLab


From 822f8ff38654bc2b06268f8581c3e8ae869d2001 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 19 Oct 2023 16:31:30 +0200
Subject: [PATCH 11/27] Change grid to r360x181

---
 .../PATC2023/handson_1-data-loading.md        | 116 ++++++++++++++----
 1 file changed, 89 insertions(+), 27 deletions(-)

diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
index 8a986f8..eb9a310 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
@@ -8,7 +8,7 @@ Use startR to load the data and learn how to adjust data structure while loading
 ```r
 # Clean the session
 rm(list = ls())
-
+# Load package
 library(startR)
 ```
 **Data description**:  
@@ -18,7 +18,7 @@ We're going to analyze the near-surface temperature (short name: tas) for season
 
 ## 1. Load experimental data from data repository
 
-### 1.1 Hindcast data
+### 1.a Hindcast data
 
 Understand the following script, run it, and check the result. 
 
@@ -31,26 +31,20 @@ Understand the following script, run it, and check the result.
   path_exp <- paste0(prefix, path_exp)
   #----------------------------------------------------------------------
 
-  var <- 'tas'
   sdate_hcst <- paste0(1993:2016, '1101')
-  lon.min <- -20
-  lon.max <- 40
-  lat.min <- 20
-  lat.max <- 80
-```
-
-Use Start() to load the data.
 
-```r
   hcst <- Start(dat = path_exp,
-                var = var,
+                var = 'tas',
                 syear = sdate_hcst,
                 ensemble = 'all',
                 time = 1:2,
-                latitude = values(list(lat.min, lat.max)),
+                latitude = values(list(20, 80)),
                 latitude_reorder = Sort(),
-                longitude = values(list(lon.min, lon.max)),
+                longitude = values(list(-20, 40)),
                 longitude_reorder = CircularSort(-180, 180),
+                transform = CDORemapper,
+                transform_params = list(grid = 'r360x181', method = 'bilinear'),
+                transform_vars = c('latitude', 'longitude'),
                 synonims = list(syear = c('syear', 'sdate'),
                                 latitude = c('lat', 'latitude'),
                                 longitude = c('lon', 'longitude')),
@@ -58,8 +52,34 @@ Use Start() to load the data.
                                    longitude = NULL, latitude = NULL),
                 retrieve = TRUE)
 ```
+
+**Questions**
+
+(1) What are the dimensions of hcst? Use `dim()` to check.
+
+```r
+dim(hcst)
+#      dat       var     syear  ensemble      time  latitude longitude 
+#        1         1        24        25         2        61        61 
+```
    
-### 1.2 Forecast data
+(2) What is the structure of hcst? Use `str()` to check.
+```r
+str(hcst, max.level = 1)
+str(hcst, max.level = 2)
+str(hcst, max.level = 3)
+```
+
+(3) The metadata variables are stored in `attr(hcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values.
+```r
+metadata_attr <- attr(hcst, 'Variables')
+str(metadata_attr)
+metadata_attr$common$time
+metadata_attr$common$latitude
+metadata_attr$common$longitude
+```
+
+### 1.b Forecast data
 
 The forecast data are from the same dataset as hindcast, but with different years. 
 Therefore, they share the same data path and strucutre. 
@@ -69,14 +89,17 @@ Try to take the Start() call above and modify it to load the forecast data (hint
   sdate_fcst <- '20201101'
 
   fcst <- Start(dat = path_exp,
-                var = var,
+                var = 'tas',
                 syear = sdate_fcst,
                 ensemble = 'all',
                 time = 1:2, 
-                latitude = values(list(lat.min, lat.max)),
+                latitude = values(list(20, 80)),
                 latitude_reorder = Sort(),
-                longitude = values(list(lon.min, lon.max)),
+                longitude = values(list(-20, 40)),
                 longitude_reorder = CircularSort(-180, 180),
+                transform = CDORemapper,
+                transform_params = list(grid = 'r360x181', method = 'bilinear'),
+                transform_vars = c('latitude', 'longitude'),
                 synonims = list(syear = c('syear', 'sdate'),
                                 latitude = c('lat', 'latitude'),
                                 longitude = c('lon', 'longitude')),
@@ -85,13 +108,42 @@ Try to take the Start() call above and modify it to load the forecast data (hint
                 retrieve = TRUE)
 ```
 
-### 1.3 Observational data
+**Questions**
+
+Check the forecast data by the same methods for hindcast data.
+
+(1) What are the dimensions of fcst? Use `dim()` to check.
+
+```r
+dim(fcst)
+#      dat       var     syear  ensemble      time  latitude longitude 
+#        1         1         1        51         2        61        61 
+```
+   
+(2) What is the structure of hcst? Use `str()` to check.
+```r
+str(fcst, max.level = 1)
+str(fcst, max.level = 2)
+str(fcst, max.level = 3)
+```
+
+(3) The metadata variables are stored in `attr(fcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values.
+```r
+metadata_attr <- attr(fcst, 'Variables')
+str(metadata_attr)
+metadata_attr$common$time
+metadata_attr$common$latitude
+metadata_attr$common$longitude
+```
+
+### 1.c Observational data
 
 We need the corresponding observational data to compare with the experimental data.
 So, the observational data should be loaded as the same dimensions as the experimental ones.
 To achieve this, we can use the metadata of the experimental data as the selectors for observational data. But be careful with the usage! We must verify the correctness and applicability first. 
 
 **Get the time values from hindcast data**
+
 Check the time attributes of `hcst`: Is it correct? 
 
 ```r
@@ -106,14 +158,19 @@ str(attributes(hcst)$Variables$common$time)
 The values are not correct since they should start from November, not December. 
 But the array has the correct dimensions and we can take advantage of it. 
 What we're going to do here is to tune the values one month ahead so we can have the correct dates. 
-(ps., `lubridate` is a useful R package for time value manipulation!)
+(p.s. `lubridate` is a useful R package for time value manipulation!)
 
 ```r
 attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time - lubridate::days(1)
 date_string <- format(attributes(hcst)$Variables$common$time, '%Y%m')
 sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
+print(sdate_obs)
 ```
 
+Now we have the correct date values, we can use them as the selectors of `syear` in the Start() call. In addition, we will use the reshaping feature in startR to get the desired dimensions.
+
+#TODO: Explain split_multiselected_dims
+
 ```r
   path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc'
   #----------------------------------------------------------------------
@@ -123,16 +180,15 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
   #----------------------------------------------------------------------
 
   obs <- Start(dat = path_obs,
-               var = var,
+               var = 'tas',
                syear = sdate_obs,
                split_multiselected_dims = TRUE,
-               latitude = values(list(lat.min, lat.max)),
+               latitude = values(list(20, 80)),
                latitude_reorder = Sort(),
-               longitude = values(list(lon.min, lon.max)),
+               longitude = values(list(-20, 40)),
                longitude_reorder = CircularSort(-180, 180),
                transform = CDORemapper,
-               transform_params = list(grid = '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/griddes_system7c3s.txt',
-                                       method = 'bilinear'),
+               transform_params = list(grid = 'r360x181', method = 'bilinear'),
                transform_vars = c('latitude', 'longitude'),
                synonims = list(syear = c('syear', 'sdate'),
                                latitude = c('lat', 'latitude'),
@@ -144,15 +200,21 @@ sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
 
 ## 2. Check if the datasets are consistent
 
+```r
+dim(hcst)
+dim(fcst)
+dim(obs)
+```
+
 ```r
 lat_hcst <- attributes(hcst)$Variables$common$latitude
 lon_hcst <- attributes(hcst)$Variables$common$longitude
 lat_obs <- attributes(obs)$Variables$common$latitude
 lon_obs <- attributes(obs)$Variables$common$longitude
 
-identical(c(lat_obs), c(lat_hcst))
+identical(lat_obs, lat_hcst)
 [1] TRUE
-identical(c(lon_obs), c(lon_hcst))
+identical(lon_obs, lon_hcst)
 [1] TRUE
 
 time_hcst <- attributes(hcst)$Variables$common$time
-- 
GitLab


From 8f18e513f5976e06bfc3433b17f4d12c64f7f4cb Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 20 Oct 2023 14:50:01 +0200
Subject: [PATCH 12/27] Finish the questions

---
 .../PATC2023/handson_1-data-loading.md        | 136 +++++----
 .../PATC2023/handson_1-data-loading_ans.md    | 272 ++++++++++++++++++
 2 files changed, 352 insertions(+), 56 deletions(-)
 create mode 100644 inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md

diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
index eb9a310..bcd171c 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
@@ -11,6 +11,7 @@ rm(list = ls())
 # Load package
 library(startR)
 ```
+
 **Data description**:  
 We will use two datasets in the hands-on. The experiment data are Meteo-France System 7 from ECMWF, and the observation ones are ERA5 from ECMWF. The data have been first processed into monthly mean data and stored in our data archive (esarchive).
 
@@ -55,28 +56,26 @@ Understand the following script, run it, and check the result.
 
 **Questions**
 
-(1) What are the dimensions of hcst? Use `dim()` to check.
+(1) What are the dimensions of `hcst`? Use `dim()` to check.
 
 ```r
-dim(hcst)
-#      dat       var     syear  ensemble      time  latitude longitude 
-#        1         1        24        25         2        61        61 
+dim(____)
 ```
    
-(2) What is the structure of hcst? Use `str()` to check.
+(2) What is the structure of `hcst`? Use `str()` to check.
 ```r
-str(hcst, max.level = 1)
-str(hcst, max.level = 2)
-str(hcst, max.level = 3)
+str(hcst, max.level = _____)  # try 1, 2, 3
 ```
 
 (3) The metadata variables are stored in `attr(hcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values.
 ```r
 metadata_attr <- attr(hcst, 'Variables')
 str(metadata_attr)
-metadata_attr$common$time
-metadata_attr$common$latitude
-metadata_attr$common$longitude
+names(metadata_attr$common)
+
+hcst_time <- metadata_attr$common$time
+hcst_lat <- __________
+hcst_lon <- __________
 ```
 
 ### 1.b Forecast data
@@ -86,24 +85,24 @@ Therefore, they share the same data path and strucutre.
 Try to take the Start() call above and modify it to load the forecast data (hint: the start year is 2020.)
 
 ```r
-  sdate_fcst <- '20201101'
+  sdate_fcst <- ____________
 
   fcst <- Start(dat = path_exp,
-                var = 'tas',
+                var = _____,
                 syear = sdate_fcst,
                 ensemble = 'all',
-                time = 1:2, 
-                latitude = values(list(20, 80)),
+                time = _____, 
+                latitude = values(list(____, ____)),
                 latitude_reorder = Sort(),
-                longitude = values(list(-20, 40)),
+                longitude = values(list(____, ____)),
                 longitude_reorder = CircularSort(-180, 180),
                 transform = CDORemapper,
-                transform_params = list(grid = 'r360x181', method = 'bilinear'),
+                transform_params = list(grid = _____, method = 'bilinear'),
                 transform_vars = c('latitude', 'longitude'),
                 synonims = list(syear = c('syear', 'sdate'),
                                 latitude = c('lat', 'latitude'),
                                 longitude = c('lon', 'longitude')),
-                return_vars = list(time = 'syear',
+                return_vars = list(time = _____,
                                    longitude = NULL, latitude = NULL),
                 retrieve = TRUE)
 ```
@@ -112,28 +111,26 @@ Try to take the Start() call above and modify it to load the forecast data (hint
 
 Check the forecast data by the same methods for hindcast data.
 
-(1) What are the dimensions of fcst? Use `dim()` to check.
+(1) What are the dimensions of `fcst`? Use `dim()` to check.
 
 ```r
-dim(fcst)
-#      dat       var     syear  ensemble      time  latitude longitude 
-#        1         1         1        51         2        61        61 
+dim(____)
 ```
    
-(2) What is the structure of hcst? Use `str()` to check.
+(2) What is the structure of `fcst`? Use `str()` to check.
 ```r
-str(fcst, max.level = 1)
-str(fcst, max.level = 2)
-str(fcst, max.level = 3)
+str(hcst, max.level = _____)  # try 1, 2, 3
 ```
 
 (3) The metadata variables are stored in `attr(fcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values.
 ```r
-metadata_attr <- attr(fcst, 'Variables')
+metadata_attr <- attr(_____, 'Variables')
 str(metadata_attr)
-metadata_attr$common$time
-metadata_attr$common$latitude
-metadata_attr$common$longitude
+names(metadata_attr$common)
+
+fcst_time <- __________
+fcst_lat <- __________
+fcst_lon <- __________
 ```
 
 ### 1.c Observational data
@@ -148,11 +145,7 @@ Check the time attributes of `hcst`: Is it correct?
 
 ```r
 dim(attributes(hcst)$Variables$common$time)
-#syear  time
-#   24     2
-
 str(attributes(hcst)$Variables$common$time)
-# POSIXct[1:48], format: "1993-12-01" "1994-12-01" "1995-12-01" "1996-12-01" "1997-12-01" ...
 ```
 
 The values are not correct since they should start from November, not December. 
@@ -169,7 +162,11 @@ print(sdate_obs)
 
 Now we have the correct date values, we can use them as the selectors of `syear` in the Start() call. In addition, we will use the reshaping feature in startR to get the desired dimensions.
 
-#TODO: Explain split_multiselected_dims
+If the selector is an array, the parameter `split_multiselected_dims` of Start() splits the array by dimensions and we will get those dimensions in the output.
+For example, we will use `sdate_obs` as the selector of "syear" dimension below. 
+`sdate_obs` has two dimensions, "syear" and "time"; 
+so, by `split_multiselected_dims`, the output `obs` will have these two dimensions, 
+even "time" is not explicitly specified in the Start() call.
 
 ```r
   path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc'
@@ -180,46 +177,73 @@ Now we have the correct date values, we can use them as the selectors of `syear`
   #----------------------------------------------------------------------
 
   obs <- Start(dat = path_obs,
-               var = 'tas',
+               var = _____,
                syear = sdate_obs,
                split_multiselected_dims = TRUE,
-               latitude = values(list(20, 80)),
+               latitude = values(list(_____, _____)),
                latitude_reorder = Sort(),
-               longitude = values(list(-20, 40)),
+               longitude = values(list(_____, _____)),
                longitude_reorder = CircularSort(-180, 180),
                transform = CDORemapper,
-               transform_params = list(grid = 'r360x181', method = 'bilinear'),
+               transform_params = list(grid = ______, method = 'bilinear'),
                transform_vars = c('latitude', 'longitude'),
                synonims = list(syear = c('syear', 'sdate'),
                                latitude = c('lat', 'latitude'),
                                longitude = c('lon', 'longitude')),
-               return_vars = list(time = 'syear',
+               return_vars = list(time = ______,
                                   longitude = NULL, latitude = NULL),
                retrieve = TRUE)
 ```
 
-## 2. Check if the datasets are consistent
+**Questions**
+
+Check the obsercational data by the same methods above.
+
+(1) What are the dimensions of `obs`? Use `dim()` to check.
 
 ```r
-dim(hcst)
-dim(fcst)
-dim(obs)
+dim(____)
 ```
+   
+(2) What is the structure of `obs`? Use `str()` to check.
+```r
+str(obs, max.level = ____) # try 1, 2, 3
+```
+
+(3) The metadata variables are stored in `attr(obs, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values.
+```r
+metadata_attr <- attr(____, 'Variables')
+str(metadata_attr)
+names(metadata_attr$common)
+
+obs_time <- __________
+obs_lat <- __________
+obs_lon <- __________
+```
+
 
+## 2. Check if the datasets are consistent
+
+Wrong data, wrong everything afterward. It is important to examine the data and metadata after we load them. 
+
+(1) Compare the dimensions of the three data by `dim()`.
 ```r
-lat_hcst <- attributes(hcst)$Variables$common$latitude
-lon_hcst <- attributes(hcst)$Variables$common$longitude
-lat_obs <- attributes(obs)$Variables$common$latitude
-lon_obs <- attributes(obs)$Variables$common$longitude
 
-identical(lat_obs, lat_hcst)
-[1] TRUE
-identical(lon_obs, lon_hcst)
-[1] TRUE
+```
+(2) Check the summary of the data by `summary()`.
+```r
+summary(hcst)
+summary(fcst)
+summary(obs)
+```
 
-time_hcst <- attributes(hcst)$Variables$common$time
-time_obs <- attributes(obs)$Variables$common$time
+(3) Compare metadata. We have saved the latitude, longitude, and time attributes above after loading each data. 
+Use `identical()` or `all.equal()` to check if the values are consistent.
+```r
+# lat and lon
+identical(____, ____)
+all.equal(____, ____)
 
-identical(format(time_hcst, '%Y%m'), format(time_obs, '%Y%m'))
-[1] TRUE
+# time: only compare year and month
+identical(format(hcst_time, '%Y%m'), format(obs_time, '%Y%m'))
 ```
diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
new file mode 100644
index 0000000..635be93
--- /dev/null
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
@@ -0,0 +1,272 @@
+# Hands-on 1: Load data by startR
+
+## Goal
+Use startR to load the data and learn how to adjust data structure while loading data.
+
+## 0. Load required packages
+
+```r
+# Clean the session
+rm(list = ls())
+# Load package
+library(startR)
+```
+
+**Data description**:  
+We will use two datasets in the hands-on. The experiment data are Meteo-France System 7 from ECMWF, and the observation ones are ERA5 from ECMWF. The data have been first processed into monthly mean data and stored in our data archive (esarchive).
+
+We're going to analyze the near-surface temperature (short name: tas) for seasonal forecast. We will focus on the Europe region (roughly 20W-40E, 20N-80N). The hindcast years are 1993 to 2016, and the forecast year is 2020. The initial month is November. To speed up the practice, we will only load the first two forecast time steps, but all the ensemble members are used to give a less biased result.
+
+## 1. Load experimental data from data repository
+
+### 1.a Hindcast data
+
+Understand the following script, run it, and check the result. 
+
+```r
+  # Use this one if on workstation or nord3 (have access to /esarchive)
+  path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc"
+  #----------------------------------------------------------------------
+  # Run these two lines if you're on Marenostrum4 and log in with training account
+  prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/'
+  path_exp <- paste0(prefix, path_exp)
+  #----------------------------------------------------------------------
+
+  sdate_hcst <- paste0(1993:2016, '1101')
+
+  hcst <- Start(dat = path_exp,
+                var = 'tas',
+                syear = sdate_hcst,
+                ensemble = 'all',
+                time = 1:2,
+                latitude = values(list(20, 80)),
+                latitude_reorder = Sort(),
+                longitude = values(list(-20, 40)),
+                longitude_reorder = CircularSort(-180, 180),
+                transform = CDORemapper,
+                transform_params = list(grid = 'r360x181', method = 'bilinear'),
+                transform_vars = c('latitude', 'longitude'),
+                synonims = list(syear = c('syear', 'sdate'),
+                                latitude = c('lat', 'latitude'),
+                                longitude = c('lon', 'longitude')),
+                return_vars = list(time = 'syear',
+                                   longitude = NULL, latitude = NULL),
+                retrieve = TRUE)
+```
+
+**Questions**
+
+(1) What are the dimensions of `hcst`? Use `dim()` to check.
+
+```r
+dim(hcst)
+#      dat       var     syear  ensemble      time  latitude longitude 
+#        1         1        24        25         2        61        61 
+```
+   
+(2) What is the structure of `hcst`? Use `str()` to check.
+```r
+str(hcst, max.level = 1)
+str(hcst, max.level = 2)
+str(hcst, max.level = 3)
+```
+
+(3) The metadata variables are stored in `attr(hcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values.
+```r
+metadata_attr <- attr(hcst, 'Variables')
+str(metadata_attr)
+names(metadata_attr$common)
+
+hcst_time <- metadata_attr$common$time
+hcst_lat <- metadata_attr$common$latitude
+hcst_lon <- metadata_attr$common$longitude
+```
+
+### 1.b Forecast data
+
+The forecast data are from the same dataset as hindcast, but with different years. 
+Therefore, they share the same data path and strucutre. 
+Try to take the Start() call above and modify it to load the forecast data (hint: the start year is 2020.)
+
+```r
+  sdate_fcst <- '20201101'
+
+  fcst <- Start(dat = path_exp,
+                var = 'tas',
+                syear = sdate_fcst,
+                ensemble = 'all',
+                time = 1:2, 
+                latitude = values(list(20, 80)),
+                latitude_reorder = Sort(),
+                longitude = values(list(-20, 40)),
+                longitude_reorder = CircularSort(-180, 180),
+                transform = CDORemapper,
+                transform_params = list(grid = 'r360x181', method = 'bilinear'),
+                transform_vars = c('latitude', 'longitude'),
+                synonims = list(syear = c('syear', 'sdate'),
+                                latitude = c('lat', 'latitude'),
+                                longitude = c('lon', 'longitude')),
+                return_vars = list(time = 'syear',
+                                   longitude = NULL, latitude = NULL),
+                retrieve = TRUE)
+```
+
+**Questions**
+
+Check the forecast data by the same methods for hindcast data.
+
+(1) What are the dimensions of `fcst`? Use `dim()` to check.
+
+```r
+dim(fcst)
+#      dat       var     syear  ensemble      time  latitude longitude 
+#        1         1         1        51         2        61        61 
+```
+   
+(2) What is the structure of `fcst`? Use `str()` to check.
+```r
+str(fcst, max.level = 1)
+str(fcst, max.level = 2)
+str(fcst, max.level = 3)
+```
+
+(3) The metadata variables are stored in `attr(fcst, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values.
+```r
+metadata_attr <- attr(fcst, 'Variables')
+str(metadata_attr)
+names(metadata_attr$common)
+
+fcst_time <- metadata_attr$common$time
+fcst_lat <- metadata_attr$common$latitude
+fcst_lon <- metadata_attr$common$longitude
+```
+
+### 1.c Observational data
+
+We need the corresponding observational data to compare with the experimental data.
+So, the observational data should be loaded as the same dimensions as the experimental ones.
+To achieve this, we can use the metadata of the experimental data as the selectors for observational data. But be careful with the usage! We must verify the correctness and applicability first. 
+
+**Get the time values from hindcast data**
+
+Check the time attributes of `hcst`: Is it correct? 
+
+```r
+dim(attributes(hcst)$Variables$common$time)
+#syear  time
+#   24     2
+
+str(attributes(hcst)$Variables$common$time)
+# POSIXct[1:48], format: "1993-12-01" "1994-12-01" "1995-12-01" "1996-12-01" "1997-12-01" ...
+```
+
+The values are not correct since they should start from November, not December. 
+But the array has the correct dimensions and we can take advantage of it. 
+What we're going to do here is to tune the values one month ahead so we can have the correct dates. 
+(p.s. `lubridate` is a useful R package for time value manipulation!)
+
+```r
+attributes(hcst)$Variables$common$time <- attributes(hcst)$Variables$common$time - lubridate::days(1)
+date_string <- format(attributes(hcst)$Variables$common$time, '%Y%m')
+sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
+print(sdate_obs)
+```
+
+Now we have the correct date values, we can use them as the selectors of `syear` in the Start() call. In addition, we will use the reshaping feature in startR to get the desired dimensions.
+
+If the selector is an array, the parameter `split_multiselected_dims` of Start() splits the array by dimensions and we will get those dimensions in the output.
+For example, we will use `sdate_obs` as the selector of "syear" dimension below. 
+`sdate_obs` has two dimensions, "syear" and "time"; 
+so, by `split_multiselected_dims`, the output `obs` will have these two dimensions, 
+even "time" is not explicitly specified in the Start() call.
+
+```r
+  path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc'
+  #----------------------------------------------------------------------
+  # Run these two lines if you're on Marenostrum4 and log in with training account
+  prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/'
+  path_obs <- paste0(prefix, path_obs)
+  #----------------------------------------------------------------------
+
+  obs <- Start(dat = path_obs,
+               var = 'tas',
+               syear = sdate_obs,
+               split_multiselected_dims = TRUE,
+               latitude = values(list(20, 80)),
+               latitude_reorder = Sort(),
+               longitude = values(list(-20, 40)),
+               longitude_reorder = CircularSort(-180, 180),
+               transform = CDORemapper,
+               transform_params = list(grid = 'r360x181', method = 'bilinear'),
+               transform_vars = c('latitude', 'longitude'),
+               synonims = list(syear = c('syear', 'sdate'),
+                               latitude = c('lat', 'latitude'),
+                               longitude = c('lon', 'longitude')),
+               return_vars = list(time = 'syear',
+                                  longitude = NULL, latitude = NULL),
+               retrieve = TRUE)
+```
+
+**Questions**
+
+Check the obsercational data by the same methods above.
+
+(1) What are the dimensions of `obs`? Use `dim()` to check.
+
+```r
+dim(obs)
+#      dat       var     syear       time  latitude longitude 
+#        1         1         24         2        61        61 
+```
+   
+(2) What is the structure of `obs`? Use `str()` to check.
+```r
+str(obs, max.level = 1)
+str(obs, max.level = 2)
+str(obs, max.level = 3)
+```
+
+(3) The metadata variables are stored in `attr(obs, 'Variables')`. What variables do we have? Use `str()` to check the structure first, then try to access the variable values.
+```r
+metadata_attr <- attr(obs, 'Variables')
+str(metadata_attr)
+names(metadata_attr$common)
+
+obs_time <- metadata_attr$common$time
+obs_lat <- metadata_attr$common$latitude
+obs_lon <- metadata_attr$common$longitude
+```
+
+
+## 2. Check if the datasets are consistent
+
+Wrong data, wrong everything afterward. It is important to examine the data and metadata after we load them. 
+
+(1) Compare the dimensions of the three data by `dim()`.
+```r
+dim(hcst)
+dim(fcst)
+dim(obs)
+```
+(2) Check the summary of the data by `summary()`.
+```r
+summary(hcst)
+summary(fcst)
+summary(obs)
+```
+
+(3) Compare metadata. We have saved the latitude, longitude, and time attributes above after loading each data. 
+Use `identical()` or `all.equal()` to check if the values are consistent.
+```r
+identical(obs_lat, hcst_lat)
+[1] TRUE
+identical(obs_lon, hcst_lon)
+[1] TRUE
+identical(fcst_lat, hcst_lat)
+[1] TRUE
+identical(fcst_lon, hcst_lon)
+[1] TRUE
+
+identical(format(hcst_time, '%Y%m'), format(obs_time, '%Y%m'))
+[1] TRUE
+```
-- 
GitLab


From b9de2964ddb6ad83a8f92cea589a9d8d72e58c6c Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 20 Oct 2023 14:52:09 +0200
Subject: [PATCH 13/27] Run if the commit doesn't end with -draft

---
 .gitlab-ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index db7b631..7706518 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -5,6 +5,7 @@ workflow:
   rules:
     - if: $CI_COMMIT_TITLE =~ /-draft$/
       when: never
+    - when: always
 #    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
 #    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
 
-- 
GitLab


From 71396fca4c8ff5863fa9daeb7f84cbf31608960d Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 23 Oct 2023 12:29:47 +0200
Subject: [PATCH 14/27] remove unecessary elements

---
 inst/doc/tutorial/PATC2023/handson_1-data-loading.md     | 9 +++------
 inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md | 9 +++------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
index bcd171c..1a61862 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
@@ -46,8 +46,7 @@ Understand the following script, run it, and check the result.
                 transform = CDORemapper,
                 transform_params = list(grid = 'r360x181', method = 'bilinear'),
                 transform_vars = c('latitude', 'longitude'),
-                synonims = list(syear = c('syear', 'sdate'),
-                                latitude = c('lat', 'latitude'),
+                synonims = list(latitude = c('lat', 'latitude'),
                                 longitude = c('lon', 'longitude')),
                 return_vars = list(time = 'syear',
                                    longitude = NULL, latitude = NULL),
@@ -99,8 +98,7 @@ Try to take the Start() call above and modify it to load the forecast data (hint
                 transform = CDORemapper,
                 transform_params = list(grid = _____, method = 'bilinear'),
                 transform_vars = c('latitude', 'longitude'),
-                synonims = list(syear = c('syear', 'sdate'),
-                                latitude = c('lat', 'latitude'),
+                synonims = list(latitude = c('lat', 'latitude'),
                                 longitude = c('lon', 'longitude')),
                 return_vars = list(time = _____,
                                    longitude = NULL, latitude = NULL),
@@ -187,8 +185,7 @@ even "time" is not explicitly specified in the Start() call.
                transform = CDORemapper,
                transform_params = list(grid = ______, method = 'bilinear'),
                transform_vars = c('latitude', 'longitude'),
-               synonims = list(syear = c('syear', 'sdate'),
-                               latitude = c('lat', 'latitude'),
+               synonims = list(latitude = c('lat', 'latitude'),
                                longitude = c('lon', 'longitude')),
                return_vars = list(time = ______,
                                   longitude = NULL, latitude = NULL),
diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
index 635be93..d0d4b07 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
@@ -46,8 +46,7 @@ Understand the following script, run it, and check the result.
                 transform = CDORemapper,
                 transform_params = list(grid = 'r360x181', method = 'bilinear'),
                 transform_vars = c('latitude', 'longitude'),
-                synonims = list(syear = c('syear', 'sdate'),
-                                latitude = c('lat', 'latitude'),
+                synonims = list(latitude = c('lat', 'latitude'),
                                 longitude = c('lon', 'longitude')),
                 return_vars = list(time = 'syear',
                                    longitude = NULL, latitude = NULL),
@@ -103,8 +102,7 @@ Try to take the Start() call above and modify it to load the forecast data (hint
                 transform = CDORemapper,
                 transform_params = list(grid = 'r360x181', method = 'bilinear'),
                 transform_vars = c('latitude', 'longitude'),
-                synonims = list(syear = c('syear', 'sdate'),
-                                latitude = c('lat', 'latitude'),
+                synonims = list(latitude = c('lat', 'latitude'),
                                 longitude = c('lon', 'longitude')),
                 return_vars = list(time = 'syear',
                                    longitude = NULL, latitude = NULL),
@@ -199,8 +197,7 @@ even "time" is not explicitly specified in the Start() call.
                transform = CDORemapper,
                transform_params = list(grid = 'r360x181', method = 'bilinear'),
                transform_vars = c('latitude', 'longitude'),
-               synonims = list(syear = c('syear', 'sdate'),
-                               latitude = c('lat', 'latitude'),
+               synonims = list(latitude = c('lat', 'latitude'),
                                longitude = c('lon', 'longitude')),
                return_vars = list(time = 'syear',
                                   longitude = NULL, latitude = NULL),
-- 
GitLab


From 3a2b89dd5508d0c16c80736d507f340336c512b1 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 26 Oct 2023 17:20:17 +0200
Subject: [PATCH 15/27] hide the potential changes

---
 .gitlab-ci.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 7706518..200b32d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,11 +1,11 @@
 stages:
   - build
 
-workflow:
-  rules:
-    - if: $CI_COMMIT_TITLE =~ /-draft$/
-      when: never
-    - when: always
+#workflow:
+#  rules:
+#    - if: $CI_COMMIT_TITLE =~ /-draft$/
+#      when: never
+#    - when: always
 #    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
 #    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
 
-- 
GitLab


From d0fe41ac3ee78cb4e879eb055bc422d27846f09f Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 27 Oct 2023 17:35:48 +0200
Subject: [PATCH 16/27] Update the data path

---
 inst/doc/tutorial/PATC2023/handson_1-data-loading.md     | 2 +-
 inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
index 1a61862..48bd86a 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
@@ -170,7 +170,7 @@ even "time" is not explicitly specified in the Start() call.
   path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc'
   #----------------------------------------------------------------------
   # Run these two lines if you're on Marenostrum4 and log in with training account
-  prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/'
+  prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/'
   path_obs <- paste0(prefix, path_obs)
   #----------------------------------------------------------------------
 
diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
index d0d4b07..39e5451 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
@@ -28,7 +28,7 @@ Understand the following script, run it, and check the result.
   path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc"
   #----------------------------------------------------------------------
   # Run these two lines if you're on Marenostrum4 and log in with training account
-  prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/'
+  prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/'
   path_exp <- paste0(prefix, path_exp)
   #----------------------------------------------------------------------
 
-- 
GitLab


From 9318c7f1f17e96375a3d9204dc0b8e71f3d0beda Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 30 Oct 2023 13:02:36 +0100
Subject: [PATCH 17/27] correct obs path and last time comparison

---
 inst/doc/tutorial/PATC2023/handson_1-data-loading.md     | 5 +++--
 inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md | 7 +++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
index 48bd86a..149f036 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
@@ -28,7 +28,7 @@ Understand the following script, run it, and check the result.
   path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc"
   #----------------------------------------------------------------------
   # Run these two lines if you're on Marenostrum4 and log in with training account
-  prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/'
+  prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/'
   path_exp <- paste0(prefix, path_exp)
   #----------------------------------------------------------------------
 
@@ -242,5 +242,6 @@ identical(____, ____)
 all.equal(____, ____)
 
 # time: only compare year and month
-identical(format(hcst_time, '%Y%m'), format(obs_time, '%Y%m'))
+hcst_time_corrected <- attributes(hcst)$Variables$common$time
+identical(format(hcst_time_correct, '%Y%m'), format(obs_time, '%Y%m'))
 ```
diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
index 39e5451..84099cd 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
@@ -182,7 +182,7 @@ even "time" is not explicitly specified in the Start() call.
   path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc'
   #----------------------------------------------------------------------
   # Run these two lines if you're on Marenostrum4 and log in with training account
-  prefix <- '/gpfs/scratch/bsc32/bsc32734/bsc_training_2023/R_handson/'
+  prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/'
   path_obs <- paste0(prefix, path_obs)
   #----------------------------------------------------------------------
 
@@ -257,6 +257,8 @@ Use `identical()` or `all.equal()` to check if the values are consistent.
 ```r
 identical(obs_lat, hcst_lat)
 [1] TRUE
+all.equal(obs_lat, hcst_lat)
+[1] TRUE
 identical(obs_lon, hcst_lon)
 [1] TRUE
 identical(fcst_lat, hcst_lat)
@@ -264,6 +266,7 @@ identical(fcst_lat, hcst_lat)
 identical(fcst_lon, hcst_lon)
 [1] TRUE
 
-identical(format(hcst_time, '%Y%m'), format(obs_time, '%Y%m'))
+hcst_time_corrected <- attributes(hcst)$Variables$common$time
+identical(format(hcst_time_corrected, '%Y%m'), format(obs_time, '%Y%m'))
 [1] TRUE
 ```
-- 
GitLab


From c67746c16a9beb4d86135329070c2cd3990e882f Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 30 Oct 2023 14:05:39 +0100
Subject: [PATCH 18/27] check file by ncdump before Start() call

---
 inst/doc/tutorial/PATC2023/handson_1-data-loading.md     | 5 +++++
 inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
index 149f036..0741780 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading.md
@@ -21,6 +21,11 @@ We're going to analyze the near-surface temperature (short name: tas) for season
 
 ### 1.a Hindcast data
 
+Check one netCDF file to see the data structure.
+```
+ncdump -h /gpfs/scratch/nct01/nct01001/d2_handson_R/esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_19931101.nc |less
+```
+
 Understand the following script, run it, and check the result. 
 
 ```r
diff --git a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
index 84099cd..41d4a8b 100644
--- a/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
+++ b/inst/doc/tutorial/PATC2023/handson_1-data-loading_ans.md
@@ -21,6 +21,11 @@ We're going to analyze the near-surface temperature (short name: tas) for season
 
 ### 1.a Hindcast data
 
+Check one netCDF file to see the data structure.
+```
+ncdump -h /gpfs/scratch/nct01/nct01001/d2_handson_R/esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_19931101.nc |less
+```
+
 Understand the following script, run it, and check the result. 
 
 ```r
-- 
GitLab


From 40136fd927d64dd74a42c01d06dd57e6cbf3633f Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 22 Nov 2023 16:16:43 +0100
Subject: [PATCH 19/27] Allow to run on not only WS and AS machine

---
 R/ByChunks_autosubmit.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R
index 65ab36e..ec33632 100644
--- a/R/ByChunks_autosubmit.R
+++ b/R/ByChunks_autosubmit.R
@@ -611,14 +611,15 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto',
     #NOTE: If we ssh to AS VM and run everything there, we don't need to ssh here
     system(sys_commands)
 
-  } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") {
+   } else {
+#  } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") {
     # ssh from WS to AS VM to run exp
     as_login <- paste0(Sys.getenv("USER"), '@', autosubmit_server, '.bsc.es')
     sys_commands <- paste0('ssh ', as_login, ' "', sys_commands, '"') #'; exit"')
     system(sys_commands)
 
-  } else {
-      stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?")
+#  } else {
+#      stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?")
   }
 
   # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed.    
-- 
GitLab


From 37f9e7a5c5b37bef2a4339d5cab07af3b56079ab Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 1 Dec 2023 14:59:13 +0100
Subject: [PATCH 20/27] Correct the update code

---
 R/Collect.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/R/Collect.R b/R/Collect.R
index 6d752f5..62e105e 100644
--- a/R/Collect.R
+++ b/R/Collect.R
@@ -386,19 +386,18 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) {
   run_dir <- startr_exec$cluster[['run_dir']]
 
   done <- FALSE
-  sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite)))
 
   while (!done) { # If wait, try until it is done
+    sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite)))
     if (sum_received_chunks / num_outputs == prod(unlist(chunks))) {
       done <- TRUE
 
     } else if (!wait) {
       stop("Computation in progress...")
     } else {
-      Sys.sleep(startr_exec$cluster[['polling_period']])
       message("Computation in progress,  ", sum_received_chunks, "  of ", prod(unlist(chunks)), " chunks are done...\n",
               "Check status on Autosubmit GUI: https://earth.bsc.es/autosubmitapp/experiment/", suite_id)
-#      Sys.sleep(min(sqrt(attempt), 5))
+      Sys.sleep(startr_exec$cluster[['polling_period']])
     }
 
   } # while !done
-- 
GitLab


From 2e82b56fff5a539630e93290a8f6128c79f017b9 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 1 Dec 2023 16:49:50 +0100
Subject: [PATCH 21/27] Enable Collect() to run on HPCs and return combined
 array there

---
 DESCRIPTION    |  2 +-
 R/Collect.R    | 90 +++++++++++++++++++++++++++++++++++++++-----------
 man/Collect.Rd |  6 +++-
 3 files changed, 76 insertions(+), 22 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 60fa08c..90b03a7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -42,5 +42,5 @@ URL: https://earth.bsc.es/gitlab/es/startR/
 BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues
 SystemRequirements: cdo ecFlow
 Encoding: UTF-8
-RoxygenNote: 7.2.0
+RoxygenNote: 7.2.3
 Config/testthat/edition: 3
diff --git a/R/Collect.R b/R/Collect.R
index 6d752f5..9baa264 100644
--- a/R/Collect.R
+++ b/R/Collect.R
@@ -22,6 +22,9 @@
 #'  folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the
 #'  data and Collect() them as many times as desired, set remove to FALSE. The
 #'  default value is TRUE.
+#' @param on_remote A logical value deciding to the function is run locally and
+#'   sync the outputs back from HPC (FALSE, default), or it is run on HPC 
+#'   (TRUE).
 #'@return A list of merged data array.
 #'
 #'@examples
@@ -72,8 +75,9 @@
 #'  }
 #'
 #'@export
-Collect <- function(startr_exec, wait = TRUE, remove = TRUE) {
-
+#--------NEW-------
+Collect <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) {
+#-------NEW_END-----------
   # Parameter checks
   if (!is(startr_exec, 'startR_exec')) {
     stop("Parameter 'startr_exec' must be an object of the class ",
@@ -88,23 +92,29 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE) {
   if (!is.logical(remove)) {
     stop("Parameter 'remove' must be logical.")
   }
+#------NEW---------
+  if (!is.logical(on_remote)) {
+    stop("Parameter 'on_remote' must be logical.")
+  }
 
   if (tolower(startr_exec$workflow_manager) == 'ecflow') {
-    res <- Collect_ecflow(startr_exec, wait = wait, remove = remove)
+    res <- Collect_ecflow(startr_exec, wait = wait, remove = remove, on_remote = on_remote)
   } else if (tolower(startr_exec$workflow_manager) == 'autosubmit') {
-    res <- Collect_autosubmit(startr_exec, wait = wait, remove = remove)
+    res <- Collect_autosubmit(startr_exec, wait = wait, remove = remove, on_remote = on_remote)
   }
+#-------NEW_END----------
 
   return(res)
 }
 
+#------NEW---------
+Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) {
 
-Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) {
-
-  if (Sys.which('ecflow_client') == '') {
+  if (!on_remote && Sys.which('ecflow_client') == '') {
     stop("ecFlow must be installed in order to collect results from a ",
          "Compute() execution.")
   }
+#-------NEW_END-----------
   cluster <- startr_exec[['cluster']]
   ecflow_server <- startr_exec[['ecflow_server']]
   suite_id <- startr_exec[['suite_id']]
@@ -114,7 +124,9 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) {
   timings <- startr_exec[['timings']]
   ecflow_suite_dir_suite <- paste0(ecflow_suite_dir, '/STARTR_CHUNKING_', 
                                    suite_id, '/')
-  if (!is.null(cluster[['temp_dir']])) {
+  if (!is.null(cluster[['temp_dir']])) { #NOTE: Which case doesn't have temp_dir?
+#-------NEW---------
+    remote_ecflow_suite_dir <- cluster[['temp_dir']]
     remote_ecflow_suite_dir_suite <- paste0(cluster[['temp_dir']], 
                                             '/STARTR_CHUNKING_', 
                                             suite_id, '/')
@@ -141,8 +153,13 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) {
   }
   done <- FALSE
   attempt <- 1
-  sum_received_chunks <- sum(grepl('output.*\\.Rds', 
-                                   list.files(ecflow_suite_dir_suite)))
+#--------NEW-----------
+  if (!on_remote) {
+    #TODO: Is it correct? Not all the cases have "output" as beginning
+    sum_received_chunks <- sum(grepl('output.*\\.Rds', 
+                                     list.files(ecflow_suite_dir_suite)))
+  }
+#---------NEW_END--------
   if (cluster[['bidirectional']]) {
     t_transfer_back <- NA
   } else {
@@ -156,7 +173,9 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) {
   writeLines(rsync_petition_file_lines, rsync_petition_file)
   Sys.sleep(2)
   while (!done) {
-    failed <- FALSE
+#-------NEW-----------
+    if (!on_remote) {
+#------NEW_END----------
     if (cluster[['bidirectional']]) {
       status <- system(paste0("ecflow_client --get_state=STARTR_CHUNKING_",
                               suite_id, " --host=",
@@ -197,6 +216,7 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) {
       #  received_chunks <- received_files[grepl('Rds$', 
       #                                          received_files)]
       #}
+      failed <- FALSE
       t_begin_transfer_back <- Sys.time()
       rsync_output <- tryCatch({
         system(paste0("rsync -rav --include-from=", rsync_petition_file, " '", 
@@ -268,11 +288,30 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) {
       }
       Sys.sleep(cluster[['polling_period']])
     }
+#--------NEW----------
+    } else {  # on_remote
+
+      sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_ecflow_suite_dir_suite )))
+
+      if (sum_received_chunks / num_outputs == prod(unlist(chunks))) {
+        done <- TRUE
+      } else if (!wait) {
+        stop("Computation in progress...")
+      } else {
+        message("Computation in progress,  ", sum_received_chunks, "  of ", prod(unlist(chunks)), " chunks are done.")
+        message("Will try again after polling_period...")
+        Sys.sleep(cluster[['polling_period']])
+      }
+
+    }
+#-------NEW_END---------
     attempt <- attempt + 1
   }
   file.remove(rsync_petition_file)
   timings[['transfer_back']] <- t_transfer_back
-  if (!is.null(cluster[['temp_dir']])) {
+  #------NEW--------
+  if (!on_remote && !is.null(cluster[['temp_dir']])) {
+    #-------NEW_END--------
     system(paste0('ssh ', cluster[['queue_host']], ' "rm -rf ', 
                   remote_ecflow_suite_dir_suite, '"'))
   }
@@ -280,11 +319,19 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) {
     .warning("ATTENTION: The source chunks will be removed from the ",
              "system. Store the result after Collect() ends if needed.")
   }
+#-------NEW-------
+  if (!on_remote) {
+    target_folder <- ecflow_suite_dir
+    target_folder_suite <- ecflow_suite_dir_suite
+  } else {
+    target_folder <- remote_ecflow_suite_dir
+    target_folder_suite <- remote_ecflow_suite_dir_suite
+  }
   t_begin_merge <- Sys.time()
-  result <- .MergeChunks(ecflow_suite_dir, suite_id, remove)
+  result <- .MergeChunks(target_folder, suite_id, remove)
   t_end_merge <- Sys.time()
   timings[['merge']] <- as.numeric(difftime(t_end_merge, t_begin_merge, units = 'secs'))
-  received_files <- list.files(ecflow_suite_dir_suite, full.names = TRUE)
+  received_files <- list.files(target_folder_suite, full.names = TRUE)
   received_timings_files <- received_files[grepl('timings$', received_files)]
   for (timings_file in received_timings_files) {
     times <- readRDS(timings_file)
@@ -294,11 +341,14 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) {
     timings[['compute']] <- c(timings[['compute']], times['compute'])
   }
   if (remove) {
-    system(paste0("ecflow_client --delete=force yes /STARTR_CHUNKING_",
-                  suite_id, " --host=", ecflow_server[['host']],
-                  " --port=", ecflow_server[['port']]))
-    unlink(paste0(ecflow_suite_dir_suite), 
-           recursive = TRUE)
+#--------NEW--------------
+    if (!on_remote) {
+      system(paste0("ecflow_client --delete=force yes /STARTR_CHUNKING_",
+                    suite_id, " --host=", ecflow_server[['host']],
+                    " --port=", ecflow_server[['port']]))
+    }
+    unlink(target_folder_suite, recursive = TRUE)
+#---------NEW_END-----------
   }
   if (attempt > 2) {
     t_end_total <- Sys.time()
@@ -374,7 +424,7 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) {
 
 
-Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) {
+Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) {
 
   suite_id <- startr_exec[['suite_id']]
   chunks <- startr_exec[['chunks']]
diff --git a/man/Collect.Rd b/man/Collect.Rd
index d90caca..e701a00 100644
--- a/man/Collect.Rd
+++ b/man/Collect.Rd
@@ -4,7 +4,7 @@
 \alias{Collect}
 \title{Collect and merge the computation results}
 \usage{
-Collect(startr_exec, wait = TRUE, remove = TRUE)
+Collect(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE)
 }
 \arguments{
 \item{startr_exec}{An R object returned by Compute() when the parameter 'wait'
@@ -25,6 +25,10 @@ received from the HPC after data being collected, as well as the local job
 folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the
 data and Collect() them as many times as desired, set remove to FALSE. The
 default value is TRUE.}
+
+\item{on_remote}{A logical value deciding to the function is run locally and
+sync the outputs back from HPC (FALSE, default), or it is run on HPC 
+(TRUE).}
 }
 \value{
 A list of merged data array.
-- 
GitLab


From c32d9286ec9f217bb0dd0b34cc4446a8342f6406 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 11 Dec 2023 14:32:08 +0100
Subject: [PATCH 22/27] Clean code

---
 R/Collect.R | 206 ++++++++++++++++++++++++----------------------------
 1 file changed, 95 insertions(+), 111 deletions(-)

diff --git a/R/Collect.R b/R/Collect.R
index 05ed1b4..1bb8648 100644
--- a/R/Collect.R
+++ b/R/Collect.R
@@ -75,9 +75,7 @@
 #'  }
 #'
 #'@export
-#--------NEW-------
 Collect <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) {
-#-------NEW_END-----------
   # Parameter checks
   if (!is(startr_exec, 'startR_exec')) {
     stop("Parameter 'startr_exec' must be an object of the class ",
@@ -92,7 +90,6 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE)
   if (!is.logical(remove)) {
     stop("Parameter 'remove' must be logical.")
   }
-#------NEW---------
   if (!is.logical(on_remote)) {
     stop("Parameter 'on_remote' must be logical.")
   }
@@ -102,19 +99,16 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE)
   } else if (tolower(startr_exec$workflow_manager) == 'autosubmit') {
     res <- Collect_autosubmit(startr_exec, wait = wait, remove = remove, on_remote = on_remote)
   }
-#-------NEW_END----------
 
   return(res)
 }
 
-#------NEW---------
 Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote = FALSE) {
 
   if (!on_remote && Sys.which('ecflow_client') == '') {
     stop("ecFlow must be installed in order to collect results from a ",
          "Compute() execution.")
   }
-#-------NEW_END-----------
   cluster <- startr_exec[['cluster']]
   ecflow_server <- startr_exec[['ecflow_server']]
   suite_id <- startr_exec[['suite_id']]
@@ -125,7 +119,6 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote =
   ecflow_suite_dir_suite <- paste0(ecflow_suite_dir, '/STARTR_CHUNKING_', 
                                    suite_id, '/')
   if (!is.null(cluster[['temp_dir']])) { #NOTE: Which case doesn't have temp_dir?
-#-------NEW---------
     remote_ecflow_suite_dir <- cluster[['temp_dir']]
     remote_ecflow_suite_dir_suite <- paste0(cluster[['temp_dir']], 
                                             '/STARTR_CHUNKING_', 
@@ -153,13 +146,12 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote =
   }
   done <- FALSE
   attempt <- 1
-#--------NEW-----------
   if (!on_remote) {
     #TODO: Is it correct? Not all the cases have "output" as beginning
     sum_received_chunks <- sum(grepl('output.*\\.Rds', 
                                      list.files(ecflow_suite_dir_suite)))
   }
-#---------NEW_END--------
+
   if (cluster[['bidirectional']]) {
     t_transfer_back <- NA
   } else {
@@ -173,95 +165,19 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote =
   writeLines(rsync_petition_file_lines, rsync_petition_file)
   Sys.sleep(2)
   while (!done) {
-#-------NEW-----------
     if (!on_remote) {
-#------NEW_END----------
-    if (cluster[['bidirectional']]) {
-      status <- system(paste0("ecflow_client --get_state=STARTR_CHUNKING_",
-                              suite_id, " --host=",
-                              ecflow_server[['host']], " --port=", ecflow_server[['port']]),
-                       intern = TRUE)
-      if (any(grepl(paste0("suite STARTR_CHUNKING_", suite_id, " #.* state:complete"), status))) {
-        done <- TRUE
-      } else if (!wait) {
-        stop("Computation in progress...")
-      }
-      if (!first_chunk_received) {
-        if (any(grepl('state:complete', status))) {
-          if (!is.null(time_before_first_chunk)) {
-            time_after_first_chunk <- Sys.time()
-            estimate <- (time_after_first_chunk -
-                           time_before_first_chunk) *
-              ceiling((prod(unlist(chunks)) - cluster[['max_jobs']]) /
-                        cluster[['max_jobs']])
-            units(estimate) <- 'mins'
-            .message(
-              paste0('Remaining time estimate (neglecting queue and ',
-                     'merge time) (at ', format(time_after_first_chunk),
-                     '): ', format(estimate), ' (',
-                     format(time_after_first_chunk -
-                              time_before_first_chunk), ' per chunk)')
-            )
-          }
-          first_chunk_received <- TRUE
-        }
-      }
-      Sys.sleep(min(sqrt(attempt), 5))
-    } else {
-      #if (sum_received_chunks == 0) {
-      #  # Accounting for the fist chunk received in ByChunks and
-      #  # setting it to complete
-      #  # ByChunks needs the first chunk to calculate remaining time
-      #  received_files <- list.files(ecflow_suite_dir_suite)
-      #  received_chunks <- received_files[grepl('Rds$', 
-      #                                          received_files)]
-      #}
-      failed <- FALSE
-      t_begin_transfer_back <- Sys.time()
-      rsync_output <- tryCatch({
-        system(paste0("rsync -rav --include-from=", rsync_petition_file, " '", 
-                      cluster[['queue_host']], ":", remote_ecflow_suite_dir_suite, "' ",  
-                      ecflow_suite_dir_suite, "/"), intern = TRUE)
-      }, error = function(e) {
-        message("Warning: rsync from remote server to collect results failed. ",
-                "Retrying soon.")
-        failed <- TRUE
-      })
-      t_end_transfer_back <- Sys.time()
-      t_transfer_back <- t_transfer_back + as.numeric(difftime(t_end_transfer_back, 
-                                                               t_begin_transfer_back, units = 'secs'))
-      if (!failed) {
-        #if (sum_received_chunks == 0) {
-        #  rsync_output <- c(rsync_output, received_chunks)
-        #}
-        received_running <- grepl('running$', rsync_output)
-        for (received_chunk_index in which(received_running)) {
-          file_name <- rsync_output[received_chunk_index]
-          task_name <- find_task_name(file_name)
-          system(paste0('ecflow_client --force=active recursive ',
-                        task_name, 
-                        " --host=", ecflow_server[['host']],
-                        " --port=", ecflow_server[['port']]))
-        }
-        received_crashed <- grepl('crashed$', rsync_output)
-        for (received_chunk_index in which(received_crashed)) {
-          file_name <- rsync_output[received_chunk_index]
-          task_name <- find_task_name(file_name)
-          system(paste0('ecflow_client --force=aborted recursive ',
-                        task_name, 
-                        " --host=", ecflow_server[['host']],
-                        " --port=", ecflow_server[['port']]))
+      if (cluster[['bidirectional']]) {
+        status <- system(paste0("ecflow_client --get_state=STARTR_CHUNKING_",
+                                suite_id, " --host=",
+                                ecflow_server[['host']], " --port=", ecflow_server[['port']]),
+                         intern = TRUE)
+        if (any(grepl(paste0("suite STARTR_CHUNKING_", suite_id, " #.* state:complete"), status))) {
+          done <- TRUE
+        } else if (!wait) {
+          stop("Computation in progress...")
         }
-        received_chunks <- grepl('Rds$', rsync_output)
-        for (received_chunk_index in which(received_chunks)) {
-          file_name <- rsync_output[received_chunk_index]
-          task_name <- find_task_name(file_name)
-          system(paste0('ecflow_client --force=complete recursive ',
-                        task_name, 
-                        " --host=", ecflow_server[['host']],
-                        " --port=", ecflow_server[['port']]))
-          sum_received_chunks <- sum_received_chunks + 1
-          if (!first_chunk_received) {
+        if (!first_chunk_received) {
+          if (any(grepl('state:complete', status))) {
             if (!is.null(time_before_first_chunk)) {
               time_after_first_chunk <- Sys.time()
               estimate <- (time_after_first_chunk -
@@ -272,23 +188,97 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote =
               .message(
                 paste0('Remaining time estimate (neglecting queue and ',
                        'merge time) (at ', format(time_after_first_chunk),
-                       '): ', format(estimate), ' (', 
-                       format(time_after_first_chunk - 
+                       '): ', format(estimate), ' (',
+                       format(time_after_first_chunk -
                                 time_before_first_chunk), ' per chunk)')
               )
             }
             first_chunk_received <- TRUE
           }
         }
-        if (sum_received_chunks / num_outputs == prod(unlist(chunks))) {
-          done <- TRUE
-        } else if (!wait) {
-          stop("Computation in progress...")
+        Sys.sleep(min(sqrt(attempt), 5))
+      } else {
+      #if (sum_received_chunks == 0) {
+      #  # Accounting for the fist chunk received in ByChunks and
+      #  # setting it to complete
+      #  # ByChunks needs the first chunk to calculate remaining time
+      #  received_files <- list.files(ecflow_suite_dir_suite)
+      #  received_chunks <- received_files[grepl('Rds$', 
+      #                                          received_files)]
+      #}
+        failed <- FALSE
+        t_begin_transfer_back <- Sys.time()
+        rsync_output <- tryCatch({
+          system(paste0("rsync -rav --include-from=", rsync_petition_file, " '", 
+                        cluster[['queue_host']], ":", remote_ecflow_suite_dir_suite, "' ",  
+                        ecflow_suite_dir_suite, "/"), intern = TRUE)
+        }, error = function(e) {
+          message("Warning: rsync from remote server to collect results failed. ",
+                  "Retrying soon.")
+          failed <- TRUE
+        })
+        t_end_transfer_back <- Sys.time()
+        t_transfer_back <- t_transfer_back + as.numeric(difftime(t_end_transfer_back, 
+                                                                 t_begin_transfer_back, units = 'secs'))
+        if (!failed) {
+          #if (sum_received_chunks == 0) {
+          #  rsync_output <- c(rsync_output, received_chunks)
+          #}
+          received_running <- grepl('running$', rsync_output)
+          for (received_chunk_index in which(received_running)) {
+            file_name <- rsync_output[received_chunk_index]
+            task_name <- find_task_name(file_name)
+            system(paste0('ecflow_client --force=active recursive ',
+                          task_name, 
+                          " --host=", ecflow_server[['host']],
+                          " --port=", ecflow_server[['port']]))
+          }
+          received_crashed <- grepl('crashed$', rsync_output)
+          for (received_chunk_index in which(received_crashed)) {
+            file_name <- rsync_output[received_chunk_index]
+            task_name <- find_task_name(file_name)
+            system(paste0('ecflow_client --force=aborted recursive ',
+                          task_name, 
+                          " --host=", ecflow_server[['host']],
+                          " --port=", ecflow_server[['port']]))
+          }
+          received_chunks <- grepl('Rds$', rsync_output)
+          for (received_chunk_index in which(received_chunks)) {
+            file_name <- rsync_output[received_chunk_index]
+            task_name <- find_task_name(file_name)
+            system(paste0('ecflow_client --force=complete recursive ',
+                          task_name, 
+                          " --host=", ecflow_server[['host']],
+                          " --port=", ecflow_server[['port']]))
+            sum_received_chunks <- sum_received_chunks + 1
+            if (!first_chunk_received) {
+              if (!is.null(time_before_first_chunk)) {
+                time_after_first_chunk <- Sys.time()
+                estimate <- (time_after_first_chunk -
+                               time_before_first_chunk) *
+                  ceiling((prod(unlist(chunks)) - cluster[['max_jobs']]) /
+                            cluster[['max_jobs']])
+                units(estimate) <- 'mins'
+                .message(
+                  paste0('Remaining time estimate (neglecting queue and ',
+                         'merge time) (at ', format(time_after_first_chunk),
+                         '): ', format(estimate), ' (', 
+                         format(time_after_first_chunk - 
+                                  time_before_first_chunk), ' per chunk)')
+                )
+              }
+              first_chunk_received <- TRUE
+            }
+          }
+          if (sum_received_chunks / num_outputs == prod(unlist(chunks))) {
+            done <- TRUE
+          } else if (!wait) {
+            stop("Computation in progress...")
+          }
         }
+        Sys.sleep(cluster[['polling_period']])
       }
-      Sys.sleep(cluster[['polling_period']])
-    }
-#--------NEW----------
+
     } else {  # on_remote
 
       sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_ecflow_suite_dir_suite )))
@@ -304,14 +294,11 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote =
       }
 
     }
-#-------NEW_END---------
     attempt <- attempt + 1
   }
   file.remove(rsync_petition_file)
   timings[['transfer_back']] <- t_transfer_back
-  #------NEW--------
   if (!on_remote && !is.null(cluster[['temp_dir']])) {
-    #-------NEW_END--------
     system(paste0('ssh ', cluster[['queue_host']], ' "rm -rf ', 
                   remote_ecflow_suite_dir_suite, '"'))
   }
@@ -319,7 +306,6 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote =
     .warning("ATTENTION: The source chunks will be removed from the ",
              "system. Store the result after Collect() ends if needed.")
   }
-#-------NEW-------
   if (!on_remote) {
     target_folder <- ecflow_suite_dir
     target_folder_suite <- ecflow_suite_dir_suite
@@ -341,14 +327,12 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote =
     timings[['compute']] <- c(timings[['compute']], times['compute'])
   }
   if (remove) {
-#--------NEW--------------
     if (!on_remote) {
       system(paste0("ecflow_client --delete=force yes /STARTR_CHUNKING_",
                     suite_id, " --host=", ecflow_server[['host']],
                     " --port=", ecflow_server[['port']]))
     }
     unlink(target_folder_suite, recursive = TRUE)
-#---------NEW_END-----------
   }
   if (attempt > 2) {
     t_end_total <- Sys.time()
-- 
GitLab


From 6c0342a65341af4a60ed06a53c0f86317895aa48 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 11 Dec 2023 15:23:08 +0100
Subject: [PATCH 23/27] Improve doc for use_libraries

---
 DESCRIPTION | 2 +-
 R/Step.R    | 4 +++-
 man/Step.Rd | 4 +++-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 60fa08c..90b03a7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -42,5 +42,5 @@ URL: https://earth.bsc.es/gitlab/es/startR/
 BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues
 SystemRequirements: cdo ecFlow
 Encoding: UTF-8
-RoxygenNote: 7.2.0
+RoxygenNote: 7.2.3
 Config/testthat/edition: 3
diff --git a/R/Step.R b/R/Step.R
index e5d7304..3c997f0 100644
--- a/R/Step.R
+++ b/R/Step.R
@@ -19,7 +19,9 @@
 #'  for multiple returned arrays indicating the dimension names of the function
 #'  output. 
 #'@param use_libraries A vector of character string indicating the R library 
-#'  names to be used in 'fun'. The default value is NULL.
+#'  names to be used in 'fun'. Only used when the jobs are run on HPCs; if the 
+#'  jobs are run locally, load the necessary libraries by \code{library()}
+#'  directly. The default value is NULL.
 #'@param use_attributes One or more lists of vectors of character string 
 #'  indicating the data attributes to be used in 'fun'. The list name should be
 #'  consistent with the list name of 'data' in AddStep(). The default value is 
diff --git a/man/Step.Rd b/man/Step.Rd
index c473ccb..283c555 100644
--- a/man/Step.Rd
+++ b/man/Step.Rd
@@ -28,7 +28,9 @@ for multiple returned arrays indicating the dimension names of the function
 output.}
 
 \item{use_libraries}{A vector of character string indicating the R library 
-names to be used in 'fun'. The default value is NULL.}
+names to be used in 'fun'. Only used when the jobs are run on HPCs; if the 
+jobs are run locally, load the necessary libraries by \code{library()}
+directly. The default value is NULL.}
 
 \item{use_attributes}{One or more lists of vectors of character string 
 indicating the data attributes to be used in 'fun'. The list name should be
-- 
GitLab


From 85c806d6058ba8eb7dec93e22fd39fb29728ceab Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 20 Dec 2023 17:07:54 +0100
Subject: [PATCH 24/27] Fix .Rds files recognization

---
 R/Collect.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/Collect.R b/R/Collect.R
index 1bb8648..5ae8b15 100644
--- a/R/Collect.R
+++ b/R/Collect.R
@@ -147,8 +147,7 @@ Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE, on_remote =
   done <- FALSE
   attempt <- 1
   if (!on_remote) {
-    #TODO: Is it correct? Not all the cases have "output" as beginning
-    sum_received_chunks <- sum(grepl('output.*\\.Rds', 
+    sum_received_chunks <- sum(grepl('.*\\.Rds$', 
                                      list.files(ecflow_suite_dir_suite)))
   }
 
-- 
GitLab


From d5d4d564e799e0a6bd790a661cc6a535d151f519 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 21 Dec 2023 15:07:23 +0100
Subject: [PATCH 25/27] faq for Collect()

---
 inst/doc/faq.md | 44 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/inst/doc/faq.md b/inst/doc/faq.md
index ffe91a5..7ff7604 100644
--- a/inst/doc/faq.md
+++ b/inst/doc/faq.md
@@ -31,6 +31,8 @@ This document intends to be the first reference for any doubts that you may have
    25. [What to do if your function has too many target dimensions](#25-what-to-do-if-your-function-has-too-many-target-dimensions)
    26. [Use merge_across_dims_narm to remove NAs](#26-use-merge_across_dims_narm-to-remove-nas)
    27. [Utilize chunk number in the function](#27-utilize-chunk-number-in-the-function)
+   28. [Run startR in the background](#28-run-startr-in-the-background)
+   29. [Collect result on HPCs](#29-collect-result-on-hpcs)
 </b>
 
 2. **Something goes wrong...**
@@ -1008,6 +1010,38 @@ shows how to get start date for each chunk using chunk number; (2) [ex2_14](inst
 
 There are many other possible applications of this parameter. Please share with us other uses cases you may create.
 
+### 28. Run startR in the background
+
+For heavy execution, we usually launch the jobs on HPCs with parallel computation. Sometimes, it takes a lot of time (days, weeks) to finish all the jobs.
+It'd be much handy to let the jobs run in the background, so we don't need to make R session on workstation open during the whole process.
+To do this:
+
+(1) Use parameter `wait = FALSE` in Compute() call. The execution therefore won't block the R session.
+
+(2) Save the object as a .Rds file by saveRDS(). In this file, you have all the information needed for collecting the result later. You can close the R session and turn off the workstation now. 
+
+(3) When you want to collect the result, use Collect() with the saved .Rds file. 
+You can choose to use parameter `wait = TRUE` and the command will keep running until all the jobs are finished and can be collected. 
+Or, by `wait = FALSE`, it will tell you the jobs are still running and you can try again later.
+
+Note that if you use ecFlow as job manager and with Compute(wait = FALSE), the ecFlow-UI won't be updated due to uni-directional connection. 
+Check [ecFlow UI remains blue and does not update status](#2-ecflow-ui-remains-blue-and-does-not-update-status) for details.
+
+### 29. Collect result on HPCs
+After using Compute() to run execution on HPCs, you can choose to collect the result on local workstation or on HPCs. Here is the instruction of how to do it on HPCs.
+
+(1) Run the startR workflow as usual on workstation until Compute().
+
+(2) In Compute(), use `wait = FALSE`. The execution therefore won't block the R session.
+
+(3) Save the object as a .Rds file somewhere can be found on HPCs. E.g.  `saveRDS(res, "/esarchive/scratch/<user_id>/res_startR_Collect.rds")`
+
+(4) ssh to HPCS (e.g., Nord3), open an R session.
+
+(5) Read the saved .Rds file. E.g. `obj_startR <- readRDS("/esarchive/scratch/<user_id>/res_startR_Collect.rds")`
+
+(6) Collect() the result with parameter `on_remote = TRUE`. E.g. `res <- Collect(obj_startR, on_remote = TRUE)`
+
 
 # Something goes wrong...
 
@@ -1042,9 +1076,15 @@ To solve this problem, use `Collect()` in the R terminal after running Compute()
 
 ### 3. Compute() successfully but then killed on R session
 
-When Compute() on HPCs, the machines are able to process data which are much larger than the local workstation, so the computation works fine (i.e., on ec-Flow UI, the chunks show yellow in the end.) However, after the computation, the output will be sent back to local workstation. **If the returned data is larger than the available local memory space, your R session will be killed.** Therefore, always pre-check if the returned data will fit in your workstation free memory or not. If not, subset the input data or reduce the output size through more computation.  
+When we use Compute() and run jobs to HPCs, each job/chunk is finished and the result is saved as .Rds file individually.
+When all the jobs are finished, the next step is to merge all the chunks into one array and return to workstation.
+**If the returned data is larger than the available local memory space on your workstation, 
+your R session will be killed.** Therefore, it is better to always pre-check if the returned data will fit in your workstation free memory or not. 
+
+If the result can fit on HPCs, you can also choose to collect the data there. Check [How-to-28](#29-collect-result-on-hpcs) for details.
 
-Further explanation: though the complete output (i.e., merging all the chunks into one returned array) cannot be sent back to workstation, but the chunking results (.Rds file) are completed and saved in the directory '<ecflow_suite_dir>/STARTR_CHUNKING_<job_id>'. If you still want to use the chunking results, you can find them there.
+Note that even though the complete output (i.e., merging all the chunks into one returned array) cannot be sent back to workstation and the R session is killed,
+the chunking results (.Rds files) are completed and saved in the local directory '<ecflow_suite_dir>/STARTR_CHUNKING_<job_id>', and you can still utilize the chunk files.
 
 
 ### 4. My jobs work well in workstation and fatnodes but not on Power9 (or vice versa)
-- 
GitLab


From c81fc57121b14c6188f1445b0b3feaa8b522403a Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 21 Dec 2023 15:44:03 +0100
Subject: [PATCH 26/27] version bump

---
 .Rbuildignore |  2 +-
 DESCRIPTION   | 10 ++++++----
 NEWS.md       |  6 ++++++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index 98316cc..aa7059a 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -9,7 +9,7 @@
 ^inst/doc$
 ^\.gitlab-ci\.yml$
 ## unit tests should be ignored when building the package for CRAN
-#^tests$
+^tests$
 ^inst/PlotProfiling\.R$
 ^.gitlab$
 # Suggested by http://r-pkgs.had.co.nz/package.html
diff --git a/DESCRIPTION b/DESCRIPTION
index 90b03a7..8fd5ee1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,14 +1,16 @@
 Package: startR
 Title: Automatically Retrieve Multidimensional Distributed Data Sets
-Version: 2.3.0
+Version: 2.3.1
 Authors@R: c(
     person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")),
-    person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), 
+    person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-4182-5258")), 
     person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = c("aut"), comment = c(ORCID = "0000-0001-8568-3071")),
+    person("Eva", "Rifa", , "eva.rifarovira@bsc.es", role = "ctb"),
+    person("Victoria", "Agudetse", , "victoria.agudetse@bsc.es", role = "ctb"),
+    person("Bruno", "de Paula Kinoshita", , "bruno.depaulakinoshita@bsc.es", role = "ctb"),
     person("Javier", "Vegas", , "javier.vegas@bsc.es", role = c("ctb")),
     person("Pierre-Antoine", "Bretonniere", , "pierre-antoine.bretonniere@bsc.es", role = c("ctb")),
-    person("Roberto", "Serrano", , "rsnotivoli@gmal.com", role = c("ctb")),
-    person("Eva", "Rifa", , "eva.rifarovira@bsc.es", role = "ctb"),
+    person("Roberto", "Serrano", , "rsnotivoli@gmail.com", role = c("ctb")),
     person("BSC-CNS", role = c("aut", "cph")))
 Description: Tool to automatically fetch, transform and arrange subsets of
     multi- dimensional data sets (collections of files) stored in local and/or
diff --git a/NEWS.md b/NEWS.md
index 9219f96..c19d7a3 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,9 @@
+# startR v2.3.1 (Release date: 2023-12-22)
+- Use Autosubmit as workflow manager on hub 
+- New feature: Collect result by Collect() on HPCs
+- Bugfix: Correct Collect_autosubmit() .Rds files update
+- Bugfix: Collect() correctly recognize the finished chunk (.Rds file) in local ecFlow folder. Prevent neverending Collect() when using `wait = F` in Compute() and Collect() the result later on
+
 # startR v2.3.0 (Release date: 2023-08-31)     
 - Load variable metadata when retreive = F     
 - Change Compute() "threads_load" to 1 to be consistent with documentation      
-- 
GitLab


From 254ced13d8e2635351561d67cd679a00607d73c7 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 21 Dec 2023 16:23:55 +0100
Subject: [PATCH 27/27] fix syntax error

---
 R/Start.R    | 2 +-
 man/Start.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/Start.R b/R/Start.R
index 89f87e9..5bfb3bf 100644
--- a/R/Start.R
+++ b/R/Start.R
@@ -674,7 +674,7 @@
 #'  to recognize files such as \cr
 #'  \code{'/path/to/dataset/precipitation_zzz/19901101_yyy_foo.nc'}).\cr\cr
 #'Note that each glob expression can only represent one possibility (Start() 
-#'chooses the first). Because /code{*} is not the tag, which means it cannot
+#'chooses the first). Because \code{*} is not the tag, which means it cannot
 #'be a dimension of the output array. Therefore, only one possibility can be
 #'adopted. For example, if \cr
 #'\code{'/path/to/dataset/precipitation_*/19901101_*_foo.nc'}\cr
diff --git a/man/Start.Rd b/man/Start.Rd
index 25eb8d7..640c5a9 100644
--- a/man/Start.Rd
+++ b/man/Start.Rd
@@ -651,7 +651,7 @@ For example, a path pattern could be as follows: \cr
  to recognize files such as \cr
  \code{'/path/to/dataset/precipitation_zzz/19901101_yyy_foo.nc'}).\cr\cr
 Note that each glob expression can only represent one possibility (Start() 
-chooses the first). Because /code{*} is not the tag, which means it cannot
+chooses the first). Because \code{*} is not the tag, which means it cannot
 be a dimension of the output array. Therefore, only one possibility can be
 adopted. For example, if \cr
 \code{'/path/to/dataset/precipitation_*/19901101_*_foo.nc'}\cr
-- 
GitLab