From e9905f51d3bdf9ca2271f75eec898b215a396c6d Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 6 Jul 2022 17:58:41 +0200
Subject: [PATCH 1/7] Improve daily loading if dataset is not ec-earth

---
 modules/Loading/Loading_decadal.R             | 59 +++++++++++++++++--
 modules/Loading/get_daily_time_ind.R          |  5 +-
 .../testing_recipes/recipe_decadal_daily.yml  |  4 +-
 tools/libs.R                                  |  1 +
 4 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R
index fe615594..35b95b67 100644
--- a/modules/Loading/Loading_decadal.R
+++ b/modules/Loading/Loading_decadal.R
@@ -9,6 +9,7 @@ source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R")
 source("/esarchive/scratch/vagudets/repos/cstools/R/s2dv_cube.R")
 # Load required libraries/funs
 source("modules/Loading/get_daily_time_ind.R")
+source("modules/Loading/dates2load.R")
 source("modules/Loading/check_latlon.R")
 source("tools/libs.R")
 
@@ -44,11 +45,25 @@ load_datasets <- function(recipe_file) {
   if (store.freq == "monthly_mean") {
     time <- (as.numeric(recipe$Analysis$Time$leadtimemin):as.numeric(recipe$Analysis$Time$leadtimemax)) + 1
   } else if (store.freq == "daily_mean") {
-    time <- get_daily_time_ind(leadtimemin = as.numeric(recipe$Analysis$Time$leadtimemin),
-                               leadtimemax = as.numeric(recipe$Analysis$Time$leadtimemax),
-                               initial_month = archive$System[[exp.name]]$initial_month,
-                               sdates = sdates_hcst,
-                               calendar = archive$System[[exp.name]][[store.freq]]$calendar)
+    if (exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4')) {
+      # time selector uses indices, and tune the data afterward to remove 0229
+      time <- get_daily_time_ind(leadtimemin = as.numeric(recipe$Analysis$Time$leadtimemin),
+                                 leadtimemax = as.numeric(recipe$Analysis$Time$leadtimemax),
+                                 initial_month = archive$System[[exp.name]]$initial_month,
+                                 sdates = sdates_hcst,
+                                 calendar = archive$System[[exp.name]][[store.freq]]$calendar)
+    } else {
+      # use an array [syear, time] as time selector since time_across = 'chunk' is not needed
+      initial_month_char <- sprintf('%02d', archive$System[[exp.name]]$initial_month)
+
+      sdates_hcst_full <- paste0(sdates_hcst, initial_month_char, '01')
+      time_arr <- get_timeidx(sdates = sdates_hcst_full, 
+                              ltmin = as.numeric(recipe$Analysis$Time$leadtimemin), 
+                              ltmax = as.numeric(recipe$Analysis$Time$leadtimemax), 
+                              time_freq = "daily_mean")
+      names(dim(time_arr)) <- c('syear', 'time')
+
+    }
   }
 
 #NOTE: May be used in the future
@@ -92,7 +107,38 @@ load_datasets <- function(recipe_file) {
                          '$ensemble$', table, '$var$', grid, version)
   hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
 
-  # monthly & daily
+  # daily non-EC-Earth data
+  if (store.freq == "daily_mean" & !exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4')) {
+    hcst <- Start(dat = file.path(hcst.path, hcst.files),
+                  var = variable,
+                  ensemble = member,
+                  syear = paste0(sdates_hcst),
+#split_multiselected_dims = T,
+                  chunk = 'all',
+                  chunk_depends = 'syear',
+                  time = time_arr, #indices(time),
+#                  time_across = 'chunk',
+#                  merge_across_dims = TRUE,
+                  largest_dims_length = need_largest_dims_length,
+                  latitude = values(list(lats.min, lats.max)),
+                  latitude_reorder = Sort(decreasing = TRUE),
+                  longitude = values(list(lons.min, lons.max)),
+                  longitude_reorder = circularsort,
+                  transform = regrid_params$fcst.transform,
+                  transform_extra_cells = 2,
+                  transform_params = list(grid = regrid_params$fcst.gridtype, #nc file
+                                          method = regrid_params$fcst.gridmethod),
+                  transform_vars = c('latitude', 'longitude'),
+                  synonims = list(longitude = c('lon', 'longitude'),
+                                  latitude = c('lat', 'latitude')),
+                  return_vars = list(latitude = NULL, longitude = NULL,
+                                     time = c('syear')), #, 'chunk')),
+                  retrieve = T)
+    if (dim(hcst)['chunk'] != 1) stop("hcst loading has problem. The chunk number is > 1")
+    dim(hcst) <- dim(hcst)[-5] #remove chunk dim
+
+  } else {
+  # monthly & daily if EC-Earth
   hcst <- Start(dat = file.path(hcst.path, hcst.files),
                 var = variable,
                 ensemble = member,
@@ -118,6 +164,7 @@ load_datasets <- function(recipe_file) {
                 return_vars = list(latitude = NULL, longitude = NULL, 
                                    time = c('syear', 'chunk')),
                 retrieve = T)
+  }
 
   tmp_time_attr <- attr(hcst, 'Variables')$common$time
 
diff --git a/modules/Loading/get_daily_time_ind.R b/modules/Loading/get_daily_time_ind.R
index 3e41cddb..d8bf9a37 100644
--- a/modules/Loading/get_daily_time_ind.R
+++ b/modules/Loading/get_daily_time_ind.R
@@ -3,6 +3,9 @@
 # leadtimemin = 0 means the first month, and leadtimemax = 2 means the 3rd month.
 # For daily data, if initial month is November, leadtimemin = 0 equals to c(1:30), 
 # and leadtimemax = 2, which means January next year, equals to c(31+31:31+31+30).
+#
+#NOTE: For the datasets not EC-Earth, we can use array [syear, time] to define time since
+# the Start() call doesn't need time_across = 'chunk' (there is only one chunk)
 get_daily_time_ind <- function(leadtimemin, leadtimemax, initial_month, sdates, calendar) {
   #TODO: calendar & leap year
   # E.g., time = 1:4 means Dec to March, then indices will be c(31:(31+30+31+28+31)) for non-leap yr and c(31:(31+30+31+28), (31+30+31+28+2):(31+30+31+28+2+30)) for leap yr. Calendar needs to be considered too.
@@ -37,7 +40,7 @@ get_daily_time_ind <- function(leadtimemin, leadtimemax, initial_month, sdates,
     } else if (calendar %in% c('standard', 'proleptic_gregorian', 'gregorian')) {
 
 #      leap <- leap_year(sdates)
-      sdates_full <- ymd(paste0(sdates, sprintf('%02d',initial_month), '01'))
+      sdates_full <- ymd(paste0(sdates, sprintf('%02d', initial_month), '01'))
       idx_min <- sdates_full + months(leadtimemin)
       idx_max <- sdates_full + months(leadtimemax + 1) - days(1)
 
diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml
index a025dca3..565ce6a8 100644
--- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml
+++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml
@@ -8,8 +8,8 @@ Analysis:
     freq: daily_mean
   Datasets:
     System: 
-      name: EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 
-      member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all'
+      name: MIROC6 #EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 
+      member: r1i1p1f1,r2i1p1f1,r3i1p1f1 #'all'
     Multimodel: no
     Reference:
       name: ERA5
diff --git a/tools/libs.R b/tools/libs.R
index 31728650..248ada45 100644
--- a/tools/libs.R
+++ b/tools/libs.R
@@ -10,6 +10,7 @@ library(abind)
 #  library(easyVerification)
 library(easyNCDF)
 library(CSTools)
+library(lubridate)
 #
 #  library(parallel)
 #  library(pryr) # To check mem usage.
-- 
GitLab


From 679265fcbc06de8372286287353f2a7fbdcadf0c Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 11 Jul 2022 18:07:32 +0200
Subject: [PATCH 2/7] Allow more than one fcst_syear; change leadtime to ftime.

---
 conf/archive_decadal.yml                      | 10 +--
 modules/Loading/Loading_decadal.R             | 82 +++++++++++++++++--
 modules/Loading/get_daily_time_ind.R          |  2 +-
 .../testing_recipes/recipe_decadal.yml        | 14 ++--
 4 files changed, 85 insertions(+), 23 deletions(-)

diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml
index e41d46a0..aff4330a 100644
--- a/conf/archive_decadal.yml
+++ b/conf/archive_decadal.yml
@@ -41,9 +41,10 @@ archive:
 
 # ---- 
       EC-Earth3-i4:
-        src: 
-          hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/"
-          fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/"
+        src: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/"
+        first_dcppB_syear: 2021 
+#          hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/"
+#          fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/"
 #        src: {"1960:2020": "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/",
 #              "2021:2021": "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/"}
         monthly_mean:
@@ -56,8 +57,7 @@ archive:
           grid: {"tas":"gr", "pr":"gr", "psl":"gr", "clt":"gr", "hfls":"gr",
                  "hurs":"gr", "huss":"gr", "rsds":"gr", "rsut":"gr", "ta":"gr", 
                   "tasmax":"gr", "tosa":"gr", "ua":"gr", "va":"gr", "zg":"gr", 
-                  "evspsbl":"gr", "hfss":"gr", "hursmin":"gr", "rlut":"gr", 
-                  "rsdt":"gr", "sfcWind":"gr", "tasmin":"gr", "ts":"gr", "uas":"gr", 
+
                   "vas":"gr"}
           version: {"tas":"v20210910", "pr":"v20210910", "psl":"v20210910", "clt":"v20210910",
                     "hurs":"v20210910", "huss":"v20210910", "rsds":"v20210910", "rsut":"v20210910", "ta":"v20210910", 
diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R
index 35b95b67..6daaf1d5 100644
--- a/modules/Loading/Loading_decadal.R
+++ b/modules/Loading/Loading_decadal.R
@@ -39,16 +39,16 @@ load_datasets <- function(recipe_file) {
 
   # change to: sdates <- dates2load(recipe, logger)
   sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) #1960:2015
-  sdates_fcst <- as.numeric(recipe$Analysis$Time$fcst)
+  sdates_fcst <- as.numeric(strsplit(recipe$Analysis$Time$fcst, ' ')[[1]])
  
   #NOTE: time in recipe starts from 0, but in Start() it starts from 1 
   if (store.freq == "monthly_mean") {
-    time <- (as.numeric(recipe$Analysis$Time$leadtimemin):as.numeric(recipe$Analysis$Time$leadtimemax)) + 1
+    time <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) + 1
   } else if (store.freq == "daily_mean") {
     if (exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4')) {
       # time selector uses indices, and tune the data afterward to remove 0229
-      time <- get_daily_time_ind(leadtimemin = as.numeric(recipe$Analysis$Time$leadtimemin),
-                                 leadtimemax = as.numeric(recipe$Analysis$Time$leadtimemax),
+      time <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min),
+                                 ftimemax = as.numeric(recipe$Analysis$Time$ftime_max),
                                  initial_month = archive$System[[exp.name]]$initial_month,
                                  sdates = sdates_hcst,
                                  calendar = archive$System[[exp.name]][[store.freq]]$calendar)
@@ -58,8 +58,8 @@ load_datasets <- function(recipe_file) {
 
       sdates_hcst_full <- paste0(sdates_hcst, initial_month_char, '01')
       time_arr <- get_timeidx(sdates = sdates_hcst_full, 
-                              ltmin = as.numeric(recipe$Analysis$Time$leadtimemin), 
-                              ltmax = as.numeric(recipe$Analysis$Time$leadtimemax), 
+                              ltmin = as.numeric(recipe$Analysis$Time$ftime_min), 
+                              ltmax = as.numeric(recipe$Analysis$Time$ftime_max), 
                               time_freq = "daily_mean")
       names(dim(time_arr)) <- c('syear', 'time')
 
@@ -203,11 +203,44 @@ load_datasets <- function(recipe_file) {
   if (!is.null(recipe$Analysis$Time$fcst)) {
 
   #monthly and daily
-  fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst,
-                         '$ensemble$', table, '$var$', grid, version)
-  fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
+  multi_path <- FALSE
+  if (is.null(archive$System[[exp.name]]$first_dcppB_syear)) { # only dcppA
+    fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src, "dcppA-hindcast",
+                           '$ensemble$', table, '$var$', grid, version)
+    fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
+  } else {
+    if (all(sdates_fcst >= archive$System[[exp.name]]$first_dcppB_syear)) { # only dcppB
+      fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src, "dcppB-forecast",
+                             '$ensemble$', table, '$var$', grid, version)
+      fcst.files <- paste0('$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
+    } else {
+      # Create one path for each sdate
+      #TODO: When *_depends = 'syear' can be more than one, use one path with $dcpp$
+      multi_path <- TRUE
+      path_list <- vector('list', length = length(sdates_fcst))
+      for (i_sdate in 1:length(sdates_fcst)) {
+        if (sdates_fcst[i_sdate] >= archive$System[[exp.name]]$first_dcppB_syear) {
+          path_list[[i_sdate]] <- 
+            list(path = file.path(archive$src, archive$System[[exp.name]]$src, "dcppB-forecast",
+                                  '$ensemble$', table, '$var$', grid, version,
+                                  paste0('$var$_', table, '_*_dcppB-forecast_s', sdates_fcst[i_sdate], '-$ensemble$_', grid, '_$chunk$.nc')))
+        } else {
+          path_list[[i_sdate]] <- 
+            list(path = file.path(archive$src, archive$System[[exp.name]]$src, "dcppA-hindcast",
+                                  '$ensemble$', table, '$var$', grid, version,
+                                  paste0('$var$_', table, '_*_dcppA-hindcast_s', sdates_fcst[i_sdate], '-$ensemble$_', grid, '_$chunk$.nc')))
+        }
+      }
+    }
+  }
+
+#  fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst,
+#                         '$ensemble$', table, '$var$', grid, version)
+#  fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
 
   # monthly & daily
+  if (!multi_path) {
+
   fcst <- Start(dat = file.path(fcst.path, fcst.files),
                 var = variable,
                 ensemble = member,
@@ -234,6 +267,37 @@ load_datasets <- function(recipe_file) {
                                    time = c('syear', 'chunk')),
                 retrieve = T)
 
+  } else { # multi_path
+  #TODO: time attribute is not correct
+  fcst <- Start(dat = path_list,
+                var = variable,
+                ensemble = member,
+                chunk = 'all',
+                time = indices(time),
+                time_across = 'chunk',
+                merge_across_dims = TRUE,
+                largest_dims_length = need_largest_dims_length,
+                latitude = values(list(lats.min, lats.max)),
+                latitude_reorder = Sort(decreasing = TRUE),
+                longitude = values(list(lons.min, lons.max)),
+                longitude_reorder = circularsort,
+                transform = regrid_params$fcst.transform,
+                transform_extra_cells = 2,
+                transform_params = list(grid = regrid_params$fcst.gridtype, #nc file
+                                        method = regrid_params$fcst.gridmethod),
+                transform_vars = c('latitude', 'longitude'),
+                synonims = list(longitude = c('lon', 'longitude'),
+                                latitude = c('lat', 'latitude')),
+                return_vars = list(latitude = NULL, longitude = NULL,
+                                   time = c('chunk')),
+                retrieve = T)
+  # Reshape dimensions
+  ## Insert syear at 3rd in [dat, var, ensemble, time, latitude, longitude]
+  ## dat should be 1, syear should be length of dat
+  dim(fcst) <- c(dat = 1, var = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[3:6])
+
+  }
+
   # change syear to c(sday, sweek, syear)
   dim(fcst) <- c(dim(fcst)[1:3], sday = 1, sweek = 1, dim(fcst)[4:7])
   tmp_time_attr <- attr(fcst, 'Variables')$common$time
diff --git a/modules/Loading/get_daily_time_ind.R b/modules/Loading/get_daily_time_ind.R
index d8bf9a37..b574544f 100644
--- a/modules/Loading/get_daily_time_ind.R
+++ b/modules/Loading/get_daily_time_ind.R
@@ -6,7 +6,7 @@
 #
 #NOTE: For the datasets not EC-Earth, we can use array [syear, time] to define time since
 # the Start() call doesn't need time_across = 'chunk' (there is only one chunk)
-get_daily_time_ind <- function(leadtimemin, leadtimemax, initial_month, sdates, calendar) {
+get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calendar) {
   #TODO: calendar & leap year
   # E.g., time = 1:4 means Dec to March, then indices will be c(31:(31+30+31+28+31)) for non-leap yr and c(31:(31+30+31+28), (31+30+31+28+2):(31+30+31+28+2+30)) for leap yr. Calendar needs to be considered too.
 
diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml
index 0661fb7b..2ec20856 100644
--- a/modules/Loading/testing_recipes/recipe_decadal.yml
+++ b/modules/Loading/testing_recipes/recipe_decadal.yml
@@ -9,19 +9,17 @@ Analysis:
   Datasets:
     System: 
       name: EC-Earth3-i4 #CanESM5 
-      member: 'all' #r1i1p1f1,r2i1p1f1,r3i1p1f1
+      member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all'
     Multimodel: no
     Reference:
       name: ERA5 #JRA-55
   Time:
-    sdate:
-    fcst:  2021
-#      fcst_sday: '1101'
-    hcst_start: 1990 #'1993'
-    hcst_end: 1992 #'2016'
+    fcst: 2020 2021
+    hcst_start: 1990 
+    hcst_end: 1992 
 #    season: 'Annual'
-    leadtimemin: 0
-    leadtimemax: 13
+    ftimemin: 0
+    ftimemax: 13
   Region:
     latmin: 10 #-90
     latmax: 20 #90
-- 
GitLab


From da31596562fb82b4938c304e071758d161229da6 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Tue, 12 Jul 2022 22:24:13 +0200
Subject: [PATCH 3/7] Deal with 0229 version (going to remove it later)

---
 modules/Loading/Loading_decadal.R             | 265 ++++++++++--------
 modules/Loading/get_daily_time_ind.R          |  23 +-
 .../testing_recipes/recipe_decadal.yml        |   6 +-
 .../testing_recipes/recipe_decadal_daily.yml  |  16 +-
 4 files changed, 166 insertions(+), 144 deletions(-)

diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R
index e2ce9809..b29b30c5 100644
--- a/modules/Loading/Loading_decadal.R
+++ b/modules/Loading/Loading_decadal.R
@@ -39,13 +39,13 @@ load_datasets <- function(recipe_file) {
 
   # change to: sdates <- dates2load(recipe, logger)
   sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) #1960:2015
-  sdates_fcst <- as.numeric(strsplit(recipe$Analysis$Time$fcst, ' ')[[1]])
+  sdates_fcst <- recipe$Analysis$Time$fcst
  
   #NOTE: time in recipe starts from 0, but in Start() it starts from 1 
   if (store.freq == "monthly_mean") {
     time <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) + 1
   } else if (store.freq == "daily_mean") {
-    if (exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4')) {
+    if (exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4', 'HadGEM3-GC31-MM')) {
       # time selector uses indices, and tune the data afterward to remove 0229
       time <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min),
                                  ftimemax = as.numeric(recipe$Analysis$Time$ftime_max),
@@ -55,7 +55,7 @@ load_datasets <- function(recipe_file) {
     } else {
       # use an array [syear, time] as time selector since time_across = 'chunk' is not needed
       initial_month_char <- sprintf('%02d', archive$System[[exp.name]]$initial_month)
-
+      # hcst
       sdates_hcst_full <- paste0(sdates_hcst, initial_month_char, '01')
       time_arr <- get_timeidx(sdates = sdates_hcst_full, 
                               ltmin = as.numeric(recipe$Analysis$Time$ftime_min), 
@@ -63,6 +63,16 @@ load_datasets <- function(recipe_file) {
                               time_freq = "daily_mean")
       names(dim(time_arr)) <- c('syear', 'time')
 
+      # fcst
+      if (!is.null(recipe$Analysis$Time$fcst)) {
+        sdates_fcst_full <- paste0(sdates_fcst, initial_month_char, '01')
+        time_arr_fcst <- get_timeidx(sdates = sdates_fcst_full,
+                                     ltmin = as.numeric(recipe$Analysis$Time$ftime_min),
+                                     ltmax = as.numeric(recipe$Analysis$Time$ftime_max),
+                                     time_freq = "daily_mean")
+        names(dim(time_arr_fcst)) <- c('syear', 'time')
+      }
+
     }
   }
 
@@ -95,10 +105,7 @@ load_datasets <- function(recipe_file) {
   # Only if the time length in each chunk may differ that we need largest_dims_length to be TRUE. Otherwise, set FALSE to increase efficiency.
   need_largest_dims_length <- ifelse(exp.name == 'EC-Earth3-i2', TRUE, FALSE) 
 
-  #time for daily data. If recipe asks for time step, we don't need to calculate here
-#  fdays <- get_days_dcpp(fyears = fyears, season = season, init = 'Nov', calendar = '360-days')
 
-  
 #-------------------------------------------
 # Step 1: Load the hcst
 #-------------------------------------------
@@ -106,71 +113,60 @@ load_datasets <- function(recipe_file) {
   hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst,
                          '$ensemble$', table, '$var$', grid, version)
   hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
-
-  # daily non-EC-Earth data
-  if (store.freq == "daily_mean" & !exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4')) {
-    hcst <- Start(dat = file.path(hcst.path, hcst.files),
-                  var = variable,
-                  ensemble = member,
-                  syear = paste0(sdates_hcst),
-#split_multiselected_dims = T,
-                  chunk = 'all',
-                  chunk_depends = 'syear',
-                  time = time_arr, #indices(time),
-#                  time_across = 'chunk',
-#                  merge_across_dims = TRUE,
-                  largest_dims_length = need_largest_dims_length,
-                  latitude = values(list(lats.min, lats.max)),
-                  latitude_reorder = Sort(decreasing = TRUE),
-                  longitude = values(list(lons.min, lons.max)),
-                  longitude_reorder = circularsort,
-                  transform = regrid_params$fcst.transform,
-                  transform_extra_cells = 2,
-                  transform_params = list(grid = regrid_params$fcst.gridtype, #nc file
-                                          method = regrid_params$fcst.gridmethod),
-                  transform_vars = c('latitude', 'longitude'),
-                  synonims = list(longitude = c('lon', 'longitude'),
-                                  latitude = c('lat', 'latitude')),
-                  return_vars = list(latitude = NULL, longitude = NULL,
-                                     time = c('syear')), #, 'chunk')),
-                  retrieve = T)
-    if (dim(hcst)['chunk'] != 1) stop("hcst loading has problem. The chunk number is > 1")
-    dim(hcst) <- dim(hcst)[-5] #remove chunk dim
+  Start_hcst_arg_list <- list(
+    dat = file.path(hcst.path, hcst.files),
+    var = variable,
+    ensemble = member,
+    syear = paste0(sdates_hcst),
+    chunk = 'all',
+    chunk_depends = 'syear',
+    time = indices(time),
+    time_across = 'chunk',
+    merge_across_dims = TRUE,
+    largest_dims_length = need_largest_dims_length,
+    latitude = values(list(lats.min, lats.max)),
+    latitude_reorder = Sort(decreasing = TRUE),
+    longitude = values(list(lons.min, lons.max)),
+    longitude_reorder = circularsort,
+    transform = regrid_params$fcst.transform,
+    transform_extra_cells = 2,
+    transform_params = list(grid = regrid_params$fcst.gridtype,
+                            method = regrid_params$fcst.gridmethod),
+    transform_vars = c('latitude', 'longitude'),
+    synonims = list(longitude = c('lon', 'longitude'),
+                    latitude = c('lat', 'latitude')),
+    return_vars = list(latitude = NULL, longitude = NULL,
+                       time = c('syear', 'chunk')),
+    retrieve = T)
+
+  # daily & multiple files per sdate
+  if (store.freq == "daily_mean" & !exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4', 'HadGEM3-GC31-MM')) {
+    Start_hcst_arg_list[['time_across']] <- NULL
+    Start_hcst_arg_list[['merge_across_dims']] <- NULL
+    Start_hcst_arg_list[['time']] <- time_arr
+    Start_hcst_arg_list[['return_vars']][['time']] <- 'syear'
+
+    hcst <- do.call(Start, Start_hcst_arg_list)
+
+    if (dim(hcst)['chunk'] != 1) {
+      stop("hcst loading has problem. The chunk number is > 1")
+    } else {
+      dim(hcst) <- dim(hcst)[-5] #remove chunk dim
+    }
 
   } else {
-  # monthly & daily if EC-Earth
-  hcst <- Start(dat = file.path(hcst.path, hcst.files),
-                var = variable,
-                ensemble = member,
-                syear = paste0(sdates_hcst),
-#split_multiselected_dims = T,
-                chunk = 'all',
-                chunk_depends = 'syear',
-                time = indices(time),
-                time_across = 'chunk',
-                merge_across_dims = TRUE,
-                largest_dims_length = need_largest_dims_length,
-                latitude = values(list(lats.min, lats.max)),
-                latitude_reorder = Sort(decreasing = TRUE),
-                longitude = values(list(lons.min, lons.max)),
-                longitude_reorder = circularsort,
-                transform = regrid_params$fcst.transform,
-                transform_extra_cells = 2,
-                transform_params = list(grid = regrid_params$fcst.gridtype, #nc file
-                                        method = regrid_params$fcst.gridmethod),
-                transform_vars = c('latitude', 'longitude'),
-                synonims = list(longitude = c('lon', 'longitude'),
-                                latitude = c('lat', 'latitude')),
-                return_vars = list(latitude = NULL, longitude = NULL, 
-                                   time = c('syear', 'chunk')),
-                retrieve = T)
+    # monthly & daily if EC-Earth
+    hcst <- do.call(Start, Start_hcst_arg_list)
   }
 
   tmp_time_attr <- attr(hcst, 'Variables')$common$time
 
+#------------------------------------------------------------------
   # If daily data with 0229, need to remove 0229 and extra time
-  #TODO: Make Start() able to read the correct time indices for each sdate, so we 
-  #      don't need this awkward correction.
+  #TODO: Make Start() able to read the correct time indices for each sdate (for case1),
+  #      so we don't need this awkward correction.
+  #NOTE: It is not critical though. Having one more or one less day is acceptable.
+  #      If we need to use Compute(), just ignore this part.
   if (store.freq == "daily_mean" & any(format(tmp_time_attr, "%m%d") == "0229")) {
     # Save hcst attributes
     hcst_attr <- attributes(hcst)[-1]  # without $dim
@@ -184,6 +180,7 @@ load_datasets <- function(recipe_file) {
     tmp_time_attr <- correct_daily_for_leap(data = NULL, time_attr = tmp_time_attr, return_time = T)$time_attr
     attr(hcst, 'Variables')$common$time <- tmp_time_attr
   }
+#------------------------------------------------------------------
 
   # change syear to c(sday, sweek, syear)
   dim(hcst) <- c(dim(hcst)[1:3], sday = 1, sweek = 1, dim(hcst)[4:7])
@@ -202,31 +199,33 @@ load_datasets <- function(recipe_file) {
 #-------------------------------------------
   if (!is.null(recipe$Analysis$Time$fcst)) {
 
-  #monthly and daily
+  # Define path (monthly and daily)
   multi_path <- FALSE
-  if (is.null(archive$System[[exp.name]]$first_dcppB_syear)) { # only dcppA
-    fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src, "dcppA-hindcast",
+  if (is.null(archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppA
+    fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst,
                            '$ensemble$', table, '$var$', grid, version)
     fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
+
   } else {
-    if (all(sdates_fcst >= archive$System[[exp.name]]$first_dcppB_syear)) { # only dcppB
-      fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src, "dcppB-forecast",
+    if (all(sdates_fcst >= archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppB
+      fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst,
                              '$ensemble$', table, '$var$', grid, version)
       fcst.files <- paste0('$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
+
     } else {
       # Create one path for each sdate
       #TODO: When *_depends = 'syear' can be more than one, use one path with $dcpp$
       multi_path <- TRUE
       path_list <- vector('list', length = length(sdates_fcst))
       for (i_sdate in 1:length(sdates_fcst)) {
-        if (sdates_fcst[i_sdate] >= archive$System[[exp.name]]$first_dcppB_syear) {
+        if (sdates_fcst[i_sdate] >= archive$System[[exp.name]]$src$first_dcppB_syear) {
           path_list[[i_sdate]] <- 
-            list(path = file.path(archive$src, archive$System[[exp.name]]$src, "dcppB-forecast",
+            list(path = file.path(archive$src, archive$System[[exp.name]]$src$fcst,
                                   '$ensemble$', table, '$var$', grid, version,
                                   paste0('$var$_', table, '_*_dcppB-forecast_s', sdates_fcst[i_sdate], '-$ensemble$_', grid, '_$chunk$.nc')))
         } else {
           path_list[[i_sdate]] <- 
-            list(path = file.path(archive$src, archive$System[[exp.name]]$src, "dcppA-hindcast",
+            list(path = file.path(archive$src, archive$System[[exp.name]]$src$hcst,
                                   '$ensemble$', table, '$var$', grid, version,
                                   paste0('$var$_', table, '_*_dcppA-hindcast_s', sdates_fcst[i_sdate], '-$ensemble$_', grid, '_$chunk$.nc')))
         }
@@ -238,66 +237,92 @@ load_datasets <- function(recipe_file) {
 #                         '$ensemble$', table, '$var$', grid, version)
 #  fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
 
+
   # monthly & daily
   if (!multi_path) {
+    #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst
+    Start_fcst_arg_list <- Start_hcst_arg_list
+    Start_fcst_arg_list[['dat']] <- file.path(fcst.path, fcst.files)
+    Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst)
+    if (is.array(Start_fcst_arg_list[['time']])) {
+      Start_fcst_arg_list[['time']] <- time_arr_fcst
+    }
+    fcst <- do.call(Start, Start_fcst_arg_list)
 
-  fcst <- Start(dat = file.path(fcst.path, fcst.files),
-                var = variable,
-                ensemble = member,
-                syear = paste0(sdates_fcst),
-#split_multiselected_dims = T,
-                chunk = 'all',
-                chunk_depends = 'syear',
-                time = indices(time),
-                time_across = 'chunk',
-                merge_across_dims = TRUE,
-                largest_dims_length = need_largest_dims_length,
-                latitude = values(list(lats.min, lats.max)),
-                latitude_reorder = Sort(decreasing = TRUE),
-                longitude = values(list(lons.min, lons.max)),
-                longitude_reorder = circularsort,
-                transform = regrid_params$fcst.transform,
-                transform_extra_cells = 2,
-                transform_params = list(grid = regrid_params$fcst.gridtype, #nc file
-                                        method = regrid_params$fcst.gridmethod),
-                transform_vars = c('latitude', 'longitude'),
-                synonims = list(longitude = c('lon', 'longitude'),
-                                latitude = c('lat', 'latitude')),
-                return_vars = list(latitude = NULL, longitude = NULL,
-                                   time = c('syear', 'chunk')),
-                retrieve = T)
+    if ('chunk' %in% names(dim(fcst))) {
+      if (dim(fcst)['chunk'] != 1) {
+        stop("fcst loading has problem. The chunk number is > 1")
+      } else {
+        dim(fcst) <- dim(fcst)[-5] #remove chunk dim
+      }
+    }
 
   } else { # multi_path
   #TODO: time attribute is not correct
-  fcst <- Start(dat = path_list,
-                var = variable,
-                ensemble = member,
-                chunk = 'all',
-                time = indices(time),
-                time_across = 'chunk',
-                merge_across_dims = TRUE,
-                largest_dims_length = need_largest_dims_length,
-                latitude = values(list(lats.min, lats.max)),
-                latitude_reorder = Sort(decreasing = TRUE),
-                longitude = values(list(lons.min, lons.max)),
-                longitude_reorder = circularsort,
-                transform = regrid_params$fcst.transform,
-                transform_extra_cells = 2,
-                transform_params = list(grid = regrid_params$fcst.gridtype, #nc file
-                                        method = regrid_params$fcst.gridmethod),
-                transform_vars = c('latitude', 'longitude'),
-                synonims = list(longitude = c('lon', 'longitude'),
-                                latitude = c('lat', 'latitude')),
-                return_vars = list(latitude = NULL, longitude = NULL,
-                                   time = c('chunk')),
-                retrieve = T)
+
+    Start_fcst_arg_list <- Start_hcst_arg_list
+    Start_fcst_arg_list[['dat']] <- path_list
+    Start_fcst_arg_list[['syear']] <- NULL
+    Start_fcst_arg_list[['chunk_depends']] <- NULL
+    if ('syear' %in% Start_hcst_arg_list[['return_vars']][['time']]) {
+      remove_ind <- which(Start_hcst_arg_list[['return_vars']][['time']] == 'syear')
+      Start_fcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind]
+    }
+
+    fcst <- do.call(Start, Start_fcst_arg_list)
+
+#  fcst <- Start(dat = path_list,
+#                var = variable,
+#                ensemble = member,
+#                chunk = 'all',
+#                time = indices(time),
+#                time_across = 'chunk',
+#                merge_across_dims = TRUE,
+#                largest_dims_length = need_largest_dims_length,
+#                latitude = values(list(lats.min, lats.max)),
+#                latitude_reorder = Sort(decreasing = TRUE),
+#                longitude = values(list(lons.min, lons.max)),
+#                longitude_reorder = circularsort,
+#                transform = regrid_params$fcst.transform,
+#                transform_extra_cells = 2,
+#                transform_params = list(grid = regrid_params$fcst.gridtype, #nc file
+#                                        method = regrid_params$fcst.gridmethod),
+#                transform_vars = c('latitude', 'longitude'),
+#                synonims = list(longitude = c('lon', 'longitude'),
+#                                latitude = c('lat', 'latitude')),
+#                return_vars = list(latitude = NULL, longitude = NULL,
+#                                   time = c('chunk')),
+#                retrieve = T)
+
   # Reshape dimensions
   ## Insert syear at 3rd in [dat, var, ensemble, time, latitude, longitude]
   ## dat should be 1, syear should be length of dat
-  dim(fcst) <- c(dat = 1, var = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[3:6])
+    dim(fcst) <- c(dat = 1, var = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[3:6])
 
   }
 
+  tmp_time_attr <- attr(fcst, 'Variables')$common$time
+#------------------------------------------------------------------
+  # If daily data with 0229, need to remove 0229 and extra time
+  #TODO: Make Start() able to read the correct time indices for each sdate (for case1),
+  #      so we don't need this awkward correction.
+  #NOTE: It is not critical though. Having one more or one less day is acceptable.
+  #      If we need to use Compute(), just ignore this part.
+  if (store.freq == "daily_mean" & any(format(tmp_time_attr, "%m%d") == "0229")) {
+    # Save fcst attributes
+    fcst_attr <- attributes(fcst)[-1]  # without $dim
+    new_fcst <- Apply(fcst,
+                      target_dims = c('syear', 'time'),
+                      output_dims = list(data = c('syear', 'time')),
+                      fun = correct_daily_for_leap,
+                      time_attr = tmp_time_attr, return_time = F)$data
+    fcst <- Reorder(new_fcst, names(dim(fcst)))
+    attributes(fcst) <- c(attributes(fcst), fcst_attr)
+    tmp_time_attr <- correct_daily_for_leap(data = NULL, time_attr = tmp_time_attr, return_time = T)$time_attr
+    attr(fcst, 'Variables')$common$time <- tmp_time_attr
+  }
+#------------------------------------------------------------------
+
   # change syear to c(sday, sweek, syear)
   dim(fcst) <- c(dim(fcst)[1:3], sday = 1, sweek = 1, dim(fcst)[4:7])
   tmp_time_attr <- attr(fcst, 'Variables')$common$time
diff --git a/modules/Loading/get_daily_time_ind.R b/modules/Loading/get_daily_time_ind.R
index b574544f..59f60268 100644
--- a/modules/Loading/get_daily_time_ind.R
+++ b/modules/Loading/get_daily_time_ind.R
@@ -1,33 +1,32 @@
 # This function is for decadal-daily data loading.
-# The time input (leadtimemin & leadtimemax) units in the recipe is month. For example,
-# leadtimemin = 0 means the first month, and leadtimemax = 2 means the 3rd month.
-# For daily data, if initial month is November, leadtimemin = 0 equals to c(1:30), 
-# and leadtimemax = 2, which means January next year, equals to c(31+31:31+31+30).
+# The time input (ftimemin & ftimemax) units in the recipe is month. For example,
+# ftimemin = 0 means the first month, and ftimemax = 2 means the 3rd month.
+# For daily data, if initial month is November, ftimemin = 0 equals to c(1:30), 
+# and ftimemax = 2, which means January next year, equals to c(31+31:31+31+30).
 #
-#NOTE: For the datasets not EC-Earth, we can use array [syear, time] to define time since
+#NOTE: For the datasets that having one file per sdate, we can use array [syear, time] to define time since
 # the Start() call doesn't need time_across = 'chunk' (there is only one chunk)
 get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calendar) {
-  #TODO: calendar & leap year
   # E.g., time = 1:4 means Dec to March, then indices will be c(31:(31+30+31+28+31)) for non-leap yr and c(31:(31+30+31+28), (31+30+31+28+2):(31+30+31+28+2+30)) for leap yr. Calendar needs to be considered too.
 
   if (!calendar %in% c('360-day', '365_day', 'noleap', 'standard', 'proleptic_gregorian',
                        'gregorian')) 
     stop("The calendar is not recognized. Please contact maintainers.")
 
-  total_months <- (initial_month + leadtimemin):(initial_month + leadtimemax)
+  total_months <- (initial_month + ftimemin):(initial_month + ftimemax)
   total_months <- ifelse(total_months > 12, total_months %% 12, total_months)
   total_months <- ifelse(total_months == 0, 12, total_months)
 
   if (calendar == '360-day') {
-    daily_time_ind <- 1:(30 * length(total_months))
+    daily_time_ind <- (ftimemin * 30 + 1): ((ftimemin * 30) + (ftimemax - ftimemin + 1) * 30)
 
   } else {
 
     total_days_sum <- sum(days_in_month(total_months))
-    if (leadtimemin == 0) {
+    if (ftimemin == 0) {
       daily_time_ind <- 1:total_days_sum
     } else {
-      before_start_months <- initial_month:(initial_month + leadtimemin - 1)
+      before_start_months <- initial_month:(initial_month + ftimemin - 1)
       before_start_months <- ifelse(before_start_months > 12, before_start_months %% 12, before_start_months)
       before_start_months <- ifelse(before_start_months == 0, 12, before_start_months)
       before_start_total_days_sum <- sum(days_in_month(before_start_months))
@@ -41,8 +40,8 @@ get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calend
 
 #      leap <- leap_year(sdates)
       sdates_full <- ymd(paste0(sdates, sprintf('%02d', initial_month), '01'))
-      idx_min <- sdates_full + months(leadtimemin)
-      idx_max <- sdates_full + months(leadtimemax + 1) - days(1)
+      idx_min <- sdates_full + months(ftimemin)
+      idx_max <- sdates_full + months(ftimemax + 1) - days(1)
 
       day0229 <- rep(NA, length(sdates))
       for (i_sdate in 1:length(sdates_full)) {
diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml
index 2ec20856..d4e568d1 100644
--- a/modules/Loading/testing_recipes/recipe_decadal.yml
+++ b/modules/Loading/testing_recipes/recipe_decadal.yml
@@ -14,12 +14,12 @@ Analysis:
     Reference:
       name: ERA5 #JRA-55
   Time:
-    fcst: 2020 2021
+    fcst: [2020,2021]
     hcst_start: 1990 
     hcst_end: 1992 
 #    season: 'Annual'
-    ftimemin: 0
-    ftimemax: 13
+    ftime_min: 0
+    ftime_max: 13
   Region:
     latmin: 10 #-90
     latmax: 20 #90
diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml
index 565ce6a8..91a957df 100644
--- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml
+++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml
@@ -8,20 +8,18 @@ Analysis:
     freq: daily_mean
   Datasets:
     System: 
-      name: MIROC6 #EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 
-      member: r1i1p1f1,r2i1p1f1,r3i1p1f1 #'all'
+      name: EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 
+      member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all'
     Multimodel: no
     Reference:
       name: ERA5
   Time:
-    sdate:
-    fcst:
-#      fcst_sday: '1101'
-    hcst_start: 1990
-    hcst_end: 1992
+    fcst: [2020,2021]
+    hcst_start: 2016
+    hcst_end: 2019
     season: 'Annual'
-    leadtimemin: 2
-    leadtimemax: 4
+    ftime_min: 2
+    ftime_max: 4
   Region:
     latmin: 10 #-90
     latmax: 20 #90
-- 
GitLab


From 1800a477c4c040c2bf89cc09382727db22853404 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Tue, 12 Jul 2022 22:43:00 +0200
Subject: [PATCH 4/7] decadal monthly loading done (modification for multiple
 fcst years and flexibility in dcppA & dcppB

---
 conf/archive_decadal.yml                   |  13 +-
 modules/Loading/Loading_decadal.R          |  13 +-
 tests/recipes/recipe-decadal_daily_1.yml   |  44 +++++
 tests/recipes/recipe-decadal_monthly_1.yml |  17 +-
 tests/testthat/test-decadal_daily.R        | 197 +++++++++++++++++++++
 5 files changed, 267 insertions(+), 17 deletions(-)
 create mode 100644 tests/recipes/recipe-decadal_daily_1.yml
 create mode 100644 tests/testthat/test-decadal_daily.R

diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml
index aff4330a..28ff6028 100644
--- a/conf/archive_decadal.yml
+++ b/conf/archive_decadal.yml
@@ -41,8 +41,10 @@ archive:
 
 # ---- 
       EC-Earth3-i4:
-        src: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/"
-        first_dcppB_syear: 2021 
+        src: 
+          hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/"
+          fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/"
+          first_dcppB_syear: 2021 
 #          hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/"
 #          fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/"
 #        src: {"1960:2020": "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/",
@@ -78,6 +80,7 @@ archive:
         src: 
           hcst: "exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/"
           fcst: "exp/CMIP6/dcppB-forecast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppB-forecast/"
+          first_dcppB_syear: 2019
         monthly_mean:
           table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon", "tos":"Omon"}
           grid: {"tas":"gr", "psl":"gr", "pr":"gr", "ts":"gr", "tos":"gr"}
@@ -115,6 +118,7 @@ archive:
         src: 
           hcst: "exp/canesm5/cmip6-dcppA-hindcast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/"
           fcst: "exp/canesm5/cmip6-dcppB-forecast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppB-forecast/"
+          first_dcppB_syear: 2020
         monthly_mean:
           table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"}
 
@@ -150,6 +154,7 @@ archive:
         src: 
           hcst: "exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/"
           fcst: "exp/CMIP6/dcppB-forecast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppB-forecast/"
+          first_dcppB_syear: 2020
         monthly_mean:
           table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "prc":"Amon", "ts":"Amon"}
           grid: {"tas":"gn", "pr":"gn", "psl":"gn", "prc":"gn", "ts":"gn"}
@@ -283,7 +288,9 @@ archive:
 
 # ----
       NorCPM1-i2:
-        src: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/"
+        src:
+          hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/"
+          fcst:
         monthly_mean:
           table: {"pr":"Amon", "psl":"Amon"}
           grid: {"pr":"gn", "psl":"gn"}
diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R
index b29b30c5..d5ec1fcf 100644
--- a/modules/Loading/Loading_decadal.R
+++ b/modules/Loading/Loading_decadal.R
@@ -295,9 +295,9 @@ load_datasets <- function(recipe_file) {
 #                retrieve = T)
 
   # Reshape dimensions
-  ## Insert syear at 3rd in [dat, var, ensemble, time, latitude, longitude]
+  ## Insert syear at 4th in [dat, var, ensemble, time, latitude, longitude]
   ## dat should be 1, syear should be length of dat
-    dim(fcst) <- c(dat = 1, var = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[3:6])
+    dim(fcst) <- c(dat = 1, dim(fcst)[2:3], syear = as.numeric(dim(fcst))[1], dim(fcst)[4:6])
 
   }
 
@@ -326,10 +326,13 @@ load_datasets <- function(recipe_file) {
   # change syear to c(sday, sweek, syear)
   dim(fcst) <- c(dim(fcst)[1:3], sday = 1, sweek = 1, dim(fcst)[4:7])
   tmp_time_attr <- attr(fcst, 'Variables')$common$time
-  if (!identical(dim(tmp_time_attr), dim(fcst)[6:7])) {
-    stop("fcst has problem in matching data and time attr dimension.")
+  #TODO: This check cannot pass if multi_path
+  if (!multi_path) {
+    if (!identical(dim(tmp_time_attr), dim(fcst)[6:7])) {
+      stop("fcst has problem in matching data and time attr dimension.")
+    }
+    dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(fcst)[6:7])
   }
-  dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(fcst)[6:7])
 
   # Change class from startR_array to s2dv_cube
   suppressWarnings(
diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml
new file mode 100644
index 00000000..ed81146f
--- /dev/null
+++ b/tests/recipes/recipe-decadal_daily_1.yml
@@ -0,0 +1,44 @@
+Description:
+  Author: An-Chi Ho
+  '': split version
+Analysis:
+  Horizon: Decadal
+  Variables:
+    name: tas
+    freq: daily_mean
+  Datasets:
+    System: 
+      name: MIROC6 #EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 
+      member: r1i1p1f1,r2i1p1f1,r3i1p1f1 #'all'
+    Multimodel: no
+    Reference:
+      name: ERA5
+  Time:
+    fcst: [2017,2018]
+    hcst_start: 1990
+    hcst_end: 1992
+    season: 'Annual'
+    ftime_min: 2
+    ftime_max: 4
+  Region:
+    latmin: 10 #-90
+    latmax: 20 #90
+    lonmin: 0
+    lonmax: 15 #359.9
+  Regrid:
+    method: bilinear
+    type: to_system #to_reference
+  Workflow:
+    Calibration:
+      method: qmap
+    Skill:
+      metric: RPSS
+    Indicators:
+      index: FALSE
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/
+  code_dir: /esarchive/scratch/aho/git/auto-s2s/
+
diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml
index 5acd642d..8312f81a 100644
--- a/tests/recipes/recipe-decadal_monthly_1.yml
+++ b/tests/recipes/recipe-decadal_monthly_1.yml
@@ -8,7 +8,7 @@ Analysis:
     freq: monthly_mean
   Datasets:
     System: 
-      name: EC-Earth3-i4 #CanESM5 
+      name: EC-Earth3-i4
       member: r1i4p1f1,r2i4p1f1
     Multimodel: no
     Reference:
@@ -16,17 +16,16 @@ Analysis:
   Time:
     sdate:
     fcst:  2021
-#      fcst_sday: '1101'
-    hcst_start: 1991 #'1993'
-    hcst_end: 1994 #'2016'
+    hcst_start: 1991 
+    hcst_end: 1994 
 #    season: 'Annual'
-    leadtimemin: 0
-    leadtimemax: 2
+    ftime_min: 0
+    ftime_max: 2
   Region:
-    latmin: 17 #-90
-    latmax: 20 #90
+    latmin: 17 
+    latmax: 20 
     lonmin: 12
-    lonmax: 15 #359.9
+    lonmax: 15 
   Regrid:
     method: bilinear
     type: to_system #to_reference
diff --git a/tests/testthat/test-decadal_daily.R b/tests/testthat/test-decadal_daily.R
new file mode 100644
index 00000000..27e29d6c
--- /dev/null
+++ b/tests/testthat/test-decadal_daily.R
@@ -0,0 +1,197 @@
+context("Decadal daily data - 1")
+
+###########################################
+
+source("modules/Loading/Loading_decadal.R")
+source("modules/Calibration/Calibration.R")
+source("modules/Skill/Skill.R")
+source("modules/Saving/Save_data.R")
+
+recipe_file <- "tests/recipes/recipe-decadal_daily_1.yml"
+
+# Load datasets
+suppressWarnings({invisible(capture.output(
+data <- load_datasets(recipe_file)
+))})
+
+#recipe <- read_yaml(recipe_file)
+## Calibrate datasets
+#suppressWarnings({invisible(capture.output(
+#  calibrated_data <- calibrate_datasets(data, recipe)
+#))})
+#
+## Compute skill metrics
+#suppressWarnings({invisible(capture.output(
+#skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs,
+#                                       recipe, na.rm = T, ncores = 4)
+#))})
+
+#======================================
+
+test_that("1. Loading", {
+
+expect_equal(
+is.list(data),
+TRUE
+)
+expect_equal(
+names(data),
+c("hcst", "fcst", "obs")
+)
+expect_equal(
+class(data$hcst),
+"s2dv_cube"
+)
+expect_equal(
+class(data$fcst),
+"s2dv_cube"
+)
+expect_equal(
+class(data$obs),
+"s2dv_cube"
+)
+expect_equal(
+names(data$hcst),
+c("data", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters")
+)
+expect_equal(
+names(data$hcst),
+names(data$fcst)
+)
+expect_equal(
+names(data$hcst),
+names(data$obs)
+)
+expect_equal(
+dim(data$hcst$data),
+c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 3, time = 90, latitude = 7, longitude = 11)
+)
+expect_equal(
+dim(data$fcst$data),
+c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 2, time = 90, latitude = 7, longitude = 11)
+)
+expect_equal(
+dim(data$hcst$Dates$start),
+c(sday = 1, sweek = 1, syear = 3, time = 90)
+)
+expect_equal(
+as.vector(drop(data$hcst$data)[1, 2:3, 1:3, 1, 1]),
+c(298.5787, 293.6479, 298.5042, 293.7802, 297.8072, 293.0764),
+tolerance = 0.0001
+)
+expect_equal(
+mean(data$hcst$data),
+301.2666,
+tolerance = 0.0001
+)
+expect_equal(
+range(data$hcst$data),
+c(285.9326, 314.9579),
+tolerance = 0.0001
+)
+expect_equal(
+(data$hcst$Dates$start)[1],
+as.POSIXct("1991-01-01 12:00:00", tz = 'UTC')
+)
+expect_equal(
+(data$hcst$Dates$start)[2],
+as.POSIXct("1992-01-01 12:00:00", tz = 'UTC')
+)
+expect_equal(
+(data$hcst$Dates$start)[5],
+as.POSIXct("1992-01-02 12:00:00", tz = 'UTC')
+)
+expect_equal(
+(data$hcst$Dates$start)[1, 1, 3, 90],
+as.POSIXct("1993-03-31 12:00:00", tz = 'UTC')
+)
+
+})
+
+##======================================
+#test_that("2. Calibration", {
+#
+#expect_equal(
+#is.list(calibrated_data),
+#TRUE
+#)
+#expect_equal(
+#names(calibrated_data),
+#c("hcst", "fcst")
+#)
+#expect_equal(
+#class(calibrated_data$hcst),
+#"s2dv_cube"
+#)
+#expect_equal(
+#class(calibrated_data$fcst),
+#"s2dv_cube"
+#)
+#expect_equal(
+#dim(calibrated_data$hcst$data),
+#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4)
+#)
+#expect_equal(
+#dim(calibrated_data$fcst$data),
+#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4)
+#)
+#expect_equal(
+#mean(calibrated_data$fcst$data),
+#291.8375,
+#tolerance = 0.0001
+#)
+#expect_equal(
+#mean(calibrated_data$hcst$data),
+#289.6679,
+#tolerance = 0.0001
+#)
+#expect_equal(
+#as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]),
+#c(286.3895, 286.6408, 290.6652, 288.3759),
+#tolerance = 0.0001
+#)
+#expect_equal(
+#range(calibrated_data$fcst$data),
+#c(287.2173, 297.4578),
+#tolerance = 0.0001
+#)
+#
+#})
+#
+#
+##======================================
+#test_that("3. Metrics", {
+#
+#expect_equal(
+#is.list(skill_metrics),
+#TRUE
+#)
+#expect_equal(
+#names(skill_metrics),
+#c("rpss", "rpss_significance")
+#)
+#expect_equal(
+#class(skill_metrics$rpss[[1]]),
+#"array"
+#)
+#expect_equal(
+#dim(skill_metrics$rpss[[1]]),
+#c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4)
+#)
+#expect_equal(
+#dim(skill_metrics$rpss_significance[[1]]),
+#dim(skill_metrics$rpss[[1]])
+#)
+#expect_equal(
+#as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]),
+#c(-0.2857143, -1.2500000, -1.8928571),
+#tolerance = 0.0001
+#)
+#expect_equal(
+#as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]),
+#rep(FALSE, 3)
+#)
+#
+#})
+
+
-- 
GitLab


From 2cacb4ff481d0ee6478fd373e65b23f2734325b6 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Tue, 12 Jul 2022 23:18:24 +0200
Subject: [PATCH 5/7] Add one monthly case

---
 tests/recipes/recipe-decadal_monthly_2.yml |  44 +++++
 tests/testthat/test-decadal_monthly_2.R    | 214 +++++++++++++++++++++
 2 files changed, 258 insertions(+)
 create mode 100644 tests/recipes/recipe-decadal_monthly_2.yml
 create mode 100644 tests/testthat/test-decadal_monthly_2.R

diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml
new file mode 100644
index 00000000..040bbe3e
--- /dev/null
+++ b/tests/recipes/recipe-decadal_monthly_2.yml
@@ -0,0 +1,44 @@
+Description:
+  Author: An-Chi Ho
+  '': split version
+Analysis:
+  Horizon: Decadal
+  Variables:
+    name: tas
+    freq: monthly_mean
+  Datasets:
+    System: 
+      name: EC-Earth3-i4 #CanESM5 
+      member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all'
+    Multimodel: no
+    Reference:
+      name: ERA5 #JRA-55
+  Time:
+    fcst: [2020,2021]
+    hcst_start: 1990 
+    hcst_end: 1992 
+#    season: 'Annual'
+    ftime_min: 0
+    ftime_max: 13
+  Region:
+    latmin: -60 #-90
+    latmax: -55 #90
+    lonmin: -2
+    lonmax: 2 #359.9
+  Regrid:
+    method: bilinear
+    type: to_system #to_reference
+  Workflow:
+    Calibration:
+      method: bias
+    Skill:
+      metric: RPSS
+    Indicators:
+      index: FALSE
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/
+  code_dir: /esarchive/scratch/aho/git/auto-s2s/
+
diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R
new file mode 100644
index 00000000..24aeac6b
--- /dev/null
+++ b/tests/testthat/test-decadal_monthly_2.R
@@ -0,0 +1,214 @@
+context("Decadal monthly data - 2")
+
+###########################################
+
+source("modules/Loading/Loading_decadal.R")
+source("modules/Calibration/Calibration.R")
+source("modules/Skill/Skill.R")
+source("modules/Saving/Save_data.R")
+
+recipe_file <- "tests/recipes/recipe-decadal_monthly_2.yml"
+
+# Load datasets
+suppressWarnings({invisible(capture.output(
+data <- load_datasets(recipe_file)
+))})
+
+#recipe <- read_yaml(recipe_file)
+## Calibrate datasets
+#suppressWarnings({invisible(capture.output(
+#  calibrated_data <- calibrate_datasets(data, recipe)
+#))})
+#
+## Compute skill metrics
+#suppressWarnings({invisible(capture.output(
+#skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs,
+#                                       recipe, na.rm = T, ncores = 4)
+#))})
+
+#======================================
+
+test_that("1. Loading", {
+
+expect_equal(
+is.list(data),
+TRUE
+)
+expect_equal(
+names(data),
+c("hcst", "fcst", "obs")
+)
+expect_equal(
+class(data$hcst),
+"s2dv_cube"
+)
+expect_equal(
+class(data$fcst),
+"s2dv_cube"
+)
+expect_equal(
+class(data$obs),
+"s2dv_cube"
+)
+expect_equal(
+names(data$hcst),
+c("data", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters")
+)
+expect_equal(
+names(data$hcst),
+names(data$fcst)
+)
+expect_equal(
+names(data$hcst),
+names(data$obs)
+)
+expect_equal(
+dim(data$hcst$data),
+c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 3, time = 14, latitude = 8, longitude = 5)
+)
+expect_equal(
+dim(data$fcst$data),
+c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 2, time = 14, latitude = 8, longitude = 5)
+)
+expect_equal(
+dim(data$hcst$Dates$start),
+c(sday = 1, sweek = 1, syear = 3, time = 14)
+)
+#expect_equal(
+#dim(data$fcst$Dates$start),
+#c(time = 14)
+#)
+# hcst data
+expect_equal(
+as.vector(drop(data$hcst$data)[1, , 1:2, 2, 2]),
+c(272.8613, 271.0689, 270.8007, 273.5594, 272.1561, 272.8729),
+tolerance = 0.0001
+)
+expect_equal(
+mean(data$hcst$data),
+269.8822,
+tolerance = 0.0001
+)
+expect_equal(
+range(data$hcst$data),
+c(253.8541, 276.6805),
+tolerance = 0.0001
+)
+# fcst data
+expect_equal(
+as.vector(drop(data$fcst$data)[1, , 1:2, 2, 2]),
+c(271.7708, 272.2929, 272.4980, 273.3616),
+tolerance = 0.0001
+)
+expect_equal(
+mean(data$fcst$data),
+271.2158,
+tolerance = 0.0001
+)
+
+expect_equal(
+(data$hcst$Dates$start)[1],
+as.POSIXct("1990-11-16", tz = 'UTC')
+)
+expect_equal(
+(data$hcst$Dates$start)[2],
+as.POSIXct("1991-11-16", tz = 'UTC')
+)
+expect_equal(
+(data$hcst$Dates$start)[5],
+as.POSIXct("1991-12-16 12:00:00", tz = 'UTC')
+)
+expect_equal(
+(data$hcst$Dates$start)[10],
+as.POSIXct("1991-02-15", tz = 'UTC')
+)
+
+})
+
+#======================================
+#test_that("2. Calibration", {
+#
+#expect_equal(
+#is.list(calibrated_data),
+#TRUE
+#)
+#expect_equal(
+#names(calibrated_data),
+#c("hcst", "fcst")
+#)
+#expect_equal(
+#class(calibrated_data$hcst),
+#"s2dv_cube"
+#)
+#expect_equal(
+#class(calibrated_data$fcst),
+#"s2dv_cube"
+#)
+#expect_equal(
+#dim(calibrated_data$hcst$data),
+#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4)
+#)
+#expect_equal(
+#dim(calibrated_data$fcst$data),
+#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4)
+#)
+#expect_equal(
+#mean(calibrated_data$fcst$data),
+#291.8375,
+#tolerance = 0.0001
+#)
+#expect_equal(
+#mean(calibrated_data$hcst$data),
+#289.6679,
+#tolerance = 0.0001
+#)
+#expect_equal(
+#as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]),
+#c(286.3895, 286.6408, 290.6652, 288.3759),
+#tolerance = 0.0001
+#)
+#expect_equal(
+#range(calibrated_data$fcst$data),
+#c(287.2173, 297.4578),
+#tolerance = 0.0001
+#)
+#
+#})
+#
+#
+##======================================
+#test_that("3. Metrics", {
+#
+#expect_equal(
+#is.list(skill_metrics),
+#TRUE
+#)
+#expect_equal(
+#names(skill_metrics),
+#c("rpss", "rpss_significance")
+#)
+#expect_equal(
+#class(skill_metrics$rpss[[1]]),
+#"array"
+#)
+#expect_equal(
+#dim(skill_metrics$rpss[[1]]),
+#c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4)
+#)
+#expect_equal(
+#dim(skill_metrics$rpss_significance[[1]]),
+#dim(skill_metrics$rpss[[1]])
+#)
+#expect_equal(
+#as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]),
+#c(-0.2857143, -1.2500000, -1.8928571),
+#tolerance = 0.0001
+#)
+#expect_equal(
+#as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]),
+#rep(FALSE, 3)
+#)
+#
+#})
+#
+#
-- 
GitLab


From 6ffacf542a45e557480a66725a279a9281239f20 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 13 Jul 2022 11:00:02 +0200
Subject: [PATCH 6/7] Remove leap year control and refine unit tests

---
 modules/Loading/Loading_decadal.R             | 155 +++---------------
 modules/Loading/get_daily_time_ind.R          |  61 +++----
 ...decadal_daily.R => test-decadal_daily_1.R} |  23 +++
 ...dal_monthly.R => test-decadal_monthly_1.R} |   0
 tests/testthat/test-decadal_monthly_2.R       |   2 +-
 5 files changed, 77 insertions(+), 164 deletions(-)
 rename tests/testthat/{test-decadal_daily.R => test-decadal_daily_1.R} (88%)
 rename tests/testthat/{test-decadal_monthly.R => test-decadal_monthly_1.R} (100%)

diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R
index d5ec1fcf..6ff9023a 100644
--- a/modules/Loading/Loading_decadal.R
+++ b/modules/Loading/Loading_decadal.R
@@ -43,37 +43,14 @@ load_datasets <- function(recipe_file) {
  
   #NOTE: time in recipe starts from 0, but in Start() it starts from 1 
   if (store.freq == "monthly_mean") {
-    time <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) + 1
-  } else if (store.freq == "daily_mean") {
-    if (exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4', 'HadGEM3-GC31-MM')) {
-      # time selector uses indices, and tune the data afterward to remove 0229
-      time <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min),
-                                 ftimemax = as.numeric(recipe$Analysis$Time$ftime_max),
-                                 initial_month = archive$System[[exp.name]]$initial_month,
-                                 sdates = sdates_hcst,
-                                 calendar = archive$System[[exp.name]][[store.freq]]$calendar)
-    } else {
-      # use an array [syear, time] as time selector since time_across = 'chunk' is not needed
-      initial_month_char <- sprintf('%02d', archive$System[[exp.name]]$initial_month)
-      # hcst
-      sdates_hcst_full <- paste0(sdates_hcst, initial_month_char, '01')
-      time_arr <- get_timeidx(sdates = sdates_hcst_full, 
-                              ltmin = as.numeric(recipe$Analysis$Time$ftime_min), 
-                              ltmax = as.numeric(recipe$Analysis$Time$ftime_max), 
-                              time_freq = "daily_mean")
-      names(dim(time_arr)) <- c('syear', 'time')
-
-      # fcst
-      if (!is.null(recipe$Analysis$Time$fcst)) {
-        sdates_fcst_full <- paste0(sdates_fcst, initial_month_char, '01')
-        time_arr_fcst <- get_timeidx(sdates = sdates_fcst_full,
-                                     ltmin = as.numeric(recipe$Analysis$Time$ftime_min),
-                                     ltmax = as.numeric(recipe$Analysis$Time$ftime_max),
-                                     time_freq = "daily_mean")
-        names(dim(time_arr_fcst)) <- c('syear', 'time')
-      }
+    time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) + 1
 
-    }
+  } else if (store.freq == "daily_mean") {
+    time_ind <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min),
+                                   ftimemax = as.numeric(recipe$Analysis$Time$ftime_max),
+                                   initial_month = archive$System[[exp.name]]$initial_month,
+                                   sdates = sdates_hcst,
+                                   calendar = archive$System[[exp.name]][[store.freq]]$calendar)
   }
 
 #NOTE: May be used in the future
@@ -106,13 +83,14 @@ load_datasets <- function(recipe_file) {
   need_largest_dims_length <- ifelse(exp.name == 'EC-Earth3-i2', TRUE, FALSE) 
 
 
-#-------------------------------------------
-# Step 1: Load the hcst
-#-------------------------------------------
+  #-------------------------------------------
+  # Step 1: Load the hcst
+  #-------------------------------------------
   #monthly and daily
   hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst,
                          '$ensemble$', table, '$var$', grid, version)
   hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
+
   Start_hcst_arg_list <- list(
     dat = file.path(hcst.path, hcst.files),
     var = variable,
@@ -120,7 +98,7 @@ load_datasets <- function(recipe_file) {
     syear = paste0(sdates_hcst),
     chunk = 'all',
     chunk_depends = 'syear',
-    time = indices(time),
+    time = indices(time_ind),
     time_across = 'chunk',
     merge_across_dims = TRUE,
     largest_dims_length = need_largest_dims_length,
@@ -139,48 +117,10 @@ load_datasets <- function(recipe_file) {
                        time = c('syear', 'chunk')),
     retrieve = T)
 
-  # daily & multiple files per sdate
-  if (store.freq == "daily_mean" & !exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4', 'HadGEM3-GC31-MM')) {
-    Start_hcst_arg_list[['time_across']] <- NULL
-    Start_hcst_arg_list[['merge_across_dims']] <- NULL
-    Start_hcst_arg_list[['time']] <- time_arr
-    Start_hcst_arg_list[['return_vars']][['time']] <- 'syear'
-
-    hcst <- do.call(Start, Start_hcst_arg_list)
-
-    if (dim(hcst)['chunk'] != 1) {
-      stop("hcst loading has problem. The chunk number is > 1")
-    } else {
-      dim(hcst) <- dim(hcst)[-5] #remove chunk dim
-    }
-
-  } else {
-    # monthly & daily if EC-Earth
-    hcst <- do.call(Start, Start_hcst_arg_list)
-  }
+  hcst <- do.call(Start, Start_hcst_arg_list)
 
   tmp_time_attr <- attr(hcst, 'Variables')$common$time
 
-#------------------------------------------------------------------
-  # If daily data with 0229, need to remove 0229 and extra time
-  #TODO: Make Start() able to read the correct time indices for each sdate (for case1),
-  #      so we don't need this awkward correction.
-  #NOTE: It is not critical though. Having one more or one less day is acceptable.
-  #      If we need to use Compute(), just ignore this part.
-  if (store.freq == "daily_mean" & any(format(tmp_time_attr, "%m%d") == "0229")) {
-    # Save hcst attributes
-    hcst_attr <- attributes(hcst)[-1]  # without $dim
-    new_hcst <- Apply(hcst, 
-                      target_dims = c('syear', 'time'), 
-                      output_dims = list(data = c('syear', 'time')),
-                      fun = correct_daily_for_leap,
-                      time_attr = tmp_time_attr, return_time = F)$data
-    hcst <- Reorder(new_hcst, names(dim(hcst)))
-    attributes(hcst) <- c(attributes(hcst), hcst_attr)
-    tmp_time_attr <- correct_daily_for_leap(data = NULL, time_attr = tmp_time_attr, return_time = T)$time_attr
-    attr(hcst, 'Variables')$common$time <- tmp_time_attr
-  }
-#------------------------------------------------------------------
 
   # change syear to c(sday, sweek, syear)
   dim(hcst) <- c(dim(hcst)[1:3], sday = 1, sweek = 1, dim(hcst)[4:7])
@@ -244,84 +184,31 @@ load_datasets <- function(recipe_file) {
     Start_fcst_arg_list <- Start_hcst_arg_list
     Start_fcst_arg_list[['dat']] <- file.path(fcst.path, fcst.files)
     Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst)
-    if (is.array(Start_fcst_arg_list[['time']])) {
-      Start_fcst_arg_list[['time']] <- time_arr_fcst
-    }
     fcst <- do.call(Start, Start_fcst_arg_list)
 
-    if ('chunk' %in% names(dim(fcst))) {
-      if (dim(fcst)['chunk'] != 1) {
-        stop("fcst loading has problem. The chunk number is > 1")
-      } else {
-        dim(fcst) <- dim(fcst)[-5] #remove chunk dim
-      }
-    }
 
   } else { # multi_path
-  #TODO: time attribute is not correct
 
+    #TODO: time attribute is not correct
     Start_fcst_arg_list <- Start_hcst_arg_list
     Start_fcst_arg_list[['dat']] <- path_list
     Start_fcst_arg_list[['syear']] <- NULL
     Start_fcst_arg_list[['chunk_depends']] <- NULL
-    if ('syear' %in% Start_hcst_arg_list[['return_vars']][['time']]) {
-      remove_ind <- which(Start_hcst_arg_list[['return_vars']][['time']] == 'syear')
-      Start_fcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind]
+    if ('syear' %in% Start_fcst_arg_list[['return_vars']][['time']]) {
+      remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear')
+      Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind]
     }
 
     fcst <- do.call(Start, Start_fcst_arg_list)
 
-#  fcst <- Start(dat = path_list,
-#                var = variable,
-#                ensemble = member,
-#                chunk = 'all',
-#                time = indices(time),
-#                time_across = 'chunk',
-#                merge_across_dims = TRUE,
-#                largest_dims_length = need_largest_dims_length,
-#                latitude = values(list(lats.min, lats.max)),
-#                latitude_reorder = Sort(decreasing = TRUE),
-#                longitude = values(list(lons.min, lons.max)),
-#                longitude_reorder = circularsort,
-#                transform = regrid_params$fcst.transform,
-#                transform_extra_cells = 2,
-#                transform_params = list(grid = regrid_params$fcst.gridtype, #nc file
-#                                        method = regrid_params$fcst.gridmethod),
-#                transform_vars = c('latitude', 'longitude'),
-#                synonims = list(longitude = c('lon', 'longitude'),
-#                                latitude = c('lat', 'latitude')),
-#                return_vars = list(latitude = NULL, longitude = NULL,
-#                                   time = c('chunk')),
-#                retrieve = T)
-
-  # Reshape dimensions
-  ## Insert syear at 4th in [dat, var, ensemble, time, latitude, longitude]
-  ## dat should be 1, syear should be length of dat
-    dim(fcst) <- c(dat = 1, dim(fcst)[2:3], syear = as.numeric(dim(fcst))[1], dim(fcst)[4:6])
+    # Reshape and reorder dimensions
+    ## dat should be 1, syear should be length of dat; reorder dimensions
+    dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6])
+    fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'ensemble', 'syear', 'time', 'latitude', 'longitude'))
 
   }
 
   tmp_time_attr <- attr(fcst, 'Variables')$common$time
-#------------------------------------------------------------------
-  # If daily data with 0229, need to remove 0229 and extra time
-  #TODO: Make Start() able to read the correct time indices for each sdate (for case1),
-  #      so we don't need this awkward correction.
-  #NOTE: It is not critical though. Having one more or one less day is acceptable.
-  #      If we need to use Compute(), just ignore this part.
-  if (store.freq == "daily_mean" & any(format(tmp_time_attr, "%m%d") == "0229")) {
-    # Save fcst attributes
-    fcst_attr <- attributes(fcst)[-1]  # without $dim
-    new_fcst <- Apply(fcst,
-                      target_dims = c('syear', 'time'),
-                      output_dims = list(data = c('syear', 'time')),
-                      fun = correct_daily_for_leap,
-                      time_attr = tmp_time_attr, return_time = F)$data
-    fcst <- Reorder(new_fcst, names(dim(fcst)))
-    attributes(fcst) <- c(attributes(fcst), fcst_attr)
-    tmp_time_attr <- correct_daily_for_leap(data = NULL, time_attr = tmp_time_attr, return_time = T)$time_attr
-    attr(fcst, 'Variables')$common$time <- tmp_time_attr
-  }
-#------------------------------------------------------------------
 
   # change syear to c(sday, sweek, syear)
   dim(fcst) <- c(dim(fcst)[1:3], sday = 1, sweek = 1, dim(fcst)[4:7])
diff --git a/modules/Loading/get_daily_time_ind.R b/modules/Loading/get_daily_time_ind.R
index 59f60268..f7625690 100644
--- a/modules/Loading/get_daily_time_ind.R
+++ b/modules/Loading/get_daily_time_ind.R
@@ -5,9 +5,12 @@
 # and ftimemax = 2, which means January next year, equals to c(31+31:31+31+30).
 #
 #NOTE: For the datasets that having one file per sdate, we can use array [syear, time] to define time since
-# the Start() call doesn't need time_across = 'chunk' (there is only one chunk)
+# the Start() call doesn't need time_across = 'chunk' (there is only one chunk). BUT to 
+# keep the script simple, we use indices always and ignore leap year difference.
+# Only distinguish 360-day and non 360-day calendar.
+
 get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calendar) {
-  # E.g., time = 1:4 means Dec to March, then indices will be c(31:(31+30+31+28+31)) for non-leap yr and c(31:(31+30+31+28), (31+30+31+28+2):(31+30+31+28+2+30)) for leap yr. Calendar needs to be considered too.
+#NOTE: "sdates" is not needed if leap year is not considered
 
   if (!calendar %in% c('360-day', '365_day', 'noleap', 'standard', 'proleptic_gregorian',
                        'gregorian')) 
@@ -33,38 +36,38 @@ get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calend
       daily_time_ind <- (before_start_total_days_sum + 1):(before_start_total_days_sum + total_days_sum)
     }
 
-
-    if (calendar %in% c('365_day', 'noleap')) {
-      # daily_time_ind
-    } else if (calendar %in% c('standard', 'proleptic_gregorian', 'gregorian')) {
-
-#      leap <- leap_year(sdates)
-      sdates_full <- ymd(paste0(sdates, sprintf('%02d', initial_month), '01'))
-      idx_min <- sdates_full + months(ftimemin)
-      idx_max <- sdates_full + months(ftimemax + 1) - days(1)
-
-      day0229 <- rep(NA, length(sdates))
-      for (i_sdate in 1:length(sdates_full)) {
-        days <- seq(idx_min[i_sdate],idx_max[i_sdate], by = 'days')
-        day0229[i_sdate] <- sum(format(days, "%m%d") == "0229")
-      }
-
-      if (all(day0229 == 0)) {  # no 0229
-        # daily_time_ind
-      } else {
-        # get extra indices and remove 0229 & extra indices after retrieval
-        #TODO: Make Start() can handle time as a list or array for this case
-        daily_time_ind <- c(daily_time_ind, 
-                            (tail(daily_time_ind, 1) + 1):((tail(daily_time_ind, 1) + 1) + (max(day0229) - 1)))
-      }
-  
-    }
+#    if (calendar %in% c('365_day', 'noleap')) {
+#      # daily_time_ind
+#    } else if (calendar %in% c('standard', 'proleptic_gregorian', 'gregorian')) {
+#
+##      leap <- leap_year(sdates)
+#      sdates_full <- ymd(paste0(sdates, sprintf('%02d', initial_month), '01'))
+#      idx_min <- sdates_full + months(ftimemin)
+#      idx_max <- sdates_full + months(ftimemax + 1) - days(1)
+#
+#      day0229 <- rep(NA, length(sdates))
+#      for (i_sdate in 1:length(sdates_full)) {
+#        days <- seq(idx_min[i_sdate],idx_max[i_sdate], by = 'days')
+#        day0229[i_sdate] <- sum(format(days, "%m%d") == "0229")
+#      }
+#
+#      if (all(day0229 == 0)) {  # no 0229
+#        # daily_time_ind
+#      } else {
+#        # get extra indices and remove 0229 & extra indices after retrieval
+#        #TODO: Make Start() can handle time as a list or array for this case
+#        daily_time_ind <- c(daily_time_ind, 
+#                            (tail(daily_time_ind, 1) + 1):((tail(daily_time_ind, 1) + 1) + (max(day0229) - 1)))
+#      }
+#  
+#    }
   }
 
   return(daily_time_ind)
 }
 
-
+# This function is used to pick out 0229 after loading data. But it is not used in the 
+# script now.
 correct_daily_for_leap <- function(data = NULL, time_attr, return_time = TRUE) {
   # data & time_attr: [syear, time]
   #NOTE: return_time = TRUE, only return time_attr; FALSE, only return data
diff --git a/tests/testthat/test-decadal_daily.R b/tests/testthat/test-decadal_daily_1.R
similarity index 88%
rename from tests/testthat/test-decadal_daily.R
rename to tests/testthat/test-decadal_daily_1.R
index 27e29d6c..42da0401 100644
--- a/tests/testthat/test-decadal_daily.R
+++ b/tests/testthat/test-decadal_daily_1.R
@@ -74,12 +74,18 @@ expect_equal(
 dim(data$hcst$Dates$start),
 c(sday = 1, sweek = 1, syear = 3, time = 90)
 )
+# hcst data
 expect_equal(
 as.vector(drop(data$hcst$data)[1, 2:3, 1:3, 1, 1]),
 c(298.5787, 293.6479, 298.5042, 293.7802, 297.8072, 293.0764),
 tolerance = 0.0001
 )
 expect_equal(
+as.vector(drop(data$hcst$data)[2, , 89:90, 1, 1]),
+c(301.6978, 308.9792, 308.4501, 302.1620, 307.6034, 307.6388),
+tolerance = 0.0001
+)
+expect_equal(
 mean(data$hcst$data),
 301.2666,
 tolerance = 0.0001
@@ -89,6 +95,19 @@ range(data$hcst$data),
 c(285.9326, 314.9579),
 tolerance = 0.0001
 )
+
+# fcst data
+as.vector(drop(data$fcst$data)[1, , 1:3, 1, 1]),
+c(295.0745, 291.1006, 296.2279, 291.6309, 295.3123, 290.8995),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(drop(data$fcst$data)[2, , 89:90, 1, 1]),
+c(305.3428, 305.0657, 305.5445, 305.5681),
+tolerance = 0.0001
+)
+
+# time value
 expect_equal(
 (data$hcst$Dates$start)[1],
 as.POSIXct("1991-01-01 12:00:00", tz = 'UTC')
@@ -105,6 +124,10 @@ expect_equal(
 (data$hcst$Dates$start)[1, 1, 3, 90],
 as.POSIXct("1993-03-31 12:00:00", tz = 'UTC')
 )
+expect_equal(
+(data$hcst$Dates$start)[1, 1, 2, 90],
+as.POSIXct("1992-03-30 12:00:00", tz = 'UTC')
+)
 
 })
 
diff --git a/tests/testthat/test-decadal_monthly.R b/tests/testthat/test-decadal_monthly_1.R
similarity index 100%
rename from tests/testthat/test-decadal_monthly.R
rename to tests/testthat/test-decadal_monthly_1.R
diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R
index 24aeac6b..ac80fe77 100644
--- a/tests/testthat/test-decadal_monthly_2.R
+++ b/tests/testthat/test-decadal_monthly_2.R
@@ -97,7 +97,7 @@ tolerance = 0.0001
 # fcst data
 expect_equal(
 as.vector(drop(data$fcst$data)[1, , 1:2, 2, 2]),
-c(271.7708, 272.2929, 272.4980, 273.3616),
+c(271.7708, 271.8424, 272.4980, 273.5842),
 tolerance = 0.0001
 )
 expect_equal(
-- 
GitLab


From c7cc2738180051dfdcc44483dac3a3fc130ce1f3 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 13 Jul 2022 12:10:24 +0200
Subject: [PATCH 7/7] Fix typo

---
 tests/testthat/test-decadal_daily_1.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R
index 42da0401..15020591 100644
--- a/tests/testthat/test-decadal_daily_1.R
+++ b/tests/testthat/test-decadal_daily_1.R
@@ -97,6 +97,7 @@ tolerance = 0.0001
 )
 
 # fcst data
+expect_equal(
 as.vector(drop(data$fcst$data)[1, , 1:3, 1, 1]),
 c(295.0745, 291.1006, 296.2279, 291.6309, 295.3123, 290.8995),
 tolerance = 0.0001
-- 
GitLab