From 26f765a3540e375eca14acc40b824ec31cdbaa03 Mon Sep 17 00:00:00 2001
From: Jost von Hardenberg <j.vonhardenberg@isac.cnr.it>
Date: Tue, 21 Apr 2020 22:22:56 +0200
Subject: [PATCH 1/9] expand closest member

---
 R/CST_EnsClustering.R | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/R/CST_EnsClustering.R b/R/CST_EnsClustering.R
index 62d29244..de0733b3 100644
--- a/R/CST_EnsClustering.R
+++ b/R/CST_EnsClustering.R
@@ -186,6 +186,19 @@ EnsClustering <- function(data, lat, lon, time_moment = "mean", numclus = NULL,
                   lon_lim = lon_lim, lat_lim = lat_lim,
                   variance_explained = variance_explained,
                   numpcs = numpcs, verbose = verbose)
+
+  cm=result$closest_member
+  cml <- vector(mode = "list", length = length(cluster_dim))
+  cum <- cm * 0
+  dim_cd <- dim(exp)[cluster_dim]
+  for (i in rev(seq_along(cluster_dim))) {
+    cml[[i]] <- floor((cm - cum) / prod(dim_cd[-i]))
+    cum <- cum + cml[[i]] * prod(dim_cd[-i])
+    dim_cd <- dim_cd[-i]
+  }
+  names(cml) <- cluster_dim
+  result$closest_member <- cml
+
   return(append(result, list(lat = lat, lon = lon)))
 }
 
-- 
GitLab


From 8e7ce8f493dacb1236e54e51713ec8b8ee6ced0a Mon Sep 17 00:00:00 2001
From: Jost von Hardenberg <j.vonhardenberg@isac.cnr.it>
Date: Tue, 21 Apr 2020 23:33:21 +0200
Subject: [PATCH 2/9] automatic selection of time dim

---
 R/CST_EnsClustering.R | 43 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/R/CST_EnsClustering.R b/R/CST_EnsClustering.R
index de0733b3..3a539c62 100644
--- a/R/CST_EnsClustering.R
+++ b/R/CST_EnsClustering.R
@@ -61,7 +61,10 @@
 #' @param variance_explained variance (percentage) to be explained by the set of EOFs.
 #'        Defaults to 80. Not used if numpcs is specified.
 #' @param numpcs Number of EOFs retained in the analysis (optional).
-#' @param cluster_dim Dimension along which to cluster. Typically "member" or "sdate". This can also be a list like c("member", "sdate").
+#' @param cluster_dim Dimension along which to cluster. Typically "member" or "sdate".
+#'        This can also be a list like c("member", "sdate").
+#' @param time_dim String or character array with name(s) of dimension(s) over which to compute statistics.
+#'        If omitted c("ftime", "sdate", "time") are searched in this order.
 #' @param verbose Logical for verbose output
 #' @return A list with elements \code{$cluster} (cluster assigned for each member),
 #'         \code{$freq} (relative frequency of each cluster), \code{$closest_member}
@@ -100,7 +103,7 @@
 #'@export
 CST_EnsClustering <- function(exp, time_moment = "mean", numclus = NULL,
                         lon_lim = NULL, lat_lim = NULL,
-                        variance_explained = 80, numpcs = NULL,
+                        variance_explained = 80, numpcs = NULL, time_dim = NULL,
                         time_percentile = 90, cluster_dim = "member",
                         verbose = F) {
   if (!inherits(exp, "s2dv_cube")) {
@@ -112,7 +115,7 @@ CST_EnsClustering <- function(exp, time_moment = "mean", numclus = NULL,
                     time_moment = time_moment, numclus = numclus,
                     lon_lim = lon_lim, lat_lim = lat_lim,
                     variance_explained = variance_explained, numpcs = numpcs,
-                    time_percentile = time_percentile,
+                    time_percentile = time_percentile, time_dim = time_dim,
                     cluster_dim = cluster_dim, verbose = verbose)
 
   return(result)
@@ -145,7 +148,10 @@ CST_EnsClustering <- function(exp, time_moment = "mean", numclus = NULL,
 #' @param variance_explained variance (percentage) to be explained by the set of EOFs.
 #'        Defaults to 80. Not used if numpcs is specified.
 #' @param numpcs Number of EOFs retained in the analysis (optional).
-#' @param cluster_dim Dimension along which to cluster. Typically "member" or "sdate". This can also be a list like c("member", "sdate").
+#' @param cluster_dim Dimension along which to cluster. Typically "member" or "sdate".
+#'        This can also be a list like c("member", "sdate").
+#' @param time_dim String or character array with name(s) of dimension(s) over which to compute statistics.
+#'        If omitted c("ftime", "sdate", "time") are searched in this order.
 #' @param verbose Logical for verbose output
 #' @return A list with elements \code{$cluster} (cluster assigned for each member),
 #'         \code{$freq} (relative frequency of each cluster), \code{$closest_member}
@@ -162,20 +168,40 @@ CST_EnsClustering <- function(exp, time_moment = "mean", numclus = NULL,
 
 EnsClustering <- function(data, lat, lon, time_moment = "mean", numclus = NULL,
                     lon_lim = NULL, lat_lim = NULL, variance_explained = 80,
-                    numpcs = NULL, time_percentile = 90,
+                    numpcs = NULL, time_percentile = 90, time_dim = NULL,
                     cluster_dim = "member", verbose = T) {
 
+  # Check/detect time_dim
+  if (is.null(time_dim)) {
+    time_dim_names <- c("ftime", "sdate", "time")
+    time_dim_num <- which(time_dim_names %in% names(dim(data)))
+    if (length(time_dim_num) > 0) {
+      # Find time dimension with length > 1
+      ilong <- which(dim(data)[time_dim_names[time_dim_num]] > 1)
+      if (length(ilong) > 0) {
+        time_dim <- time_dim_names[time_dim_num[ilong[1]]]
+      } else {
+        stop("No time dimension longer than one found.")
+      }
+    } else {
+      stop("Could not automatically detect a target time dimension ",
+           "in the provided data in 'data'.")
+    }
+    .printv(paste("Selected time dim:", time_dim))
+  }
+
   # Apply time_moment
   if (time_moment == "mean") {
     .printv("Considering the time_moment: mean", verbose)
-    exp <- apply(data, c(1, 2, 3, 5, 6), mean)
+    exp <- Apply(data, target_dims = time_dim, mean)$output1
   } else if (time_moment == "sd") {
     .printv("Considering the time_moment: sd", verbose)
-    exp <- apply(data, c(1, 2, 3, 5, 6), sd)
+    exp <- Apply(data, target_dims = time_dim, sd)$output1
   } else if (time_moment == "perc") {
     .printv(paste0("Considering the time_moment: percentile ",
                  sprintf("%5f", time_percentile)), verbose)
-    exp <- apply(data, c(1, 2, 3, 5, 6), quantile, time_percentile / 100.)
+    exp <- Apply(data, target_dims = time_dim, quantile,
+                 time_percentile / 100.)$output1
   } else {
     stop(paste0("Invalid time_moment '", time_moment, "' specified!"))
   }
@@ -187,6 +213,7 @@ EnsClustering <- function(data, lat, lon, time_moment = "mean", numclus = NULL,
                   variance_explained = variance_explained,
                   numpcs = numpcs, verbose = verbose)
 
+  # Expand result$closest_member into indices in cluster_dim dimensions
   cm=result$closest_member
   cml <- vector(mode = "list", length = length(cluster_dim))
   cum <- cm * 0
-- 
GitLab


From a34660bc0a8d314f5097c63037e8a9385a1d3b1a Mon Sep 17 00:00:00 2001
From: Jost von Hardenberg <j.vonhardenberg@isac.cnr.it>
Date: Wed, 22 Apr 2020 17:43:40 +0200
Subject: [PATCH 3/9] correct calculation of indices

---
 R/CST_EnsClustering.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/CST_EnsClustering.R b/R/CST_EnsClustering.R
index 3a539c62..171f5711 100644
--- a/R/CST_EnsClustering.R
+++ b/R/CST_EnsClustering.R
@@ -219,8 +219,8 @@ EnsClustering <- function(data, lat, lon, time_moment = "mean", numclus = NULL,
   cum <- cm * 0
   dim_cd <- dim(exp)[cluster_dim]
   for (i in rev(seq_along(cluster_dim))) {
-    cml[[i]] <- floor((cm - cum) / prod(dim_cd[-i]))
-    cum <- cum + cml[[i]] * prod(dim_cd[-i])
+    cml[[i]] <- floor((cm - cum - 1) / prod(dim_cd[-i])) + 1
+    cum <- cum + (cml[[i]] - 1) * prod(dim_cd[-i])
     dim_cd <- dim_cd[-i]
   }
   names(cml) <- cluster_dim
-- 
GitLab


From ee4954a3307b76423b2297526731276a93306ab4 Mon Sep 17 00:00:00 2001
From: Jost von Hardenberg <j.vonhardenberg@isac.cnr.it>
Date: Thu, 23 Apr 2020 14:47:54 +0200
Subject: [PATCH 4/9] verbose printing of time selection

---
 R/CST_EnsClustering.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/CST_EnsClustering.R b/R/CST_EnsClustering.R
index 171f5711..35a4f375 100644
--- a/R/CST_EnsClustering.R
+++ b/R/CST_EnsClustering.R
@@ -187,7 +187,7 @@ EnsClustering <- function(data, lat, lon, time_moment = "mean", numclus = NULL,
       stop("Could not automatically detect a target time dimension ",
            "in the provided data in 'data'.")
     }
-    .printv(paste("Selected time dim:", time_dim))
+    .printv(paste("Selected time dim:", time_dim), verbose)
   }
 
   # Apply time_moment
-- 
GitLab


From 380d384b7e9eeed1fac50ca0a445950f1e70aac6 Mon Sep 17 00:00:00 2001
From: Jost von Hardenberg <j.vonhardenberg@isac.cnr.it>
Date: Thu, 23 Apr 2020 14:50:14 +0200
Subject: [PATCH 5/9] doc fix

---
 R/CST_EnsClustering.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/CST_EnsClustering.R b/R/CST_EnsClustering.R
index 35a4f375..8a7d2c60 100644
--- a/R/CST_EnsClustering.R
+++ b/R/CST_EnsClustering.R
@@ -52,7 +52,7 @@
 #' spatial dimensions named "lon" and "lat", and dimensions "dataset", "member", "ftime", "sdate".
 #' @param time_moment Decides the moment to be applied to the time dimension. Can be either 'mean' (time mean),
 #'        'sd' (standard deviation along time) or 'perc' (a selected percentile on time).
-#'        If 'perc' the keyword 'time_percentile' is also needed.
+#'        If 'perc' the keyword 'time_percentile' is also used.
 #' @param time_percentile Set the percentile in time you want to analyse (used for `time_moment = "perc").
 #' @param numclus Number of clusters (scenarios) to be calculated.
 #'        If set to NULL the number of ensemble members divided by 10 is used, with a minimum of 2 and a maximum of 8.
@@ -139,7 +139,7 @@ CST_EnsClustering <- function(exp, time_moment = "mean", numclus = NULL,
 #' @param lon Vector of longitudes.
 #' @param time_moment Decides the moment to be applied to the time dimension. Can be either 'mean' (time mean),
 #'        'sd' (standard deviation along time) or 'perc' (a selected percentile on time).
-#'        If 'perc' the keyword 'time_percentile' is also needed.
+#'        If 'perc' the keyword 'time_percentile' is also used.
 #' @param time_percentile Set the percentile in time you want to analyse (used for `time_moment = "perc").
 #' @param numclus Number of clusters (scenarios) to be calculated.
 #'        If set to NULL the number of ensemble members divided by 10 is used, with a minimum of 2 and a maximum of 8.
-- 
GitLab


From 301d5fc246c6f9e2364cc8a0a528a927acdc3c48 Mon Sep 17 00:00:00 2001
From: Jost von Hardenberg <j.vonhardenberg@isac.cnr.it>
Date: Thu, 23 Apr 2020 15:08:51 +0200
Subject: [PATCH 6/9] fix test

---
 tests/testthat/test-CST_EnsClustering.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/testthat/test-CST_EnsClustering.R b/tests/testthat/test-CST_EnsClustering.R
index 8c507db3..2e7364d9 100644
--- a/tests/testthat/test-CST_EnsClustering.R
+++ b/tests/testthat/test-CST_EnsClustering.R
@@ -37,7 +37,8 @@ test_that("Sanity and Functionality tests", {
   res <- CST_EnsClustering(exp, numclus = 3, cluster_dim = c("member", "sdate"))
   expect_equivalent(dim(res$cluster), dim(exp$data)[c(2, 3, 1)])
   expect_equivalent(dim(res$freq), c(cluster = 3, dim(exp$data)[1]))
-  expect_equivalent(dim(res$closest_member), c(cluster = 3, dim(exp$data)[1]))
+  expect_equivalent(dim(res$closest_member$sdate), c(cluster = 3,
+                    dim(exp$data)[1]))
   expect_equivalent(dim(res$repr_field), c(cluster = 3,
                     dim(exp$data)[c(5, 6)], dim(exp$data)[1]))
   expect_equivalent(dim(res$composites), c(cluster = 3,
@@ -48,13 +49,13 @@ test_that("Sanity and Functionality tests", {
                      cluster_dim = "member")
   # The closest member of each cluster should be member of that cluster
   for (i in 1:3) {
-    expect_equivalent(res$cluster[res$closest_member[i, 1, 1], 1, 1], i)
+    expect_equivalent(res$cluster[res$closest_member$member[i, 1, 1], 1, 1], i)
   }
 
   res <- CST_EnsClustering(exp, numclus = 3, numpcs = 8,
                      cluster_dim = c("member"))
   for (i in 1:3) {
-    expect_equivalent(res$cluster[res$closest_member[i, 1, 1], 1, 1], i)
+    expect_equivalent(res$cluster[res$closest_member$member[i, 1, 1], 1, 1], i)
   }
 
   res <- CST_EnsClustering(exp, numclus = 3, variance_explained = 80,
-- 
GitLab


From b2729c998586338d3c3eb106bb6ef31297518f7f Mon Sep 17 00:00:00 2001
From: jhardenberg <hardenberg@gmail.com>
Date: Fri, 24 Apr 2020 13:57:09 +0200
Subject: [PATCH 7/9] as.vector for multiApply and quantile

---
 R/CST_EnsClustering.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/CST_EnsClustering.R b/R/CST_EnsClustering.R
index 8a7d2c60..53bf013a 100644
--- a/R/CST_EnsClustering.R
+++ b/R/CST_EnsClustering.R
@@ -200,8 +200,9 @@ EnsClustering <- function(data, lat, lon, time_moment = "mean", numclus = NULL,
   } else if (time_moment == "perc") {
     .printv(paste0("Considering the time_moment: percentile ",
                  sprintf("%5f", time_percentile)), verbose)
-    exp <- Apply(data, target_dims = time_dim, quantile,
-                 time_percentile / 100.)$output1
+    exp <- Apply(data, target_dims = time_dim,
+                 function(x) {quantile(as.vector(x),
+                                       time_percentile / 100.)})$output1
   } else {
     stop(paste0("Invalid time_moment '", time_moment, "' specified!"))
   }
-- 
GitLab


From f313d00fa76cc66feb0571d2f253bd9c6a5cfdca Mon Sep 17 00:00:00 2001
From: nperez <nperez@bscearth372.int.bsc.es>
Date: Wed, 3 Jun 2020 19:58:30 +0200
Subject: [PATCH 8/9] functions documentation automatically generated with
 devtools

---
 man/CST_EnsClustering.Rd | 9 +++++++--
 man/EnsClustering.Rd     | 9 +++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/man/CST_EnsClustering.Rd b/man/CST_EnsClustering.Rd
index 154541d5..6ee79b7c 100644
--- a/man/CST_EnsClustering.Rd
+++ b/man/CST_EnsClustering.Rd
@@ -12,6 +12,7 @@ CST_EnsClustering(
   lat_lim = NULL,
   variance_explained = 80,
   numpcs = NULL,
+  time_dim = NULL,
   time_percentile = 90,
   cluster_dim = "member",
   verbose = F
@@ -24,7 +25,7 @@ spatial dimensions named "lon" and "lat", and dimensions "dataset", "member", "f
 
 \item{time_moment}{Decides the moment to be applied to the time dimension. Can be either 'mean' (time mean),
 'sd' (standard deviation along time) or 'perc' (a selected percentile on time).
-If 'perc' the keyword 'time_percentile' is also needed.}
+If 'perc' the keyword 'time_percentile' is also used.}
 
 \item{numclus}{Number of clusters (scenarios) to be calculated.
 If set to NULL the number of ensemble members divided by 10 is used, with a minimum of 2 and a maximum of 8.}
@@ -38,9 +39,13 @@ Defaults to 80. Not used if numpcs is specified.}
 
 \item{numpcs}{Number of EOFs retained in the analysis (optional).}
 
+\item{time_dim}{String or character array with name(s) of dimension(s) over which to compute statistics.
+If omitted c("ftime", "sdate", "time") are searched in this order.}
+
 \item{time_percentile}{Set the percentile in time you want to analyse (used for `time_moment = "perc").}
 
-\item{cluster_dim}{Dimension along which to cluster. Typically "member" or "sdate". This can also be a list like c("member", "sdate").}
+\item{cluster_dim}{Dimension along which to cluster. Typically "member" or "sdate".
+This can also be a list like c("member", "sdate").}
 
 \item{verbose}{Logical for verbose output}
 }
diff --git a/man/EnsClustering.Rd b/man/EnsClustering.Rd
index 2fd8a3f1..30d81f87 100644
--- a/man/EnsClustering.Rd
+++ b/man/EnsClustering.Rd
@@ -15,6 +15,7 @@ EnsClustering(
   variance_explained = 80,
   numpcs = NULL,
   time_percentile = 90,
+  time_dim = NULL,
   cluster_dim = "member",
   verbose = T
 )
@@ -28,7 +29,7 @@ EnsClustering(
 
 \item{time_moment}{Decides the moment to be applied to the time dimension. Can be either 'mean' (time mean),
 'sd' (standard deviation along time) or 'perc' (a selected percentile on time).
-If 'perc' the keyword 'time_percentile' is also needed.}
+If 'perc' the keyword 'time_percentile' is also used.}
 
 \item{numclus}{Number of clusters (scenarios) to be calculated.
 If set to NULL the number of ensemble members divided by 10 is used, with a minimum of 2 and a maximum of 8.}
@@ -44,7 +45,11 @@ Defaults to 80. Not used if numpcs is specified.}
 
 \item{time_percentile}{Set the percentile in time you want to analyse (used for `time_moment = "perc").}
 
-\item{cluster_dim}{Dimension along which to cluster. Typically "member" or "sdate". This can also be a list like c("member", "sdate").}
+\item{time_dim}{String or character array with name(s) of dimension(s) over which to compute statistics.
+If omitted c("ftime", "sdate", "time") are searched in this order.}
+
+\item{cluster_dim}{Dimension along which to cluster. Typically "member" or "sdate".
+This can also be a list like c("member", "sdate").}
 
 \item{verbose}{Logical for verbose output}
 }
-- 
GitLab


From 2fcb83dbbc3477b3415553297f0fee61113e5ed1 Mon Sep 17 00:00:00 2001
From: nperez <nperez@bscearth372.int.bsc.es>
Date: Thu, 4 Jun 2020 10:22:51 +0200
Subject: [PATCH 9/9] Changes in EnsClustering listed in NEWS

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 4144a707..aa6de232 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -8,7 +8,7 @@
     + MultiMetric vignette fixed typo text description
     + RainFARM checks 'slope' is not a vector
     + DESCRIPTION specifies the minimum multiApply version required
-
+    + EnsClustering has a new parameter 'time_dim' and fixed 'closest_member' output
 
 ### CSTools 3.0.0
 **Submission date to CRAN: 10-02-2020**
-- 
GitLab