From 917a58d05e0a54a9904295d0cf8fac8bf1807edb Mon Sep 17 00:00:00 2001
From: eduzenli <eren.duzenli@bsc.es>
Date: Tue, 9 Apr 2024 17:19:11 +0200
Subject: [PATCH 1/4] daily data downscaling bug is fixed for linear and
 logistic regression. Also, find.nn function was fixed

---
 R/Intlr.R       |  93 ++++++++++++++++++++++++++++++++---------
 R/LogisticReg.R | 109 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 157 insertions(+), 45 deletions(-)

diff --git a/R/Intlr.R b/R/Intlr.R
index 953031f..32490f1 100644
--- a/R/Intlr.R
+++ b/R/Intlr.R
@@ -610,19 +610,33 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
   }
   
   # Apply the linear regressions
-  # case hindcast - forecast
+  ## case hindcast - forecast
   if (!is.null(exp_cor)) {
+    if (  time_dim %in% names(dim(predictor)) ) {
+      if (dim(predictor)[time_dim] > 1) {
+        target_dims_predictor <- c(target_dims_predictor, time_dim) 
+        target_dims_predictand <- c(target_dims_predictand, time_dim)
+        target_dims_forecast <- c(target_dims_forecast, time_dim)
+      }
+    }
     res <- Apply(list(predictor, predictand, forecast),
                  target_dims = list(target_dims_predictor, target_dims_predictand, 
                                     target_dims_forecast),
-                 fun = .intlr, loocv = loocv, aux_dim = aux_dim, ncores = ncores)$output1
+                 fun = .intlr, loocv = loocv, aux_dim = aux_dim, ncores = ncores, 
+                               sdate_dim = sdate_dim, time_dim = time_dim)$output1
   } 
-  # case hindcast only
+  ## case hindcast only
   else {
+    if ( time_dim %in% names(dim(predictor)) ) {
+      if (dim(predictor)[time_dim] > 1) {
+        target_dims_predictor <- c(target_dims_predictor, time_dim) 
+        target_dims_predictand <- c(target_dims_predictand, time_dim)
+      }
+    }
     res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, 
                                                                  target_dims_predictand),
-                 fun = .intlr, loocv = loocv, ncores = ncores)$output1
-    names(dim(res))[1] <- sdate_dim
+                 fun = .intlr, loocv = loocv, ncores = ncores, sdate_dim = sdate_dim, 
+                               time_dim = time_dim)$output1
   }
   
   # restore ensemble dimension in observations if it existed originally
@@ -652,19 +666,39 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
 #-----------------------------------
 # Atomic function to generate and apply the linear regressions
 #-----------------------------------
-.intlr <- function(x, y, f = NULL, loocv, aux_dim = NULL) {
+.intlr <- function(x, y, f = NULL, loocv, aux_dim = NULL, sdate_dim = "sdate", time_dim = "time") {
 
   require (plyr)
 
+  k_out <- 1 ##	 leave ONE out
+  daily <- FALSE
+  if (any(names(dim(x)) == time_dim)) {  ## IF DAILY DATA IS PROVIDED
+    daily <- TRUE  ## in order to arrange the res dimension, when loocv is not TRUE 
+    k_out <- as.numeric (dim(x)[time_dim])  ## for daily, to omit the data of the corresponding year.
+    x <- MergeDims (x, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
+    y <- MergeDims (y, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
+    if (!is.null(f)) {
+      f <- MergeDims (f, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
+    }
+  }
+
   if (!is.null(f)) {
     if (!is.null(aux_dim)) {
+      if (length(dim(x)) > 1 & daily) {
+        x <- aperm (x, c(3,1,2)) ## if 9nn or large scale, dimensions: sdate, nn, member
+        f <- aperm (f, c(3,1,2)) ## if 9nn or large scale, dimensions: sdate, nn, member
+      }
       tmp_df <- data.frame(x = adply(x,.margins = 3, .id = NULL), y = y)
     } else {
       tmp_df <- data.frame(x = as.vector(x), y = y)
     }
   } else {
+    if (length(dim(x)) > 1 & daily) {
+      x <- aperm (x, c(2,1))  ## if 9nn or large scale, dimensions: sdate, nn
+    }
     tmp_df <- data.frame(x = x, y = y)
   }
+
   # if the data is all NA, force return return NA
   if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) {
     if (is.null(f)) {  
@@ -677,7 +711,6 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
       }
     }
   } else {
-    # training
     
     ## apply principle components if method is 9nn
     if (any(names(dim(x)) == "nn")) { 
@@ -696,11 +729,27 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
       colnames(tmp_df) <- c(paste0("x.",1:(ncol(tmp_df)-1)), "y")
     } 
 
-    lm1 <- .train_lm(df = tmp_df, loocv = loocv)  
+    # training
+    lm1 <- .train_lm(df = tmp_df, loocv = loocv, k_out = k_out)  
  
     # prediction
-    res <- .pred_lm(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, aux_dim = aux_dim)
+    res <- .pred_lm(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, aux_dim = aux_dim, k_out = k_out)
   }
+
+  ## dimension naming
+  if (daily & is.null(f)) {            
+    if (!loocv) {             
+      dim (res) <- c(k_out, length(res)/k_out)
+    }
+    names(dim(res)) <- c(time_dim, sdate_dim)
+  } else if (daily & !is.null (f)) { 
+    sdate_num <- as.numeric (dim(res)[sdate_dim])/k_out
+    res <- SplitDim (res, indices = rep(1:k_out, sdate_num), 
+                     new_dim_name = time_dim, split_dim = sdate_dim) 
+  } else if (!daily & is.null(f)) {
+    dim(res) <- length(res)
+    names(dim(res)) <- sdate_dim
+  } ## if !daily & !is.null(f), the function already correctly assign the dimension names 
  
    return(res)
 }
@@ -709,18 +758,20 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
 # Function to generate the linear regressions.
 # Returns a list
 #-----------------------------------
-.train_lm <- function(df, loocv) {
+.train_lm <- function(df, loocv, k_out = 1) {
   
   # Remove predictor columns containing only NA's
-  df <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, function(x) !all(is.na(x)))]
+  df <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, 
+                   function(x) !all(is.na(x)))]
   
   if (loocv) {
-    
-    lm1 <- lapply(1:nrow(df), function(j) {
-      if (all(is.na(df[-j,]$y))) {
+
+    lm1 <- lapply(1:(nrow(df)/k_out), function(j) {
+      window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # omit the data of the year including corresponding day
+      if (all(is.na(df[-window ,]$y))) {
         return(NA)
       } else {
-        return(lm(df[-j,], formula = y ~ .)) 
+        return(lm(df[-window,], formula = y ~ .)) 
       }})
   } else {
     
@@ -733,16 +784,18 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
 #-----------------------------------
 # Function to apply the linear regressions.
 #-----------------------------------
-.pred_lm <- function(df, lm1, f, loocv, aux_dim) {
+.pred_lm <- function(df, lm1, f, loocv, aux_dim, k_out = 1) {
 
   require (plyr)
+
   
   if (loocv) {
-    pred_vals <- sapply(1:nrow(df), function(j) {
+    pred_vals <- sapply(1:length(lm1), function(j) {
       if (all(is.na(lm1[[j]]))) {
-        return(NA)
+        return(rep(NA, k_out))
       } else {
-        return(predict(lm1[[j]], df[j,]))
+        window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # test the daily data of the corresponding year
+        return(predict(lm1[[j]], df[window,]))
       }})
   } else {
     if (!is.na(lm1)) {
@@ -799,7 +852,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
   names(dim(dist_id)) <- c("nn", lon_dim)
 
   idh <- SplitDim(dist_id, split_dim = lon_dim,
-                  indices = rep(1:(length(lons_hres)),length(lats_hres)),
+                  indices = rep(1:(length(lats_hres)),length(lons_hres)),
                   new_dim_name = lat_dim)
 
   idh <- aperm (idh, c(1,3,2))  # dimension: nn, lat_dim, lon_dim
diff --git a/R/LogisticReg.R b/R/LogisticReg.R
index f74f2ee..91fde87 100644
--- a/R/LogisticReg.R
+++ b/R/LogisticReg.R
@@ -56,7 +56,9 @@
 #'@param sdate_dim a character vector indicating the start date dimension name in the element 
 #''data' in exp and obs. Default set to "sdate".
 #'@param member_dim a character vector indicating the member dimension name in the element
-#''data' in exp and obs. Default set to "member". 
+#''data' in exp and obs. Default set to "member".
+#'@param time_dim a character vector indicating the time dimension name in the element
+#''data' in exp and obs. Default set to "time".  
 #'@param region a numeric vector indicating the borders of the region defined in obs.
 #'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers
 #'to the left border, while lonmax refers to the right border. latmin indicates the lower
@@ -94,8 +96,8 @@ CST_LogisticReg <- function(exp, obs, exp_cor = NULL, target_grid, int_method =
                             log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), 
                             return_most_likely_cat = FALSE, points = NULL, 
                             method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", 
-                            sdate_dim = "sdate", member_dim = "member", region = NULL, 
-                            loocv = TRUE, ncores = NULL) {
+                            sdate_dim = "sdate", member_dim = "member", time_dim = "time", 
+                            region = NULL, loocv = TRUE, ncores = NULL) {
 
   if (!inherits(exp,'s2dv_cube')) {
     stop("Parameter 'exp' must be of the class 's2dv_cube'")
@@ -105,14 +107,16 @@ CST_LogisticReg <- function(exp, obs, exp_cor = NULL, target_grid, int_method =
     stop("Parameter 'obs' must be of the class 's2dv_cube'")
   }
 
-  res <- LogisticReg(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, exp_lats = exp$coords[[lat_dim]], 
-                     exp_lons = exp$coords[[lon_dim]], obs_lats = obs$coords[[lat_dim]], 
-                     obs_lons = obs$coords[[lon_dim]], target_grid = target_grid,
-                     probs_cat = probs_cat, return_most_likely_cat = return_most_likely_cat,
+  res <- LogisticReg(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, 
+                     exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], 
+                     obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], 
+                     target_grid = target_grid, probs_cat = probs_cat, 
+                     return_most_likely_cat = return_most_likely_cat,
                      int_method = int_method, log_reg_method = log_reg_method, points = points, 
                      method_point_interp = method_point_interp, lat_dim = lat_dim, 
                      lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, 
-                     source_file_exp = exp$attrs$source_files[1], source_file_obs = obs$attrs$source_files[1],
+                     time_dim = time_dim, source_file_exp = exp$attrs$source_files[1], 
+                     source_file_obs = obs$attrs$source_files[1],
                      region = region, loocv = loocv, ncores = ncores)
 
   # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data
@@ -206,7 +210,9 @@ CST_LogisticReg <- function(exp, obs, exp_cor = NULL, target_grid, int_method =
 #'@param sdate_dim a character vector indicating the start date dimension name in the element 
 #''data' in exp and obs. Default set to "sdate".
 #'@param member_dim a character vector indicating the member dimension name in the element
-#''data' in exp and obs. Default set to "member". 
+#''data' in exp and obs. Default set to "member".
+#'@param time_dim a character vector indicating the time dimension name in the element
+#''data' in exp and obs. Default set to "time".  
 #'@param source_file_exp a character vector with a path to an example file of the exp data.
 #'Only needed if the downscaling is to a point location.
 #'@param source_file_obs a character vector with a path to an example file of the obs data.
@@ -247,7 +253,7 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats,
                         target_grid, int_method = NULL, log_reg_method = "ens_mean", 
                         probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, 
                         method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", 
-                        sdate_dim = "sdate", member_dim = "member", 
+                        sdate_dim = "sdate", member_dim = "member", time_dim = "time", 
                         source_file_exp = NULL, source_file_obs = NULL, region = NULL, 
                         loocv = TRUE, ncores = NULL) {
 
@@ -488,21 +494,54 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats,
                        as.integer(apply(terc, 1, function(r) which (r == 1)))}},          
                    output_dims = sdate_dim, ncores = ncores)$output1
 
+  target_dims_predictand <- sdate_dim
+  # Apply the logistic regressions
+  ## case hindcast only
   if (is.null(exp_cor)) {
-    res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), 
+    if (  time_dim %in% names(dim(predictor)) ) {
+      if (dim(predictor)[time_dim] > 1) {
+        target_dims_predictor <- c(target_dims_predictor, time_dim)
+        target_dims_predictand <- c(target_dims_predictand, time_dim)
+      }
+    }
+    res <- Apply(list(predictor, obs_cat), 
+                 target_dims = list(target_dims_predictor, target_dims_predictand), 
                  fun = function(x, y) .log_reg(x = x, y = y, loocv = loocv,probs_cat = probs_cat), 
                  output_dims = c(sdate_dim, "category"), ncores = ncores)$output1
 
+   if (  time_dim %in% names(dim(predictor)) ) {
+      if (dim(predictor)[time_dim] > 1) {
+        res <- SplitDim(res, split_dim = sdate_dim, new_dim_name = time_dim,
+                        indices = rep(1:(dim(predictor)[time_dim]), dim(predictor)[sdate_dim]))
+      }
+    }
+
     if (return_most_likely_cat) {
       res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category,
                    output_dims = sdate_dim, ncores = ncores)$output1
     }
-  } else {
+  } 
+  ## case hindcast - forecast
+  else {
+    if (  time_dim %in% names(dim(predictor)) ) {
+      if (dim(predictor)[time_dim] > 1) {
+        target_dims_predictor <- c(target_dims_predictor, time_dim)
+        target_dims_predictand <- c(target_dims_predictand, time_dim)
+        target_dims_forecast <- c(target_dims_forecast, time_dim)
+      }
+    }
     res <- Apply(list(predictor, obs_cat, forecast), 
-                 target_dims = list(target_dims_predictor, sdate_dim, target_dims_forecast), 
+                 target_dims = list(target_dims_predictor, target_dims_predictand, 
+                                    target_dims_forecast), 
                  fun = function(x, y, f) .log_reg(x = x, y = y, f = f, loocv = loocv, 
                                                   probs_cat = probs_cat),
                  output_dims = c(sdate_dim, "category"), ncores = ncores)$output1
+    if (  time_dim %in% names(dim(predictor)) ) {
+      if (dim(predictor)[time_dim] > 1) {
+        res <- SplitDim(res, split_dim = sdate_dim, new_dim_name = time_dim,  
+                        indices = rep(1:(dim(predictor)[time_dim]), dim(forecast)[sdate_dim]))
+      }
+    }
   }
 
   # restore ensemble dimension in observations if it existed originally
@@ -561,7 +600,22 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats,
 
 # atomic functions for logistic regressions
 .log_reg <- function(x, y, f = NULL, loocv, probs_cat) {
-  
+
+  k_out <- 1 ##  leave ONE out if the data is not daily.
+  daily <- FALSE
+  if (any(names(dim(x)) == time_dim)) {  ## IF DAILY DATA IS PROVIDED
+    daily <- TRUE  ## in order to arrange the res dimension, when loocv is not TRUE 
+    k_out <- as.numeric (dim(x)[time_dim])  ## for daily, to omit the data of the corresponding year.
+    x <- MergeDims (x, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
+    y <- MergeDims (y, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
+    if (length(dim(x)) > 1) {
+      x <- aperm (x, c(2,1))  ## if ens_mean_sd or sorted_members, dimensions: sdate, pred or member
+    }
+    if (!is.null(f)) {
+      f <- MergeDims (f, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
+    }
+  }
+
   tmp_df <- data.frame(x = x, y = y)
   
   # if the data is all NA, force return return NA
@@ -581,10 +635,11 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats,
 
   } else {
     # training
-    lm1 <- .train_lr(df = tmp_df, loocv = loocv)
+    lm1 <- .train_lr(df = tmp_df, loocv = loocv, k_out)
 
     # prediction
-    res <- .pred_lr(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, probs_cat = probs_cat)
+    res <- .pred_lr(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, 
+                    probs_cat = probs_cat, k_out = k_out)
 
     if ( !(is.null(f)) ) {  ## if forecast is provided, and syear of forecast is 1.
       if ( nrow(f) == 1 ) {
@@ -599,7 +654,7 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats,
 #-----------------------------------
 # Function to train the logistic regressions.
 #-----------------------------------
-.train_lr <- function(df, loocv) {
+.train_lr <- function(df, loocv, k_out = 1) {
 
   require(nnet)
   
@@ -608,7 +663,9 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats,
 
   if (loocv) {
 
-    lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ], trace = FALSE)) 
+    lm1 <- lapply(1:(nrow(df)/k_out), function(j) {
+      window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # omit the data of the year including corresponding day
+      multinom(y ~ ., data = df[ -window, ], trace = FALSE)}) 
 
   } else {
 
@@ -622,7 +679,7 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats,
 #-----------------------------------
 # Function to apply the logistic regressions.
 #-----------------------------------
-.pred_lr <- function(df, lm1, f, loocv, probs_cat) {
+.pred_lr <- function(df, lm1, f, loocv, probs_cat, k_out = 1) {
 
   require(plyr)
 
@@ -631,16 +688,18 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats,
     # The error: "Error: Results must have the same dimensions." can 
     # appear when the number of sdates is insufficient
    
-   pred_vals_ls <-list()
-   for (j in 1:nrow(df)) {
-    if (all(is.na(df[j, ]))) {
-      pred_vals_ls[[j]] <- rep(NA, length(probs_cat) + 1)
+   pred_vals_ls <- list()
+   for (j in 1:length(lm1)) {
+    window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # test the daily data of the corresponding year
+    if (all(is.na(df[window, ]))) {
+      pred_vals_ls[[j]] <- array(rep(NA, length(window) * (length(probs_cat) + 1)), 
+                                    dim = c(length(window), length(probs_cat) + 1))
     } else {
-      pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") 
+      pred_vals_ls[[j]] <- predict(lm1[[j]], df[window,], type = "probs") 
      }
    }
 	
-   pred_vals <- laply(pred_vals_ls, .fun = as.array)
+   pred_vals <- do.call(rbind, pred_vals_ls)
     
    if( length(probs_cat) + 1 == 2) {
      pred_vals_dum<-array(NA, dim=c(nrow(df), 2))
-- 
GitLab


From ba142910236f3b868ecaed4039692159850d6fb8 Mon Sep 17 00:00:00 2001
From: eduzenli <eren.duzenli@bsc.es>
Date: Fri, 12 Apr 2024 18:36:26 +0200
Subject: [PATCH 2/4] daily dwn version 2

---
 R/Intlr.R | 61 +++++++++++++++++++++++--------------------------------
 1 file changed, 25 insertions(+), 36 deletions(-)

diff --git a/R/Intlr.R b/R/Intlr.R
index 32490f1..c7705a3 100644
--- a/R/Intlr.R
+++ b/R/Intlr.R
@@ -608,16 +608,14 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
 
     stop(paste0(lr_method, " method is not implemented yet"))
   }
-  
+ 
   # Apply the linear regressions
   ## case hindcast - forecast
   if (!is.null(exp_cor)) {
     if (  time_dim %in% names(dim(predictor)) ) {
-      if (dim(predictor)[time_dim] > 1) {
-        target_dims_predictor <- c(target_dims_predictor, time_dim) 
-        target_dims_predictand <- c(target_dims_predictand, time_dim)
-        target_dims_forecast <- c(target_dims_forecast, time_dim)
-      }
+      target_dims_predictor <- c(target_dims_predictor, time_dim) 
+      target_dims_predictand <- c(target_dims_predictand, time_dim)
+      target_dims_forecast <- c(target_dims_forecast, time_dim)
     }
     res <- Apply(list(predictor, predictand, forecast),
                  target_dims = list(target_dims_predictor, target_dims_predictand, 
@@ -628,15 +626,15 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
   ## case hindcast only
   else {
     if ( time_dim %in% names(dim(predictor)) ) {
-      if (dim(predictor)[time_dim] > 1) {
-        target_dims_predictor <- c(target_dims_predictor, time_dim) 
-        target_dims_predictand <- c(target_dims_predictand, time_dim)
-      }
+      output_dims <- c(time_dim, sdate_dim)
+      target_dims_predictor <- c(target_dims_predictor, time_dim) 
+      target_dims_predictand <- c(target_dims_predictand, time_dim)
     }
     res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, 
                                                                  target_dims_predictand),
                  fun = .intlr, loocv = loocv, ncores = ncores, sdate_dim = sdate_dim, 
-                               time_dim = time_dim)$output1
+                               time_dim = time_dim,
+                               output_dims = output_dims)$output1
   }
   
   # restore ensemble dimension in observations if it existed originally
@@ -670,11 +668,11 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
 
   require (plyr)
 
-  k_out <- 1 ##	 leave ONE out
+  k_out <- 1 ##	 number of daily data in a year. It is useful in constructing window for loocv = TRUE case. k_out is also used to order the dimensions of pred.lm output for both loocv = TRUE and loocv = FALSE cases. 
   daily <- FALSE
   if (any(names(dim(x)) == time_dim)) {  ## IF DAILY DATA IS PROVIDED
-    daily <- TRUE  ## in order to arrange the res dimension, when loocv is not TRUE 
-    k_out <- as.numeric (dim(x)[time_dim])  ## for daily, to omit the data of the corresponding year.
+    daily <- TRUE  ## in order to arrange the res dimension of x and f after MergeDims 
+    k_out <- as.numeric (dim(x)[time_dim])  
     x <- MergeDims (x, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
     y <- MergeDims (y, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
     if (!is.null(f)) {
@@ -702,7 +700,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
   # if the data is all NA, force return return NA
   if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) {
     if (is.null(f)) {  
-      res <- rep(NA, nrow(tmp_df))
+      res <- array(rep(NA, nrow(tmp_df)), c(k_out, nrow(tmp_df)/k_out))
     } else {
       if (!is.null(aux_dim)) {
         res <- array(NA, dim(f)[names(dim(f)) != aux_dim])
@@ -736,22 +734,12 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
     res <- .pred_lm(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, aux_dim = aux_dim, k_out = k_out)
   }
 
-  ## dimension naming
-  if (daily & is.null(f)) {            
-    if (!loocv) {             
-      dim (res) <- c(k_out, length(res)/k_out)
+    if (!is.null(f) & daily) {
+      res <- SplitDim(data = res, split_dim = sdate_dim, new_dim_name = time_dim, 
+                      indices = rep (1:k_out, dim(res)[sdate_dim] / k_out))
     }
-    names(dim(res)) <- c(time_dim, sdate_dim)
-  } else if (daily & !is.null (f)) { 
-    sdate_num <- as.numeric (dim(res)[sdate_dim])/k_out
-    res <- SplitDim (res, indices = rep(1:k_out, sdate_num), 
-                     new_dim_name = time_dim, split_dim = sdate_dim) 
-  } else if (!daily & is.null(f)) {
-    dim(res) <- length(res)
-    names(dim(res)) <- sdate_dim
-  } ## if !daily & !is.null(f), the function already correctly assign the dimension names 
- 
-   return(res)
+
+    return(res)
 }
 
 #-----------------------------------
@@ -792,17 +780,18 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
   if (loocv) {
     pred_vals <- sapply(1:length(lm1), function(j) {
       if (all(is.na(lm1[[j]]))) {
-        return(rep(NA, k_out))
+        return(array(rep(NA, k_out), c(k_out, nrow(df) / k_out)))
       } else {
         window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # test the daily data of the corresponding year
         return(predict(lm1[[j]], df[window,]))
       }})
+    pred_vals <- array (pred_vals, c(k_out, nrow(df) / k_out))
   } else {
     if (!is.na(lm1)) {
       # case to downscale hindcasts
       if (is.null(f)) { 
-        pred_vals_ls <- lapply(lm1, predict, data = df)    
-        pred_vals <- unlist(pred_vals_ls)
+        pred_vals_ls <- lapply(lm1, predict, newdata = df)    
+        pred_vals <- array (unlist(pred_vals_ls), c(k_out, nrow(df) / k_out))
       # case to downscale forecasts
       } else {
         if (!is.null(aux_dim)) {
@@ -816,17 +805,17 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
         # basic
           pred_vals_ls <- lapply(lm1, predict, newdata = data.frame(x = as.vector(f)))
           pred_vals <- unlist(pred_vals_ls)
-          dim(pred_vals) <- dim(f )
+          dim(pred_vals) <- dim(f)
         }
       }
     } else {
       if (is.null(f)) {
-        pred_vals <- rep(NA, nrow(df))
+        pred_vals <- array(rep(NA, k_out), c(k_out, nrow(df) / k_out))
       } else {
         if (!is.null(aux_dim)) {
           pred_vals <- array(NA, dim(f)[names(dim(f)) != aux_dim])
         } else {
-          pred_vals <- array(NA, dim(f))
+          pred_vals <- array(NA,  dim(f))
         }
       }
     }
-- 
GitLab


From c7b8c32f7e07e5261e350c8d0e7cd2106afe1aee Mon Sep 17 00:00:00 2001
From: eduzenli <eren.duzenli@bsc.es>
Date: Sun, 21 Apr 2024 11:59:46 +0200
Subject: [PATCH 3/4] obs_cat is arranged wrt daily data

---
 R/LogisticReg.R | 68 +++++++++++++++++++++++--------------------------
 1 file changed, 32 insertions(+), 36 deletions(-)

diff --git a/R/LogisticReg.R b/R/LogisticReg.R
index 91fde87..d42c39e 100644
--- a/R/LogisticReg.R
+++ b/R/LogisticReg.R
@@ -486,6 +486,20 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats,
 
   # convert observations to categorical predictands
 
+  k_out <- 1 ##  in case loocv = TRUE and the data is NOT daily, leave one data out.
+  daily <- FALSE
+  if (  time_dim %in% names(dim(obs_ref)) & dim(obs_ref)[time_dim] > 1) {
+    daily <- TRUE
+    sdate_num <- as.numeric (dim(obs_ref)[sdate_dim])
+    k_out <- as.numeric (dim(obs_ref)[time_dim]) ## in case loocv = TRUE and the data is daily, leave one-year data out
+    obs_ref <-  MergeDims (obs_ref, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
+    predictor <- MergeDims (predictor, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
+    if (!is.null(exp_cor)) {
+      sdate_num_fr <- as.numeric (dim(forecast)[sdate_dim]) ## sdate_num of forecast
+      forecast <- MergeDims (forecast, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
+    }
+  }
+  
   obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) {
                      if (!any(!is.na(x))) {
                        rep(NA,length(x))
@@ -493,28 +507,23 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats,
 		       terc <- convert2prob(as.vector(x), prob = probs_cat) 
                        as.integer(apply(terc, 1, function(r) which (r == 1)))}},          
                    output_dims = sdate_dim, ncores = ncores)$output1
-
+  
   target_dims_predictand <- sdate_dim
   # Apply the logistic regressions
   ## case hindcast only
   if (is.null(exp_cor)) {
-    if (  time_dim %in% names(dim(predictor)) ) {
-      if (dim(predictor)[time_dim] > 1) {
-        target_dims_predictor <- c(target_dims_predictor, time_dim)
-        target_dims_predictand <- c(target_dims_predictand, time_dim)
-      }
-    }
     res <- Apply(list(predictor, obs_cat), 
                  target_dims = list(target_dims_predictor, target_dims_predictand), 
-                 fun = function(x, y) .log_reg(x = x, y = y, loocv = loocv,probs_cat = probs_cat), 
+                 fun = function(x, y) .log_reg(x = x, y = y, loocv = loocv, probs_cat = probs_cat,
+                                               sdate_dim = sdate_dim, k_out = k_out), 
                  output_dims = c(sdate_dim, "category"), ncores = ncores)$output1
 
-   if (  time_dim %in% names(dim(predictor)) ) {
-      if (dim(predictor)[time_dim] > 1) {
-        res <- SplitDim(res, split_dim = sdate_dim, new_dim_name = time_dim,
-                        indices = rep(1:(dim(predictor)[time_dim]), dim(predictor)[sdate_dim]))
-      }
-    }
+   if (  daily ) {
+     res <- SplitDim(res, split_dim = sdate_dim, new_dim_name = time_dim,
+                     indices = rep(1:k_out, sdate_num))
+     obs_ref <- SplitDim(obs_ref, split_dim = sdate_dim, new_dim_name = time_dim,
+                         indices = rep(1:k_out, sdate_num))
+   }
 
     if (return_most_likely_cat) {
       res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category,
@@ -534,13 +543,15 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats,
                  target_dims = list(target_dims_predictor, target_dims_predictand, 
                                     target_dims_forecast), 
                  fun = function(x, y, f) .log_reg(x = x, y = y, f = f, loocv = loocv, 
-                                                  probs_cat = probs_cat),
+                                                  probs_cat = probs_cat, sdate_dim = sdate_dim,
+                                                  k_out = k_out),
                  output_dims = c(sdate_dim, "category"), ncores = ncores)$output1
-    if (  time_dim %in% names(dim(predictor)) ) {
-      if (dim(predictor)[time_dim] > 1) {
-        res <- SplitDim(res, split_dim = sdate_dim, new_dim_name = time_dim,  
-                        indices = rep(1:(dim(predictor)[time_dim]), dim(forecast)[sdate_dim]))
-      }
+
+    if (  daily ) {
+     res <- SplitDim(res, split_dim = sdate_dim, new_dim_name = time_dim,
+                     indices = rep(1:k_out, sdate_num_fr))
+     obs_ref <- SplitDim(obs_ref, split_dim = sdate_dim, new_dim_name = time_dim,
+                         indices = rep(1:k_out, sdate_num))
     }
   }
 
@@ -599,22 +610,7 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats,
 }
 
 # atomic functions for logistic regressions
-.log_reg <- function(x, y, f = NULL, loocv, probs_cat) {
-
-  k_out <- 1 ##  leave ONE out if the data is not daily.
-  daily <- FALSE
-  if (any(names(dim(x)) == time_dim)) {  ## IF DAILY DATA IS PROVIDED
-    daily <- TRUE  ## in order to arrange the res dimension, when loocv is not TRUE 
-    k_out <- as.numeric (dim(x)[time_dim])  ## for daily, to omit the data of the corresponding year.
-    x <- MergeDims (x, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
-    y <- MergeDims (y, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
-    if (length(dim(x)) > 1) {
-      x <- aperm (x, c(2,1))  ## if ens_mean_sd or sorted_members, dimensions: sdate, pred or member
-    }
-    if (!is.null(f)) {
-      f <- MergeDims (f, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
-    }
-  }
+.log_reg <- function(x, y, f = NULL, loocv, probs_cat, sdate_dim, k_out = 1) {
 
   tmp_df <- data.frame(x = x, y = y)
   
-- 
GitLab


From f3a71cfc45dc805950a3c15261e3bc83a2e9b94e Mon Sep 17 00:00:00 2001
From: eduzenli <eren.duzenli@bsc.es>
Date: Sun, 21 Apr 2024 16:28:34 +0200
Subject: [PATCH 4/4] Processes with the MergeDims function are moved before
 the wrapper

---
 R/Intlr.R | 63 +++++++++++++++++++++----------------------------------
 1 file changed, 24 insertions(+), 39 deletions(-)

diff --git a/R/Intlr.R b/R/Intlr.R
index c7705a3..6f7e124 100644
--- a/R/Intlr.R
+++ b/R/Intlr.R
@@ -608,32 +608,41 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
 
     stop(paste0(lr_method, " method is not implemented yet"))
   }
- 
+
+  k_out <- 1 ## ##  It is used in constructing window for loocv = TRUE case. k_out is also used to order the dimensions of pred.lm output for both loocv = TRUE and loocv = FALSE cases. In case the data is NOT daily, k_out is 1.
+  daily <- FALSE
+  if (  time_dim %in% names(dim(predictor)) ) {
+    daily <- TRUE
+    k_out <- as.numeric (dim(predictor)[time_dim]) ## in case loocv = TRUE and the data is daily, leave one-year data out
+    predictor <- MergeDims (predictor, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
+    predictand <- MergeDims (predictand, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
+    output_dims <- c(time_dim, sdate_dim) # for hindcast dwn 
+    if (!is.null(exp_cor)) {
+      sdate_num_fr <- as.numeric (dim(forecast)[sdate_dim]) ## sdate_num of forecast
+      forecast <- MergeDims (forecast, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
+    }
+  }
+
   # Apply the linear regressions
   ## case hindcast - forecast
   if (!is.null(exp_cor)) {
-    if (  time_dim %in% names(dim(predictor)) ) {
-      target_dims_predictor <- c(target_dims_predictor, time_dim) 
-      target_dims_predictand <- c(target_dims_predictand, time_dim)
-      target_dims_forecast <- c(target_dims_forecast, time_dim)
-    }
     res <- Apply(list(predictor, predictand, forecast),
                  target_dims = list(target_dims_predictor, target_dims_predictand, 
                                     target_dims_forecast),
                  fun = .intlr, loocv = loocv, aux_dim = aux_dim, ncores = ncores, 
-                               sdate_dim = sdate_dim, time_dim = time_dim)$output1
+                               sdate_dim = sdate_dim, k_out = k_out)$output1
+
+    if (daily) {
+      res <- SplitDim(data = res, split_dim = sdate_dim, new_dim_name = time_dim,
+                      indices = rep (1:k_out, sdate_num_fr))
+    }
   } 
   ## case hindcast only
   else {
-    if ( time_dim %in% names(dim(predictor)) ) {
-      output_dims <- c(time_dim, sdate_dim)
-      target_dims_predictor <- c(target_dims_predictor, time_dim) 
-      target_dims_predictand <- c(target_dims_predictand, time_dim)
-    }
     res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, 
                                                                  target_dims_predictand),
                  fun = .intlr, loocv = loocv, ncores = ncores, sdate_dim = sdate_dim, 
-                               time_dim = time_dim,
+                               k_out = k_out,
                                output_dims = output_dims)$output1
   }
   
@@ -660,40 +669,20 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
 
   return(res)
 }
-
 #-----------------------------------
 # Atomic function to generate and apply the linear regressions
 #-----------------------------------
-.intlr <- function(x, y, f = NULL, loocv, aux_dim = NULL, sdate_dim = "sdate", time_dim = "time") {
+.intlr <- function(x, y, f = NULL, loocv, aux_dim = NULL, sdate_dim = "sdate", k_out = 1) {
 
   require (plyr)
 
-  k_out <- 1 ##	 number of daily data in a year. It is useful in constructing window for loocv = TRUE case. k_out is also used to order the dimensions of pred.lm output for both loocv = TRUE and loocv = FALSE cases. 
-  daily <- FALSE
-  if (any(names(dim(x)) == time_dim)) {  ## IF DAILY DATA IS PROVIDED
-    daily <- TRUE  ## in order to arrange the res dimension of x and f after MergeDims 
-    k_out <- as.numeric (dim(x)[time_dim])  
-    x <- MergeDims (x, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
-    y <- MergeDims (y, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
-    if (!is.null(f)) {
-      f <- MergeDims (f, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
-    }
-  }
-
   if (!is.null(f)) {
     if (!is.null(aux_dim)) {
-      if (length(dim(x)) > 1 & daily) {
-        x <- aperm (x, c(3,1,2)) ## if 9nn or large scale, dimensions: sdate, nn, member
-        f <- aperm (f, c(3,1,2)) ## if 9nn or large scale, dimensions: sdate, nn, member
-      }
       tmp_df <- data.frame(x = adply(x,.margins = 3, .id = NULL), y = y)
     } else {
       tmp_df <- data.frame(x = as.vector(x), y = y)
     }
   } else {
-    if (length(dim(x)) > 1 & daily) {
-      x <- aperm (x, c(2,1))  ## if 9nn or large scale, dimensions: sdate, nn
-    }
     tmp_df <- data.frame(x = x, y = y)
   }
 
@@ -711,6 +700,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
   } else {
     
     ## apply principle components if method is 9nn
+    ## TODO 9NN MIGHT GIVE ERROR FOR FORECAST CASE. REVIEW AFTER DEVELOPING FORECAST STRATEGY!! 
     if (any(names(dim(x)) == "nn")) { 
       PR <- prcomp(~ x.1 + x.2 + x.3 + x.4 + x.5 + x.6 + x.7 + x.8 + x.9, 
                    data = tmp_df[, -ncol(tmp_df)], 
@@ -734,11 +724,6 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo
     res <- .pred_lm(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, aux_dim = aux_dim, k_out = k_out)
   }
 
-    if (!is.null(f) & daily) {
-      res <- SplitDim(data = res, split_dim = sdate_dim, new_dim_name = time_dim, 
-                      indices = rep (1:k_out, dim(res)[sdate_dim] / k_out))
-    }
-
     return(res)
 }
 
-- 
GitLab