From 814013d3195e144db7d308831ffb38002401a9a0 Mon Sep 17 00:00:00 2001
From: eduzenli <eren.duzenli@bsc.es>
Date: Mon, 17 Apr 2023 12:30:05 +0200
Subject: [PATCH 1/2] 2 categories can be selected- all the obs categories are
 the same problem=assigns the dominant cat

---
 R/LogisticReg.R | 60 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 49 insertions(+), 11 deletions(-)

diff --git a/R/LogisticReg.R b/R/LogisticReg.R
index c514d25..d58e8ab 100644
--- a/R/LogisticReg.R
+++ b/R/LogisticReg.R
@@ -365,7 +365,7 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target
                output_dims = sdate_dim)$output1
 
   res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), fun = function(x, y)
-               .log_reg(x = x, y = y, loocv = loocv), output_dims = c(sdate_dim, "category"))$output1
+               .log_reg(x = x, y = y, loocv = loocv,probs_cat=probs_cat), output_dims = c(sdate_dim, "category"))$output1
 
   if (return_most_likely_cat) {
     res <- Apply(res, target_dims = c(sdate_dim, "category"), most_likely_category,
@@ -425,7 +425,7 @@ get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) {
 }
 
 # atomic functions for logistic regressions
-.log_reg <- function(x, y, loocv) {
+.log_reg <- function(x, y, loocv,probs_cat) {
   
   tmp_df <- data.frame(x = x, y = y)
   
@@ -440,7 +440,7 @@ get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) {
     lm1 <- train_lr(df = tmp_df, loocv = loocv)
 
     # prediction
-    res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv)
+    res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv,probs_cat=probs_cat)
   }
   return(res)
 }
@@ -457,11 +457,11 @@ train_lr <- function(df, loocv) {
 
   if (loocv) {
 
-    lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ]))
+    lm1 <- lapply(1:nrow(df), function(j) ifelse(length(unique(df[-j,]$y))==1,NA,return(multinom(y ~ ., data = df[ -j, ]))))  ## if all the observed categories are the same for the corresponding loocv step, assign NA to the relevant lm1 element.
 
   } else {
 
-    lm1 <- list(multinom(y ~ ., data = df))
+    lm1 <- ifelse(length(unique(df$y))==1,list(NA),list(multinom(y ~ ., data = df)))
 
   }
 
@@ -471,7 +471,7 @@ train_lr <- function(df, loocv) {
 #-----------------------------------
 # Function to apply the logistic regressions.
 #-----------------------------------
-pred_lr <- function(df, lm1, loocv) {
+pred_lr <- function(df, lm1, loocv,probs_cat) {
 
   require(plyr)
 
@@ -479,19 +479,57 @@ pred_lr <- function(df, lm1, loocv) {
 
     # The error: "Error: Results must have the same dimensions." can 
     # appear when the number of sdates is insufficient
-    pred_vals_ls <- list()
-    for (j in 1:nrow(df)) {
-      pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") 
+   
+   pred_vals_ls <-list()
+   for (j in 1:nrow(df)) {
+    if(length(unique(df[-j,]$y))==1)
+    {
+    pred_vals_ls[[j]] <-NA ## if all the observed categories are the same for the corresponding loocv step, assign NA as predicted class (we need this step for the two-class cases. predict.multinom function provides the probability for the second category for two-class examples.we can obtain the prob of the first categort by 1-prob of second category)
+    } else{ 
+    pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") 
     }
- 
+    }
+	
     pred_vals <- laply(pred_vals_ls, .fun = as.array)
+    
+     if( length(probs_cat)+1==2)
+     {
+ 	
+     if (any(is.na(pred_vals)))   ### if all the observed categories are the same for the corresponding loocv step, assign 100% probability to the observed categories for the relevant prediction.
+     {
+     ifelse(names(which.max(table(df$y)))==1,pred_vals[is.na(pred_vals)]<-0,
+     pred_vals[is.na(pred_vals)]<-1)
+     }
+     pred_vals_dum<-array(NA,dim=c(nrow(df),2))
+     pred_vals_dum[,2]<-pred_vals
+     pred_vals_dum[,1]<-1-pred_vals
+     pred_vals<-pred_vals_dum
+     colnames(pred_vals)<-c(1,2)
+    }
 
-  } else {
+    } else {
 
     # type = class, probs
     #pred_vals_ls <- lapply(lm1, predict, data = df, type = "probs")
     #pred_vals <- unlist(pred_vals_ls)
     pred_vals <- predict(lm1[[1]], df, type = "probs")
+	
+     if( length(probs_cat)+1==2)
+     {
+	
+     if (any(is.na(pred_vals)))   ### if all the observed categories are the same for the corresponding loocv step, assign 100% probability to the observed categories for the relevant prediction.
+     {
+     ifelse(names(which.max(table(df$y)))==1,pred_vals[is.na(pred_vals)]<-0,
+     pred_vals[is.na(pred_vals)]<-1)
+     }
+	
+     pred_vals_dum<-array(NA,dim=c(nrow(df),2))
+     pred_vals_dum[,2]<-pred_vals
+     pred_vals_dum[,1]<-1-pred_vals
+     pred_vals<-pred_vals_dum
+     colnames(pred_vals)<-c(1,2)
+     }
+
   }
 
   return(pred_vals)
-- 
GitLab


From d00c1f0f735144e9b725c49fe8b7761daff59206 Mon Sep 17 00:00:00 2001
From: eduzenli <eren.duzenli@bsc.es>
Date: Wed, 19 Apr 2023 15:58:11 +0200
Subject: [PATCH 2/2] dimension error is fixed

---
 R/Intlr.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/Intlr.R b/R/Intlr.R
index b8bbf0f..c1e7e06 100644
--- a/R/Intlr.R
+++ b/R/Intlr.R
@@ -470,8 +470,8 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t
 train_lm <- function(df, loocv) {
 
   # Remove predictor columns containing only NA's
-  df <- df[ , apply(df[,colnames(df) != 'y'], 2, function(x) !all(is.na(x)))]
-
+  df <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, function(x) !all(is.na(x)))]
+  
   if (loocv) {
    
     lm1 <- lapply(1:nrow(df), function(j) {
-- 
GitLab