From 814013d3195e144db7d308831ffb38002401a9a0 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Mon, 17 Apr 2023 12:30:05 +0200 Subject: [PATCH 1/2] 2 categories can be selected- all the obs categories are the same problem=assigns the dominant cat --- R/LogisticReg.R | 60 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/R/LogisticReg.R b/R/LogisticReg.R index c514d25..d58e8ab 100644 --- a/R/LogisticReg.R +++ b/R/LogisticReg.R @@ -365,7 +365,7 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target output_dims = sdate_dim)$output1 res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), fun = function(x, y) - .log_reg(x = x, y = y, loocv = loocv), output_dims = c(sdate_dim, "category"))$output1 + .log_reg(x = x, y = y, loocv = loocv,probs_cat=probs_cat), output_dims = c(sdate_dim, "category"))$output1 if (return_most_likely_cat) { res <- Apply(res, target_dims = c(sdate_dim, "category"), most_likely_category, @@ -425,7 +425,7 @@ get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) { } # atomic functions for logistic regressions -.log_reg <- function(x, y, loocv) { +.log_reg <- function(x, y, loocv,probs_cat) { tmp_df <- data.frame(x = x, y = y) @@ -440,7 +440,7 @@ get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) { lm1 <- train_lr(df = tmp_df, loocv = loocv) # prediction - res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv) + res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv,probs_cat=probs_cat) } return(res) } @@ -457,11 +457,11 @@ train_lr <- function(df, loocv) { if (loocv) { - lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ])) + lm1 <- lapply(1:nrow(df), function(j) ifelse(length(unique(df[-j,]$y))==1,NA,return(multinom(y ~ ., data = df[ -j, ])))) ## if all the observed categories are the same for the corresponding loocv step, assign NA to the relevant lm1 element. } else { - lm1 <- list(multinom(y ~ ., data = df)) + lm1 <- ifelse(length(unique(df$y))==1,list(NA),list(multinom(y ~ ., data = df))) } @@ -471,7 +471,7 @@ train_lr <- function(df, loocv) { #----------------------------------- # Function to apply the logistic regressions. #----------------------------------- -pred_lr <- function(df, lm1, loocv) { +pred_lr <- function(df, lm1, loocv,probs_cat) { require(plyr) @@ -479,19 +479,57 @@ pred_lr <- function(df, lm1, loocv) { # The error: "Error: Results must have the same dimensions." can # appear when the number of sdates is insufficient - pred_vals_ls <- list() - for (j in 1:nrow(df)) { - pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") + + pred_vals_ls <-list() + for (j in 1:nrow(df)) { + if(length(unique(df[-j,]$y))==1) + { + pred_vals_ls[[j]] <-NA ## if all the observed categories are the same for the corresponding loocv step, assign NA as predicted class (we need this step for the two-class cases. predict.multinom function provides the probability for the second category for two-class examples.we can obtain the prob of the first categort by 1-prob of second category) + } else{ + pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") } - + } + pred_vals <- laply(pred_vals_ls, .fun = as.array) + + if( length(probs_cat)+1==2) + { + + if (any(is.na(pred_vals))) ### if all the observed categories are the same for the corresponding loocv step, assign 100% probability to the observed categories for the relevant prediction. + { + ifelse(names(which.max(table(df$y)))==1,pred_vals[is.na(pred_vals)]<-0, + pred_vals[is.na(pred_vals)]<-1) + } + pred_vals_dum<-array(NA,dim=c(nrow(df),2)) + pred_vals_dum[,2]<-pred_vals + pred_vals_dum[,1]<-1-pred_vals + pred_vals<-pred_vals_dum + colnames(pred_vals)<-c(1,2) + } - } else { + } else { # type = class, probs #pred_vals_ls <- lapply(lm1, predict, data = df, type = "probs") #pred_vals <- unlist(pred_vals_ls) pred_vals <- predict(lm1[[1]], df, type = "probs") + + if( length(probs_cat)+1==2) + { + + if (any(is.na(pred_vals))) ### if all the observed categories are the same for the corresponding loocv step, assign 100% probability to the observed categories for the relevant prediction. + { + ifelse(names(which.max(table(df$y)))==1,pred_vals[is.na(pred_vals)]<-0, + pred_vals[is.na(pred_vals)]<-1) + } + + pred_vals_dum<-array(NA,dim=c(nrow(df),2)) + pred_vals_dum[,2]<-pred_vals + pred_vals_dum[,1]<-1-pred_vals + pred_vals<-pred_vals_dum + colnames(pred_vals)<-c(1,2) + } + } return(pred_vals) -- GitLab From d00c1f0f735144e9b725c49fe8b7761daff59206 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Wed, 19 Apr 2023 15:58:11 +0200 Subject: [PATCH 2/2] dimension error is fixed --- R/Intlr.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Intlr.R b/R/Intlr.R index b8bbf0f..c1e7e06 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -470,8 +470,8 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t train_lm <- function(df, loocv) { # Remove predictor columns containing only NA's - df <- df[ , apply(df[,colnames(df) != 'y'], 2, function(x) !all(is.na(x)))] - + df <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, function(x) !all(is.na(x)))] + if (loocv) { lm1 <- lapply(1:nrow(df), function(j) { -- GitLab