diff --git a/R/Intlr.R b/R/Intlr.R index eab39ef08cf3486227b70048e37868a6fcd7920e..5ac11c3d7a68bb0e909b35d5adec722e06a239f7 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -477,7 +477,7 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t .train_lm <- function(df, loocv) { # Remove predictor columns containing only NA's - df <- df[ , apply(df[,colnames(df) != 'y'], 2, function(x) !all(is.na(x)))] + df <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, function(x) !all(is.na(x)))] if (loocv) { diff --git a/R/LogisticReg.R b/R/LogisticReg.R index c514d2541c1c74c5e53d50b11ea9c1e4775ab6b5..d58e8ab0eeaf47fc088964f04d0ca87aa8e6d710 100644 --- a/R/LogisticReg.R +++ b/R/LogisticReg.R @@ -365,7 +365,7 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target output_dims = sdate_dim)$output1 res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), fun = function(x, y) - .log_reg(x = x, y = y, loocv = loocv), output_dims = c(sdate_dim, "category"))$output1 + .log_reg(x = x, y = y, loocv = loocv,probs_cat=probs_cat), output_dims = c(sdate_dim, "category"))$output1 if (return_most_likely_cat) { res <- Apply(res, target_dims = c(sdate_dim, "category"), most_likely_category, @@ -425,7 +425,7 @@ get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) { } # atomic functions for logistic regressions -.log_reg <- function(x, y, loocv) { +.log_reg <- function(x, y, loocv,probs_cat) { tmp_df <- data.frame(x = x, y = y) @@ -440,7 +440,7 @@ get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) { lm1 <- train_lr(df = tmp_df, loocv = loocv) # prediction - res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv) + res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv,probs_cat=probs_cat) } return(res) } @@ -457,11 +457,11 @@ train_lr <- function(df, loocv) { if (loocv) { - lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ])) + lm1 <- lapply(1:nrow(df), function(j) ifelse(length(unique(df[-j,]$y))==1,NA,return(multinom(y ~ ., data = df[ -j, ])))) ## if all the observed categories are the same for the corresponding loocv step, assign NA to the relevant lm1 element. } else { - lm1 <- list(multinom(y ~ ., data = df)) + lm1 <- ifelse(length(unique(df$y))==1,list(NA),list(multinom(y ~ ., data = df))) } @@ -471,7 +471,7 @@ train_lr <- function(df, loocv) { #----------------------------------- # Function to apply the logistic regressions. #----------------------------------- -pred_lr <- function(df, lm1, loocv) { +pred_lr <- function(df, lm1, loocv,probs_cat) { require(plyr) @@ -479,19 +479,57 @@ pred_lr <- function(df, lm1, loocv) { # The error: "Error: Results must have the same dimensions." can # appear when the number of sdates is insufficient - pred_vals_ls <- list() - for (j in 1:nrow(df)) { - pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") + + pred_vals_ls <-list() + for (j in 1:nrow(df)) { + if(length(unique(df[-j,]$y))==1) + { + pred_vals_ls[[j]] <-NA ## if all the observed categories are the same for the corresponding loocv step, assign NA as predicted class (we need this step for the two-class cases. predict.multinom function provides the probability for the second category for two-class examples.we can obtain the prob of the first categort by 1-prob of second category) + } else{ + pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") } - + } + pred_vals <- laply(pred_vals_ls, .fun = as.array) + + if( length(probs_cat)+1==2) + { + + if (any(is.na(pred_vals))) ### if all the observed categories are the same for the corresponding loocv step, assign 100% probability to the observed categories for the relevant prediction. + { + ifelse(names(which.max(table(df$y)))==1,pred_vals[is.na(pred_vals)]<-0, + pred_vals[is.na(pred_vals)]<-1) + } + pred_vals_dum<-array(NA,dim=c(nrow(df),2)) + pred_vals_dum[,2]<-pred_vals + pred_vals_dum[,1]<-1-pred_vals + pred_vals<-pred_vals_dum + colnames(pred_vals)<-c(1,2) + } - } else { + } else { # type = class, probs #pred_vals_ls <- lapply(lm1, predict, data = df, type = "probs") #pred_vals <- unlist(pred_vals_ls) pred_vals <- predict(lm1[[1]], df, type = "probs") + + if( length(probs_cat)+1==2) + { + + if (any(is.na(pred_vals))) ### if all the observed categories are the same for the corresponding loocv step, assign 100% probability to the observed categories for the relevant prediction. + { + ifelse(names(which.max(table(df$y)))==1,pred_vals[is.na(pred_vals)]<-0, + pred_vals[is.na(pred_vals)]<-1) + } + + pred_vals_dum<-array(NA,dim=c(nrow(df),2)) + pred_vals_dum[,2]<-pred_vals + pred_vals_dum[,1]<-1-pred_vals + pred_vals<-pred_vals_dum + colnames(pred_vals)<-c(1,2) + } + } return(pred_vals)