diff --git a/R/Analogs.R b/R/Analogs.R index 1cbea151b6a34824decac424cd9321b43945b928..feb063ef258c1524d1e860843356e0c931798bd3 100644 --- a/R/Analogs.R +++ b/R/Analogs.R @@ -448,53 +448,63 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, # For each element in test, find the indices of the k nearest neigbhors in train .analogs <- function(train, test, obs_hres, k, fun_analog, return_indices = FALSE) { - + require(FNN) - + # train and obs_hres dim: 3 dimensions window, lat and lon (in this order) # test dim: 2 dimensions lat and lon (in this order) # Number of lats/lons of the high-resolution data space_dims_hres <- dim(obs_hres)[c(2,3)] # Reformat train and test as an array with (time, points) - train <- apply(train, 1, as.vector); names(dim(train))[1] <- "space" + train <- apply(train, 1, as.vector); names(dim(train))[1] <- "space" test <- as.vector(test) obs_hres <- apply(obs_hres, 1, as.vector); names(dim(obs_hres))[1] <- "space" - + # Identify and remove NA's - idx_na_tr <- is.na(train[ , 1]) + dum<-which(!apply(train,2,function (x) all(is.na(x))))[1] ## the column in which NA in space will be investigated. it shouldn't be all-NA time-step + idx_na_tr <- is.na(train[ , dum]) # NA in space + idy_na_tr <- is.na(train[1, ]) # NA in time idx_na_te <- is.na(test) idx_na <- idx_na_tr | idx_na_te - tr_wo_na <- t(train[!idx_na , ]) + tr_wo_na <- t(train[!idx_na , !idy_na_tr ]) te_wo_na <- test[!idx_na] te_wo_na <- InsertDim(data = te_wo_na, posdim = 1, lendim = 1, name = "time") - - knn.ind <- get.knnx(tr_wo_na, te_wo_na, k) - - dist <- knn.ind$nn.dist - idx <- knn.ind$nn.index - # Either return only the indices or the analogs - if (return_indices) { - res <- as.numeric(idx) + if (all(is.na(test))) { + res <- array(NA,space_dims_hres) } else { - res <- obs_hres[ , idx] - dim(res) <- c(space_dims_hres, analogs = k) - - if (!is.null(fun_analog)) { - if (fun_analog == "wmean") { - weight <- 1 / dist - res <- apply(res, c(1,2), function(x) weighted.mean(x, weight)) - } else if (fun_analog == "min") { - res<-res[,,which.min(dist)] - } else if (fun_analog == "max") { - res<-res[,,which.max(dist)] - } else { - res <- apply(res, c(1,2), fun_analog) + knn.ind <- get.knnx(tr_wo_na, te_wo_na, k) + + dist <- knn.ind$nn.dist + idx <- knn.ind$nn.index + + if (isTRUE(any(idy_na_tr))) { + dum <-(1:length(idy_na_tr))[!idy_na_tr] + idx <- dum[idx] + } + + # Either return only the indices or the analogs + if (return_indices) { + res <- as.numeric(idx) + } else { + res <- obs_hres[ , idx] + dim(res) <- c(space_dims_hres, analogs = k) + + if (!is.null(fun_analog)) { + if (fun_analog == "wmean") { + weight <- 1 / dist + res <- apply(res, c(1,2), function(x) weighted.mean(x, weight)) + } else if (fun_analog == "min") { + res<-res[,,which.min(dist)] + } else if (fun_analog == "max") { + res<-res[,,which.max(dist)] + } else { + res <- apply(res, c(1,2), fun_analog) + } } } } - return(res) } diff --git a/R/LogisticReg.R b/R/LogisticReg.R index a85a1b3ff2d36741c3371e63be57f8b44d7b57ee..b8b19afe320c4662b9565652e343ec6a9a517020 100644 --- a/R/LogisticReg.R +++ b/R/LogisticReg.R @@ -391,12 +391,14 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { rep(NA,length(x)) } else { terc <- convert2prob(as.vector(x), prob = probs_cat) - apply(terc, 1, function(r) which (r == 1))}}, + as.integer(apply(terc, 1, function(r) which (r == 1)))}}, output_dims = sdate_dim, ncores = ncores)$output1 - res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), fun = function(x, y) - .log_reg(x = x, y = y, loocv = loocv,probs_cat=probs_cat), output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 + res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), + fun = function(x, y) + .log_reg(x = x, y = y, loocv = loocv,probs_cat=probs_cat), + output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 if (return_most_likely_cat) { res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category, @@ -444,7 +446,7 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { require(s2dv) # compute climatology - clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean, ncores = ncores)$output1 + clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean, ncores = ncores, na.rm = TRUE)$output1 # compute ensemble mean ens_mean <- Apply(obj_ens, target_dims = member_dim, mean, na.rm = TRUE, ncores = ncores)$output1 @@ -472,7 +474,7 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { lm1 <- .train_lr(df = tmp_df, loocv = loocv) # prediction - res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv,probs_cat=probs_cat) + res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv, probs_cat=probs_cat) } return(res) } @@ -489,11 +491,11 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { if (loocv) { - lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ])) + lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ], trace = FALSE)) } else { - lm1 <- list(multinom(y ~ ., data = df)) + lm1 <- list(multinom(y ~ ., data = df, trace = FALSE)) } @@ -503,7 +505,7 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { #----------------------------------- # Function to apply the logistic regressions. #----------------------------------- -pred_lr <- function(df, lm1, loocv,probs_cat) { +pred_lr <- function(df, lm1, loocv, probs_cat) { require(plyr) @@ -514,35 +516,37 @@ pred_lr <- function(df, lm1, loocv,probs_cat) { pred_vals_ls <-list() for (j in 1:nrow(df)) { - pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") - } + if (all(is.na(df[j,]))) { + pred_vals_ls[[j]] <- rep(NA, length(probs_cat) + 1) + } else { + pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") + } + } - pred_vals <- laply(pred_vals_ls, .fun = as.array) + pred_vals <- laply(pred_vals_ls, .fun = as.array) - if( length(probs_cat)+1==2) - { + if( length(probs_cat)+1==2) { pred_vals_dum<-array(NA,dim=c(nrow(df),2)) pred_vals_dum[,2]<-pred_vals pred_vals_dum[,1]<-1-pred_vals pred_vals<-pred_vals_dum colnames(pred_vals)<-c(1,2) - } + } - } else { + } else { # type = class, probs #pred_vals_ls <- lapply(lm1, predict, data = df, type = "probs") #pred_vals <- unlist(pred_vals_ls) pred_vals <- predict(lm1[[1]], df, type = "probs") - if( length(probs_cat)+1==2) - { - pred_vals_dum<-array(NA,dim=c(nrow(df),2)) - pred_vals_dum[,2]<-pred_vals - pred_vals_dum[,1]<-1-pred_vals - pred_vals<-pred_vals_dum - colnames(pred_vals)<-c(1,2) - } + if( length(probs_cat)+1==2) { + pred_vals_dum<-array(NA,dim=c(nrow(df),2)) + pred_vals_dum[,2]<-pred_vals + pred_vals_dum[,1]<-1-pred_vals + pred_vals<-pred_vals_dum + colnames(pred_vals)<-c(1,2) + } }