PeriodStandardization.R 9.31 KB
Newer Older
PeriodStandardization <- function(data, data_cor = NULL, accum = NULL, time_dim = 'syear', 
                                  leadtime_dim = 'time', memb_dim = 'ensemble',
                                  ref_period = NULL, cross_validation = FALSE,  
                                  handle_infinity = FALSE, param_error = -9999, 
                                  method = 'parametric', distribution = 'log-Logistic', 
                                  na.rm = FALSE, ncores = NULL) {
  # Initial checks
  target_dims <- c(leadtime_dim, time_dim, memb_dim)

  if (is.null(ref_period)) {
    ref.start <- NULL
    ref.end <- NULL
  } else {
    ref.start <- ref_period[[1]]
    ref.end <- ref_period[[2]]
  }


  # Standardization
  if (is.null(data_cor)) {
    spei <- Apply(data = list(data), 
                  target_dims = target_dims,
                  fun = .standardization, 
                  leadtime_dim = leadtime_dim, 
                  time_dim = time_dim, memb_dim = memb_dim, 
                  ref_period = ref_period, handle_infinity = handle_infinity,
                  cross_validation = cross_validation, param_error = param_error, 
                  method = method, distribution = distribution, 
                  na.rm = na.rm, ncores = ncores)$output1
  } else {
    spei <- Apply(data = list(data, data_cor), target_dims = target_dims,
                  fun = .standardization,
                  leadtime_dim = leadtime_dim, 
                  time_dim = time_dim, memb_dim = memb_dim, 
                  ref_period = ref_period, handle_infinity = handle_infinity,
                  cross_validation = cross_validation, param_error = param_error, 
                  method = method, distribution = distribution, 
                  na.rm = na.rm, ncores = ncores)$output1
  }

  # add NA
  if (!is.null(accum)) {
    spei <- Apply(data = list(spei), target_dims = leadtime_dim,
                      output_dims = leadtime_dim,
                            fun = function(x, accum, leadtime_dim) {
                              res <- c(rep(NA, accum-1), x)
                              return(res)
                            }, accum = accum, leadtime_dim = leadtime_dim)$output1
  }
  if (is.null(data_cor)) {
    pos <- match(names(dim(data)), names(dim(spei)))
    spei <- aperm(spei, pos)
  } else {
    pos <- match(names(dim(data_cor)), names(dim(spei)))
    spei <- aperm(spei, pos)
  }
  return(spei)
}

.standardization <- function(data, data_cor = NULL, leadtime_dim = 'time', 
                             time_dim = 'syear', memb_dim = 'ensemble', 
                             ref_period = NULL, handle_infinity = FALSE, 
                             cross_validation = FALSE, param_error = -9999, 
                             method = 'parametric', distribution = 'log-Logistic',
                             na.rm = FALSE) {
  # data: [leadtime_dim, time_dim, memb_dim]

  # maximum number of parameters needed
  nleadtime <- as.numeric(dim(data)[leadtime_dim])
  ntime <- as.numeric(dim(data)[time_dim])
  nmemb <- as.numeric(dim(data)[memb_dim])
  fit = 'ub-pwm'

  coef = switch(distribution,
                "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha', 'beta'))),
                "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi', 'alpha', 'kappa'))),
                "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu', 'sigma', 'gamma'))))

  if (is.null(data_cor)) {
    # cross_val = TRUE
    spei_mod <- data*NA
    params_result <- array(data = NA, dim = c(ntime, nleadtime, length(coef)))
    for (ff in 1:dim(data)[leadtime_dim]) {
      data2 <- data[ff, , ]
      dim(data2) <- c(ntime, nmemb)
      if (!is.null(ref.start) && !is.null(ref.end)) {
        data.fit <- window(data2, ref.start, ref.end)	
      } else {
        data.fit <- data2
      }
      for (nsd in 1:dim(data)[time_dim]) {
        acu <- as.vector(data.fit[-nsd, ])
        acu.sorted <- sort.default(acu, method = "quick")
        if (na.rm) {
          acu.sorted <- acu.sorted[!is.na(acu.sorted)]
        }
        if (!any(is.na(acu.sorted))) {
          if (length(acu.sorted) != 0) { 
            acu_sd <- sd(acu.sorted)
            if (!is.na(acu_sd) & acu_sd != 0) {
              if (distribution != "log-Logistic") {
                pze <- sum(acu == 0) / length(acu)
                acu.sorted <- acu.sorted[acu.sorted > 0]
              }
              if (length(acu.sorted) >= 4) {
                pwm = switch(fit,
                            "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3),
                            pwm.ub(acu.sorted, nmom = 3)
                            # TLMoments::PWM(acu.sorted, order = 0:2)
                            )
                lmom <- pwm2lmom(pwm)  
                if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) {
                  fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3])
                  f_params = switch(distribution,
                                    "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), 
                                                              error = function(e){parglo(lmom)$para}),
                                    "Gamma" = tryCatch(lmom::pelgam(fortran_vec), 
                                                        error = function(e){pargam(lmom)$para}),
                                    "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), 
                                                            error = function(e){parpe3(lmom)$para}))
                  if (distribution == 'log-Logistic' && fit == 'max-lik') {
                    f_params = parglo.maxlik(acu.sorted, f_params)$para
                  }
                  params_result[nsd, ff, ] <- f_params
              if (all(is.na(params_result[nsd, ff, ]))) {
                f_params <- params_result[nsd, ff, ]
                f_params <- f_params[which(!is.na(f_params))]
                cdf_res = switch(distribution,
                                "log-Logistic" = lmom::cdfglo(data2, f_params),
                                "Gamma" = lmom::cdfgam(data2, f_params),
                                "PearsonIII" = lmom::cdfpe3(data2, f_params))
              }
              std_index_cv <- array(qnorm(cdf_res), dim = c(ntime, nmemb))
              spei_mod[ff, nsd, ] <- std_index_cv[nsd, ]
            }
          }
        }
      }
    }
  } else {
    # cross_val = FALSE
    spei_mod <- data_cor*NA
    params_result <- array(data = NA, dim = c(1, length(coef)))
    for (ff in 1:dim(data)[leadtime_dim]) {
      data_cor2 <- data_cor[ff, , ]
      dim(data_cor2) <- c(1, nmemb)
      data2 <- data[ff, , ]
      dim(data2) <- c(ntime, nmemb)
      if (!is.null(ref.start) && !is.null(ref.end)) {
        data.fit <- window(data2, ref.start, ref.end)	
      } else {
        data.fit <- data2
      }
      acu <- as.vector(data.fit)
      acu.sorted <- sort.default(acu, method = "quick")
      if (na.rm) {
        acu.sorted <- acu.sorted[!is.na(acu.sorted)]
      }
      if (!any(is.na(acu.sorted))) {
        if (length(acu.sorted) != 0) {
          acu_sd <- sd(acu.sorted)
          if (!is.na(acu_sd) & acu_sd != 0) {
            if (distribution != "log-Logistic") {
              pze <- sum(acu == 0) / length(acu)
              acu.sorted <- acu.sorted[acu.sorted > 0]
            }
            if (length(acu.sorted) >= 4) {
              pwm = switch(fit,
                            "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3),
                            pwm.ub(acu.sorted, nmom = 3)
                            # TLMoments::PWM(acu.sorted, order = 0:2)
                            )
              lmom <- pwm2lmom(pwm) 
              if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) {
                fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3])
                f_params = switch(distribution,
                                    "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), 
                                                              error = function(e){parglo(lmom)$para}),
                                    "Gamma" = tryCatch(lmom::pelgam(fortran_vec), 
                                                        error = function(e){pargam(lmom)$para}),
                                    "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), 
                                                            error = function(e){parpe3(lmom)$para}))
                if (distribution == 'log-Logistic' && fit == 'max-lik') {
                  f_params = parglo.maxlik(acu.sorted, f_params)$para
                }
                params_result[1, ] <- f_params
              }
            }
            if (all(is.na(params_result[1, ]))) {
              cdf_res <- NA
            } else {
              f_params <- params_result[1, ]
                f_params <- f_params[which(!is.na(f_params))]
                cdf_res = switch(distribution,
                                "log-Logistic" = lmom::cdfglo(data_cor2, f_params),
                                "Gamma" = lmom::cdfgam(data_cor2, f_params),
                                "PearsonIII" = lmom::cdfpe3(data_cor2, f_params))
            }
            std_index_cv <- array(qnorm(cdf_res), dim = c(1, nmemb))
            spei_mod[ff, , ] <- std_index_cv
          }
        }
      }
    }