SPOD.R

#'Compute the South Pacific Ocean Dipole (SPOD) index
#'
#'@description The South Pacific Ocean Dipole (SPOD) index is related to the El Nino-Southern Oscillation (ENSO) and the Inderdecadal Pacific Oscillation (IPO).
#'The SPOD index is computed as the difference of weighted-averaged SST anomalies over 20ºS-48ºS, 165ºE-190ºE (NW pole) and the weighted-averaged SST anomalies over 
#'44ºS-65ºS, 220ºE-260ºE (SE pole) (Saurral et al., 2020).
#'@author Carlos Delgado-Torres, \email{carlos.delgado@bsc.es}
#'@author Núria Pérez-Zanón, \email{nuria.perez@bsc.es}
#'@author Roberto Bilbao, \email{roberto.bilbao@bsc.es}
#'
#'@param data Data to be used for the index computation with latitude, longitude, start date, forecast month, and member dimensions (in case of decadal predictions),
#'  with latitude, longitude, year, month and member dimensions (in case of historical simulations), or with latitude, longitude, year and month (in case of observations or reanalyses).
#'  This data has to be provided, at least, over the whole region needed to compute the index.
#'@param data_lats An array with the latitudes of the data.
#'@param data_lons An array with the longitudes of the data.
#'@param type A string the the type of data ('dcpp' for decadal predictions, 'hist' for historical simulations, or 'obs' for observations or reanalyses).
#'@param lat_dim A string with the name of the latitude dimension ('lat' by default).
#'@param lon_dim A string with the name of the longitude dimension ('lon' by default).
#'@param mask An array with a mask (with 0's in the grid points that have to be masked) or NULL (NULL by default, i.e., no mask is used). This parameter allows to remove the values over land in case the dataset is a 
#'  combination of surface air temperature over land and sea surface temperature over the ocean. Also, it can be used to mask those grid points that are missing in the 
#'  observational dataset for a fair comparison between the forecast system and the reference dataset.
#'@param monini Month in which the forecast system is initialized (11 by default, i.e., initialized in November). Only used if type='dcpp'.
#'@param fmonth_dim A string with the name of the forecast month dimension ('fmonth' by default). Only used if type='dcpp'.
#'@param sdate_dim A string with the name of the start date dimension ('sdate' by default). Only used if type='dcpp'.
#'@param indices_for_clim Indices of the years to compute the climatology. If NULL, the climatology is calculated over the whole period (NULL by default).
#'  In case of type='dcpp', indices_for_clim must be relative to the first forecast year, and the climatology is automatically computed over the actual common period for the different forecast years.
#'@param year_dim A string with the name of the year dimension ('year' by default). Only used if type='hist' or type='obs'.
#'@param month_dim A string with the name of the month dimension ('month' by default). Only used if type='hist' or type='obs'.
#'@param member_dim A string with the name of the member dimension ('member' by default). Only used if type='dcpp' or type='hist'.
#'
#'@return The SPOD index as function of the sdate, forecast year, and member (in case of decadal predictions);
#'  as function of the year and the member (in case of historical simulations); or as function of the year (in case of observations or reanalyses).
#'
#'@examples
#' ## Observations or reanalyses
#' obs = array(1:100, dim = c(year = 5, lat = 19, lon = 37, month = 12))
#' lat = seq(-90, 90, 10)
#' lon = seq(0, 360, 10)
#' index_obs = SPOD(data = obs, data_lats = lat, data_lons = lon, type = 'obs')
#' 
#' ## Historical simulations
#' hist = array(1:100, dim = c(year = 5, lat = 19, lon = 37, month = 12, member = 5))
#' lat = seq(-90, 90, 10)
#' lon = seq(0, 360, 10)
#' index_hist = SPOD(data = hist, data_lats = lat, data_lons = lon, type = 'hist')
#' 
#' ## Decadal predictions
#' dcpp = array(1:100, dim = c(sdate = 5, lat = 19, lon = 37, fmonth = 24, member = 5))
#' lat = seq(-90, 90, 10)
#' lon = seq(0, 360, 10)
#' index_dcpp = SPOD(data = dcpp, data_lats = lat, data_lons = lon, type = 'dcpp', monini = 1)
#'
#'@import ClimProjDiags
#'@import multiApply
#'@import s2dv
#'@import startR
#'@export
SPOD <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lon', 
                 mask = NULL, monini = 11, fmonth_dim = 'fmonth', sdate_dim = 'sdate', 
                 indices_for_clim = NULL, year_dim = 'year', month_dim = 'month', member_dim = 'member'){
  
  ## Checkings
  if (!is.array(data)){
    stop('data must be an array')
  }
  if(!class(data_lats)=='numeric'){
    stop('data_lats must be a numeric vector')
  }
  if (!class(data_lons)=='numeric'){
    stop('data_lons must be a numeric vector or NULL')
  }
  if (!type %in% c('dcpp','hist','obs')){
    stop("type must be 'dcpp', 'hist' or 'obs'")
  }
  if (!is.character(lat_dim)){
    stop('lat_dim must be a string')
  }
  if (!is.character(lon_dim)){
    stop('lon_dim must be a string')
  }
  if (!monini){
    stop("monini must be an integer from 1 to 12")
  }
  if (!is.character(fmonth_dim)){
    stop('fmonth_dim must be a string')
  }
  if (!is.character(sdate_dim)){
    stop('sdate_dim must be a string')
  }
  if (!is.null(indices_for_clim) & !class(indices_for_clim) %in% c('numeric','integer') & !is.null(indices_for_clim) & !isFALSE(indices_for_clim)){
    stop("indices_for_clim must be a numeric vector, NULL to compute the anomalies based on the whole period, or FALSE if data are already anomalies")
  }
  if (!is.character(year_dim)){
    stop('year_dim must be a string')
  }
  if (!is.character(month_dim)){
    stop('month_dim must be a string')
  }
  if (!is.null(mask)){
    if (is.array(mask) & identical(names(dim(mask)),c(lat_dim,lon_dim)) & identical(as.integer(dim(mask)),c(length(data_lats),length(data_lons)))){
      ## To mask those grid point that are missing in the observations
      mask <- s2dv::Reorder(data = mask, order = c(lat_dim,lon_dim))
      fun_mask <- function(data, mask){
        data[mask == 0] <- NA
        return(data)
      }
      data <- multiApply::Apply(data = data, target_dims = c(lat_dim,lon_dim), fun = fun_mask, mask = mask)$output1
    } else {
      stop('mask must be NULL (no mask) or an array with c(lat_dim,lon_dim) dimensions and 0 in those grid points that have to be masked')
    }
  }
  
  ## Regions for IPO_SPOD (Saurral et al., 2020)
  lat_min_1 <- -48; lat_max_1 = -20
  lon_min_1 <- 165; lon_max_1 = 190
  lat_min_2 <- -65; lat_max_2 = -44
  lon_min_2 <- 220; lon_max_2 = 260
  regions = NULL
  regions$reg1 <- c(lon_min_1, lon_max_1, lat_min_1, lat_max_1)
  regions$reg2 <- c(lon_min_2, lon_max_2, lat_min_2, lat_max_2)
  
  mean_1 <- ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, region = regions$reg1,
                                        londim = which(names(dim(data))==lon_dim), latdim = which(names(dim(data))==lat_dim))
  mean_2 <- ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, region = regions$reg2,
                                        londim = which(names(dim(data))==lon_dim), latdim = which(names(dim(data))==lat_dim))
  
  data <- ClimProjDiags::CombineIndices(indices = list(mean_1,mean_2), weights = NULL, operation = 'subtract') # (mean_1 - mean_2)
  
  INDEX <- .Indices(data = data, type = type, monini = monini,
                    indices_for_clim = indices_for_clim, fmonth_dim = fmonth_dim,
                    sdate_dim = sdate_dim, year_dim = year_dim, 
                    month_dim = month_dim, member_dim = member_dim)
  return(INDEX)
}