diff --git a/R/AnoMultiOptions.R b/R/AnoMultiOptions.R new file mode 100644 index 0000000000000000000000000000000000000000..8ec1fb5cdc932f0fb4f4c624415a02e18747c807 --- /dev/null +++ b/R/AnoMultiOptions.R @@ -0,0 +1,207 @@ +#'Anomalies relative to a climatology along selected dimension with or without cross-validation +#' +#'@description This function computes the anomalies relative to a climatology computed along the +#'selected dimension (usually starting dates or forecast time) allowing the application or not of +#'crossvalidated climatologies. The computation is carried out independently for experimental and +#'observational data products. +#' +#'@param var_exp A multidimensional array related to experimental simulations with the specific +#'dimensions required by Clim(): i.e. \code{dim(var_exp)} goes from c(nmod/nexp, nmemb/nparam, nsdates, nltime) +#'up to c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon). +#' +#'@param var_obs A multidimensional array related to observations with the specific dimensions required +#'by Clim(): i.e. \code{dim(var_exp)} goes from c(nmod/nexp, nmemb/nparam, nsdates, nltime) up to +#' c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon) +#' +#'@param cross A logical value indicating whether cross-validation should be applied or not. Default = FALSE. +#' +#'@param memb A logical value indicating whether Clim() computes one climatology for each experimental data +#'product member(TRUE) or it computes one sole climatology for all members (FALSE). Default = TRUE. +#' +#'@param dim_ano An integer indicating the dimension along which the climatology will be computed. It +#'usually corresponds to 3 (sdates) or 4 (ftime). Default = 3. +#' +#' +#'@return A list of length 2: +#'\itemize{ +#'\item\code{$var_exp}{ Array of anomalies for \code{var_exp} with same dimensions input.} +#'\item\code{$var_obs}{ Array of anomalies for \code{var_obs} with same dimensions input.}} +#' +#' +#'@examples +#' +#'# Example 1: +#' +#'dim_exp=c(mod = 1, member = 3, sdates = 2, ftime = 100) +#'dim_obs=c(mod = 1, member = 1, sdates = 2, ftime = 100) +#' +#'var_exp = array(rnorm(prod(dim_exp)), dim=dim_exp) +#'var_obs = array(rnorm(prod(dim_obs)), dim=dim_obs) +#' +#'Computation of anomalies without cross-validation +#'anoM <- AnoMultiOptions(var_exp = var_exp, var_obs = var_obs, +#' cross = FALSE, memb = TRUE) +#'str(anoM) +#' +#' +#'# Example 2: +#' +#'dim_exp=c(mod = 1, member = 3, sdates = 2, ftime = 100, nlon = 100, nlat = 50) +#'dim_obs=c(mod = 1, member = 1, sdates = 2, ftime = 100, nlon = 100, nlat = 50) +#' +#'var_exp = array(rnorm(prod(dim_exp)), dim=dim_exp) +#'var_obs = array(rnorm(prod(dim_obs)), dim=dim_obs) +#' +#'Computation of anomalies with cross-validation using kharin and NDV methods +#'anoM <- AnoMultiOptions(var_exp = var_exp, var_obs = var_obs, +#' cross = TRUE, memb = TRUE) +#'str(anoM) +#'anoM <- AnoMultiOptions2(var_exp = var_exp) +#' +#'@seealso +#'\code{\link{Ano_CrossValid}} +#'\code{\link{Clim}} +#' +#' +#'@export + +AnoMultiOptions <- function(var_exp = NULL, var_obs = NULL, cross = FALSE, memb = TRUE, dim_ano = 3) { + + case_exp = case_obs = 0 + if (is.null(var_exp) & is.null(var_obs)) { + stop("Parameter 'var_exp' and 'var_obs' cannot be NULL.") + } + if (is.null(var_exp)) { + var_exp <- var_obs + case_obs = 1 + warning("Parameter 'var_exp' is not provided and will be recycled.") + } + if (is.null(var_obs)) { + var_obs <- var_exp + case_exp = 1 + warning("Parameter 'var_obs' is not provided and will be recycled.") + } + + dim_exp <- dim(var_exp) + dim_obs <- dim(var_obs) + dimnames_data <- names(dim_exp) + + + if (length(dim_exp) < 4 | length(dim_obs) < 4) { + stop("Invalid input dimensions: At least 4 dimensions needed for var_exp or var_obs, + c(nexp/nobs, nmemb, nsdates, nltime).") + } + if (length(dim_exp) != length(dim_obs)) { + stop('Invalid input dimensions: var_exp and var_obs should have same number of dimensions.') + } + for (jn in 3:max(length(dim_exp), length(dim_obs))) { + if (dim_exp[jn] != dim_obs[jn]) { + stop("Invalid input dimensions: var_exp and var_obs dimensions should have same length + for nsdates and nltime (and for nlevel,nlon and nlat if present).") + } + } + + if (dim_ano == dim_ano & (dim_exp[dim_ano] == 1 | dim_obs[dim_ano] == 1)) { + stop("Invalid input dimensions: selected dim_ano dimension length should be greater than 1") + } + if (!is.logical(cross) | !is.logical(memb) ) { + stop("Invalid flags used: Flags 'cross' and 'memb' should be all logical.") + } + if (length(cross) > 1 | length(memb) > 1 ) { + stop("Invalid flags used: Flags 'cross' and 'memb' should all have length 1.") + } + if ( !(dim_ano == 3 | dim_ano == 4)) { + warning("A not-temporal dimension selected, check 'dim_ano' parameter.") + } + if (dim_exp[dim_ano] < 2) { + stop("Invalid length for selected anomaly time dimension 'dim(var_exp[dim_ano])': length should be greater than 1.") + } + + # Selecting time dimension through dimensions permutation + if (dim_ano != 3) { + dimperm <- 1 : length(dim_exp) + dimperm[3] <- dim_ano + dimperm[dim_ano] <- 3 + + var_exp <- aperm(var_exp, perm = dimperm) + var_obs <- aperm(var_obs, perm = dimperm) + + #Updating permuted dimensions + dim_exp <- dim(var_exp) + dim_obs <- dim(var_obs) + } + + + # Computating anomalies + #---------------------- + + # With cross-validation + if (cross) { + ano <- Ano_CrossValid(var_exp, var_obs, memb = memb) + + if (case_obs == 1) { + ano <- list(ano_obs = ano$ano_obs) + } + else if (case_exp == 1) { + ano <- list(ano_exp = ano$ano_exp) + } + + # Without cross-validation + } else { + tmp <- Clim(var_exp , var_obs , memb ) + + if (memb) { + clim_exp <- tmp$clim_exp + clim_obs <- tmp$clim_obs + } else { + clim_exp <- InsertDim(tmp$clim_exp, 2, dim_exp[2]) + clim_obs <- InsertDim(tmp$clim_obs, 2, dim_obs[2]) + } + + + clim_exp <- InsertDim(clim_exp, 3, dim_exp[3]) + clim_obs <- InsertDim(clim_obs, 3, dim_obs[3]) + + ano_exp <- var_exp - clim_exp + ano_obs <- var_obs - clim_obs + + if (case_obs == 1) { + ano <- list(ano_obs = ano_obs) + } + else if (case_exp == 1) { + ano <- list(ano_exp = ano_exp) + } + else { + ano <- list(ano_exp = ano_exp, ano_obs = ano_obs) + } + } + + # Permuting back dimensions to original order + if (dim_ano != 3) { + + if (case_obs == 0) { + ano$ano_exp <- aperm(ano$ano_exp, perm = dimperm) + } + if (case_exp == 0) { + ano$ano_obs <- aperm(ano$ano_obs, perm = dimperm) + } + + + #Updating back permuted dimensions + dim_exp <- dim(var_exp) + dim_obs <- dim(var_obs) + } + + #Adding dimensions names + if (case_obs == 0) { + attr(ano$ano_exp, 'dimensions') <- dimnames_data + } + if (case_exp == 0) { + attr(ano$ano_obs, 'dimensions') <- dimnames_data + } + # Outputs + # ~~~~~~~~~ + invisible(ano) +} + + diff --git a/man/AnoMultiOptions.Rd b/man/AnoMultiOptions.Rd new file mode 100644 index 0000000000000000000000000000000000000000..8c5fa4f57a282435cc0489fe6475c04ded2972fd --- /dev/null +++ b/man/AnoMultiOptions.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AnoMultiOptions.R +\name{AnoMultiOptions} +\alias{AnoMultiOptions} +\title{Anomalies relative to a climatology along selected dimension with or without cross-validation} +\usage{ +AnoMultiOptions(var_exp = NULL, var_obs = NULL, cross = FALSE, + memb = TRUE, dim_ano = 3) +} +\arguments{ +\item{var_exp}{A multidimensional array related to experimental simulations with the specific +dimensions required by Clim(): i.e. \code{dim(var_exp)} goes from c(nmod/nexp, nmemb/nparam, nsdates, nltime) +up to c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon).} + +\item{var_obs}{A multidimensional array related to observations with the specific dimensions required +by Clim(): i.e. \code{dim(var_exp)} goes from c(nmod/nexp, nmemb/nparam, nsdates, nltime) up to +c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon)} + +\item{cross}{A logical value indicating whether cross-validation should be applied or not. Default = FALSE.} + +\item{memb}{A logical value indicating whether Clim() computes one climatology for each experimental data +product member(TRUE) or it computes one sole climatology for all members (FALSE). Default = TRUE.} + +\item{dim_ano}{An integer indicating the dimension along which the climatology will be computed. It +usually corresponds to 3 (sdates) or 4 (ftime). Default = 3.} +} +\value{ +A list of length 2: +\itemize{ +\item\code{$var_exp}{ Array of anomalies for \code{var_exp} with same dimensions input.} +\item\code{$var_obs}{ Array of anomalies for \code{var_obs} with same dimensions input.}} +} +\description{ +This function computes the anomalies relative to a climatology computed along the +selected dimension (usually starting dates or forecast time) allowing the application or not of +crossvalidated climatologies. The computation is carried out independently for experimental and +observational data products. +} +\examples{ + +# Example 1: + +dim_exp=c(mod = 1, member = 3, sdates = 2, ftime = 100) +dim_obs=c(mod = 1, member = 1, sdates = 2, ftime = 100) + +var_exp = array(rnorm(prod(dim_exp)), dim=dim_exp) +var_obs = array(rnorm(prod(dim_obs)), dim=dim_obs) + +Computation of anomalies without cross-validation +anoM <- AnoMultiOptions(var_exp = var_exp, var_obs = var_obs, + cross = FALSE, memb = TRUE) +str(anoM) + + +# Example 2: + +dim_exp=c(mod = 1, member = 3, sdates = 2, ftime = 100, nlon = 100, nlat = 50) +dim_obs=c(mod = 1, member = 1, sdates = 2, ftime = 100, nlon = 100, nlat = 50) + +var_exp = array(rnorm(prod(dim_exp)), dim=dim_exp) +var_obs = array(rnorm(prod(dim_obs)), dim=dim_obs) + +Computation of anomalies with cross-validation using kharin and NDV methods +anoM <- AnoMultiOptions(var_exp = var_exp, var_obs = var_obs, + cross = TRUE, memb = TRUE) +str(anoM) +anoM <- AnoMultiOptions2(var_exp = var_exp) + +} +\seealso{ +\code{\link{Ano_CrossValid}} +\code{\link{Clim}} +} + diff --git a/vignettes/Anomalies.md b/vignettes/Anomalies.md new file mode 100644 index 0000000000000000000000000000000000000000..84b7a569f4ad22a7ea319103fd55b9fe8d06d279 --- /dev/null +++ b/vignettes/Anomalies.md @@ -0,0 +1,274 @@ +--- +author: +date: +output: +vignette: +--- +Anomalies +======== + +Function `AnoMultiOption()` allows the user different possibilities of computing anomalies for observational and modelled data. This vignette illustrates a complete example of how to load datasets using the `Load()` function following different cases for anomalies computations. + + +### 1- Load dependencies + +This example requires the following system libraries: + +- libssl-dev +- libnecdf-dev +- cdo + + +The **s2dverification R package** should be loaded by running the following lines in R, onces it is integrated into CRAN mirror. + +```r +library(s2dverification) +library(devtools) +``` + +You will need to install specific versions of certain R packages as follows: + +```r +source('https://earth.bsc.es/gitlab/es/s2dverification/blob/develop-anom/R/AnoMultiOptions.R') +``` + + + +### 2- Define the problem and the correspondent data and parameters + +We want to show how well seasonal forecast models perform for up to a 7-months forecast window. The idea is to compare a observational dataset represented by the ERAinterim reanalysis with the EUROSIP multi-model seasonal forecasting system. + +For more information about both datasets see the next documentation: +- [**ERAinterim**](https://www.ecmwf.int/en/forecasts/datasets/archive-datasets/reanalysis-datasets/era-interim). +- [**EUROSIP system**](https://www.ecmwf.int/en/forecasts/documentation-and-support/long-range/seasonal-forecast-documentation/eurosip-user-guide/multi-model). + +Both datasets can be downloaded from the corresponding server. However in this example we will use the BSC esarchive database. Files path parameters for the `Load()` function are defined as follows: + +```r +#Path for experimental data files path +path_exp_EUROSIP <- list(name = 'system3_m1', + path = file.path('/esarchive/exp/EUROSIP/ecmwf', + '$EXP_NAME$/monthly_mean', + '$VAR_NAME$_f6h/$VAR_NAME$_$START_DATE$.nc')) + +#Path for observational data files +path_obs_eraI <- list(name = 'erainterim', + path = file.path('/esarchive/recon/ecmwf', + '$OBS_NAME$/monthly_mean', + '$VAR_NAME$/$VAR_NAME$_$YEAR$$MONTH$.nc')) +``` + +Our case of study will be a proxy of El Niño3.4 index defined here as the 5-month moving average temperature air surface (tas) anomalies averaged for the region comprised between longitudes [170ºW - 120ºW] and latitudes [5ºS - 5ºN]. We will focus on the 1990s and 2000s decades and especially on the strong El Niño signal during 1997/1998 and the strong La Niña during 1999/2000. Note that the accepted definition of El Niño3.4 index uses sea surface temperature (SST) anomalies instead of tas used in our proxy. Although there are some discrepancies between both, our index shows the same general pattern and it therefore suits for this practical example. + +```r +#Predefined domain for El Niño3.4 index +lonNino <- c(360 - 170, 360 - 120) +latNino <- c(-5, 5) + +#Selected domain for map visualization is defined as: +lonMap <- c(-250, -60) +latMap <- c(-30, 30) + +``` + + +### 3- Loading observational data + +As a first step we will explore the observational data for the last decades. + +In the case of using `Load()` for loading only observational data, all available data from the +given starting date will be loaded as a time series along the forecast time dimension. Anomalies +should therefore be computed using along this dimension using parameter `dim_ano = 4`. Parameter `'output'` +should be set to `'areave'` to compute El Niño3.4 domain averages and to `'lonlat'` to load snap-shot maps. + + +```r +#Starting dates and forecast period parameters for observations +sdates_obs <- '19900101' +time_length <- 240 + +#Loading data +data_Nino <- Load(var = 'tas', + obs = list(path_obs_eraI), + sdates = sdates_obs, + leadtimemax = time_length, + output = 'areave', + lonmin = lonNino[1], lonmax = lonNino[2], + latmin = latNino[1], latmax = latNino[2]) + +data_map <- Load(var = 'tas', + obs = list(path_obs_eraI), + sdates = sdates_obs, + leadtimemax = time_length, + output = 'lonlat', + lonmin = lonMap[1], lonmax = lonMap[2], + latmin = latMap[1], latmax = latMap[2]) + + +#Retrieving observational data +obs_Nino <- data_Nino$obs +obs_map <- data_map$obs + + +#Computing anomalies along forecast time dimension +ano_obs_Nino <- AnoMultiOptions(var_obs = obs_Nino, dim_ano = 4)$ano_obs +ano_obs_map <- AnoMultiOptions(var_obs = obs_map, dim_ano = 4)$ano_obs + +#Running 5-months moving window average +ano_obs_Nino_sm <- Smoothing(ano_obs_Nino, runmeanlen = 5, numdimt = 4) + +#Selecting november-1997 and november-1999 snap-shot map (index starting from january 1990) +inov97 <- 12*7 + 11 +inov99 <- 12*9 + 11 + + +#Plotting the nov1997 and nov1999 anomalies maps +taslim <- c(-2, 2) +brks <- seq(taslim[1], taslim[2], length.out = 21) +layout(matrix(c(1, 1, + 2, 3, + 4, 4), + 3, 2, byrow = TRUE), + heights = c(4, 6, 1), + widths = c(1, 1)) + +#Define the date for plotting +dates_obs <- data_Nino$Dates$start + +plot(x = dates_obs, y = ano_obs_Nino[ , , , ], type = 'l', col = 'grey', + xlab = NA, ylab = NA, + xlim= c(dates_obs[1], dates_obs[20*12]), ylim = taslim) +lines(x = dates_obs, y = ano_obs_Nino_sm[ , , , ], col = 'darkgreen') +lines(x = dates_obs, y = rep(0, length(dates_obs)), col = 'black') +lines(rep(dates_obs[inov97], 2), taslim, lwd = 4, col = 'darkred') +lines(rep(dates_obs[inov99], 2), taslim, lwd = 4, col = 'darkblue') +grid() +title('Tas anomaly between 1990 and 2010 for Niño3.4 domain', line = 1.5) + +PlotEquiMap(ano_obs_map[1, 1, 1, inov97, , ], data_map$lon, data_map$lat, + brks = brks, drawleg = F, toptitle = 'Tas anomaly (El Niño nov-97)', sizetit = 0.8) +rect(lonNino[1], latNino[1], lonNino[2], latNino[2]) + +PlotEquiMap(ano_obs_map[1, 1, 1, inov99,,], data_map$lon, data_map$lat, + brks = brks, drawleg = F, + toptitle = 'Tas anomaly (La Niña nov-99)', sizetit = 0.8) +rect(lonNino[1], latNino[1], lonNino[2], latNino[2]) + +ColorBar(brks, vert = FALSE, subsampleg = 5) + +``` + + + + +The strong signal of El Niño / La Niña (positive / negative anomalies) for november 1997 and 1999 can be clearly seen in both time-series and snap-shot maps. However, since the anomaly is computed with the annual climatological mean, the clear seasonality presented in the time series is not removed hence obscuring the real signal of El Niño variability. We can clarify this problem computing monthly anomalies as described in the next section. + + + +### 4- Seasonal forecast versus observational data + + +In this section we will compare the tas anomalies for the Niño3.4 between the observational ERAinterim data and the EUROSIP forecast system. The latter provide a 7-month forecast window for each starting date, therefore the aim of this section will be to show how this forecast performs during the two events seen in section 3, El Niño 1997 and La Niña 1999. + +We will also compare two different methods when calculating anomalies, using or not using cross-validation during the climatology computation. + +```r +#Starting dates parameter for time series +sdates_exp_df <- seq(as.Date("1994/11/01"), as.Date("2002/11/01"), "years") +sdates_exp <- format(sdates_exp_df, '%Y%m%d') + +#Loading data +data <- Load(var = 'tas', + exp = list(path_exp_EUROSIP), + obs = list(path_obs_eraI), + sdates = sdates_exp, + output = 'areave', + lonmin = lonNino[1], lonmax = lonNino[2], + latmin = latNino[1], latmax = latNino[2]) + + +#Retrieving experimental and observational data +obs <- data$obs +exp <- data$mod + +#Retrieving dates +dates_exp <- as.Date(data$Dates$start) + +#Selecting november-1997 and november-1999 snap-shot map (index starting from january 1990) +inov97 <- 12*7 + 11 +inov99 <- 12*9 + 11 + +#Computing anomalies along the starting date dimension without cross-validation +ano_NoCV <- AnoMultiOptions(var_exp = exp, var_obs = obs, dim_ano = 3, cross = FALSE) +ano_obs_NoCV <- ano$ano_obs +ano_exp_NoCV <- ano$ano_exp + +#Computing anomalies along the starting date dimension with cross-validation +ano_CV <- AnoMultiOptions(var_exp = exp, var_obs = obs, dim_ano = 3, cross = TRUE) +ano_obs_CV <- ano$ano_obs +ano_exp_CV <- ano$ano_exp + +#Running 5-months moving window average for obs and averaging across experimental member +ano_exp_NoCV_m <- Mean1Dim(var = ano_exp_NoCV_sm, posdim = 2 ) +ano_exp_CV_m <- Mean1Dim(var = ano_exp_NoCV_sm, posdim = 2 ) + + +#Plotting +taslim <- c(-2, 2) + +layout(matrix(c(1, 1, 2, 2), 4, 1, byrow = TRUE)) + +plot(x = dates_obs, y = rep(0,length(dates_obs)), + type = 'l', col = 'black', + xlab = 'Forecast time', ylab = 'tas anomalies', + xaxt = 'n', + xlim= c(dates_obs[inov97-40], dates_obs[inov99+40]), ylim = taslim) +axis.Date(side = 1, at = sdates_exp_df, format = '%h%Y') + +for (isd in 1 : dim(exp)[3]) { + jsd = (isd-1)*7 + 1 + for (im in 1 : dim(exp)[2]) { + lines(x = dates_exp[jsd : (jsd + 6)], y = ano_exp_NoCV[1, im, isd, ], + col = 'grey') + } + lines(x = dates_exp[jsd : (jsd + 6)], y = ano_obs_NoCV[1, 1, isd, ], + col = 'black', lwd = 2) + lines(x = dates_exp[jsd : (jsd + 6)], y = ano_exp_NoCV_m[1, isd, ], + col = 'green', lwd = 2) +} +grid() +text(sdates_exp_df[5], 2, 'Without CrossValidation', font = 2, adj = 0) +title('Deseasonalized tas anomalies for observations and forecast') + + + +plot(x = dates_obs, y = rep(0,length(dates_obs)), + type = 'l', col = 'black', + xlab = 'Forecast time', ylab = 'tas anomalies', + xaxt = 'n', + xlim= c(dates_obs[inov97-40], dates_obs[inov99+40]), ylim = taslim) +axis.Date(side = 1, at = sdates_exp_df,format = '%h%Y') + +for (isd in 1 : dim(exp)[3]) { + jsd = (isd-1)*7 + 1 + for (im in 1 : dim(exp)[2]) { + lines(x = dates_exp[jsd : (jsd + 6)], y = ano_exp_CV[1, im, isd, ], + col = 'grey') + } + lines(x = dates_exp[jsd : (jsd + 6)], y = ano_obs_CV[1, 1, isd, ], + col = 'black', lwd = 2) + lines(x = dates_exp[jsd : (jsd + 6)], y = ano_exp_CV_m[1, isd, ], + col = 'green', lwd = 2) +} +grid() +text(sdates_exp_df[5], 2, 'With CrossValidation', font = 2, adj = 0) + +``` + + + + +The figure above shows the deseasonalized tas anomalies for El Nino3.4 domain comparing observational ERAinterim data (black colour lines) with 7-months forecasts from the EUROSIP system for several starting dates (different members [grey] and ensemble mean [green]). The lower / upper panels corresponds with the application / not-application of CrossValidation in the computation of the climatology used in the anomaly calculation. It is clearly seen that there is almost no difference in these two cases due to the significantly high number of sdates (9) in the simulation. + +Overall the agreement between observations and simulations is quite good, especially because in opposition to section 3 this anomalies does not include any seasonality. Since the anomaly is computed along the starting dates dimension (`dim_ano = 3`), i.e. from the corresponding monthly means of the two decades, it assures that the seasonality is removed. This means that changes in the deseasonalized tas anomalies represented in the figure are linked to natural variability, like El Niño / La Niña signal. Therefore, the agreement between observations and model confirms that this natural variability is reasonably well reproduced by the EUROSIP system. + diff --git a/vignettes/DeseasonalizedTASanomalies.png b/vignettes/DeseasonalizedTASanomalies.png new file mode 100644 index 0000000000000000000000000000000000000000..b863eaacd5a3bd8113b71f4829a6b4523b61d13e Binary files /dev/null and b/vignettes/DeseasonalizedTASanomalies.png differ diff --git a/vignettes/Nino3.4TimeSeries.png b/vignettes/Nino3.4TimeSeries.png new file mode 100644 index 0000000000000000000000000000000000000000..cda8a329f08a0098f327eaa71d4e608e1e697848 Binary files /dev/null and b/vignettes/Nino3.4TimeSeries.png differ