diff --git a/.gitignore b/.gitignore index d17d76340a88f111f22792959ec31a0e53d8a4bf..e11ba7d322dd439b07d98ef244a871c11ae75d9e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ out-logs/ *.swp *.swo -/modules/Calibration/test_victoria.R modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml diff --git a/OperationalCS.R b/OperationalCS.R index 1e662d1bb0e6bc1b59634f7cd1b2b67d47636658..ec01a30ec482167ee7dbc07d31389086f7548721 100644 --- a/OperationalCS.R +++ b/OperationalCS.R @@ -22,7 +22,10 @@ log_file <- logger$logname logger <- logger$logger # Checks: -verifications <- check_recipe(recipe, logger) +verifications <- check_recipe(recipe, file = args[2], conf, logger) +# Divide recipe into single verifications recipes: +total_recipes <- divide_recipe(recipe, verifications, folder, logger) + # Divide recipe into single verifications recipes: total_recipes <- divide_recipe(recipe, verifications, folder, logger) # Go to verification code: diff --git a/conf/archive.yml b/conf/archive.yml index c50909c3ed556d255c81c23370a0322e90694f14..1f226a0745bf9f4f49edd560f5d68a7c70489dce 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -1,24 +1,46 @@ - - archive: src: "/esarchive/" System: - system5c3s: + ECMWF-SEAS5: name: "ECMWF SEAS5" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ecmwf/system5c3s/" daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", - "tasmin":"_f24h/", "tasmax":"_f24h/"} + "tasmin":"_f24h/", "tasmax":"_f24h/", + "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", + "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", + "tdps":"_f6h/", "hurs":"_f6h/"} monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", - "tasmin":"_f24h/", "tasmax":"_f24h/"} + "tasmin":"_f24h/", "tasmax":"_f24h/", + "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", + "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", + "tdps":"_f6h/"} nmember: fcst: 51 hcst: 25 calendar: "proleptic_gregorian" + time_stamp_lag: "0" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" - system7c3s: + ECMWF-SEAS5.1: + name: "ECMWF SEAS5 (v5.1)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/system51c3s/" + daily_mean: {"tas":"_f6h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", + "uas":"_f6h/", "vas":"_f6h/", "psl":"_f6h/", + "tdps":"_f6h/"} + monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", + "sfcWind":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/", + "uas":"_f6h/", "vas":"_f6h/", "psl":"_f6h/", + "tdps":"_f6h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_system51c3s.txt" + Meteo-France-System7: name: "Meteo-France System 7" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/meteofrance/system7c3s/" @@ -28,21 +50,23 @@ archive: nmember: fcst: 51 hcst: 25 + time_stamp_lag: "+1" calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system7c3s.txt" - system21_m1: + DWD-GCFS2.1: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/dwd/system21_m1/" - monthly_mean: {"tas":"_f6h/", "prlr":"_f24h", + monthly_mean: {"tas":"_f6h/", "prlr":"_f24h/", "g500":"_f12h/", "sfcWind":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/"} nmember: fcst: 50 hcst: 30 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system21_m1.txt" - system35c3s: + CMCC-SPS3.5: name: "CMCC-SPS3.5" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/cmcc/system35c3s/" @@ -53,8 +77,9 @@ archive: fcst: 50 hcst: 40 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system35c3s.txt" - system2c3s: + JMA-CPS2: name: "JMA System 2" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/jma/system2c3s/" @@ -64,8 +89,9 @@ archive: fcst: 10 hcst: 10 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system2c3s.txt" - eccc1: + ECCC-CanCM4i: name: "ECCC CanCM4i" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/eccc/eccc1/" @@ -75,9 +101,10 @@ archive: fcst: 10 hcst: 10 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" - glosea6_system600-c3s: - name: "UKMO GloSea 6 6.0" + UK-MetOffice-Glosea600: + name: "UK MetOffice GloSea 6 (v6.0)" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ukmo/glosea6_system600-c3s/" monthly_mean: {"tas":"_f6h/", "tasmin":"_f24h/", @@ -86,8 +113,9 @@ archive: fcst: 62 hcst: 28 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo600.txt" - ncep-cfsv2: + NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? src: "exp/ncep/cfs-v2/" @@ -97,29 +125,41 @@ archive: fcst: 20 hcst: 20 calendar: "gregorian" + time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" Reference: - era5: + ERA5: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", "prlr":"_f1h-r1440x721cds/", + "g300":"_f1h-r1440x721cds/", "g500":"_f1h-r1440x721cds/", + "g850":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/", "tasmax":"_f1h-r1440x721cds/", - "tasmin":"_f1h-r1440x721cds/"} + "tasmin":"_f1h-r1440x721cds/", + "ta300":"_f1h-r1440x721cds/", + "ta500":"_f1h-r1440x721cds/", + "ta850":"_f1h-r1440x721cds/", + "hurs":"_f1h-r1440x721cds/"} monthly_mean: {"tas":"_f1h-r1440x721cds/", "prlr":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", + "g300":"_f1h-r1440x721cds/", "g500":"_f1h-r1440x721cds/", + "g850":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/", "tasmax":"_f1h-r1440x721cds/", - "tasmin":"_f1h-r1440x721cds/"} + "tasmin":"_f1h-r1440x721cds/", + "ta300":"_f1h-r1440x721cds/", + "ta500":"_f1h-r1440x721cds/", + "ta850":"_f1h-r1440x721cds/"} calendar: "standard" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" - era5land: + ERA5-Land: name: "ERA5-Land" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5land/" @@ -127,10 +167,11 @@ archive: "prlr":"_f1h/", "sfcWind":"_f1h/"} monthly_mean: {"tas":"_f1h/","tasmin":"_f24h/", "tasmax":"_f24h/", "prlr":"_f1h/", - "sfcWind":"_f1h/", "rsds":"_f1h/"} + "sfcWind":"_f1h/", "rsds":"_f1h/", + "tdps":"_f1h/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" - uerra: + UERRA: name: "ECMWF UERRA" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/uerra_mescan/" diff --git a/conf/grid_description/griddes_system51c3s.txt b/conf/grid_description/griddes_system51c3s.txt new file mode 100644 index 0000000000000000000000000000000000000000..9610e9ef73e582c2b1ba28eeee14cb1f5cf0dd14 --- /dev/null +++ b/conf/grid_description/griddes_system51c3s.txt @@ -0,0 +1,17 @@ +# +# Grid description file for ECMWF SEAS5 v5.1 +# +gridtype = lonlat +gridsize = 64800 +xname = lon +xlongname = longitude +xunits = degrees_east +yname = lat +ylongname = latitude +yunits = degrees_north +xsize = 360 +ysize = 180 +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/output_dictionaries/scorecards.yml b/conf/output_dictionaries/scorecards.yml new file mode 100644 index 0000000000000000000000000000000000000000..fa92042caf218489eecf32febfd9e9e10bb6717d --- /dev/null +++ b/conf/output_dictionaries/scorecards.yml @@ -0,0 +1,38 @@ +System: + ECMWF-SEAS5: + short_name: "ecmwfs5" + display_name: "ECMWF System 5" + ECMWF-SEAS5.1: + short_name: "ecmwfs51" + display_name: "ECMWF System 5.1" + Meteo-France-System7: + short_name: "meteofrances7" + display_name: "Meteo-France System 7" + DWD-GCFS2.1: + short_name: "dwds21" + display_name: "DWD System 21" + CMCC-SPS3.5: + short_name: "cmccs35" + display_name: "CMCC System 35" + JMA-CPS2: + short_name: "jmas2" + display_name: "JMA System 2" + ECCC-CanCM4i: + short_name: "ecccs1" + display_name: "ECCC System 1" + UK-MetOffice-Glosea600: + short_name: "ukmos600" + display_name: "UK Met Office System 600" + NCEP-CFSv2: + short_name: "nceps2" + display_name: "NCEP System 2" +Reference: + era5: + short_name: "era5" + display_name: "ERA5" + era5land: + short_name: "era5land" + display_name: "ERA5-Land" + uerra: + short_name: "uerra_mescan" + display_name: "UERRA MESCAN" diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index 5125215418cc5294fed85349b09b2392ee16ffae..18e694bac67ac23832535ce5f4379a5ef6c6a61e 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -1,10 +1,24 @@ - vars: ## NOTE: The units field in this file corresponds to CMOR standards. ## Some variables in esarchive may have different units than stated here. ## Use with caution. -# ECVs +# ECVs + ta300: + units: "K" + long_name: "Air Temperature at 300 hPa" + standard_name: "air_temperature" + accum: no + ta500: + units: "K" + long_name: "Air Temperature at 500 hPa" + standard_name: "air_temperature" + accum: no + ta850: + units: "K" + long_name: "Air Temperature at 850 hPa" + standard_name: "air_temperature" + accum: no tas: units: "K" long_name: "Near-Surface Air Temperature" @@ -31,6 +45,11 @@ vars: long_name: "Surface Temperature" standard_name: "surface_temperature" accum: no + tdps: + units: "K" + long_name: "2 metre dewpoint temperature" + standard_name: + accum: no sfcWind: units: "m s-1" long_name: "Near-Surface Wind Speed" @@ -55,11 +74,21 @@ vars: standard_name: "total_precipitation_flux" #? Not in CF accum: yes # outname: "acprec" + g300: + units: "m2 s-2" + long_name: "Geopotential" + standard_name: "geopotential" + accum: no g500: units: "m2 s-2" long_name: "Geopotential" standard_name: "geopotential" accum: no + g850: + units: "m2 s-2" + long_name: "Geopotential" + standard_name: "geopotential" + accum: no pr: units: "kg m-2 s-1" long_name: "Precipitation" @@ -239,3 +268,5 @@ metrics: long_name: "Mean Bias Skill Score Statistical Significance" enssprerr: long_name: "Ensemble Spread-To-Error Ratio" + rmsss: + long_name: "Root Mean Square Skill Score" diff --git a/conf/vars-dict.yml-OLD b/conf/vars-dict.yml-OLD new file mode 100644 index 0000000000000000000000000000000000000000..04549d36001c848521f53fd704b752878b2eb862 --- /dev/null +++ b/conf/vars-dict.yml-OLD @@ -0,0 +1,114 @@ + +vars: +# ECVs + tas: + units: "°C" + longname: "Daily mean temperature at surface" + outname: ~ + tasmin: + units: "°C" + longname: "Minimum daily temperature at surface" + outname: ~ + tasmax: + units: "°C" + longname: "Maximum daily temperature at surface" + outname: ~ + sfcwind: + units: "m/s" + longname: "Surface wind speed module" + outname: ~ + rsds: + units: "W/m2" + longname: "Surface solar radiation downwards" + outname: ~ + psl: + units: "hPa" + longname: "Mean sea level pressure" + outname: ~ + prlr: + units: "mm" + longname: "Total precipitation" + outname: ~ +# CFs + cfwnd1: + units: "%" + longname: "Wind Capacity factor IEC1" + outname: ~ + cfwnd2: + units: "%" + longname: "Wind Capacity factor IEC2" + outname: ~ + cfwnd3: + units: "%" + longname: "Wind Capacity factor IEC3" + outname: ~ + cfslr: + units: "%" + longname: "Solar Capacity factor" + outname: ~ +# Energy + edmnd: + units: "GW" + longname: "Electricity Demmand" + outname: ~ + wndpwo: + units: "GW" + longname: "Wind Power" + outname: ~ + dmndnetwnd: + units: "GW" + longname: "Demmand-net-Wind" + outname: ~ +# Indices + Spr32: + units: "days" + longname: > + Total count of days when daily maximum temp exceeded 32°C + from April 21st to June 21st + outname: ~ + SU35: + units: "days" + longname: > + Total count of days when daily maximum temp exceeded 35°C + from June 21st to September 21st + outname: ~ + SU36: + units: "days" + longname: > + Total count of days when daily maximum temp exceeded 36°C + from June 21st to September 21st + outname: ~ + SU40: + units: "days" + longname: > + Total count of days when daily maximum temp exceeded 40°C + from June 21st to September 21st + outname: ~ + GDD: + units: "days" + longname: > + The sum of the daily differences between daily mean + temperature and 10°C from April 1st to October 31st + outname: ~ + GST: + units: "°C" + longname: "The average temperature from April 1st to October 31st" + outname: ~ + SprTX: + units: "°C" + longname: "The average daily maximum temperature from April 1st to October 31st" + outname: ~ + WSDI: + units: "" + longname: > + The total count of days with at least 6 consecutives days + when the daily temperature maximum exceeds its 90th percentile + outname: ~ + SprR: + units: "mm" + longname: 'Total precipitation from April 21st to June 21st' + outname: ~ + HarR: + units: "mm" + longname: 'Total precipitation from August 21st to September 21st' + outname: ~ diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R new file mode 100644 index 0000000000000000000000000000000000000000..859e97bbd042be63af43c08358700b663ebe9139 --- /dev/null +++ b/modules/Anomalies/Anomalies.R @@ -0,0 +1,100 @@ +source("modules/Anomalies/tmp/CST_Anomaly.R") + +# Compute the hcst, obs and fcst anomalies with or without cross-validation +# and return them, along with the hcst and obs climatologies. + +compute_anomalies <- function(recipe, data) { + + if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { + error(recipe$Run$logger, + paste("The anomaly module has been called, but the element", + "'Workflow:Anomalies:compute' is missing from the recipe.")) + stop() + } + + if (recipe$Analysis$Workflow$Anomalies$compute) { + if (recipe$Analysis$Workflow$Anomalies$cross_validation) { + cross <- TRUE + cross_msg <- "with" + } else { + cross <- FALSE + cross_msg <- "without" + } + original_dims <- dim(data$hcst$data) + + # Compute anomalies + anom <- CST_Anomaly(data$hcst, data$obs, + cross = cross, + memb = TRUE, + memb_dim = 'ensemble', + dim_anom = 'syear', + dat_dim = c('dat', 'ensemble'), + ftime_dim = 'time', + ncores = recipe$Analysis$ncores) + # Reorder dims + anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) + anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) + + # Save full fields + hcst_fullvalue <- data$hcst + obs_fullvalue <- data$obs + + # Hindcast climatology + + data$hcst <- anom$exp + data$obs <- anom$obs + remove(anom) + # Change variable metadata + # data$hcst$Variable$varName <- paste0(data$hcst$Variable$varName, "anomaly") + attr(data$hcst$Variable, "variable")$long_name <- + paste(attr(data$hcst$Variable, "variable")$long_name, "anomaly") + # data$obs$Variable$varName <- paste0(data$obs$Variable$varName, "anomaly") + attr(data$obs$Variable, "variable")$long_name <- + paste(attr(data$obs$Variable, "variable")$long_name, "anomaly") + + # Compute forecast anomaly field + if (!is.null(data$fcst)) { + # Compute hindcast climatology ensemble mean + clim <- s2dv::Clim(hcst_fullvalue$data, obs_fullvalue$data, + time_dim = "syear", + dat_dim = c("dat", "ensemble"), + memb = FALSE, + memb_dim = "ensemble", + ftime_dim = "time", + ncores = recipe$Analysis$ncores) + clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, + name = "syear") + dims <- dim(clim_hcst) + clim_hcst <- rep(clim_hcst, dim(data$fcst$data)[['ensemble']]) + dim(clim_hcst) <- c(dims, ensemble = dim(data$fcst$data)[['ensemble']]) + clim_hcst <- Reorder(clim_hcst, order = names(dim(data$fcst$data))) + # Get fcst anomalies + data$fcst$data <- data$fcst$data - clim_hcst + # Change metadata + # data$fcst$Variable$varName <- paste0(data$fcst$Variable$varName, "anomaly") + attr(data$fcst$Variable, "variable")$long_name <- + paste(attr(data$fcst$Variable, "variable")$long_name, "anomaly") + } + + info(recipe$Run$logger, + paste("The anomalies have been computed,", cross_msg, + "cross-validation. The original full fields are returned as", + "$hcst.full_val and $obs.full_val.")) + + info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") + + } else { + warn(recipe$Run$logger, paste("The Anomalies module has been called, but", + "recipe parameter Analysis:Variables:anomaly is set to FALSE.", + "The full fields will be returned.")) + hcst_fullvalue <- NULL + obs_fullvalue <- NULL + info(recipe$Run$logger, "##### ANOMALIES NOT COMPUTED #####") + } + + ## TODO: Return fcst full value? + + return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, + hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) + +} diff --git a/modules/Anomalies/tmp/CST_Anomaly.R b/modules/Anomalies/tmp/CST_Anomaly.R new file mode 100644 index 0000000000000000000000000000000000000000..f38e39b050f7c46be452ac6e6571542c465264b9 --- /dev/null +++ b/modules/Anomalies/tmp/CST_Anomaly.R @@ -0,0 +1,246 @@ +#'Anomalies relative to a climatology along selected dimension with or without cross-validation +#' +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#'@author Pena Jesus, \email{jesus.pena@bsc.es} +#'@description This function computes the anomalies relative to a climatology +#'computed along the selected dimension (usually starting dates or forecast +#'time) allowing the application or not of crossvalidated climatologies. The +#'computation is carried out independently for experimental and observational +#'data products. +#' +#'@param exp An object of class \code{s2dv_cube} as returned by \code{CST_Load} +#' function, containing the seasonal forecast experiment data in the element +#' named \code{$data}. +#'@param obs An object of class \code{s2dv_cube} as returned by \code{CST_Load} +#' function, containing the observed data in the element named \code{$data}. +#'@param dim_anom A character string indicating the name of the dimension +#' along which the climatology will be computed. The default value is 'sdate'. +#'@param cross A logical value indicating whether cross-validation should be +#' applied or not. Default = FALSE. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no +#' member dimension, set NULL. The default value is 'member'. +#'@param memb A logical value indicating whether to subtract the climatology +#' based on the individual members (TRUE) or the ensemble mean over all +#' members (FALSE) when calculating the anomalies. The default value is TRUE. +#'@param dat_dim A character vector indicating the name of the dataset and +#' member dimensions. If there is no dataset dimension, it can be NULL. +#' The default value is "c('dataset', 'member')". +#'@param filter_span A numeric value indicating the degree of smoothing. This +#' option is only available if parameter \code{cross} is set to FALSE. +#'@param ftime_dim A character string indicating the name of the temporal +#' dimension where the smoothing with 'filter_span' will be applied. It cannot +#' be NULL if 'filter_span' is provided. The default value is 'ftime'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. It will be used only when +#' 'filter_span' is not NULL. +#' +#'@return A list with two S3 objects, 'exp' and 'obs', of the class +#''s2dv_cube', containing experimental and date-corresponding observational +#'anomalies, respectively. These 's2dv_cube's can be ingested by other functions +#'in CSTools. +#' +#'@examples +#'# Example 1: +#'mod <- 1 : (2 * 3 * 4 * 5 * 6 * 7) +#'dim(mod) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) +#'obs <- 1 : (1 * 1 * 4 * 5 * 6 * 7) +#'dim(obs) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) +#'lon <- seq(0, 30, 5) +#'lat <- seq(0, 25, 5) +#'exp <- list(data = mod, lat = lat, lon = lon) +#'obs <- list(data = obs, lat = lat, lon = lon) +#'attr(exp, 'class') <- 's2dv_cube' +#'attr(obs, 'class') <- 's2dv_cube' +#' +#'anom <- CST_Anomaly(exp = exp, obs = obs, cross = FALSE, memb = TRUE) +#' +#'@seealso \code{\link[s2dv]{Ano_CrossValid}}, \code{\link[s2dv]{Clim}} and \code{\link{CST_Load}} +#' +#'@import multiApply +#'@importFrom s2dv InsertDim Clim Ano_CrossValid Reorder +#'@export +CST_Anomaly <- function(exp = NULL, obs = NULL, dim_anom = 'sdate', cross = FALSE, + memb_dim = 'member', memb = TRUE, dat_dim = c('dataset', 'member'), + filter_span = NULL, ftime_dim = 'ftime', ncores = NULL) { + # s2dv_cube + if (!inherits(exp, 's2dv_cube') & !is.null(exp) || + !inherits(obs, 's2dv_cube') & !is.null(obs)) { + stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + # exp and obs + if (is.null(exp$data) & is.null(obs$data)) { + stop("One of the parameter 'exp' or 'obs' cannot be NULL.") + } + case_exp = case_obs = 0 + if (is.null(exp)) { + exp <- obs + case_obs = 1 + warning("Parameter 'exp' is not provided and 'obs' will be used instead.") + } + if (is.null(obs)) { + obs <- exp + case_exp = 1 + warning("Parameter 'obs' is not provided and 'exp' will be used instead.") + } + if(any(is.null(names(dim(exp$data))))| any(nchar(names(dim(exp$data))) == 0) | + any(is.null(names(dim(obs$data))))| any(nchar(names(dim(obs$data))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names in element 'data'.") + } + if(!all(names(dim(exp$data)) %in% names(dim(obs$data))) | + !all(names(dim(obs$data)) %in% names(dim(exp$data)))) { + stop("Parameter 'exp' and 'obs' must have same dimension names in element 'data'.") + } + dim_exp <- dim(exp$data) + dim_obs <- dim(obs$data) + dimnames_data <- names(dim_exp) + # dim_anom + if (is.numeric(dim_anom) & length(dim_anom) == 1) { + warning("Parameter 'dim_anom' must be a character string and a numeric value will not be ", + "accepted in the next release. The corresponding dimension name is assigned.") + dim_anom <- dimnames_data[dim_anom] + } + if (!is.character(dim_anom)) { + stop("Parameter 'dim_anom' must be a character string.") + } + if (!dim_anom %in% names(dim_exp) | !dim_anom %in% names(dim_obs)) { + stop("Parameter 'dim_anom' is not found in 'exp' or in 'obs' dimension in element 'data'.") + } + if (dim_exp[dim_anom] <= 1 | dim_obs[dim_anom] <= 1) { + stop("The length of dimension 'dim_anom' in label 'data' of the parameter ", + "'exp' and 'obs' must be greater than 1.") + } + # cross + if (!is.logical(cross) | !is.logical(memb) ) { + stop("Parameters 'cross' and 'memb' must be logical.") + } + if (length(cross) > 1 | length(memb) > 1 ) { + cross <- cross[1] + warning("Parameter 'cross' has length greater than 1 and only the first element", + "will be used.") + } + # memb + if (length(memb) > 1) { + memb <- memb[1] + warning("Parameter 'memb' has length greater than 1 and only the first element", + "will be used.") + } + # memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim_exp) | !memb_dim %in% names(dim_obs)) { + stop("Parameter 'memb_dim' is not found in 'exp' or in 'obs' dimension.") + } + } + # dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim)) { + stop("Parameter 'dat_dim' must be a character vector.") + } + if (!all(dat_dim %in% names(dim_exp)) | !all(dat_dim %in% names(dim_obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension in element 'data'.", + " Set it as NULL if there is no dataset dimension.") + } + } + # filter_span + if (!is.null(filter_span)) { + if (!is.numeric(filter_span)) { + warning("Paramater 'filter_span' is not numeric and any filter", + " is being applied.") + filter_span <- NULL + } + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + # ftime_dim + if (!is.character(ftime_dim)) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!ftime_dim %in% names(dim_exp) | !memb_dim %in% names(dim_obs)) { + stop("Parameter 'ftime_dim' is not found in 'exp' or in 'obs' dimension in element 'data'.") + } + } + + # Computating anomalies + #---------------------- + + # With cross-validation + if (cross) { + ano <- Ano_CrossValid(exp = exp$data, obs = obs$data, + time_dim = dim_anom, + memb_dim = memb_dim, + memb = memb, + dat_dim = dat_dim, + ncores = ncores) + + # Without cross-validation + } else { + tmp <- Clim(exp = exp$data, obs = obs$data, + time_dim = dim_anom, + memb_dim = memb_dim, + memb = memb, + dat_dim = dat_dim, + ncores = ncores) + if (!is.null(filter_span)) { + tmp$clim_exp <- Apply(tmp$clim_exp, + target_dims = c(ftime_dim), + output_dims = c(ftime_dim), + fun = .Loess, + loess_span = filter_span, + ncores = ncores)$output1 + tmp$clim_obs <- Apply(tmp$clim_obs, + target_dims = c(ftime_dim), + output_dims = c(ftime_dim), + fun = .Loess, + loess_span = filter_span, + ncores = ncores)$output1 + } + if (memb) { + clim_exp <- tmp$clim_exp + clim_obs <- tmp$clim_obs + } else { + clim_exp <- InsertDim(tmp$clim_exp, 1, dim_exp[memb_dim]) + clim_obs <- InsertDim(tmp$clim_obs, 1, dim_obs[memb_dim]) + } + clim_exp <- InsertDim(clim_exp, 1, dim_exp[dim_anom]) + clim_obs <- InsertDim(clim_obs, 1, dim_obs[dim_anom]) + ano <- NULL + + # Permuting back dimensions to original order + clim_exp <- Reorder(clim_exp, dimnames_data) + clim_obs <- Reorder(clim_obs, dimnames_data) + + ano$exp <- exp$data - clim_exp + ano$obs <- obs$data - clim_obs + } + + exp$data <- ano$exp + obs$data <- ano$obs + + # Outputs + # ~~~~~~~~~ + if (case_obs == 1) { + return(obs) + } + else if (case_exp == 1) { + return(exp) + } + else { + return(list(exp = exp, obs = obs)) + } +} + +.Loess <- function(clim, loess_span) { + data <- data.frame(ensmean = clim, day = 1 : length(clim)) + loess_filt <- loess(ensmean ~ day, data, span = loess_span) + output <- predict(loess_filt) + return(output) +} + diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 59e5451a437d2ed414c6c7fd9060f300ed9bd029..899b12913bfade3e1b3955a8236b22fe387e33f1 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -4,19 +4,27 @@ calibrate_datasets <- function(recipe, data) { # recipe. If the forecast is not null, it calibrates it as well. # # data: list of s2dv_cube objects containing the hcst, obs and fcst. + # Optionally, it may also have hcst.full_val and obs.full_val. # recipe: object obtained when passing the .yml recipe file to read_yaml() method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (method == "raw") { - warn(recipe$Run$logger, "The Calibration module has been called, - but the calibration method in the recipe is 'raw'. - The hcst and fcst will not be calibrated.") + warn(recipe$Run$logger, + paste("The Calibration module has been called, but the calibration", + "method in the recipe is 'raw'. The hcst and fcst will not be", + "calibrated.")) fcst_calibrated <- data$fcst hcst_calibrated <- data$hcst + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- data$hcst.full_val + } else { + hcst_full_calibrated <- NULL + } CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####" } else { + ## TODO: Calibrate full fields when present # Calibration function params mm <- recipe$Analysis$Datasets$Multimodel if (is.null(recipe$Analysis$ncores)) { @@ -32,6 +40,7 @@ calibrate_datasets <- function(recipe, data) { CALIB_MSG <- "##### CALIBRATION COMPLETE #####" # Replicate observation array for the multi-model case + ## TODO: Implement for obs.full_val if (mm) { obs.mm <- obs$data for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { @@ -47,13 +56,12 @@ calibrate_datasets <- function(recipe, data) { CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") ## TODO: implement other calibration methods - ## TODO: Restructure the code? if (!(method %in% CST_CALIB_METHODS)) { - error(recipe$Run$logger, "Calibration method in the recipe is not - available for monthly data.") + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "monthly data.")) stop() } else { - ## Alba's version of CST_Calibration (pending merge) is being used # Calibrate the hindcast hcst_calibrated <- CST_Calibration(data$hcst, data$obs, cal.method = method, @@ -66,8 +74,25 @@ calibrate_datasets <- function(recipe, data) { memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) + # In the case where anomalies have been computed, calibrate full values + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, + data$obs.full_val, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + hcst_full_calibrated <- NULL + } + + # Calibrate the forecast if (!is.null(data$fcst)) { - # Calibrate the forecast fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst, cal.method = method, eval.method = "leave-one-out", @@ -86,9 +111,9 @@ calibrate_datasets <- function(recipe, data) { } else if (recipe$Analysis$Variables$freq == "daily_mean") { # Daily data calibration using Quantile Mapping if (!(method %in% c("qmap"))) { - error(recipe$Run$logger, "Calibration method in the recipe is not - available for daily data. Only quantile mapping 'qmap is - implemented.") + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "daily data. Only quantile mapping 'qmap is implemented.")) stop() } # Calibrate the hindcast @@ -104,7 +129,21 @@ calibrate_datasets <- function(recipe, data) { wet.day = F) # Restore dimension order hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) - + # In the case where anomalies have been computed, calibrate full values + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, + data$obs.full_val, + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) + } else { + hcst_full_calibrated <- NULL + } + if (!is.null(data$fcst)) { # Calibrate the forecast fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, @@ -124,5 +163,14 @@ calibrate_datasets <- function(recipe, data) { } } info(recipe$Run$logger, CALIB_MSG) - return(list(hcst = hcst_calibrated, fcst = fcst_calibrated)) + ## TODO: Sort out returns + return_list <- list(hcst = hcst_calibrated, + obs = data$obs, + fcst = fcst_calibrated) + if (!is.null(hcst_full_calibrated)) { + return_list <- append(return_list, + list(hcst.full_val = hcst_full_calibrated, + obs.full_val = data$obs.full_val)) + } + return(return_list) } diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 8d54d63def1acb94639e0bf7732a040a7f98b206..598ddd0e4050105138e6579c290d1ef416924ee2 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -3,9 +3,9 @@ source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") # Load required libraries/funs source("modules/Loading/dates2load.R") source("modules/Loading/check_latlon.R") +## TODO: Move to prepare_outputs.R source("tools/libs.R") - load_datasets <- function(recipe) { # ------------------------------------------- @@ -19,7 +19,6 @@ load_datasets <- function(recipe) { lons.max <- recipe$Analysis$Region$lonmax ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name - variable <- recipe$Analysis$Variables$name store.freq <- recipe$Analysis$Variables$freq @@ -55,7 +54,7 @@ load_datasets <- function(recipe) { reference_descrip <- archive$Reference[[ref.name]] freq.obs <- unlist(reference_descrip[[store.freq]][variable]) obs.dir <- reference_descrip$src - fcst.dir <- exp_descrip$src + fcst.dir <- exp_descrip$src hcst.dir <- exp_descrip$src fcst.nmember <- exp_descrip$nmember$fcst hcst.nmember <- exp_descrip$nmember$hcst @@ -143,7 +142,11 @@ load_datasets <- function(recipe) { ## TODO: Give correct dimensions to $Dates$start ## (sday, sweek, syear instead of file_date) hcst <- as.s2dv_cube(hcst) - + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$Dates$start[] <- hcst$Dates$start - seconds(exp_descrip$time_stamp_lag) + } + # Load forecast #------------------------------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst_year)) { @@ -151,7 +154,7 @@ load_datasets <- function(recipe) { # with the daily case and the current version of startR not allowing # multiple dims split - fcst <- Start(dat = fcst.path, + fcst <- Start(dat = fcst.path, var = variable, file_date = sdates$fcst, time = idxs$fcst, @@ -193,6 +196,11 @@ load_datasets <- function(recipe) { # Convert fcst to s2dv_cube fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$Dates$start[] <- + fcst$Dates$start - seconds(exp_descrip$time_stamp_lag) + } } else { fcst <- NULL @@ -282,7 +290,7 @@ load_datasets <- function(recipe) { # Check for consistency between hcst and obs grid if (!(recipe$Analysis$Regrid$type == 'none')) { - if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") @@ -295,7 +303,7 @@ load_datasets <- function(recipe) { info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } - if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) { + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") @@ -327,22 +335,24 @@ load_datasets <- function(recipe) { ## TODO: Make a unit conversion function? if (variable == "prlr") { # Verify that the units are m/s and the same in obs and hcst - if ((attr(obs$Variable, "variable")$units != - attr(hcst$Variable, "variable")$units) && - (attr(obs$Variable, "variable")$units == "m s-1")) { + if (((attr(obs$Variable, "variable")$units == "m s-1") || + (attr(obs$Variable, "variable")$units == "m s**-1")) && + ((attr(hcst$Variable, "variable")$units == "m s-1") || + (attr(hcst$Variable, "variable")$units == "m s**-1"))) { info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") - obs$data <- obs$data*84000*1000 + obs$data <- obs$data*86400*1000 attr(obs$Variable, "variable")$units <- "mm/day" - hcst$data <- hcst$data*84000*1000 + hcst$data <- hcst$data*86400*1000 attr(hcst$Variable, "variable")$units <- "mm/day" if (!is.null(fcst)) { - fcst$data <- fcst$data*84000*1000 + fcst$data <- fcst$data*86400*1000 attr(fcst$Variable, "variable")$units <- "mm/day" } } } + # Compute anomalies if requested # Print a summary of the loaded data for the user, for each object if (recipe$Run$logger$threshold <= 2) { data_summary(hcst, recipe) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 8046344b85bd15d75d684accf78efc2a95abb5ba..e3677e1db3bceae19bfa8e3c27493575a43a00f6 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -387,7 +387,7 @@ load_datasets <- function(recipe) { # lat and lon attributes if (!(recipe$Analysis$Regrid$type == 'none')) { - if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") @@ -401,7 +401,7 @@ load_datasets <- function(recipe) { stop("hcst and obs don't share the same latitudes.") } - if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) { + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") @@ -485,18 +485,19 @@ load_datasets <- function(recipe) { ## TODO: Make a function? if (variable == "prlr") { # Verify that the units are m/s and the same in obs and hcst - if ((attr(obs$Variable, "variable")$units != - attr(hcst$Variable, "variable")$units) && - (attr(obs$Variable, "variable")$units == "m s-1")) { + if (((attr(obs$Variable, "variable")$units == "m s-1") || + (attr(obs$Variable, "variable")$units == "m s**-1")) && + ((attr(hcst$Variable, "variable")$units == "m s-1") || + (attr(hcst$Variable, "variable")$units == "m s**-1"))) { info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") - obs$data <- obs$data*84000*1000 + obs$data <- obs$data*86400*1000 attr(obs$Variable, "variable")$units <- "mm/day" - hcst$data <- hcst$data*84000*1000 + hcst$data <- hcst$data*86400*1000 attr(hcst$Variable, "variable")$units <- "mm/day" if (!is.null(fcst)) { - fcst$data <- fcst$data*84000*1000 + fcst$data <- fcst$data*86400*1000 attr(fcst$Variable, "variable")$units <- "mm/day" } } diff --git a/modules/Loading/dates2load.R b/modules/Loading/dates2load.R index e1e8f89ea227b3aa9f6a0631ffcbaf04a612b30c..0e3613f3a3a7e6b09d8317cdadb8bcb850b2bccc 100644 --- a/modules/Loading/dates2load.R +++ b/modules/Loading/dates2load.R @@ -15,46 +15,39 @@ library(lubridate) -dates2load <- function(recipe, logger){ +dates2load <- function(recipe, logger) { temp_freq <- recipe$Analysis$Variables$freq recipe <- recipe$Analysis$Time - # hcst dates - file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) - - if (temp_freq == "monthly_mean"){ - file_dates <- .add_dims(file_dates, "hcst") - } - # fcst dates (if fcst_year empty it creates an empty object) - if (! is.null(recipe$fcst_year)){ + if (temp_freq == "monthly_mean") { + file_dates <- .add_dims(file_dates) + } + # fcst dates (if fcst_year empty it creates an empty object) + if (! is.null(recipe$fcst_year)) { file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) - if (temp_freq == "monthly_mean"){ - file_dates.fcst <- .add_dims(file_dates.fcst, "fcst") + if (temp_freq == "monthly_mean") { + file_dates.fcst <- .add_dims(file_dates.fcst) } } else { file_dates.fcst <- NULL info(logger, paste("fcst_year empty in the recipe, creating empty fcst object...")) } - return(list(hcst = file_dates, fcst = file_dates.fcst)) ## TODO: document header of fun - } # adds the correspondent dims to each sdate array -.add_dims <- function(data, type){ +.add_dims <- function(data) { default_dims <- c(sday = 1, sweek = 1, syear = length(data)) - default_dims[names(dim(data))] <- dim(data) dim(data) <- default_dims return(data) - } # Gets the corresponding dates or indices according @@ -65,30 +58,35 @@ dates2load <- function(recipe, logger){ # the forecasting times covering December to March get_timeidx <- function(sdates, ltmin, ltmax, - time_freq="monthly_mean"){ + time_freq="monthly_mean") { - if (time_freq == "daily_mean"){ + if (time_freq == "daily_mean") { sdates <- ymd(sdates) idx_min <- sdates + months(ltmin - 1) idx_max <- sdates + months(ltmax) - days(1) - indxs <- array(numeric(), c(file_date=length(sdates), - time = (as.integer(idx_max[1]-idx_min[1]+1)) + day_seq <- seq(idx_min[1], idx_max[1], by = 'days') + if (any("0229" %in% (format(day_seq, "%m%d")))) { + time_length <- as.integer(idx_max[1]-idx_min[1]) + } else { + time_length <- as.integer(idx_max[1]-idx_min[1]+1) + } + indxs <- array(numeric(), c(file_date = length(sdates), + time = time_length)) #syear = length(sdates), #sday = 1, sweek = 1, - )) for (sdate in 1:length(sdates)) { - days <- seq(idx_min[sdate], idx_max[sdate], by='days') - indxs[sdate,] <- days[!(format(days, "%m%d") == "0229")] + day_seq <- seq(idx_min[sdate], idx_max[sdate], by='days') + indxs[sdate,] <- day_seq[!(format(day_seq, "%m%d") == "0229")] } indxs <- as.POSIXct(indxs*86400, tz = 'UTC', origin = '1970-01-01') lubridate::hour(indxs) <- 12 lubridate::minute(indxs) <- 00 - dim(indxs) <- c(file_date=length(sdates), - time=(as.integer(idx_max[1]-idx_min[1])+1)) + dim(indxs) <- c(file_date = length(sdates), + time = time_length) } else if (time_freq == "monthly_mean") { diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml index de4f959138c0c448107032efb9211242e5b1370a..986578f7cc7a74e44604c83fb6080ada63637406 100644 --- a/modules/Loading/testing_recipes/recipe_decadal.yml +++ b/modules/Loading/testing_recipes/recipe_decadal.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: bias Skill: diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml index f362329e387a7e0db259cac2d220d8f62d3b9b1a..9d404bfa45da3d70620b5f22920067d16f12afe5 100644 --- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml +++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: qmap Skill: diff --git a/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml b/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml index f9130e16d3dc97f6509c6e7668754f678cddbdd7..38b25d42295e198d714956e11cdf8ffdf006ed12 100644 --- a/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml +++ b/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: bias Skill: diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml new file mode 100644 index 0000000000000000000000000000000000000000..cda98c913123a5ec1f9f194c21e26f2d3dcddea4 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml @@ -0,0 +1,49 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5.1 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '0101' + fcst_year: + hcst_start: '2000' + hcst_end: '2015' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: 30 + latmax: 50 + lonmin: -10 + lonmax: 30 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: raw + Anomalies: + compute: yes + cross_validation: yes + Skill: + metric: mean_bias EnsCorr RPSS CRPSS EnsSprErr # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 14 + remove_NAs: yes + Output_format: Scorecards +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml b/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml index ca52e8cc161ece1eb0a23e1be391df1b2318bf5f..0cb2d29f1880041fc90e732a9b7e9cd3960cdfc6 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml @@ -8,10 +8,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system5c3s + name: ECMWF-SEAS5 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: '2020' @@ -28,6 +28,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: mse_min Skill: diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml index 27cdccdc89c56ea8ef0e3322063573c2017be744..31ae079d2dfe76c600ecef3083db5943e5f2ae20 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml @@ -8,10 +8,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system5c3s + name: ECMWF-SEAS5 Multimodel: no Reference: - name: era5 + name: ERA5 Time: sdate: '0601' fcst_year: '2020' @@ -28,6 +28,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: raw Skill: diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml b/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml index 197c109c0e5dbb7f2419131cc2f4a328995dc34f..58030bf3b0697a177d901bbb3b2cbbca0411c779 100644 --- a/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml +++ b/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml @@ -8,10 +8,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system7c3s + name: Meteo-France-System7 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: '2020' @@ -28,6 +28,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: mse_min Skill: diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml index f83f91bb44f645ddbc84f6541f57fb82a09caa7b..c8d3b5e891de09b5cc1236e1b9c85297fc27f1e3 100644 --- a/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml +++ b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml @@ -8,17 +8,17 @@ Analysis: freq: monthly_mean Datasets: System: - name: system7c3s + name: Meteo-France-System7 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: '2020' hcst_start: '1993' - hcst_end: '2016' + hcst_end: '2010' ftime_min: 1 - ftime_max: 6 + ftime_max: 2 Region: latmin: -10 latmax: 10 @@ -28,6 +28,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: yes # yes/no, default yes + cross_validation: yes # yes/no, default yes Calibration: method: mse_min Skill: diff --git a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml index 71e386fae2420e0bb4cdc01c241b40169d787f68..b14c90e13ffd42534689ec0f1c4a179321e23cca 100644 --- a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml +++ b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml @@ -9,10 +9,10 @@ Analysis: freq: daily_mean # Mandatory, str: either monthly_mean or daily_mean Datasets: System: - name: system5c3s # Mandatory, str: System codename. See docu. + name: ECMWF-SEAS5 # Mandatory, str: System codename. See docu. Multimodel: no # Mandatory, bool: Either yes/true or no/false Reference: - name: era5 # Mandatory, str: Reference codename. See docu. + name: ERA5 # Mandatory, str: Reference codename. See docu. Time: sdate: '1101' fcst_year: '2020' # Optional, int: Forecast year 'YYYY' @@ -29,6 +29,9 @@ Analysis: method: bilinear # Mandatory, str: Interpolation method. See docu. type: to_reference # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: + Anomalies: + compute: no # Whether to compute the anomalies and use them for skill metrics + cross_validation: # whether they should be computed in cross-validation Calibration: method: qmap # Mandatory, str: Calibration method. See docu. Skill: diff --git a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml index 233c14eb4a74c7dad982ddbdf3e335c16cad601e..5c899f97273cf183518ee7ebb560ced275e2dc0a 100644 --- a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml +++ b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml @@ -8,10 +8,10 @@ Analysis: freq: daily_mean Datasets: System: - name: system5c3s + name: ECMWF-SEAS5 Multimodel: no Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: '2020' @@ -28,6 +28,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: qmap Skill: diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-tas-specs.yml b/modules/Loading/testing_recipes/recipe_test-new-metrics.yml similarity index 81% rename from modules/Loading/testing_recipes/recipe_system7c3s-tas-specs.yml rename to modules/Loading/testing_recipes/recipe_test-new-metrics.yml index b1829d22e76b8e1e94a3218af3b91d46715600e1..b5745292ace8c45258c4e975bc375385005f1fc1 100644 --- a/modules/Loading/testing_recipes/recipe_system7c3s-tas-specs.yml +++ b/modules/Loading/testing_recipes/recipe_test-new-metrics.yml @@ -8,17 +8,17 @@ Analysis: freq: monthly_mean Datasets: System: - name: system7c3s + name: Meteo-France-System7 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: '2020' - hcst_start: '1993' - hcst_end: '2016' + hcst_start: '1998' + hcst_end: '2010' ftime_min: 1 - ftime_max: 3 + ftime_max: 2 Region: latmin: -10 latmax: 10 @@ -31,11 +31,13 @@ Analysis: Calibration: method: mse_min Skill: - metric: CRPS CRPSS FCRPS FCRPSS FRPS_Specs + metric: RMSSS Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: no + ncores: 7 + remove_NAs: yes Output_format: S2S4E Run: Loglevel: INFO diff --git a/modules/Loading/testing_recipes/recipe_era5land.yml b/modules/Loading/testing_recipes/recipe_test_anomalies.yml similarity index 67% rename from modules/Loading/testing_recipes/recipe_era5land.yml rename to modules/Loading/testing_recipes/recipe_test_anomalies.yml index c99906b68c37e65c19c98ab062947d6f2d078e37..287f9a98f38786bcc4db37a21eff196d23f7837c 100644 --- a/modules/Loading/testing_recipes/recipe_era5land.yml +++ b/modules/Loading/testing_recipes/recipe_test_anomalies.yml @@ -8,17 +8,17 @@ Analysis: freq: monthly_mean Datasets: System: - name: system7c3s + name: ECMWF-SEAS5 Multimodel: False Reference: - name: era5land + name: ERA5 Time: sdate: '1101' fcst_year: '2020' - hcst_start: '1993' - hcst_end: '2016' + hcst_start: '1999' + hcst_end: '2010' ftime_min: 1 - ftime_max: 3 + ftime_max: 2 Region: latmin: -10 latmax: 10 @@ -29,14 +29,17 @@ Analysis: type: to_system Workflow: Calibration: - method: mse_min + method: raw + Anomalies: + compute: yes + cross_validation: yes Skill: - metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr + metric: RPS RPSS CRPS CRPSS BSS10 BSS90 EnsCorr mean_bias mean_bias_SS Probabilities: - percentiles: [[1/4, 2/4, 3/4]] + percentiles: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: no - ncores: 1 + ncores: 7 remove_NAs: yes Output_format: S2S4E Run: diff --git a/modules/Loading/testing_recipes/recipe_circular-sort-test.yml b/modules/Loading/testing_recipes/recipe_testing_nadia.yml similarity index 53% rename from modules/Loading/testing_recipes/recipe_circular-sort-test.yml rename to modules/Loading/testing_recipes/recipe_testing_nadia.yml index 700fd3b2a7fa6a41158c4fe51ea89f2ff32f639e..e6b2bc02e6a511b114977c7238397d89a7ce5558 100644 --- a/modules/Loading/testing_recipes/recipe_circular-sort-test.yml +++ b/modules/Loading/testing_recipes/recipe_testing_nadia.yml @@ -1,7 +1,6 @@ Description: Author: V. Agudetse - Info: For testing the behavior of the loading module when loading data - that crosses the date line or the Greenwich meridian. + Analysis: Horizon: Seasonal Variables: @@ -9,33 +8,40 @@ Analysis: freq: monthly_mean Datasets: System: - name: system7c3s + name: ECMWF-SEAS5 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: - hcst_start: '1993' - hcst_end: '2003' - leadtimemin: 2 - leadtimemax: 2 + hcst_start: '2010' + hcst_end: '2015' + ftime_min: 1 + ftime_max: 6 Region: - latmin: -10 - latmax: 10 - lonmin: 320 - lonmax: 350 + latmin: 30 + latmax: 50 + lonmin: -10 + lonmax: 30 Regrid: method: bilinear type: to_system Workflow: Calibration: - method: mse_min + method: raw + Anomalies: + compute: yes + cross_validation: yes Skill: - metric: BSS90 + metric: mean_bias EnsCorr RPSS CRPSS EnsSprErr + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] Indicators: index: no - Output_format: S2S4E + ncores: 7 + remove_NAs: yes + Output_format: scorecards Run: Loglevel: INFO Terminal: yes diff --git a/modules/Loading/testing_recipes/recipe_system2c3s-prlr-nofcst.yml b/modules/Loading/testing_recipes/wrong_recipe_example.yml similarity index 58% rename from modules/Loading/testing_recipes/recipe_system2c3s-prlr-nofcst.yml rename to modules/Loading/testing_recipes/wrong_recipe_example.yml index 3bfad08ea575ad8545371cd1a8384221ec369265..12e2fc06ea95065fecdbbb8543057b5f3bd18f46 100644 --- a/modules/Loading/testing_recipes/recipe_system2c3s-prlr-nofcst.yml +++ b/modules/Loading/testing_recipes/wrong_recipe_example.yml @@ -1,26 +1,25 @@ Description: Author: V. Agudetse + Info: Incomplete recipe with incorrect fields to test the recipe checker. Analysis: - Horizon: Seasonal + Horizon: Seasoning Variables: - name: prlr + name: tas freq: monthly_mean - Datasets: + Petaflops: System: - name: system2c3s + name: system7c3s Multimodel: False Reference: name: era5 Time: - sdate: '0301' - fcst_year: # + sdate: '1101' + fcst_syear: '2020' hcst_start: '1993' hcst_end: '2016' - ftime_min: 1 - ftime_max: 1 + ftime_max: 6 Region: - latmin: -10 latmax: 10 lonmin: 0 lonmax: 20 @@ -29,14 +28,17 @@ Analysis: type: to_system Workflow: Calibration: - method: evmos + method: Skill: - metric: FRPS + metric: RPS RPSS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] Indicators: index: no + ncores: 7 + remove_NAs: yes Output_format: S2S4E Run: Loglevel: INFO Terminal: yes output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index ed0933f2a6c053b0e2a51880a072213789310292..28b5e5529bad1ae5e4402b154b674500a175dd0d 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -3,27 +3,28 @@ source("modules/Saving/paths2save.R") save_data <- function(recipe, data, - calibrated_data = NULL, skill_metrics = NULL, probabilities = NULL, archive = NULL) { - # Wrapper for the saving functions. # recipe: The auto-s2s recipe # archive: The auto-s2s archive # data: output of load_datasets() - # calibrated_data: output of calibrate_datasets() + # data: output of calibrate_datasets() # skill_metrics: output of compute_skill_metrics() # probabilities: output of compute_probabilities() # mean_bias: output of compute_mean_bias() if (is.null(recipe)) { - stop("The 'recipe' parameter is mandatory.") + error(recipe$Run$logger, "The 'recipe' parameter is mandatory.") + stop() } if (is.null(data)) { - stop("The 'data' parameter is mandatory. It should be the output of", - "load_datasets().") + error(recupe$Run$logger, + paste("The 'data' parameter is mandatory. It should be a list", + "of at least two s2dv_cubes containing the hcst and obs.")) + stop() } if (is.null(archive)) { if (tolower(recipe$Analysis$Horizon) == "seasonal") { @@ -41,15 +42,13 @@ save_data <- function(recipe, data, dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # Export hindcast, forecast and observations onto outfile - if (!is.null(calibrated_data)) { - save_forecast(calibrated_data$hcst, recipe, dict, outdir, - archive = archive, type = 'hcst') - if (!is.null(calibrated_data$fcst)) { - save_forecast(calibrated_data$fcst, recipe, dict, outdir, - archive = archive, type = 'fcst') - } - save_observations(data$obs, recipe, dict, outdir, archive = archive) + save_forecast(data$hcst, recipe, dict, outdir, archive = archive, + type = 'hcst') + if (!is.null(data$fcst)) { + save_forecast(data$fcst, recipe, dict, outdir, + archive = archive, type = 'fcst') } + save_observations(data$obs, recipe, dict, outdir, archive = archive) # Separate ensemble correlation from the rest of the metrics, as it has one # extra dimension "ensemble" and must be saved to a different file @@ -79,7 +78,11 @@ save_data <- function(recipe, data, save_percentiles(probabilities$percentiles, recipe, data$hcst, outdir, archive = archive) save_probabilities(probabilities$probs, recipe, data$hcst, outdir, - archive = archive) + archive = archive, type = "hcst") + if (!is.null(probabilities$probs_fcst)) { + save_probabilities(probabilities$probs_fcst, recipe, data$fcst, outdir, + archive = archive, type = "fcst") + } } } @@ -106,7 +109,6 @@ get_global_attributes <- function(recipe, archive) { get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { # Generates time dimensions and the corresponding metadata. - ## TODO: Add calendar ## TODO: Subseasonal switch(fcst.horizon, @@ -176,6 +178,7 @@ save_forecast <- function(data_cube, fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$System[[global_attributes$system]]$calendar + # if (fcst.horizon == "seasonal") { # calendar <- attr(data_cube$Variable, "variable")$dim$time$calendar # } else { @@ -186,12 +189,13 @@ save_forecast <- function(data_cube, dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { - # Method 1: Use the first date as init_date. But it may be better to use the real initialized date (ask users) + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - # Method 2: use initial month + ## Method 2: use initial month init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month if (type == 'hcst') { -#PROBLEM for fcst!!!!!!!!!!!! + ## PROBLEM for fcst!!!!!!!!!!!! init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), cal = calendar) @@ -273,9 +277,8 @@ save_forecast <- function(data_cube, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, data_cube$Variable$varName, - fcst.sdate, fcst.sdate, - agg, fcst.horizon, "exp") + outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + fcst.sdate, agg, "exp") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -291,7 +294,8 @@ save_forecast <- function(data_cube, ArrayToNc(vars, outfile) } } - info(recipe$Run$logger, "##### FCST SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, paste("#####", toupper(type), + "SAVED TO NETCDF FILE #####")) } @@ -322,9 +326,10 @@ save_observations <- function(data_cube, dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { - # Method 1: Use the first date as init_date. But it may be better to use the real initialized date (ask users) + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - # Method 2: use initial month + ## Method 2: use initial month init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), @@ -409,9 +414,8 @@ save_observations <- function(data_cube, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, data_cube$Variable$varName, - fcst.sdate, fcst.sdate, - agg, fcst.horizon, "obs") + outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + fcst.sdate, agg, "obs") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -459,6 +463,15 @@ save_metrics <- function(skill, # Add global and variable attributes global_attributes <- get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } attr(skill[[1]], 'global_attrs') <- global_attributes for (i in 1:length(skill)) { @@ -470,7 +483,7 @@ save_metrics <- function(skill, sdname <- paste0(metric, " region-aggregated metric") dims <- c('Country', 'time') } else { - sdname <- paste0(metric, " grid point metric") + sdname <- paste0(metric) #, " grid point metric") dims <- c(lalo, 'time') } metadata <- list(metric = list(name = metric, @@ -527,9 +540,8 @@ save_metrics <- function(skill, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, data_cube$Variable$varName, - fcst.sdate, fcst.sdate, - agg, fcst.horizon, "skill") + outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + fcst.sdate, agg, "skill") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -567,6 +579,15 @@ save_corr <- function(skill, # Add global and variable attributes global_attributes <- get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(global_attributes, + list(from_anomalies = "Yes")) + } else { + global_attributes <- c(global_attributes, + list(from_anomalies = "No")) + } attr(skill[[1]], 'global_attrs') <- global_attributes for (i in 1:length(skill)) { @@ -578,7 +599,7 @@ save_corr <- function(skill, sdname <- paste0(metric, " region-aggregated metric") dims <- c('Country', 'ensemble', 'time') } else { - sdname <- paste0(metric, " grid point metric") # formerly names(metric) + sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) dims <- c(lalo, 'ensemble', 'time') } metadata <- list(metric = list(name = metric, @@ -634,9 +655,8 @@ save_corr <- function(skill, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, data_cube$Variable$varName, - fcst.sdate, fcst.sdate, - agg, fcst.horizon, "corr") + outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + fcst.sdate, agg, "corr") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -674,6 +694,15 @@ save_percentiles <- function(percentiles, # Add global and variable attributes global_attributes <- get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } attr(percentiles[[1]], 'global_attrs') <- global_attributes for (i in 1:length(percentiles)) { @@ -734,9 +763,8 @@ save_percentiles <- function(percentiles, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, data_cube$Variable$varName, - fcst.sdate, fcst.sdate, - agg, fcst.horizon, "percentiles") + outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + fcst.sdate, agg, "percentiles") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -759,7 +787,8 @@ save_probabilities <- function(probs, data_cube, outdir, agg = "global", - archive = NULL) { + archive = NULL, + type = "hcst") { # Loops over the years in the s2dv_cube containing a hindcast or forecast # and exports the corresponding category probabilities to a netCDF file. # probs: array containing the probability data @@ -768,12 +797,23 @@ save_probabilities <- function(probs, # outdir: directory where the files should be saved # type: 'exp' (hcst and fcst) or 'obs' # agg: aggregation, "global" or "country" + # type: 'hcst' or 'fcst' lalo <- c('longitude', 'latitude') variable <- data_cube$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name global_attributes <- get_global_attributes(recipe, archive) + # Add anomaly computation to global attributes + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$System[[global_attributes$system]]$calendar @@ -842,9 +882,8 @@ save_probabilities <- function(probs, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, data_cube$Variable$varName, - fcst.sdate, fcst.sdate, - agg, fcst.horizon, "probs") + outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + fcst.sdate, agg, "probs") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -860,5 +899,8 @@ save_probabilities <- function(probs, ArrayToNc(vars, outfile) } } - info(recipe$Run$logger, "##### PROBABILITIES SAVED TO NETCDF FILE #####") + + info(recipe$Run$logger, + paste("#####", toupper(type), + "PROBABILITIES SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index f48ebe7b058969568bee2c14d47394de18664c0e..93196b86d194fa2dbed8913d6c675f84d038ec9b 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -1,34 +1,58 @@ ## TODO: Separate by time aggregation +## TODO: Build a default path that accounts for: +## variable, system, reference, start date and region name -get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { +get_filename <- function(dir, recipe, var, date, agg, file.type) { # This function builds the path of the output file based on directory, # variable, forecast date, startdate, aggregation, forecast horizon and # type of metric/forecast/probability. - - if (horizon == "subseasonal") { - shortdate <- format(as.Date(as.character(fcst.sdate), "%Y%m%d"), "%V") + + if (recipe$Analysis$Horizon == "subseasonal") { + shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%V") dd <- "week" } else { - shortdate <- format(as.Date(as.character(fcst.sdate), "%Y%m%d"), "%m") + shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%m") dd <- "month" } - + switch(tolower(agg), "country" = {gg <- "-country"}, "global" = {gg <- ""}) - switch(file.type, - "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, - "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, - "exp" = {file <- paste0(var, gg, "_", date)}, - "obs" = {file <- paste0(var, gg, "-obs_", date)}, - "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, - shortdate)}, - "probs" = {file <- paste0(var, gg, "-probs_", date)}, - "bias" = {file <- paste0(var, gg, "-bias_", date)}) + system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) + reference <- gsub('.','', recipe$Analysis$Datasets$Reference$name, fixed = T) - return(paste0(dir, file, ".nc")) + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { + # Define output dir name accordint to Scorecards format + dict <- read_yaml("conf/output_dictionaries/scorecards.yml") + # Get necessary names + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + + switch(file.type, + "skill" = {type_info <- "-skill_"}, + "corr" = {type_info <- "-corr_"}, + "exp" = {type_info <- paste0("_", date, "_")}, + "obs" = {type_info <- paste0("-obs_", date, "_")}, + "percentiles" = {type_info <- "-percentiles_"}, + "probs" = {type_info <- paste0("-probs_", date, "_")}, + "bias" = {type_info <- paste0("-bias_", date, "_")}) + # Build file name + file <- paste0("scorecards_", system, "_", reference, "_", + var, type_info, hcst_start, "-", hcst_end, "_s", shortdate) + } else { + switch(file.type, + "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, + "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, + "exp" = {file <- paste0(var, gg, "_", date)}, + "obs" = {file <- paste0(var, gg, "-obs_", date)}, + "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, + shortdate)}, + "probs" = {file <- paste0(var, gg, "-probs_", date)}, + "bias" = {file <- paste0(var, gg, "-bias_", date)}) + } + return(paste0(dir, file, ".nc")) } get_dir <- function(recipe, agg = "global") { @@ -37,38 +61,48 @@ get_dir <- function(recipe, agg = "global") { # startdate, and aggregation. ## TODO: Get aggregation from recipe - ## TODO: Add time frequency - outdir <- paste0(recipe$Run$output_dir, "/outputs/") + ## TODO: multivar case variable <- recipe$Analysis$Variables$name - if (!is.null(recipe$Analysis$Time$fcst_year)) { - if (tolower(recipe$Analysis$Horizon) == 'decadal') { - #PROBLEM: decadal doesn't have sdate - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') - } else { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } + system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) + + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { + # Define output dir name accordint to Scorecards format + dict <- read_yaml("conf/output_dictionaries/scorecards.yml") + # system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name + dir <- paste0(outdir, "/", system, "/", variable, "/") } else { - if (tolower(recipe$Analysis$Horizon) == 'decadal') { - #PROBLEM: decadal doesn't have sdate - fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$hcst_end, sep = '_')) + # Default generic output format based on FOCUS + # Get startdate or hindcast period + if (!is.null(recipe$Analysis$Time$fcst_year)) { + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + # decadal doesn't have sdate + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') + } else { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } } else { - fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + # decadal doesn't have sdate + fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$hcst_end, + sep = '_')) + } else { + fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) + } } - } - - calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - store.freq <- recipe$Analysis$Variables$freq - switch(tolower(agg), - "country" = {dir <- paste0(outdir, "/", calib.method, "-", - store.freq, "/", variable, - "_country/", fcst.sdate, "/")}, - "global" = {dir <- paste0(outdir, "/", calib.method, "-", - store.freq, "/", variable, "/", + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + store.freq <- recipe$Analysis$Variables$freq + ## TODO: Change "_country" + switch(tolower(agg), + "country" = {dir <- paste0(outdir, "/", system, "/", calib.method, + "-", store.freq, "/", variable, + "_country/", fcst.sdate, "/")}, + "global" = {dir <- paste0(outdir, "/", system, "/", calib.method, + "-", store.freq, "/", variable, "/", fcst.sdate, "/")}) - + } return(dir) - } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 99f12346c21e98e87c4e78efcaf688c0bb37ee41..9f97e688167ab992515641234b9d05efe86ee354 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -14,6 +14,8 @@ source("modules/Skill/s2s.metrics.R") source("modules/Skill/tmp/RandomWalkTest.R") source("modules/Skill/tmp/Bias.R") source("modules/Skill/tmp/AbsBiasSS.R") +source("modules/Skill/tmp/RMSSS.R") +source("modules/Skill/tmp/Corr.R") ## TODO: Implement this in the future ## Which parameter are required? @@ -51,12 +53,32 @@ source("modules/Skill/tmp/AbsBiasSS.R") # " running Skill module ", "\n", # " it can call ", metric_fun )) -compute_skill_metrics <- function(recipe, exp, obs) { - # exp: s2dv_cube containing the hindcast +# compute_skill_metrics <- function(recipe, data$hcst, obs, +# clim_data$hcst = NULL, +# clim_obs = NULL) { +compute_skill_metrics <- function(recipe, data) { + + # data$hcst: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml ## TODO: Adapt time_dims to subseasonal case + ## TODO: Add dat_dim + ## TODO: Refine error messages + ## TODO: Add check to see if anomalies are provided (info inside s2dv_cube) + +# if (recipe$Analysis$Workflow$Anomalies$compute) { +# if (is.null(clim_data$hcst) || is.null(clim_obs)) { +# warn(recipe$Run$logger, "Anomalies have been requested in the recipe, +# but the climatologies have not been provided in the +# compute_skill_metrics call. Be aware that some metrics like the +# Mean Bias may not be correct.") +# } +# } else { +# warn(recipe$Run$logger, "Anomaly computation was not requested in the +# recipe. Be aware that some metrics, such as the CRPSS may not be +# correct.") +# } time_dim <- 'syear' memb_dim <- 'ensemble' metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) @@ -87,22 +109,31 @@ compute_skill_metrics <- function(recipe, exp, obs) { } # Ranked Probability Score and Fair version if (metric %in% c('rps', 'frps')) { - skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - Fair = Fair, ncores = ncores) + skill <- RPS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { - skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - Fair = Fair, ncores = ncores) + skill <- RPSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 10th percentile } else if (metric == 'bss10') { - skill <- RPSS(exp$data, obs$data, time_dim = time_dim, - memb_dim = memb_dim, prob_thresholds = 0.1, Fair = Fair, + skill <- RPSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.1, + Fair = Fair, ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) @@ -110,8 +141,11 @@ compute_skill_metrics <- function(recipe, exp, obs) { skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 90th percentile } else if (metric == 'bss90') { - skill <- RPSS(exp$data, obs$data, time_dim = time_dim, - memb_dim = memb_dim, prob_thresholds = 0.9, Fair = Fair, + skill <- RPSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.9, + Fair = Fair, ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) @@ -119,58 +153,104 @@ compute_skill_metrics <- function(recipe, exp, obs) { skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # CRPS and FCRPS } else if (metric %in% c('crps', 'fcrps')) { - skill <- CRPS(exp$data, obs$data, time_dim = time_dim, - memb_dim = memb_dim, Fair = Fair, ncores = ncores) + skill <- CRPS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # CRPSS and FCRPSS } else if (metric %in% c('crpss', 'fcrpss')) { - skill <- CRPSS(exp$data, obs$data, time_dim = time_dim, - memb_dim = memb_dim, Fair = Fair, ncores = ncores) + skill <- CRPSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$crpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign - # Mean bias (climatology) - } else if (metric == 'mean_bias') { - skill <- Bias(exp$data, obs$data, time_dim = time_dim, - memb_dim = memb_dim, ncores = ncores) + # Mean bias (climatology) + } else if (metric == 'mean_bias') { + ## TODO: Eliminate option to compute from anomalies + # Compute from full field + if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } else { + skill <- Bias(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # Mean bias skill score } else if (metric == 'mean_bias_ss') { - skill <- AbsBiasSS(exp$data, obs$data, time_dim = time_dim, - memb_dim = memb_dim, ncores = ncores) + if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- AbsBiasSS(data$hcst.full_val$data, data$obs.full_val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } else { + skill <- AbsBiasSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } skill <- lapply(skill, function(x) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$biasSS skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Ensemble mean correlation } else if (metric %in% c('enscorr', 'corr')) { + ## TODO: Return significance ## TODO: Implement option for Kendall and Spearman methods? - skill <- s2dv::Corr(exp$data, obs$data, - dat_dim = 'dat', - time_dim = time_dim, - method = 'pearson', - memb_dim = memb_dim, - memb = memb, - ncores = ncores) + skill <- Corr(data$hcst$data, data$obs$data, + dat_dim = 'dat', + time_dim = time_dim, + method = 'pearson', + memb_dim = memb_dim, + memb = memb, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$corr - skill_metrics[[ paste0(metric, "_p.value") ]] <- skill$p.val - skill_metrics[[ paste0(metric, "_conf.low") ]] <- skill$conf.lower - skill_metrics[[ paste0(metric, "_conf.up") ]] <- skill$conf.upper + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + } else if (metric == 'rmsss') { + # Compute RMSS + skill <- RMSSS(data$hcst$data, data$obs$data, + dat_dim = 'dat', + time_dim = time_dim, + memb_dim = memb_dim, + pval = FALSE, + sign = TRUE, + sig_method = 'Random Walk', + ncores = ncores) + # Compute ensemble mean and modify dimensions + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rmsss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'enssprerr') { # Remove ensemble dim from obs to avoid veriApply warning - obs_noensdim <- ClimProjDiags::Subset(obs$data, "ensemble", 1, + obs_noensdim <- ClimProjDiags::Subset(data$obs$data, "ensemble", 1, drop = "selected") capture.output( skill <- easyVerification::veriApply(verifun = 'EnsSprErr', - fcst = exp$data, + fcst = data$hcst$data, obs = obs_noensdim, - tdim = which(names(dim(exp$data))==time_dim), - ensdim = which(names(dim(exp$data))==memb_dim), + tdim = which(names(dim(data$hcst$data))==time_dim), + ensdim = which(names(dim(data$hcst$data))==memb_dim), na.rm = na.rm, ncpus = ncores) ) @@ -184,12 +264,11 @@ compute_skill_metrics <- function(recipe, exp, obs) { metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', 'rpss'))) { - ## TODO: Test this scenario warn(recipe$Run$logger, - "Some of the requested metrics are not available.") + "Some of the requested SpecsVerification metrics are not available.") } capture.output( - skill <- Compute_verif_metrics(exp$data, obs$data, + skill <- Compute_verif_metrics(data$hcst$data, data$obs$data, skill_metrics = metric_name, verif.dims=c("syear", "sday", "sweek"), na.rm = na.rm, @@ -208,6 +287,7 @@ compute_skill_metrics <- function(recipe, exp, obs) { } compute_probabilities <- function(recipe, data) { + ## TODO: Do hcst and fcst at the same time if (is.null(recipe$Analysis$ncores)) { ncores <- 1 @@ -222,7 +302,9 @@ compute_probabilities <- function(recipe, data) { } named_probs <- list() + named_probs_fcst <- list() named_quantiles <- list() + if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, "Quantiles and probability bins have been requested, but no thresholds are provided in the recipe.") @@ -231,12 +313,13 @@ compute_probabilities <- function(recipe, data) { for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { # Parse thresholds in recipe thresholds <- sapply(element, function (x) eval(parse(text = x))) - quants <- compute_quants(data$data, thresholds, + quants <- compute_quants(data$hcst$data, thresholds, ncores = ncores, na.rm = na.rm) - probs <- compute_probs(data$data, quants, + probs <- compute_probs(data$hcst$data, quants, ncores = ncores, na.rm = na.rm) + for (i in seq(1:dim(quants)['bin'][[1]])) { named_quantiles <- append(named_quantiles, list(ClimProjDiags::Subset(quants, @@ -258,13 +341,52 @@ compute_probabilities <- function(recipe, data) { 'bin', i))) names(named_probs)[length(named_probs)] <- name_i } + + # Compute fcst probability bins + if (!is.null(data$fcst)) { + probs_fcst <- compute_probs(data$fcst$data, quants, + ncores = ncores, + na.rm = na.rm) + + for (i in seq(1:dim(probs_fcst)['bin'][[1]])) { + if (i == 1) { + name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) + } else if (i == dim(probs_fcst)['bin'][[1]]) { + name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) + } else { + name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", + as.integer(thresholds[i]*100)) + } + named_probs_fcst <- append(named_probs_fcst, + list(ClimProjDiags::Subset(probs_fcst, + 'bin', i))) + names(named_probs_fcst)[length(named_probs_fcst)] <- name_i + } + } } + + # Rearrange dimensions and return probabilities named_probs <- lapply(named_probs, function(x) {.drop_dims(x)}) named_quantiles <- lapply(named_quantiles, function(x) {.drop_dims(x)}) + if (!is.null(data$fcst)) { + fcst_years <- dim(data$fcst$data)[['syear']] + named_probs_fcst <- lapply(named_probs_fcst, + function(x) {Subset(x, + along = 'syear', + indices = 1:fcst_years, + drop = 'non-selected')}) + results <- list(probs = named_probs, + probs_fcst = named_probs_fcst, + percentiles = named_quantiles) + } else { + results <- list(probs = named_probs, + percentiles = named_quantiles) + } + + info(recipe$Run$logger, + "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") + return(results) } - info(recipe$Run$logger, - "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") - return(list(probs=named_probs, percentiles=named_quantiles)) } ## TODO: Replace with ClimProjDiags::Subset @@ -275,7 +397,7 @@ compute_probabilities <- function(recipe, data) { if (!("time" %in% names(dim(metric_array)))) { dim(metric_array) <- c("time" = 1, dim(metric_array)) } - # If array has memb_exp (Corr case), change name to 'ensemble' + # If array has memb dim (Corr case), change name to 'ensemble' if ("exp_memb" %in% names(dim(metric_array))) { names(dim(metric_array))[which(names(dim(metric_array)) == "exp_memb")] <- "ensemble" diff --git a/modules/Skill/tmp/Corr.R b/modules/Skill/tmp/Corr.R new file mode 100644 index 0000000000000000000000000000000000000000..c95b103492a9ea67f302c83a9bbe9e8e0bdc61fc --- /dev/null +++ b/modules/Skill/tmp/Corr.R @@ -0,0 +1,463 @@ +#'Compute the correlation coefficient between an array of forecast and their corresponding observation +#' +#'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for +#'an array of forecast and an array of observation. The correlations are +#'computed along 'time_dim' that usually refers to the start date dimension. If +#''comp_dim' is given, the correlations are computed only if obs along comp_dim +#'dimension are complete between limits[1] and limits[2], i.e., there is no NA +#'between limits[1] and limits[2]. This option can be activated if the user +#'wants to account only for the forecasts which the corresponding observations +#'are available at all leadtimes.\cr +#'The confidence interval is computed by the Fisher transformation and the +#'significance level relies on an one-sided student-T distribution.\cr +#'The function can calculate ensemble mean before correlation by 'memb_dim' +#'specified and 'memb = F'. If ensemble mean is not calculated, correlation will +#'be calculated for each member. +#'If there is only one dataset for exp and obs, you can simply use cor() to +#'compute the correlation. +#' +#'@param exp A named numeric array of experimental data, with at least dimension +#' 'time_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is 'dataset'. If there is no dataset +#' dimension, set NULL. +#'@param comp_dim A character string indicating the name of dimension along which +#' obs is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default is c(1, length(comp_dim dimension)). +#'@param method A character string indicating the type of correlation: +#' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no +#' member dimension, set NULL. The default value is NULL. +#'@param memb A logical value indicating whether to remain 'memb_dim' dimension +#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when +#' 'memb_dim' is not NULL. The default value is TRUE. +#'@param pval A logical value indicating whether to return or not the p-value +#' of the test Ho: Corr = 0. The default value is TRUE. +#'@param conf A logical value indicating whether to return or not the confidence +#' intervals. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: Corr = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except +#' time_dim and memb_dim).\cr +#'nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the +#'number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and +#'nobs are omitted. exp_memb is the number of member in experiment (i.e., +#''memb_dim' in exp) and obs_memb is the number of member in observation (i.e., +#''memb_dim' in obs). If memb = F, exp_memb and obs_memb are omitted.\cr\cr +#'\item{$corr}{ +#' The correlation coefficient. +#'} +#'\item{$p.val}{ +#' The p-value. Only present if \code{pval = TRUE}. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$sign}{ +#' The statistical significance. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#'# Case 1: Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'runmean_months <- 12 +#' +#'# Smooth along lead-times +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) +#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times +#'leadtimes_per_startdate <- 60 +#'corr <- Corr(MeanDims(smooth_ano_exp, 'member'), +#' MeanDims(smooth_ano_obs, 'member'), +#' comp_dim = 'ftime', +#' limits = c(ceiling((runmean_months + 1) / 2), +#' leadtimes_per_startdate - floor(runmean_months / 2))) +#' +#'# Case 2: Keep member dimension +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member') +#'# ensemble mean +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE) +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom stats cor pt qnorm +#'@export +Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', + comp_dim = NULL, limits = NULL, method = 'pearson', + memb_dim = NULL, memb = TRUE, + pval = TRUE, conf = TRUE, sign = FALSE, + alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) + } + } + ## method + if (!(method %in% c("kendall", "spearman", "pearson"))) { + stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'dat_dim' and 'memb_dim'.")) + } + if (dim(exp)[time_dim] < 3) { + stop("The length of time_dim must be at least 3 to compute correlation.") + } + + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- Reorder(obs, order_obs) + + + ############################### + # Calculate Corr + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + pos <- which(names(dim(obs)) == comp_dim) + if (is.null(limits)) { + obs_sub <- obs + } else { + obs_sub <- ClimProjDiags::Subset(obs, pos, list(limits[1]:limits[2])) + } + outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA + rm(obs_sub, outrows) + } + + if (!is.null(memb_dim)) { + if (!memb) { #ensemble mean + exp <- MeanDims(exp, memb_dim, na.rm = TRUE) + obs <- MeanDims(obs, memb_dim, na.rm = TRUE) +# name_exp <- names(dim(exp)) +# margin_dims_ind <- c(1:length(name_exp))[-which(name_exp == memb_dim)] +# exp <- apply(exp, margin_dims_ind, mean, na.rm = TRUE) #NOTE: remove NAs here +# obs <- apply(obs, margin_dims_ind, mean, na.rm = TRUE) + memb_dim <- NULL + } + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim, memb_dim), + c(time_dim, dat_dim, memb_dim)), + fun = .Corr, + dat_dim = dat_dim, memb_dim = memb_dim, + time_dim = time_dim, method = method, + pval = pval, conf = conf, sign = sign, alpha = alpha, + ncores = ncores) + + return(res) +} + +.Corr <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', + time_dim = 'sdate', method = 'pearson', + conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) { + if (is.null(memb_dim)) { + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + nexp <- 1 + nobs <- 1 + CORR <- array(dim = c(nexp = nexp, nobs = nobs)) + if (any(!is.na(exp)) && sum(!is.na(obs)) > 2) { + CORR <- cor(exp, obs, use = "pairwise.complete.obs", method = method) + } + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + CORR <- array(dim = c(nexp = nexp, nobs = nobs)) + for (j in 1:nobs) { + for (y in 1:nexp) { + if (any(!is.na(exp[, y])) && sum(!is.na(obs[, j])) > 2) { + CORR[y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + } + } +#---------------------------------------- +# Same as above calculation. +#TODO: Compare which is faster. +# CORR <- sapply(1:nobs, function(i) { +# sapply(1:nexp, function (x) { +# if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { +# cor(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method) +# } else { +# NA +# } +# }) +# }) +#----------------------------------------- + } + + } else { # memb_dim != NULL + exp_memb <- as.numeric(dim(exp)[memb_dim]) # memb_dim + obs_memb <- as.numeric(dim(obs)[memb_dim]) + + if (is.null(dat_dim)) { + # exp: [sdate, memb_exp] + # obs: [sdate, memb_obs] + nexp <- 1 + nobs <- 1 + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + + if (any(!is.na(exp[,y])) && sum(!is.na(obs[, j])) > 2) { + CORR[, , y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + + } + } + } else { + # exp: [sdate, dat_exp, memb_exp] + # obs: [sdate, dat_obs, memb_obs] + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + CORR[, , y, j] <- sapply(1:nobs, function(i) { + sapply(1:nexp, function (x) { + if (any(!is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) { + cor(exp[, x, y], obs[, i, j], + use = "pairwise.complete.obs", + method = method) + } else { + NA + } + }) + }) + + } + } + } + + } + + +# if (pval) { +# for (i in 1:nobs) { +# p.val[, i] <- try(sapply(1:nexp, +# function(x) {(cor.test(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method)$p.value)/2}), silent = TRUE) +# if (class(p.val[, i]) == 'character') { +# p.val[, i] <- NA +# } +# } +# } + + if (pval || conf || sign) { + if (method == "kendall" | method == "spearman") { + if (!is.null(dat_dim) | !is.null(memb_dim)) { + tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) + names(dim(tmp))[1] <- time_dim + eno <- Eno(tmp, time_dim) + } else { + tmp <- rank(obs) + tmp <- array(tmp) + names(dim(tmp)) <- time_dim + eno <- Eno(tmp, time_dim) + } + } else if (method == "pearson") { + eno <- Eno(obs, time_dim) + } + + if (is.null(memb_dim)) { + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + eno_expand[i, ] <- eno + } + } else { #member + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + for (i in 1:nexp) { + for (j in 1:exp_memb) { + eno_expand[i, , j, ] <- eno + } + } + } + + } + +#############old################# +#This doesn't return error but it's diff from cor.test() when method is spearman and kendall + if (pval || sign) { + t <- sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) + p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) + if (sign) signif <- !is.na(p.val) & p.val <= alpha + } +################################### + if (conf) { + conf.lower <- alpha / 2 + conf.upper <- 1 - conf.lower + suppressWarnings({ + conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) + confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) + }) + } + +################################### + # Remove nexp and nobs if dat_dim = NULL + if (is.null(dat_dim) & !is.null(memb_dim)) { + dim(CORR) <- dim(CORR)[3:length(dim(CORR))] + if (pval) { + dim(p.val) <- dim(p.val)[3:length(dim(p.val))] + } + if (conf) { + dim(conflow) <- dim(conflow)[3:length(dim(conflow))] + dim(confhigh) <- dim(confhigh)[3:length(dim(confhigh))] + } + } + +################################### + + res <- list(corr = CORR) + if (pval) { + res <- c(res, list(p.val = p.val)) + } + if (conf) { + res <- c(res, list(conf.lower = conflow, conf.upper = confhigh)) + } + if (sign) { + res <- c(res, list(sign = signif)) + } + + return(res) + +} diff --git a/modules/Skill/tmp/RMSSS.R b/modules/Skill/tmp/RMSSS.R new file mode 100644 index 0000000000000000000000000000000000000000..d2ff4861e3547a32d84bdbb268df0e7eebcd8e9f --- /dev/null +++ b/modules/Skill/tmp/RMSSS.R @@ -0,0 +1,448 @@ +#'Compute root mean square error skill score +#' +#'Compute the root mean square error skill score (RMSSS) between an array of +#'forecast 'exp' and an array of observation 'obs'. The two arrays should +#'have the same dimensions except along dat_dim, where the length can be +#'different, with the number of experiments/models (nexp) and the number of +#'observational datasets (nobs).\cr +#'RMSSS computes the root mean square error skill score of each jexp in 1:nexp +#'against each job in 1:nobs which gives nexp * nobs RMSSS for each grid point +#'of the array.\cr +#'The RMSSS are computed along the time_dim dimension which should correspond +#'to the start date dimension.\cr +#'The p-value and significance test are optionally provided by an one-sided +#'Fisher test or Random Walk test.\cr +#' +#'@param exp A named numeric array of experimental data which contains at least +#' two dimensions for dat_dim and time_dim. It can also be a vector with the +#' same length as 'obs', then the vector will automatically be 'time_dim' and +#' 'dat_dim' will be 1. +#'@param obs A named numeric array of observational data which contains at least +#' two dimensions for dat_dim and time_dim. The dimensions should be the same +#' as paramter 'exp' except the length of 'dat_dim' dimension. The order of +#' dimension can be different. It can also be a vector with the same length as +#' 'exp', then the vector will automatically be 'time_dim' and 'dat_dim' will +#' be 1. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension, or 0 (typical climatological forecast) or 1 +#' (normalized climatological forecast). If it is an array, the dimensions must +#' be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one +#' reference dataset, it should not have dataset dimension. If there is +#' corresponding reference for each experiment, the dataset dimension must +#' have the same length as in 'exp'. If 'ref' is NULL, the typical +#' climatological forecast is used as reference forecast (equivelant to 0.) +#' The default value is NULL. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is 'dataset'. +#'@param time_dim A character string indicating the name of dimension along +#' which the RMSSS are computed. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' +#' and 'ref' are already the ensemble mean. The default value is NULL. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho: RMSSS = 0. The default value is TRUE. +#'@param sign A logical value indicating whether to compute or not the +#' statistical significance of the test Ho: RMSSS = 0. The default value is +#' FALSE. +#'@param alpha A numeric of the significance level to be used in the +#' statistical significance test. The default value is 0.05. +#'@param sig_method A character string indicating the significance method. The +#' options are "one-sided Fisher" (default) and "Random Walk". +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr +#'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the +#'number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and +#'nobs are omitted.\cr +#'\item{$rmsss}{ +#' A numerical array of the root mean square error skill score. +#'} +#'\item{$p.val}{ +#' A numerical array of the p-value with the same dimensions as $rmsss. +#' Only present if \code{pval = TRUE}. +#'} +#'\item{sign}{ +#' A logical array of the statistical significance of the RMSSS with the same +#' dimensions as $rmsss. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#' set.seed(1) +#' exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) +#' set.seed(2) +#' obs <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) +#' res <- RMSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset') +#' +#'@rdname RMSSS +#'@import multiApply +#'@importFrom stats pf +#'@export +RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', + memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, + sig_method = 'one-sided Fisher', ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector + if (length(exp) == length(obs)) { + exp <- array(exp, dim = c(length(exp), 1)) + names(dim(exp)) <- c(time_dim, dat_dim) + obs <- array(obs, dim = c(length(obs), 1)) + names(dim(obs)) <- c(time_dim, dat_dim) + } else { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + } else if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name.") + } + if (!is.null(ref)) { + if (!is.numeric(ref)) { + stop("Parameter 'ref' must be numeric.") + } + if (is.array(ref)) { + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { + stop("Parameter 'ref' must be a numeric array or number 0 or 1.") + } + } + + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | length(alpha) > 1) { + stop("Parameter 'alpha' must be one numeric value.") + } + ## sig_method + if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { + stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") + } + if (sig_method == "Random Walk" & pval == T) { + warning("p-value cannot be calculated by significance method 'Random Walk'.") + pval <- FALSE + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'memb_dim' and 'dat_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim) && memb_dim %in% name_ref) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be ", + "equal to dataset dimension of 'exp'.")) + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) + } + } + + if (dim(exp)[time_dim] <= 2) { + stop("The length of time_dim must be more than 2 to compute RMSSS.") + } + + + ############################### +# # Sort dimension +# name_exp <- names(dim(exp)) +# name_obs <- names(dim(obs)) +# order_obs <- match(name_exp, name_obs) +# obs <- Reorder(obs, order_obs) + + + ############################### + # Create ref array if needed + if (is.null(ref)) ref <- 0 + if (!is.array(ref)) { + ref <- array(data = ref, dim = dim(exp)) + } + + ############################### + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + if (!is.null(ref) & memb_dim %in% names(dim(ref))) { + ref <- MeanDims(ref, memb_dim, na.rm = T) + } + } + + ############################### + # Calculate RMSSS + +# if (!is.null(ref)) { # use "ref" as reference forecast +# if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { +# target_dims_ref <- c(time_dim, dat_dim) +# } else { +# target_dims_ref <- c(time_dim) +# } +# data <- list(exp = exp, obs = obs, ref = ref) +# target_dims = list(exp = c(time_dim, dat_dim), +# obs = c(time_dim, dat_dim), +# ref = target_dims_ref) +# } else { +# data <- list(exp = exp, obs = obs) +# target_dims = list(exp = c(time_dim, dat_dim), +# obs = c(time_dim, dat_dim)) +# } + data <- list(exp = exp, obs = obs, ref = ref) + if (!is.null(dat_dim)) { + if (dat_dim %in% names(dim(ref))) { + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim, dat_dim)) + } else { + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim)) + } + } else { + target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) + } + + res <- Apply(data, + target_dims = target_dims, + fun = .RMSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, + sig_method = sig_method, + ncores = ncores) + + return(res) +} + +.RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE, + sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher') { + # exp: [sdate, (dat)] + # obs: [sdate, (dat)] + # ref: [sdate, (dat)] or NULL + + if (is.null(ref)) { + ref <- array(data = 0, dim = dim(obs)) + } else if (identical(ref, 0) | identical(ref, 1)) { + ref <- array(ref, dim = dim(exp)) + } + + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + nexp <- 1 + nobs <- 1 + nref <- 1 + # Add dat dim back temporarily + dim(exp) <- c(dim(exp), dat = 1) + dim(obs) <- c(dim(obs), dat = 1) + dim(ref) <- c(dim(ref), dat = 1) + + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + nexp <- as.numeric(dim(exp)[2]) + nobs <- as.numeric(dim(obs)[2]) + if (dat_dim %in% names(dim(ref))) { + nref <- as.numeric(dim(ref)[2]) + } else { + dim(ref) <- c(dim(ref), dat = 1) + nref <- 1 + } + } + + nsdate <- as.numeric(dim(exp)[1]) + + # RMS of forecast + dif1 <- array(dim = c(nsdate, nexp, nobs)) + names(dim(dif1)) <- c(time_dim, 'nexp', 'nobs') + + for (i in 1:nobs) { + dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + } + + rms_exp <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) + + # RMS of reference +# if (!is.null(ref)) { + dif2 <- array(dim = c(nsdate, nref, nobs)) + names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') + for (i in 1:nobs) { + dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) + } + rms_ref <- apply(dif2^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nref, nobs)) + if (nexp != nref) { + # expand rms_ref to nexp (nref is 1) + rms_ref <- array(rms_ref, dim = c(nobs = nobs, nexp = nexp)) + rms_ref <- Reorder(rms_ref, c(2, 1)) + } +# } else { +# rms_ref <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs, nexp = nexp)) +## rms_ref[which(abs(rms_ref) <= (max(abs(rms_ref), na.rm = TRUE) / 1000))] <- max(abs( +## rms_ref), na.rm = TRUE) / 1000 +# rms_ref <- Reorder(rms_ref, c(2, 1)) +# #rms_ref above: [nexp, nobs] +# } + + rmsss <- 1 - rms_exp / rms_ref + +################################################# + +# if (conf) { +# conflow <- (1 - conf.lev) / 2 +# confhigh <- 1 - conflow +# conf_low <- array(dim = c(nexp = nexp, nobs = nobs)) +# conf_high <- array(dim = c(nexp = nexp, nobs = nobs)) +# } + + if (sig_method == 'one-sided Fisher') { + p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + ## pval and sign + if (pval || sign) { + eno1 <- Eno(dif1, time_dim) + if (is.null(ref)) { + eno2 <- Eno(obs, time_dim) + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } else { + eno2 <- Eno(dif2, time_dim) + if (nref != nexp) { + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } + } + + F.stat <- (eno2 * rms_ref^2 / (eno2 - 1)) / ((eno1 * rms_exp^2 / (eno1- 1))) + tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 + p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) + if (sign) signif <- p_val <= alpha + # If there isn't enough valid data, return NA + p_val[which(!tmp)] <- NA + if (sign) signif[which(!tmp)] <- NA + + # change not enough valid data rmsss to NA + rmsss[which(!tmp)] <- NA + } + + } else if (sig_method == "Random Walk") { + signif <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + + # Error + error_exp <- array(data = abs(exp[, i] - obs[, j]), dim = c(time = nsdate)) + if (nref == nexp) { + error_ref <- array(data = abs(ref[, i] - obs[, j]), dim = c(time = nsdate)) + } else { + # nref = 1 + error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) + } + signif[i, j] <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref)$signif + } + } + } + + ################################### + # Remove extra dimensions if dat_dim = NULL + if (is.null(dat_dim)) { + dim(rmsss) <- NULL + dim(p_val) <- NULL + if (sign) dim(signif) <- NULL + } + ################################### + + # output + res <- list(rmsss = rmsss) + if (pval) { + p.val <- list(p.val = p_val) + res <- c(res, p.val) + } + if (sign) { + signif <- list(sign = signif) + res <- c(res, signif) + } + + return(res) +} diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index ff0e9fd44d8e71b831819271d51c7f1374e20068..a8664569047bb60185733e27a1250ec985cd481d 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,6 +1,7 @@ #G# TODO: Remove once released in s2dv/CSTools source("modules/Visualization/tmp/PlotMostLikelyQuantileMap.R") source("modules/Visualization/tmp/PlotCombinedMap.R") +source("modules/Visualization/tmp/clim.palette.R") ## TODO: Add the possibility to read the data directly from netCDF ## TODO: Adapt to multi-model case @@ -8,12 +9,10 @@ source("modules/Visualization/tmp/PlotCombinedMap.R") plot_data <- function(recipe, data, - calibrated_data = NULL, skill_metrics = NULL, probabilities = NULL, archive = NULL, significance = F) { - # Try to produce and save several basic plots. # recipe: the auto-s2s recipe as read by read_yaml() # archive: the auto-s2s archive as read by read_yaml() @@ -26,10 +25,11 @@ plot_data <- function(recipe, outdir <- paste0(get_dir(recipe), "/plots/") dir.create(outdir, showWarnings = FALSE, recursive = TRUE) - if ((is.null(skill_metrics)) && (is.null(calibrated_data)) && - (is.null(data$fcst))) { - stop("The Visualization module has been called, but there is no data ", - "that can be plotted.") + if ((is.null(skill_metrics)) && (is.null(data$fcst))) { + error(recipe$Run$logger, "The Visualization module has been called, + but there is no fcst in 'data', and 'skill_metrics' is NULL + so there is no data that can be plotted.") + stop() } if (is.null(archive)) { @@ -38,7 +38,7 @@ plot_data <- function(recipe, "conf/archive.yml"))$archive } else if (tolower(recipe$Analysis$Horizon) == "decadal") { archive <- read_yaml(paste0(recipe$Run$code_dir, - "conf/archive_decadal.yml"))$archive + "conf/archive_decadal.yml"))$archive } } @@ -49,32 +49,32 @@ plot_data <- function(recipe, } # Plot forecast ensemble mean - if (!is.null(calibrated_data$fcst)) { - plot_ensemble_mean(recipe, archive, calibrated_data$fcst, outdir) - } else if (!is.null(data$fcst)) { - warn(recipe$Run$logger, "Only the uncalibrated forecast was provided. - Using this data to plot the forecast ensemble mean.") + if (!is.null(data$fcst)) { plot_ensemble_mean(recipe, archive, data$fcst, outdir) } # Plot Most Likely Terciles - if ((!is.null(probabilities)) && (!is.null(calibrated_data$fcst))) { - plot_most_likely_terciles(recipe, archive, calibrated_data$fcst, - probabilities$percentiles, outdir) - } else if ((!is.null(probabilities)) && (!is.null(data$fcst))) { - warn(recipe$Run$logger, "Only the uncalibrated forecast was provided. - Using this data to plot the most likely terciles.") + if ((!is.null(probabilities)) && (!is.null(data$fcst))) { plot_most_likely_terciles(recipe, archive, data$fcst, - probabilities$percentiles, outdir) + probabilities, outdir) } } plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, outdir, significance = F) { - + # recipe: Auto-S2S recipe + # archive: Auto-S2S archive + # data_cube: s2dv_cube object with the corresponding hindcast data + # skill_metrics: list of named skill metrics arrays + # outdir: output directory + # significance: T/F, whether to display the significance dots in the plots + + ## TODO: OPTION for CERISE: Using PuOr # Abort if frequency is daily if (recipe$Analysis$Variables$freq == "daily_mean") { - stop("Visualization functions not yet implemented for daily data.") + error(recipe$Run$logger, "Visualization functions not yet implemented + for daily data.") + stop() } # Abort if skill_metrics is not list if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { @@ -89,112 +89,136 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, init_month <- lubridate::month(as.numeric(substr(recipe$Analysis$Time$sdate, start = 1, stop = 2)), label = T, abb = T) + # Define color palette and number of breaks according to output format + ## TODO: Make separate function + if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { + diverging_palette <- "purpleorange" + sequential_palette <- "Oranges" + } else { + diverging_palette <- "bluered" + sequential_palette <- "Reds" + } # Group different metrics by type skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", - "enscorr_specs") + "enscorr_specs", "rmsss") scores <- c("rps", "frps", "crps", "frps_specs") - + # Assign colorbar to each metric type + ## TODO: Triangle ends for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { - if (name %in% names(skill_metrics)) { # Define plot characteristics and metric name to display in plot if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90_specs", "bss10_specs")) { + "rpss_specs", "bss90_specs", "bss10_specs", + "rmsss")) { display_name <- toupper(strsplit(name, "_")[[1]][1]) skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) - + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL } else if (name == "mean_bias_ss") { display_name <- "Mean Bias Skill Score" skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) - + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL } else if (name %in% c("enscorr", "enscorr_specs")) { display_name <- "Ensemble Mean Correlation" skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) - + brks <- seq(-1, 1, by = 0.2) + cols <- clim.colors(length(brks) - 1, diverging_palette) + col_inf <- NULL + col_sup <- NULL } else if (name %in% scores) { skill <- skill_metrics[[name]] display_name <- toupper(strsplit(name, "_")[[1]][1]) brks <- seq(0, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "Reds") - + colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) + cols <- colorbar[1:(length(colorbar) - 1)] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] } else if (name == "enssprerr") { - ## TODO: Adjust colorbar parameters skill <- skill_metrics[[name]] display_name <- "Spread-to-Error Ratio" - brks <- pretty(0:max(skill, na.rm = T), n = 20, min.n = 10) - col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) - + brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) + colorbar <- clim.colors(length(brks), diverging_palette) + cols <- colorbar[1:length(colorbar) - 1] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] } else if (name == "mean_bias") { skill <- skill_metrics[[name]] display_name <- "Mean Bias" - max_value <- max(abs(skill)) - ugly_intervals <- seq(-max_value, max_value, (max_value*2)/10) - brks <- pretty(ugly_intervals, n = 20, min.n = 10) - col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), + abs(quantile(skill, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] } - - options(bitmapType = "cairo") - - # Reorder dimensions - skill <- Reorder(skill, c("time", "longitude", "latitude")) - # If the significance has been requested and the variable has it, - # retrieve it and reorder its dimensions. - significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(skill_metrics))) { + options(bitmapType = "cairo") + # Reorder dimensions + skill <- Reorder(skill, c("time", "longitude", "latitude")) + # If the significance has been requested and the variable has it, + # retrieve it and reorder its dimensions. significance_name <- paste0(name, "_significance") - skill_significance <- skill_metrics[[significance_name]] - skill_significance <- Reorder(skill_significance, c("time", - "longitude", - "latitude")) - # Split skill significance into list of lists, along the time dimension - # This allows for plotting the significance dots correctly. - skill_significance <- ClimProjDiags::ArrayToList(skill_significance, - dim = 'time', - level = "sublist", - names = "dots") - } else { - skill_significance <- NULL - } - # Define output file name and titles - outfile <- paste0(outdir, name, ".png") - toptitle <- paste(display_name, "-", data_cube$Variable$varName, - "-", system_name, "-", init_month, hcst_period) - months <- unique(lubridate::month(data_cube$Dates$start, - label = T, abb = F)) - titles <- as.vector(months) - # Plot - suppressWarnings( - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - asplit(skill, MARGIN=1), # Splitting array into a list - longitude, latitude, - special_args = skill_significance, - dot_symbol = 20, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - filled.continents=F, - brks = brks, - cols = col2, - fileout = outfile, - bar_label_digits = 3) - ) - } + if ((significance) && (significance_name %in% names(skill_metrics))) { + significance_name <- paste0(name, "_significance") + skill_significance <- skill_metrics[[significance_name]] + skill_significance <- Reorder(skill_significance, c("time", + "longitude", + "latitude")) + # Split skill significance into list of lists, along the time dimension + # to avoid overlapping of significance dots. + skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + dim = 'time', + level = "sublist", + names = "dots") + } else { + skill_significance <- NULL + } + # Define output file name and titles + outfile <- paste0(outdir, name, ".png") + toptitle <- paste(display_name, "-", data_cube$Variable$varName, + "-", system_name, "-", init_month, hcst_period) + months <- unique(lubridate::month(data_cube$Dates$start, + label = T, abb = F)) + titles <- as.vector(months) + # Plot + suppressWarnings( + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + asplit(skill, MARGIN=1), # Splitting array into a list + longitude, latitude, + special_args = skill_significance, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents=F, + brks = brks, + cols = cols, + col_inf = col_inf, + col_sup = col_sup, + fileout = outfile, + bar_label_digits = 3, + bar_extra_margin = rep(0.9, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) + ) + } } - info(recipe$Run$logger, "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") } plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { + ## TODO: Add 'anomaly' to plot title # Abort if frequency is daily if (recipe$Analysis$Variables$freq == "daily_mean") { stop("Visualization functions not yet implemented for daily data.") @@ -207,7 +231,6 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { units <- attr(fcst$Variable, "variable")$units start_date <- paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate) - # Compute ensemble mean ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') # Drop extra dims, add time dim if missing: @@ -217,9 +240,14 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) } if (!'syear' %in% names(dim(ensemble_mean))) { - ensemble_mean <- Reorder(ensemble_mean, c("time", "longitude", "latitude")) + ensemble_mean <- Reorder(ensemble_mean, c("time", + "longitude", + "latitude")) } else { - ensemble_mean <- Reorder(ensemble_mean, c("syear", "time", "longitude", "latitude")) + ensemble_mean <- Reorder(ensemble_mean, c("syear", + "time", + "longitude", + "latitude")) } ## TODO: Redefine column colors, possibly depending on variable if (variable == 'prlr') { @@ -229,10 +257,18 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { palette = "RdBu" rev = T } - - brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) - col2 <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) - # color <- colorRampPalette(col2)(length(brks) - 1) + # Define brks, centered on in the case of anomalies + ## + if (grepl("anomaly", + attr(fcst$Variable, "variable")$long_name)) { + variable <- paste(variable, "anomaly") + max_value <- max(abs(ensemble_mean)) + ugly_intervals <- seq(-max_value, max_value, max_value/20) + brks <- pretty(ugly_intervals, n = 12, min.n = 8) + } else { + brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) + } + cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) options(bitmapType = "cairo") for (i_syear in start_date) { @@ -257,21 +293,24 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { title_scale = 0.6, titles = titles, units = units, - cols = col2, + cols = cols, brks = brks, fileout = outfile, - bar_label_digits = 4) + bar_label_digits = 4, + bar_extra_margin = rep(0.7, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) } - info(recipe$Run$logger, "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") } plot_most_likely_terciles <- function(recipe, archive, fcst, - percentiles, + probabilities, outdir) { + ## TODO: Add 'anomaly' to plot title # Abort if frequency is daily if (recipe$Analysis$Variables$freq == "daily_mean") { stop("Visualization functions not yet implemented for daily data.") @@ -283,26 +322,23 @@ plot_most_likely_terciles <- function(recipe, archive, variable <- recipe$Analysis$Variables$name start_date <- paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate) - if (is.null(recipe$Analysis$remove_NAs)) { - recipe$Analysis$remove_NAs <- FALSE - } - if (is.null(recipe$Analysis$ncores)) { - recipe$Analysis$ncores <- 1 - } - # Compute probability bins for the forecast - if (is.null(percentiles$percentile_33) | is.null(percentiles$percentile_33)) { - stop("The quantile array does not contain the 33rd and 66th percentiles,", - " the most likely tercile map cannot be plotted.") + # Retrieve and rearrange probability bins for the forecast + if (is.null(probabilities$probs_fcst$prob_b33) || + is.null(probabilities$probs_fcst$prob_33_to_66) || + is.null(probabilities$probs_fcst$prob_a66)) { + stop("The forecast tercile probability bins are not present inside ", + "'probabilities', the most likely tercile map cannot be plotted.") } - terciles <- abind(percentiles$percentile_33, percentiles$percentile_66, - along = 0) - names(dim(terciles)) <- c("bin", names(dim(percentiles$percentile_33))) - probs_fcst <- compute_probs(fcst$data, terciles, - ncores = recipe$Analysis$ncores, - na.rm = recipe$Analysis$remove_NAs) + probs_fcst <- abind(probabilities$probs_fcst$prob_b33, + probabilities$probs_fcst$prob_33_to_66, + probabilities$probs_fcst$prob_a66, + along = 0) + names(dim(probs_fcst)) <- c("bin", + names(dim(probabilities$probs_fcst$prob_b33))) + ## TODO: Improve this section # Drop extra dims, add time dim if missing: probs_fcst <- drop(probs_fcst) if (!("time" %in% names(dim(probs_fcst)))) { @@ -311,7 +347,8 @@ plot_most_likely_terciles <- function(recipe, archive, if (!'syear' %in% names(dim(probs_fcst))) { probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) } else { - probs_fcst <- Reorder(probs_fcst, c("syear", "time", "bin", "longitude", "latitude")) + probs_fcst <- Reorder(probs_fcst, + c("syear", "time", "bin", "longitude", "latitude")) } for (i_syear in start_date) { @@ -344,6 +381,9 @@ plot_most_likely_terciles <- function(recipe, archive, titles = titles, fileout = outfile, bar_label_digits = 2, + bar_scale = rep(0.7, 4), + bar_label_scale = 1.2, + axes_label_scale = 1.3, triangle_ends = c(F, F), width = 11, height = 8) ) } diff --git a/modules/Visualization/tmp/clim.palette.R b/modules/Visualization/tmp/clim.palette.R new file mode 100644 index 0000000000000000000000000000000000000000..b23ff8428f201dbe4cb129e5f202ab2f1924532f --- /dev/null +++ b/modules/Visualization/tmp/clim.palette.R @@ -0,0 +1,69 @@ +#'Generate Climate Color Palettes +#' +#'Generates a colorblind friendly color palette with color ranges useful in +#'climate temperature variable plotting. +#' +#'@param palette Which type of palette to generate: from blue through white +#' to red ('bluered'), from red through white to blue ('redblue'), from +#' yellow through orange to red ('yellowred'), from red through orange to +#' red ('redyellow'), from purple through white to orange ('purpleorange'), +#' and from orange through white to purple ('orangepurple'). +#'@param n Number of colors to generate. +#' +#'@examples +#'lims <- seq(-1, 1, length.out = 21) +#' +#'ColorBar(lims, color_fun = clim.palette('redyellow')) +#' +#'cols <- clim.colors(20) +#'ColorBar(lims, cols) +#' +#'@rdname clim.palette +#'@importFrom grDevices colorRampPalette +#'@export +clim.palette <- function(palette = "bluered") { + if (palette == "bluered") { + colorbar <- colorRampPalette(rev(c("#67001f", "#b2182b", "#d6604d", + "#f4a582", "#fddbc7", "#f7f7f7", + "#d1e5f0", "#92c5de", "#4393c3", + "#2166ac", "#053061"))) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "redblue") { + colorbar <- colorRampPalette(c("#67001f", "#b2182b", "#d6604d", + "#f4a582", "#fddbc7", "#f7f7f7", + "#d1e5f0", "#92c5de", "#4393c3", + "#2166ac", "#053061")) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "yellowred") { + colorbar <- colorRampPalette(c("#ffffcc", "#ffeda0", "#fed976", + "#feb24c", "#fd8d3c", "#fc4e2a", + "#e31a1c", "#bd0026", "#800026")) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "redyellow") { + colorbar <- colorRampPalette(rev(c("#ffffcc", "#ffeda0", "#fed976", + "#feb24c", "#fd8d3c", "#fc4e2a", + "#e31a1c", "#bd0026", "#800026"))) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "purpleorange") { + colorbar <- colorRampPalette(c("#2d004b", "#542789", "#8073ac", + "#b2abd2", "#d8daeb", "#f7f7f7", + "#fee0b6", "#fdb863", "#e08214", + "#b35806", "#7f3b08")) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "orangepurple") { + colorbar <- colorRampPalette(rev(c("#2d004b", "#542789", "#8073ac", + "#b2abd2", "#d8daeb", "#f7f7f7", + "#fee0b6", "#fdb863", "#e08214", + "#b35806", "#7f3b08"))) + attr(colorbar, 'na_color') <- 'pink' + } else { + stop("Parameter 'palette' must be one of 'bluered', 'redblue', 'yellowred'", + "'redyellow', 'purpleorange' or 'orangepurple'.") + } + colorbar +} +#'@rdname clim.palette +#'@export +clim.colors <- function(n, palette = "bluered") { + clim.palette(palette)(n) +} diff --git a/modules/test_decadal.R b/modules/test_decadal.R index 80304f978052889c19eccdf85ba9f1e99488dfe8..8998cfbe202e2d0a52cb69fe4e0ab59b9730d95f 100644 --- a/modules/test_decadal.R +++ b/modules/test_decadal.R @@ -16,15 +16,15 @@ data <- load_datasets(recipe) calibrated_data <- calibrate_datasets(recipe, data) # Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) # Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, calibrated_data$hcst) +probabilities <- compute_probabilities(recipe, calibrated_data) # Export all data to netCDF -save_data(recipe, data, calibrated_data, skill_metrics, probabilities) +save_data(recipe, calibrated_data, skill_metrics, probabilities) # Plot data -plot_data(recipe, data, calibrated_data, skill_metrics, probabilities, +plot_data(recipe, calibrated_data, skill_metrics, probabilities, significance = T) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index d8eb5c4eba48b064ad463dc52b6e863d02b0589e..b8541488c8540e61c694c42a0f36be60595699a9 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -1,23 +1,25 @@ source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-tas.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml" recipe <- prepare_outputs(recipe_file) -# archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets data <- load_datasets(recipe) # Calibrate datasets calibrated_data <- calibrate_datasets(recipe, data) -# Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +# Compute anomalies +calibrated_data <- compute_anomalies(recipe, calibrated_data) +# Compute skill metrics +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) # Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, calibrated_data$hcst) +probabilities <- compute_probabilities(recipe, calibrated_data) # Export all data to netCDF -save_data(recipe, data, calibrated_data, skill_metrics, probabilities) +save_data(recipe, calibrated_data, skill_metrics, probabilities) # Plot data -plot_data(recipe, data, calibrated_data, skill_metrics, probabilities, +plot_data(recipe, calibrated_data, skill_metrics, probabilities, significance = T) diff --git a/recipes/recipe_splitting_example.yml b/recipes/recipe_splitting_example.yml new file mode 100644 index 0000000000000000000000000000000000000000..78a4d18c566bd492ba66f483ae1e80f96b0db1ec --- /dev/null +++ b/recipes/recipe_splitting_example.yml @@ -0,0 +1,60 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test for recipe splitting + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: # ECVs and Indicators? + - {name: tas, freq: monthly_mean} + - {name: prlr, freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: Meteo-France-System7} + - {name: ECMWF-SEAS5} + Multimodel: False # single option + Reference: + - {name: ERA5} # multiple references for single model? + Time: + sdate: # list, split + - '1101' + - '1201' + fcst_year: '2020' # list, don't split, handled internally + hcst_start: '1993' # single option + hcst_end: '2016' # single option + ftime_min: 1 # single option + ftime_max: 6 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "global", latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + - {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: bilinear ## TODO: allow multiple methods? + type: to_system + Workflow: + Calibration: + method: mse_min ## TODO: list, split? + Skill: + metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + Indicators: + index: no # ? + ncores: 7 + remove_NAs: yes # bool, don't split + Output_format: S2S4E # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/recipes/seasonal_oper.yml b/recipes/seasonal_oper.yml index 5e5f61fcdd2249bf0f97c4932439994547bda7cd..a1351e37306778ffaf9ae8867de84b9daf6ec38b 100644 --- a/recipes/seasonal_oper.yml +++ b/recipes/seasonal_oper.yml @@ -26,10 +26,10 @@ Analysis: - None Datasets: System: - - name: system5c3s # list of strs + - name: ECMWF-SEAS5 # list of strs Multimodel: False # boolean, if true system above are aggregated into single multi-model Reference: # single dict? in the future multiple ref can be an asset - - {name: era5} # str + - {name: ERA5} # str Time: sdate: fcst_syear: ["2017"] # list of ints or None (case where only hcst is verfied) diff --git a/recipes/tests/execute_tests.R b/recipes/tests/old_tests/execute_tests.R similarity index 100% rename from recipes/tests/execute_tests.R rename to recipes/tests/old_tests/execute_tests.R diff --git a/recipes/tests/seasonal_testWorkflow1.yml b/recipes/tests/old_tests/seasonal_testWorkflow1.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow1.yml rename to recipes/tests/old_tests/seasonal_testWorkflow1.yml diff --git a/recipes/tests/seasonal_testWorkflow2.yml b/recipes/tests/old_tests/seasonal_testWorkflow2.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow2.yml rename to recipes/tests/old_tests/seasonal_testWorkflow2.yml diff --git a/recipes/tests/seasonal_testWorkflow3.yml b/recipes/tests/old_tests/seasonal_testWorkflow3.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow3.yml rename to recipes/tests/old_tests/seasonal_testWorkflow3.yml diff --git a/recipes/tests/seasonal_testWorkflow4.yml b/recipes/tests/old_tests/seasonal_testWorkflow4.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow4.yml rename to recipes/tests/old_tests/seasonal_testWorkflow4.yml diff --git a/recipes/tests/seasonal_testWorkflow5.yml b/recipes/tests/old_tests/seasonal_testWorkflow5.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow5.yml rename to recipes/tests/old_tests/seasonal_testWorkflow5.yml diff --git a/recipes/tests/seasonal_testWorkflow6.yml b/recipes/tests/old_tests/seasonal_testWorkflow6.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow6.yml rename to recipes/tests/old_tests/seasonal_testWorkflow6.yml diff --git a/recipes/tests/seasonal_testWorkflow7.yml b/recipes/tests/old_tests/seasonal_testWorkflow7.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow7.yml rename to recipes/tests/old_tests/seasonal_testWorkflow7.yml diff --git a/recipes/tests/seasonal_testWorkflow8.yml b/recipes/tests/old_tests/seasonal_testWorkflow8.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow8.yml rename to recipes/tests/old_tests/seasonal_testWorkflow8.yml diff --git a/recipes/tests/recipe_seasonal_two-variables.yml b/recipes/tests/recipe_seasonal_two-variables.yml new file mode 100644 index 0000000000000000000000000000000000000000..0cef06b312dde288b04512b00e93f4a09724a8c9 --- /dev/null +++ b/recipes/tests/recipe_seasonal_two-variables.yml @@ -0,0 +1,60 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test Independent verification of two variables + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: # ECVs and Indicators? + - {name: tas, freq: monthly_mean} + - {name: prlr, freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: ECMWF-SEAS5} + Multimodel: False # single option + Reference: + - {name: ERA55} # multiple references for single model? + Time: + sdate: # list, split + - '0101' + fcst_year: '2020' # list, don't split, handled internally + hcst_start: '2000' # single option + hcst_end: '2016' # single option + ftime_min: 1 # single option + ftime_max: 3 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: bilinear ## TODO: allow multiple methods? + type: to_system + Workflow: + Anomalies: + compute: yes + cross_validation: yes + Calibration: + method: mse_min ## TODO: list, split? + Skill: + metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + Indicators: + index: no # ? + ncores: 7 + remove_NAs: yes # bool, don't split + Output_format: S2S4E # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml index 90048009bf356ba79f87306efbd75c4914d78289..7a2a575baefd1881ecf2616a654b944e32f11b9c 100644 --- a/tests/recipes/recipe-decadal_daily_1.yml +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: qmap Skill: diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml index cedb07f002cfb46ed50e980ead642ba7e1884414..35b55b1a1985142d0bb6833ccd925da5142cfd3e 100644 --- a/tests/recipes/recipe-decadal_monthly_1.yml +++ b/tests/recipes/recipe-decadal_monthly_1.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Anomalies: + compute: no + cross-validation: Calibration: method: bias Skill: diff --git a/tests/recipes/recipe-decadal_monthly_1b.yml b/tests/recipes/recipe-decadal_monthly_1b.yml new file mode 100644 index 0000000000000000000000000000000000000000..5551d9c7c9611a79b73646bc846bbda629e9cbd8 --- /dev/null +++ b/tests/recipes/recipe-decadal_monthly_1b.yml @@ -0,0 +1,51 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 + member: r1i4p1f1,r2i4p1f1 + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst_year: [2020,2021] + hcst_start: 1991 + hcst_end: 1994 +# season: 'Annual' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: bias + Skill: + metric: RPSS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + Indicators: + index: FALSE + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml index 6c33d30270e41536dfa6a82afc31c412a23ca807..45eb01dd44e20486eec242362f39df05f459603f 100644 --- a/tests/recipes/recipe-decadal_monthly_2.yml +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -29,10 +29,13 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: raw Skill: - metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + metric: RPSS_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS Probabilities: percentiles: [[1/3, 2/3]] Indicators: diff --git a/tests/recipes/recipe-decadal_monthly_3.yml b/tests/recipes/recipe-decadal_monthly_3.yml index 87197af62ba40df07bed0da3243bed8d440678cf..94bdfebc7fb5d1b56552f0ce983fcd091466f95f 100644 --- a/tests/recipes/recipe-decadal_monthly_3.yml +++ b/tests/recipes/recipe-decadal_monthly_3.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: 'evmos' Skill: diff --git a/tests/recipes/recipe-seasonal_daily_1.yml b/tests/recipes/recipe-seasonal_daily_1.yml index fc1bc58c49b2747999100a098aaf000e9282b926..afa0f4966bf6b242e40f1179f79882fbc60c1022 100644 --- a/tests/recipes/recipe-seasonal_daily_1.yml +++ b/tests/recipes/recipe-seasonal_daily_1.yml @@ -8,10 +8,10 @@ Analysis: freq: daily_mean Datasets: System: - name: system5c3s + name: ECMWF-SEAS5 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1201' fcst_year: @@ -28,6 +28,9 @@ Analysis: method: conservative type: to_system Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: qmap Skill: diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml index 0e1b6f4b023c39e01841ffa98353e16a0bca690d..68c58f83603baf157662d5c44f885a814cb914ec 100644 --- a/tests/recipes/recipe-seasonal_monthly_1.yml +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -8,10 +8,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system7c3s + name: Meteo-France-System7 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: '2020' @@ -28,6 +28,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: mse_min Skill: diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 5cf1922e73e1ee4bc1a8c46f9998cc64ecbc145b..b76a216c18d605436a730c92504be69379e991d8 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -24,23 +24,22 @@ suppressWarnings({invisible(capture.output( # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(recipe, calibrated_data$hcst) +probs <- compute_probabilities(recipe, calibrated_data) ))}) # Saving suppressWarnings({invisible(capture.output( -save_data(recipe = recipe, data = data, calibrated_data = calibrated_data, +save_data(recipe = recipe, data = calibrated_data, skill_metrics = skill_metrics, probabilities = probs, archive = archive) ))}) # Plotting suppressWarnings({invisible(capture.output( -plot_data(recipe = recipe, archive = archive, data = data, - calibrated_data = calibrated_data, skill_metrics = skill_metrics, - probabilities = probs, significance = T) +plot_data(recipe = recipe, archive = archive, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, significance = T) ))}) @@ -137,7 +136,7 @@ TRUE ) expect_equal( names(calibrated_data), -c("hcst", "fcst") +c("hcst", "obs", "fcst") ) expect_equal( class(calibrated_data$hcst), @@ -215,7 +214,7 @@ rep(FALSE, 3) # Probs expect_equal( names(probs), -c('probs', 'percentiles') +c('probs', 'probs_fcst', 'percentiles') ) expect_equal( names(probs$probs), @@ -263,7 +262,7 @@ list.files(outdir), c("plots", "tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-percentiles_month11.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc", - "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-skill_month11.nc") + "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc") ) # open the files and check values/attributes? #expect_equal( @@ -284,3 +283,56 @@ c("forecast_ensemble_mean.png", "forecast_most_likely_tercile.png", # Delete files unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) + + +#============================================================== + +# Compare with 2 forecast + +recipe_file <- "tests/recipes/recipe-decadal_monthly_1b.yml" +recipe <- prepare_outputs(recipe_file) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data_b <- load_datasets(recipe) +))}) + +# Calibrate datasets +suppressWarnings({invisible(capture.output( + calibrated_data_b <- calibrate_datasets(recipe, data_b) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics_b <- compute_skill_metrics(recipe, calibrated_data_b) +))}) +suppressWarnings({invisible(capture.output( +probs_b <- compute_probabilities(recipe, calibrated_data_b) +))}) + + +test_that("6. Compare with two sdates in forecast", { + + +expect_equal( +c(ClimProjDiags::Subset(data_b$fcst$data, 'syear', 2, drop = F)), +c(data$fcst$data) +) + +expect_equal( +c(ClimProjDiags::Subset(calibrated_data_b$fcst$data, 'syear', 2, drop = F)), +c(calibrated_data$fcst$data) +) + +expect_equal( +skill_metrics_b, +skill_metrics +) + +expect_equal( +lapply(probs_b$probs_fcst, ClimProjDiags::Subset, 'syear', 2), +probs$probs_fcst +) + +}) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 4dd72ebf13c632479bbd34772a54509805b7c057..cdab57f3174d710e31ad68eb17eab14304b81d84 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -6,6 +6,7 @@ source("modules/Loading/Loading_decadal.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_2.yml" recipe <- prepare_outputs(recipe_file) @@ -22,12 +23,28 @@ suppressWarnings({invisible(capture.output( # Compute skill metrics suppressMessages({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(recipe, calibrated_data$hcst) +probs <- compute_probabilities(recipe, calibrated_data) ))}) +# Saving +suppressWarnings({invisible(capture.output( +save_data(recipe, calibrated_data, skill_metrics, probs) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +plot_data(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, + probabilities = probs, significance = T) +))}) + + +outdir <- get_dir(recipe) + + #====================================== test_that("1. Loading", { @@ -132,12 +149,13 @@ test_that("2. Calibration", { expect_equal( names(calibrated_data), -c("hcst", "fcst") -) -expect_equal( -calibrated_data, -data[1:2] +c("hcst", "obs", "fcst") ) +## TODO: Ask An-Chi about this test +# expect_equal( +# calibrated_data, +# data[1:2] +# ) }) @@ -151,7 +169,7 @@ TRUE ) expect_equal( names(skill_metrics), -c("rpss_specs", "bss90_specs", "enscorr_specs", "frps_specs", "frpss_specs", "bss10_specs", "frps") +c("rpss_specs", "enscorr_specs", "frps_specs", "frpss_specs", "bss10_specs", "frps") ) expect_equal( class(skill_metrics$rpss_specs), @@ -166,10 +184,10 @@ as.vector(skill_metrics$rpss_specs[6:8, 1, 2]), c(-0.3333333, 0.1666667, -0.3333333), tolerance = 0.0001 ) -expect_equal( -all(is.na(skill_metrics$bss90_specs)), -TRUE -) +#expect_equal( +#all(is.na(skill_metrics$bss90_specs)), +#TRUE +#) expect_equal( as.vector(skill_metrics$enscorr_specs[6:8, 1, 2]), c(0.4474382, 0.1026333, 0.4042823), @@ -198,7 +216,7 @@ tolerance = 0.0001 # Probs expect_equal( names(probs), -c('probs', 'percentiles') +c('probs', 'probs_fcst', 'percentiles') ) expect_equal( names(probs$probs), @@ -229,4 +247,29 @@ tolerance = 0.0001 }) +#====================================== + +test_that("4. Saving", { + +expect_equal( +list.files(outdir), +c("plots", "tas_19901101.nc", "tas_19911101.nc", "tas_19921101.nc", "tas_20201101.nc", "tas_20211101.nc", + "tas-obs_19901101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", + "tas-percentiles_month11.nc", "tas-probs_19901101.nc", "tas-probs_19911101.nc", + "tas-probs_19921101.nc", "tas-probs_20201101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc") +) +}) + +#====================================== + +test_that("5. Visualization", { +expect_equal( +list.files(paste0(outdir, "/plots/")), +c("bss10_specs.png", "enscorr_specs.png", "forecast_ensemble_mean_2020.png", "forecast_ensemble_mean_2021.png", "forecast_most_likely_tercile_2020.png", "forecast_most_likely_tercile_2021.png", "frps_specs.png", "frps.png", "rpss_specs.png") +) + +}) + +# Delete files +unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 7535e8dc6fcdedeb2eb7a69fa1f13e917e6178bc..22fd435391ab4ae1d5cc0f9e4a9ed0898d961e20 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -23,10 +23,10 @@ suppressWarnings({invisible(capture.output( # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(recipe, calibrated_data$hcst) +probs <- compute_probabilities(recipe, calibrated_data) ))}) #====================================== @@ -110,7 +110,7 @@ test_that("2. Calibration", { expect_equal( names(calibrated_data), -c("hcst", "fcst") +c("hcst", "obs", "fcst") ) expect_equal( as.vector(aperm(drop(calibrated_data$hcst$data), c(5, 1:4))[3, , 2, 2, 2]), @@ -133,7 +133,7 @@ TRUE ) expect_equal( names(skill_metrics), -c("bss10", "bss10_significance", "corr", "corr_p.value", "corr_conf.low", "corr_conf.up") +c("bss10", "bss10_significance", "corr", "corr_significance") ) expect_equal( class(skill_metrics[[1]]), @@ -144,7 +144,7 @@ all(unlist(lapply(lapply(skill_metrics, dim)[1:2], all.equal, c(time = 3, latitu TRUE ) expect_equal( -all(unlist(lapply(lapply(skill_metrics, dim)[3:6], all.equal, c(ensemble = 3, time = 3, latitude = 25, longitude = 16)))), +all(unlist(lapply(lapply(skill_metrics, dim)[3:4], all.equal, c(ensemble = 3, time = 3, latitude = 25, longitude = 16)))), TRUE ) expect_equal( diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index 5b771d77fd4bf22eb74626ec7b2170602234dd0b..ddcca22fd93750647b02ecfc5290591edfc5167d 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -19,7 +19,7 @@ calibrated_data <- calibrate_datasets(recipe, data) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) test_that("1. Loading", { @@ -106,7 +106,7 @@ TRUE ) expect_equal( names(calibrated_data), -c("hcst", "fcst") +c("hcst", "obs", "fcst") ) expect_equal( class(calibrated_data$hcst), @@ -163,3 +163,5 @@ c(0.7509920, 0.6514916, 0.5118371), tolerance=0.0001 ) }) + +unlink(recipe$Run$output_dir) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 86feedfbb14eb550cdf8cb26ee00283964d3df4d..de03bf7340906650004fbdf5bcdf445992144c09 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -22,22 +22,22 @@ calibrated_data <- calibrate_datasets(recipe, data) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(recipe, calibrated_data$hcst) +probs <- compute_probabilities(recipe, calibrated_data) ))}) # Saving suppressWarnings({invisible(capture.output( -save_data(recipe = recipe, data = data, calibrated_data = calibrated_data, +save_data(recipe = recipe, data = calibrated_data, skill_metrics = skill_metrics, probabilities = probs) ))}) # Plotting suppressWarnings({invisible(capture.output( -plot_data(recipe = recipe, data = data, calibrated_data = calibrated_data, +plot_data(recipe = recipe, data = calibrated_data, skill_metrics = skill_metrics, probabilities = probs, significance = T) ))}) @@ -108,19 +108,19 @@ tolerance = 0.0001 ) expect_equal( (data$hcst$Dates$start)[1], -as.POSIXct("1993-12-01", tz = 'UTC') +as.POSIXct("1993-11-30 23:59:59", tz = 'UTC') ) expect_equal( (data$hcst$Dates$start)[2], -as.POSIXct("1994-12-01", tz = 'UTC') +as.POSIXct("1994-11-30 23:59:59", tz = 'UTC') ) expect_equal( (data$hcst$Dates$start)[5], -as.POSIXct("1994-01-01", tz = 'UTC') +as.POSIXct("1993-12-31 23:59:59", tz = 'UTC') ) expect_equal( (data$obs$Dates$start)[10], -as.POSIXct("1995-02-14", tz = 'UTC') +as.POSIXct("1995-01-15 12:00:00", tz = 'UTC') ) }) @@ -133,7 +133,7 @@ TRUE ) expect_equal( names(calibrated_data), -c("hcst", "fcst") +c("hcst", "obs", "fcst") ) expect_equal( class(calibrated_data$hcst), @@ -153,22 +153,22 @@ c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, long ) expect_equal( mean(calibrated_data$fcst$data), -291.1218, +291.6433, tolerance = 0.0001 ) expect_equal( mean(calibrated_data$hcst$data), -289.8596, +290.9006, tolerance = 0.0001 ) expect_equal( as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), -c(287.7982, 287.0422, 290.4297), +c(291.8887, 287.0233, 289.8808), tolerance = 0.0001 ) expect_equal( range(calibrated_data$fcst$data), -c(283.5374, 306.2353), +c(283.8926, 299.0644), tolerance = 0.0001 ) @@ -185,8 +185,7 @@ TRUE expect_equal( names(skill_metrics), c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", - "enscorr_p.value", "enscorr_conf.low", "enscorr_conf.up", "corr", - "corr_p.value", "corr_conf.low", "corr_conf.up", "enscorr_specs") + "enscorr_significance", "corr", "corr_significance", "enscorr_specs") ) expect_equal( class(skill_metrics$rpss), @@ -202,7 +201,7 @@ dim(skill_metrics$rpss) ) expect_equal( as.vector(skill_metrics$rpss[, 2, 3]), -c(-1.153829, -1.114743, -1.392457), +c(-0.2918857, -1.4809143, -1.3842286), tolerance = 0.0001 ) expect_equal( @@ -216,10 +215,12 @@ test_that("4. Saving", { expect_equal( list.files(outdir), -c("plots", "tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", "tas_19961101.nc", "tas_20201101.nc", - "tas-corr_month11.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", - "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", - "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", "tas-skill_month11.nc") +c("plots", "tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", + "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", + "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", + "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", + "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", + "tas-probs_20201101.nc", "tas-skill_month11.nc") ) }) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 25536335eb6d65f4bcf3bb41717a6973660e7684..559d049226fbd0902376fe85538fa032e01796e1 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -1,93 +1,181 @@ -check_recipe <- function(recipe, logger) { +check_recipe <- function(recipe) { # recipe: yaml recipe already read it - # output: errors or the total number of workflow (vars x regions) to compute + ## TODO: Adapt to decadal case - info(logger, paste("Checking recipe", recipe$filename)) + info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) # --------------------------------------------------------------------- # ANALYSIS CHECKS # --------------------------------------------------------------------- - TIME_SETTINGS = c('sdate','leadtimemin','leadtimemax','hcst_start','hcst_end') - PARAMS = c('Horizon','Time','Variables','Region','Regrid','Workflow','Datasets') - HORIZONS <- c('Subseasonal','Seasonal','Decadal') + TIME_SETTINGS_SEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", + "hcst_end") + TIME_SETTINGS_DECADAL <- c("ftime_min", "ftime_max", "hcst_start", "hcst_end") + PARAMS <- c("Horizon", "Time", "Variables", "Region", "Regrid", "Workflow", + "Datasets") + HORIZONS <- c("subseasonal", "seasonal", "decadal") + ARCHIVE_SEASONAL <- "conf/archive.yml" + ARCHIVE_DECADAL <- "conf/archive_decadal.yml" - # create output dirs: - if (!any(names(recipe) %in% "Analysis")) { - error(logger, "The recipe should contain an element called 'Analysis'.") + # Define error status variable + error_status <- F + + # Check basic elements in recipe:Analysis: + if (!("Analysis" %in% names(recipe))) { + error(recipe$Run$logger, + "The recipe must contain an element called 'Analysis'.") + error_status <- T } if (!all(PARAMS %in% names(recipe$Analysis))) { - error(logger, - paste("The element 'Analysis' in the recipe should contain these", - "elements:", paste(PARAMS, collapse = " "))) + error(recipe$Run$logger, + paste0("The element 'Analysis' in the recipe must contain all of ", + "the following: ", paste(PARAMS, collapse = ", "), ".")) + error_status <- T } - if (!any(HORIZONS %in% recipe$Analysis$Horizon)) { - error(logger, - "The element 'Horizon' in the recipe should be one of the followings:", - paste(HORIZONS, collapse = " ")) - } - # Check temporal settings and - # count the number of verifications - if (!all(TIME_SETTINGS %in% names(recipe$Analysis$Time))) { - error(logger, - paste("The element 'Time' in the recipe should contain these elements:", - paste(TIME_SETTINGS, collapse = " "))) - } - if (is.null(recipe$Analysis$Time$sdate$fcst_year) || - recipe$Analysis$Time$sdate$fcst_year == 'None') { + if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { + error(recipe$Run$logger, + paste0("The element 'Horizon' in the recipe must be one of the ", + "following: ", paste(HORIZONS, collapse = ", "), ".")) + error_status <- T + } + # Check time settings + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + ## TODO: Specify filesystem + archive <- read_yaml(ARCHIVE_SEASONAL)$archive + if (!all(TIME_SETTINGS_SEASONAL %in% names(recipe$Analysis$Time))) { + error(recipe$Run$logger, + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_SEASONAL, + collapse = ", "), ".")) + error_status <- T + } + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + archive <- read_yaml(ARCHIVE_DECADAL)$archive + if (!all(TIME_SETTINGS_DECADAL %in% names(recipe$Analysis$Time))) { + error(recipe$Run$logger, + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_DECADAL, + collapse = ", "), ".")) + error_status <- T + } + } + # Check system names + if (!all(recipe$Analysis$Datasets$System$name %in% names(archive$System))) { + error(recipe$Run$logger, + "The specified System name was not found in the archive.") + error_status <- T + } + # Check reference names + if (!all(recipe$Analysis$Datasets$Reference$name %in% + names(archive$Reference))) { + error(recipe$Run$logger, + "The specified Reference name was not found in the archive.") + error_status <- T + } + # Check ftime_min and ftime_max + if ((!(recipe$Analysis$Time$ftime_min > 0)) || + (!is.integer(recipe$Analysis$Time$ftime_min))) { + error(recipe$Run$logger, + "The element 'ftime_min' must be an integer larger than 0.") + error_status <- T + } + if ((!(recipe$Analysis$Time$ftime_max > 0)) || + (!is.integer(recipe$Analysis$Time$ftime_max))) { + error(recipe$Run$logger, + "The element 'ftime_max' must be an integer larger than 0.") + error_status <- T + } + if ((is.numeric(recipe$Analysis$Time$ftime_max)) && + (is.numeric(recipe$Analysis$Time$ftime_min))) { + if (recipe$Analysis$Time$ftime_max < recipe$Analysis$Time$ftime_min) { + error(recipe$Run$logger, + "'ftime_max' cannot be smaller than 'ftime_min'.") + error_status <- T + } + } + # Check consistency of hindcast years + if (!(as.numeric(recipe$Analysis$Time$hcst_start) %% 1 == 0) || + (!(recipe$Analysis$Time$hcst_start > 0))) { + error(recipe$Run$logger, + "The element 'hcst_start' must be a valid year.") + error_status <- T + } + if (!(as.numeric(recipe$Analysis$Time$hcst_end) %% 1 == 0) || + (!(recipe$Analysis$Time$hcst_end > 0))) { + error(recipe$Run$logger, + "The element 'hcst_end' must be a valid year.") + error_status <- T + } + if (recipe$Analysis$Time$hcst_end < recipe$Analysis$Time$hcst_start) { + error(recipe$Run$logger, + "'hcst_end' cannot be smaller than 'hcst_start'.") + error_status <- T + } + ## TODO: Is this needed? + if (is.null(recipe$Analysis$Time$fcst_year) || + tolower(recipe$Analysis$Time$fcst_year) == 'none') { stream <- "hindcast" - recipe$Analysis$Time$sdate$fcst_year <- 'YYYY' + # recipe$Analysis$Time$fcst_year <- 'YYYY' } else { stream <- "fcst" } - if (length(recipe$Analysis$Time$sdate$fcst_day) > 1 && - tolower(recipe$Analysis$Horizon) != "subseasonal") { - warn(logger, - paste("Only subseasonal verification allows multiple forecast days."), - "Element fcst_day in recipe set as 1.") - recipe$Analysis$Time$sdate$fcst_day <- '01' - } - if (is.null(recipe$Analysis$Time$sdate$fcst_sday)) { - error(logger, - paste("The element 'fcst_sday' in the recipe should be defined.")) - } - if (is.null(recipe$Analysis$Time$sdate$fcst_syear)) { - error(logger, - paste("The element 'fcst_syear' in the recipe should be defined.")) + + ## TODO: To be implemented in the future + # if (length(recipe$Analysis$Time$sdate$fcst_day) > 1 && + # tolower(recipe$Analysis$Horizon) != "subseasonal") { + # warn(recipe$Run$logger, + # paste("Only subseasonal verification allows multiple forecast days."), + # "Element fcst_day in recipe set as 1.") + # recipe$Analysis$Time$sdate$fcst_day <- '01' + # } + ## TODO: Delete, this parameter was deprecated + # if (is.null(recipe$Analysis$Time$sdate$fcst_sday)) { + # error(recipe$Run$logger, + # paste("The element 'fcst_sday' in the recipe should be defined.")) + # } + + if (is.null(recipe$Analysis$Time$fcst_year)) { + warn(recipe$Run$logger, + paste("The element 'fcst_year' is not defined in the recipe.", + "No forecast year will be used.")) } + ## TODO: Adapt and move this inside 'if'? + # fcst.sdate <- NULL + # for (syear in recipe$Analysis$Time$fcst_year) { + # for (sday in recipe$Analysis$Time$sdate) { + # fcst.sdate <- c(fcst.sdate, + # paste0(syear, + # sprintf("%04d", as.numeric(sday)))) + # } + # } + # fcst.sdate <- list(stream = stream, fcst.sdate = fcst.sdate) - - fcst.sdate <- NULL - for (syear in recipe$Analysis$Time$sdate$fcst_syear) { - for (sday in recipe$Analysis$Time$sdate$fcst_sday) { - fcst.sdate <- c(fcst.sdate, - paste0(syear, - sprintf("%04d", as.numeric(sday)))) - } - } - fcst.sdate <- list(stream = stream, fcst.sdate = fcst.sdate) # Regrid checks: if (length(recipe$Analysis$Regrid) != 2) { - error(logger, - "The 'Regrid' element should specified the 'method' and 'type'.") - stop("EXECUTION FAILED") + error(recipe$Run$logger, + "The 'Regrid' element must specify the 'method' and 'type'.") + error_status <- T } -# more checks + # TODO: Add Workflow checks? # ... - # calculate number of workflows to create for each variable and + # calculate number of workflows to create for each variable and if (length(recipe$Analysis$Horizon) > 1) { - error(logger, "Only 1 Horizon can be specified in the recipe") - stop("EXECUTION FAILED") - } - nvar <- length(recipe$Analysis$Variables) - if (nvar > 2) { - error(logger, - "Only two type of Variables can be listed: ECVs and Indicators.") - stop("EXECUTION FAILED") + error(recipe$Run$logger, + "Only one single Horizon can be specified in the recipe") + error_status <- T } + + ## TODO: Refine this + # nvar <- length(recipe$Analysis$Variables) + # if (nvar > 2) { + # error(recipe$Run$logger, + # "Only two type of Variables can be listed: ECVs and Indicators.") + # stop("EXECUTION FAILED") + # } + # remove NULL or None Indicators or ECVs from the recipe: if (!is.null(recipe$Analysis$Variables$Indicators) && !is.list(recipe$Analysis$Variables$Indicators)) { @@ -99,82 +187,138 @@ check_recipe <- function(recipe, logger) { recipe$Analysis$Variables <- recipe$Analysis$Variables[ -which(names(recipe$Analysis$Variables) == 'ECVs')] } + + # Region checks: + LIMITS <- c('latmin', 'latmax', 'lonmin', 'lonmax') + if (!all(LIMITS %in% names(recipe$Analysis$Region))) { + error(recipe$Run$logger, + paste0("There must be 4 elements in 'Region': ", + paste(LIMITS, collapse = ", "), ".")) + error_status <- T + } + ## TODO: Implement multiple regions + # nregions <- length(recipe$Analysis$Region) + # for (i in 1:length(recipe$Analysis$Region)) { + # if (!all(limits %in% names(recipe$Analysis$Region[[i]]))) { + # limits <- paste(limits, collapse = " ") + # error(recipe$Run$logger, + # paste0("Each region defined in element 'Region' ", + # "should have 4 elements: ", + # paste(limits, collapse = ", "), ".")) + # error_status <- T + # } + # if (length(recipe$Analysis$Region) > 1) { + # if (!("name" %in% names(recipe$Analysis$Region[[i]]))) { + # error(recipe$Run$logger, + # paste("If multiple regions are requested, each region must", + # "have a 'name'".) + # # are numeric? class list mode list + # } + # --------------------------------------------------------------------- + # WORKFLOW CHECKS + # --------------------------------------------------------------------- + # Only one Calibration method allowed: - if ((is.logical(recipe$Analysis$Workflow$Calibration[[1]]) && - recipe$Analysis$Workflow$Calibration[[1]] == FALSE) || - recipe$Analysis$Workflow$Calibration[[1]] == 'None' || - is.null(recipe$Analysis$Workflow$Calibration[[1]])) { - warn(logger, - "There is no Calibration method selected, raw data verification.") - recipe$Analysis$Workflow$Calibration[[1]] <- FALSE + if ((is.logical(recipe$Analysis$Workflow$Calibration$method) && + recipe$Analysis$Workflow$Calibration$method == FALSE) || + tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || + is.null(recipe$Analysis$Workflow$Calibration$method)) { + warn(recipe$Run$logger, + "No Calibration method was specified, raw data verification.") + recipe$Analysis$Workflow$Calibration$method <- 'raw' } else { - # remove multiple calibration methods - if (is.null(names(recipe$Analysis$Workflow$Calibration))) { - error(logger, - "The 'Calibration' element should specified at least the 'method'.") - stop("EXECUTION FAILED") + if (is.null(recipe$Analysis$Workflow$Calibration$method)) { + error(recipe$Run$logger, + "The 'Calibration' element 'method' must be specified.") + error_status <- T } - } - - if ("Region" %in% names(recipe$Analysis)) { - nregions <- length(recipe$Analysis$Region$Regional) - limits <- c('latmin', 'latmax', 'lonmin', 'lonmax') - for (i in 1:length(recipe$Analysis$Region)) { - if (!all(limits %in% names(recipe$Analysis$Region[[i]]))) { - limits <- paste(limits, collapse = " ") - error(logger, - paste("Each region defined in element 'Regional'", - "should have 4 elements:", - limits)) - stop("EXECUTION FAILED") - } - # are numeric? class list mode list + } + # Anomalies + if ("Anomalies" %in% names(recipe$Analysis$Workflow)) { + if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { + error(recipe$Run$logger, + "Parameter 'compute' must be defined under 'Anomalies'.") + error_status <- T + } else if (!(is.logical(recipe$Analysis$Workflow$Anomalies$compute))) { + error(recipe$Run$logger, + paste("Parameter 'Anomalies:compute' must be a logical value", + "(True/False or yes/no).")) + error_status <- T + } else if ((recipe$Analysis$Workflow$Anomalies$compute) && + (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation))) { + error(recipe$Run$logger, + paste("If anomaly computation is requested, parameter", + "'cross_validation' must be defined under 'Anomalies', + and it must be a logical value (True/False or yes/no).")) + error_status <- T + } + } + # Skill + if (("Skill" %in% names(recipe$Analysis$Workflow)) && + (is.null(recipe$Analysis$Workflow$Skill$metric))) { + error(recipe$Run$logger, + "Parameter 'metric' must be defined under 'Skill'.") + error_status <- T + } + # Probabilities + if ("Probabilities" %in% names(recipe$Analysis$Workflow)) { + if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { + error(recipe$Run$logger, + "Parameter 'percentiles' must be defined under 'Probabilities'.") + error_status <- T + } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) { + error(recipe$Run$logger, + paste("Parameter 'Probabilities:percentiles' expects a list.", + "See documentation in the wiki for examples.")) + error_status <- T } - } else { - error(logger, - paste("'Region'", - "should be defined", - limits)) - stop("EXECUTION FAILED") } - + # --------------------------------------------------------------------- # RUN CHECKS # --------------------------------------------------------------------- - RUN_FIELDS = c("Loglevel","Terminal","output_dir","code_dir") - LOG_LEVELS = c("INFO","DEBUG","WARNING","ERROR") + RUN_FIELDS = c("Loglevel", "Terminal", "output_dir", "code_dir") + LOG_LEVELS = c("INFO", "DEBUG", "WARN", "ERROR", "FATAL") - if (!any(names(recipe) %in% "Run")) { - error(logger, "The recipe should contain an element called 'Run'.") + if (!("Run" %in% names(recipe))) { + stop("The recipe must contain an element named 'Run'.") } if (!all(RUN_FIELDS %in% names(recipe$Run))) { - error(logger, paste0("Run should contain the fields: ", - paste(RUN_FIELDS,collapse=", "), ".")) + error(recipe$Run$logger, paste("Recipe element 'Run' must contain", + "all of the following fields:", + paste(RUN_FIELDS, collapse=", "), ".")) + error_status <- T } if (!is.character(recipe$Run$output_dir)) { - error(logger, - paste("The Run element 'output_dir' in", recipe$filename,"file ", - "should be a character string indicating the path ", - "where to save the outputs.")) + error(recipe$Run$logger, + paste("The Run element 'output_dir' in", recipe$name, "file", + "should be a character string indicating the path where", + "the outputs should be saved.")) + error_status <- T } if (!is.character(recipe$Run$code_dir)) { - error(logger, - paste("The Run element 'code_dir' in", recipe$filename,"file ", - "should be a character string indicating the path ", + error(recipe$Run$logger, + paste("The Run element 'code_dir' in", recipe$name, "file ", + "should be a character string indicating the path", "where the code is.")) + error_status <- T } if (!is.logical(recipe$Run$Terminal)) { - error(logger, - paste("The Run element 'Terminal' in", recipe$filename,"file ", - "should be a boolean value indicating wether to print or not the log", - "in the terminal.")) + error(recipe$Run$logger, + paste("The Run element 'Terminal' in", recipe$name, "file ", + "should be a boolean value indicating whether or not to", + "print the logs in the terminal.")) + error_status <- T } - if (!is.character(recipe$Run$Loglevel) || !any(recipe$Run$Loglevel %in% LOG_LEVELS)) { + ## TODO: Review this case, since default value is allowed + if (!is.character(recipe$Run$Loglevel) || + !any(recipe$Run$Loglevel %in% LOG_LEVELS)) { error(logger, - paste("The Run element 'Loglevel' in", recipe$filename,"file ", - "should be a character string indicating one of the levels available: ", - paste0(LOG_LEVELS,collapse='/'))) + paste("The Run element 'Loglevel' in", recipe$name, "file", + "should be a character string specifying one of the levels available:", + paste0(LOG_LEVELS, collapse='/'))) + error_status <- T } # --------------------------------------------------------------------- @@ -182,10 +326,19 @@ check_recipe <- function(recipe, logger) { # --------------------------------------------------------------------- # Check workflow: need to define restrictions? # e.g. only one calibration method - nverifications <- check_number_of_dependent_verifications(recipe) - info(logger, paste("Start Dates", paste(fcst.sdate, collapse = " "))) - info(logger, "Recipe checked succsessfully.") - return(append(nverifications, fcst.sdate)) + ## TODO: Implement number of dependent verifications + #nverifications <- check_number_of_dependent_verifications(recipe) + # info(recipe$Run$logger, paste("Start Dates:", + # paste(fcst.sdate, collapse = " "))) + + # Return error if any check has failed + if (error_status) { + error(recipe$Run$logger, "RECIPE CHECK FAILED.") + stop("The recipe contains some errors. The full list is in the logs.") + } else { + info(recipe$Run$logger, "##### RECIPE CHECK SUCCESSFULL #####") + # return(append(nverifications, fcst.sdate)) + } } check_number_of_dependent_verifications <- function(recipe) { diff --git a/tools/data_summary.R b/tools/data_summary.R index 34b6bd6e47d54477e1afc8cd846c4f3f7b54b0bd..d8f2b1b63f1a732ef08e72f712a1d24bbd49fa73 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -1,6 +1,5 @@ # Print a summary of the loaded data for the user, for each object. # object: hindcast, forecast or reference data in s2dv_cube format. -## TODO: Incorporate into logger ## TODO: Adapt to daily/subseasonal cases ## TODO: Add check for missing files/NAs by dimension @@ -8,25 +7,32 @@ data_summary <- function(data_cube, recipe) { # Get name, leadtime months and date range object_name <- deparse(substitute(data_cube)) if (recipe$Analysis$Variables$freq == "monthly_mean") { - date_format <- '%b %Y' + date_format <- "%b %Y" } else if (recipe$Analysis$Variables$freq == "daily_mean") { - date_format <- '%b %d %Y' + date_format <- "%b %d %Y" } - months <- unique(format(as.Date(data_cube$Dates[[1]]), format = '%B')) - months <- paste(as.character(months), collapse=", ") + months <- unique(format(as.Date(data_cube$Dates[[1]]), format = "%B")) + months <- paste(as.character(months), collapse = ", ") sdate_min <- format(min(as.Date(data_cube$Dates[[1]])), format = date_format) sdate_max <- format(max(as.Date(data_cube$Dates[[1]])), format = date_format) - + # Create log instance and sink output to logfile and terminal info(recipe$Run$logger, "DATA SUMMARY:") - sink(recipe$Run$logfile, append = TRUE, split = TRUE) - print(paste0(object_name, " months: ", months)) - print(paste0(object_name, " range: ", sdate_min, " to ", sdate_max)) - print(paste0(object_name, " dimensions: ")) - print(dim(data_cube$data)) - print(paste0("Statistical summary of the data in ", object_name, ":")) - print(summary(data_cube$data)) - print("---------------------------------------------") - sink() + info(recipe$Run$logger, paste(object_name, "months:", months)) + info(recipe$Run$logger, paste(object_name, "range:", sdate_min, "to", + sdate_max)) + info(recipe$Run$logger, paste(object_name, "dimensions:")) + # Use capture.output() and for loop to display results neatly + output_string <- capture.output(dim(data_cube$data)) + for (i in output_string) { + info(recipe$Run$logger, i) + } + info(recipe$Run$logger, paste0("Statistical summary of the data in ", + object_name, ":")) + output_string <- capture.output(summary(data_cube$data)) + for (i in output_string) { + info(recipe$Run$logger, i) + } + info(recipe$Run$logger, "---------------------------------------------") + invisible(gc()) } - diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index dafc8704aa8a686fa5885b684acbaa23e49c8f7e..18962f93010bae7fe6693beb48a5f485075954f5 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -1,11 +1,12 @@ -# recipe: the content of the recipe -# verifications: the output from check_recipe -# folder: the name of the output folder for this run -# logger: the log file obtain from prepare_outputs -divide_recipe <- function(recipe, verifications, folder, logger) { - info(logger, "Spliting recipe in single verifications.") +# recipe: the recipe as returned by prepare_outputs() +divide_recipe <- function(recipe) { + + ## TODO: Implement dependent vs independent verifications? + info(recipe$Run$logger, "Spliting recipe in single verifications.") beta_recipe <- list(Description = append(recipe$Description, - "split version"), + list(Origin = paste("Atomic recipe,", + "split from:", + recipe$name))), Analysis = list(Horizon = recipe$Analysis$Horizon, Variables = NULL, Datasets = NULL, @@ -14,38 +15,38 @@ divide_recipe <- function(recipe, verifications, folder, logger) { Regrid = recipe$Analysis$Regrid, Workflow = recipe$Analysis$Workflow, Output_format = - recipe$Analysis$Output_format), - Run = recipe$Run) - # duplicate recipe by Variables considering dep and indep: - all_recipes <- list(beta_recipe) - i <- 1 # to get track of the recipe number - for (indep in verifications$independent) { - all_recipes[[i]]$Analysis$Variables <- indep - i = i + 1 - all_recipes <- append(all_recipes, list(beta_recipe)) + recipe$Analysis$Output_format), + Run = recipe$Run[c("Loglevel", "output_dir", "Terminal", + "code_dir", "logfile")]) + + # duplicate recipe by independent variables: + all_recipes <- rep(list(beta_recipe), length(recipe$Analysis$Variables)) + for (var in 1:length(recipe$Analysis$Variables)) { + all_recipes[[var]]$Analysis$Variables <- recipe$Analysis$Variables[[var]] } - for (dep in verifications$dependent) { - all_recipes[[i]]$Analysis$Variables <- dep - i = i + 1 - all_recipes <- append(all_recipes, list(beta_recipe)) - } - all_recipes <- all_recipes[-length(all_recipes)] + # for (dep in verifications$dependent) { + # all_recipes[[i]]$Analysis$Variables <- dep + # i = i + 1 + # all_recipes <- append(all_recipes, list(beta_recipe)) + # } + # all_recipes <- all_recipes[-length(all_recipes)] # wth does this do + # duplicate recipe by Datasets: # check Systems if (recipe$Analysis$Datasets$Multimodel) { for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Datasets <- list( - System = recipe$Analysis$Datasets$System, - Multimodel = recipe$Analysis$Datasets$Multimodel, - Reference = NULL) + all_recipes[[reci]]$Analysis$Datasets <- + list(System = recipe$Analysis$Datasets$System, + Multimodel = recipe$Analysis$Datasets$Multimodel, + Reference = NULL) } } else { for (sys in 1:length(recipe$Analysis$Datasets$System)) { for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Datasets <- list( - System = recipe$Analysis$Datasets$System[[sys]], - Multimodel = recipe$Analysis$Datasets$Multimodel, - Reference = NULL) + all_recipes[[reci]]$Analysis$Datasets <- + list(System = recipe$Analysis$Datasets$System[[sys]], + Multimodel = recipe$Analysis$Datasets$Multimodel, + Reference = NULL) } if (sys == 1) { recipes <- all_recipes @@ -72,28 +73,28 @@ divide_recipe <- function(recipe, verifications, folder, logger) { # Duplicate recipe by Region recipes <- list() for (reg in 1:length(recipe$Analysis$Region)) { - if (length(recipe$Analysis$Region[[reg]]) == 4) { ##TODO: THIS SHOULD BE ONLY CHECK IN THE RECIPE CHECKER? + # if (length(recipe$Analysis$Region[[reg]]) == 4) { ##TODO: THIS SHOULD BE ONLY CHECK IN THE RECIPE CHECKER? for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Region <- - recipe$Analysis$Region[[reg]] + all_recipes[[reci]]$Analysis$Region <- recipe$Analysis$Region[[reg]] } recipes <- append(recipes, all_recipes) - } + # } } all_recipes <- recipes rm(list = 'recipes') + # Duplicate recipe by start date if (tolower(recipe$Analysis$Horizon) == 'seasonal') { - for (sday in 1:length(recipe$Analysis$Time$sdate$fcst_sday)) { + for (sdate in 1:length(recipe$Analysis$Time$sdate)) { for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Time <- list(sdate = list( - fcst_syear = recipe$Analysis$Time$sdate$fcst_syear, - fcst_sday = recipe$Analysis$Time$sdate$fcst_sday[[sday]]), - hcst_start = recipe$Analysis$Time$hcst_start, - hcst_end = recipe$Analysis$Time$hcst_end, - leadtimemin = recipe$Analysis$Time$leadtimemin, - leadtimemax = recipe$Analysis$Time$leadtimemax) + all_recipes[[reci]]$Analysis$Time <- + list(sdate = recipe$Analysis$Time$sdate[[sdate]], + fcst_year = recipe$Analysis$Time$fcst_year, + hcst_start = recipe$Analysis$Time$hcst_start, + hcst_end = recipe$Analysis$Time$hcst_end, + ftime_min = recipe$Analysis$Time$ftime_min, + ftime_max = recipe$Analysis$Time$ftime_max) } - if (sday == 1) { + if (sdate == 1) { recipes <- all_recipes } else { recipes <- append(recipes, all_recipes) @@ -102,12 +103,24 @@ divide_recipe <- function(recipe, verifications, folder, logger) { all_recipes <- recipes rm(list = 'recipes') } # Rest of horizons - # Finally, save all recipes in saparated yaml files + # Save all recipes in separate YAML files + ## TODO: Re-add recipe$Run$logger for (reci in 1:length(all_recipes)) { + if (reci < 10) { + recipe_number <- paste0("0", reci) + } else { + recipe_number <- reci + } write_yaml(all_recipes[[reci]], - paste0(folder, "/logs/recipes/recipe_", reci, ".yml")) + paste0(recipe$Run$output_dir, "/logs/recipes/recipe_", + recipe_number, ".yml")) + all_recipes[[reci]]$Run$logger <- recipe$Run$logger } - text <- paste0("See folder ",folder,"/logs/recipes/ to see the individual recipes.") - info(logger, text) + info(recipe$Run$logger, + paste("The main recipe has been divided into", length(all_recipes), + "atomic recipes.")) + text <- paste0("See output directory ", recipe$Run$output_dir, + "/logs/recipes/ to see all the individual atomic recipes.") + info(recipe$Run$logger, text) return(all_recipes) } diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 8a6831788769db69196c0c4be100c2d00e7f2f68..1972aef0cb38f7e845ebd0fdfc1ca19a28950939 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -1,12 +1,12 @@ #'Read recipe YAML file and create and store logfile info #' #'The purpose of this function is to read the recipe configuration for Auto-S2S -#'workflows and create logfiles stores in an the output directory specified in +#'workflows and create logfiles stored in an the output directory specified in #'the recipe. It returns an object of class logger that stores information on #'the recipe configuration and errors. #' #'@param recipe_file path to a YAML file with Auto-S2S configuration recipe -#' +#'@param disable_checks whether to disable the recipe checks #'@return list contaning recipe with logger, log file name and log dir name #' #'@import log4r @@ -20,10 +20,11 @@ #' #'@export -prepare_outputs <- function(recipe_file) { +prepare_outputs <- function(recipe_file, + disable_checks = FALSE) { -# recipe: the content of the readed recipe -# file: the recipe file name +# recipe_file: path to recipe YAML file +# disable_checks: If TRUE, does not perform checks on recipe recipe <- read_yaml(recipe_file) recipe$recipe_path <- recipe_file @@ -33,46 +34,49 @@ prepare_outputs <- function(recipe_file) { # Create output folders: folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$name)), "_", gsub(" ", "", gsub(":", "", gsub("-", "", Sys.time())))) - print("Saving all outputs to:") print(output_dir) print(folder_name) - dir.create(file.path(output_dir, folder_name, 'outputs'), recursive = TRUE) dir.create(file.path(output_dir, folder_name, 'logs')) dir.create(file.path(output_dir, folder_name, 'logs', 'recipes')) - + # Copy recipe to output folder file.copy(recipe$recipe_path, file.path(output_dir, folder_name, 'logs', 'recipes')) - + # Create log output file logfile <- file.path(output_dir, folder_name, 'logs', 'log.txt') file.create(logfile) - # Set default behaviour of log output file: + # Set default behaviour of logger if (is.null(recipe$Run)) { recipe$Run <- list(Loglevel = 'INFO', Terminal = TRUE) } if (is.null(recipe$Run$Loglevel)) { recipe$Run$Loglevel <- 'INFO' } - if (!is.logical(recipe$Run$Terminal)) { recipe$Run$Terminal <- TRUE } + # logger set-up if (recipe$Run$Terminal) { - logger <- logger(threshold = recipe$Run$Loglevel, - appenders = list(console_appender(layout = default_log_layout()), - file_appender(logfile, append = TRUE, - layout = default_log_layout()))) + logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(console_appender(layout = default_log_layout()), + file_appender(logfile, append = TRUE, + layout = default_log_layout()))) } else { - logger <- logger(threshold = recipe$Run$Loglevel, - appenders = list(file_appende(logfile, append = TRUE, - layout = default_log_layout()))) + logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(file_appender(logfile, append = TRUE, + layout = default_log_layout()))) } - recipe$Run$output_dir <- file.path(output_dir, folder_name) recipe$Run$logger <- logger recipe$Run$logfile <- logfile - + # Run recipe checker + if (disable_checks) { + warn(recipe$Run$logger, + "Recipe checks disabled. The recipe will not be checked for errors.") + } else { + check_recipe(recipe) + } return(recipe) }