diff --git a/MODULES b/MODULES index abb6e01d423b27c52147ec33c6b2e6e7e51ec16f..0a01a9796c2753a74a5e946f00024e33777105ac 100644 --- a/MODULES +++ b/MODULES @@ -21,17 +21,6 @@ elif [ $BSC_MACHINE == "nord3v2" ]; then module load R/4.1.2-foss-2019b module load OpenMPI/4.0.5-GCC-8.3.0-nord3-v2 -elif [ $BSC_MACHINE == "nord3" ]; then - - module use /gpfs/projects/bsc32/software/suselinux/11/modules/all - module unuse /apps/modules/modulefiles/applications /apps/modules/modulefiles/applications_bis - module unuse /apps/modules/modulefiles/compilers /apps/modules/modulefiles/tools - module unuse /apps/modules/modulefiles/libraries /apps/modules/modulefiles/environment - module unuse /apps/modules/PRACE - - module load CDO/1.9.8-foss-2019b - module load R/3.6.2-foss-2019b - else module load CDO/1.9.8-foss-2015a diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml new file mode 100644 index 0000000000000000000000000000000000000000..5127ae14e566b4eef51ca5a4c61df06750e53f09 --- /dev/null +++ b/conf/variable-dictionary.yml @@ -0,0 +1,103 @@ + +vars: + +# ECVs + tas: + units: "K" + long_name: "Near-Surface Air Temperature" + standard_name: "air_temperature" +# outname: "t2" + tasmax: + units: "K" + long_name: "Maximum Near-Surface Air Temperature" + standard_name: "air_temperature" + tasmin: + units: "K" + long_name: "Minimum Near-Surface Air Temperature" + standard_name: "air_temperature" + sfcWind: + units: "m s-1" + long_name: "Near-Surface Wind Speed" + standard_name: "wind_speed" +# outname: "wind" + rsds: + units: "W m-2" + long_name: "Surface Downwelling Shortwave Radiation" + standard_name: "surface_downwelling_shortwave_flux_in_air" + positive: "down" +# outname: "rswin" + prlr: + units: "mm" + long_name: "Total precipitation" + standard_name: "total_precipitation_flux" #? Not in CF +# outname: "acprec" + +# Coordinates +coords: + longitude: + units: "degrees_east" + standard_name: "longitude" + long_name: "Longitude" + axis: "X" + latitude: + units: "degrees_north" + standard_name: "latitude" + long_name: "Latitude" + axis: "Y" + +# Skill metrics +metrics: + enscorr: + long_name: "Ensemble Mean Correlation Coefficient" + enscorr_specs: + long_name: "Ensemble Mean Correlation Coefficient" + enscorr_p.value: + long_name: "Ensemble Mean Correlation p-value" + enscorr_conf.low: + long_name: "Ensemble Mean Correlation Lower Confidence Interval" + enscorr_conf.up: + long_name: "Ensemble Mean Correlation Upper Confidence Interval" + enscorr_significance: + long_name: "Ensemble Mean Correlation Statistical Significance" + corr: + long_name: "Ensemble Correlation Coefficient" + corr_specs: + long_name: "Ensemble Correlation Coefficient" + corr_p.value: + long_name: "Ensemble Correlation p-value" + corr_conf.low: + long_name: "Ensemble Correlation Lower Confidence Interval" + corr_conf.up: + long_name: "Ensemble Correlation Upper Confidence Interval" + corr_significance: + long_name: "Ensemble Correlation Statistical Significance" + rps: + long_name: "Ranked Probability Score" + frps: + long_name: "Fair Ranked Probability Score" + rpss: + long_name: "Ranked Probability Skill Score" + rpss_significance: + long_name: "Ranked Probability Skill Score Statistical Significance" + rpss_specs: + long_name: "Ranked Probability Skill Score" + frpss: + long_name: "Fair Ranked Probability Skill Score" + frpss_significance: + long_name: "Fair Ranked Probability Skill Score Statistical Significance" + frpss_specs: + long_name: "Fair Ranked Probability Skill Score" + bss10: + long_name: "Brier Skill Score Lower Extreme" + bss10_specs: + long_name: "Brier Skill Score Lower Extreme" + bss10_significance: + long_name: "Brier Score Lower Extreme Statistical Significance" + bss90: + long_name: "Brier Skill Score Upper Extreme" + bss90_significance: + long_name: "Brier Skill Score Upper Extreme Statistical Significance" + crps: + long_name: "Continuous Ranked Probability Score" + crpss: + long_name: "Continuous Ranked Probability Skill Score" diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 1478ecd5f6a4e606b771fa4b326e7d545e60d0e6..85b1b0070612a4e96f9cf75652c2f56954491a00 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -9,53 +9,56 @@ calibrate_datasets <- function(data, recipe) { # # data: list of s2dv_cube objects containing the hcst, obs and fcst. # recipe: object obtained when passing the .yml recipe file to read_yaml() - hcst <- data$hcst - obs <- data$obs - fcst <- data$fcst - # Calibration function params - method <- recipe$Analysis$Workflow$Calibration$method - mm <- recipe$Analysis$Datasets$Multimodel - ncores <- 4 - na.rm <- T + method <- tolower(recipe$Analysis$Workflow$Calibration$method) - # Replicate observation array for the multi-model case - if (mm) { - obs.mm <- obs$data - for(dat in 1:(dim(hcst$data)['dat'][[1]]-1)) { - obs.mm <- abind(obs.mm, obs$data, - along=which(names(dim(obs$data)) == 'dat')) - } - names(dim(obs.mm)) <- names(dim(obs$data)) - obs$data <- obs.mm - remove(obs.mm) - } + if (method == "raw") { + warning("The Calibration module has been called, but the calibration ", + "method in the recipe is 'raw'. The hcst and fcst will not be ", + "calibrated.") + fcst_calibrated <- data$fcst + hcst_calibrated <- data$hcst + CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####" - if (recipe$Analysis$Variables$freq == "monthly_mean") { - - CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") - ## TODO: implement other calibration methods - ## TODO: Restructure the code? - if (!(method %in% CST_CALIB_METHODS)) { - stop("Calibration method in the recipe is not available for monthly", - " data.") + } else { + # Calibration function params + mm <- recipe$Analysis$Datasets$Multimodel + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 } else { - ## Alba's version of CST_Calibration (pending merge) is being used - # Calibrate the hindcast - hcst_calibrated <- CST_Calibration(hcst, obs, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - alpha = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - if (!is.null(fcst)) { - # Calibrate the forecast - fcst_calibrated <- CST_Calibration(hcst, obs, fcst, + ncores <- recipe$Analysis$ncores + } + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } + + CALIB_MSG <- "##### CALIBRATION COMPLETE #####" + # Replicate observation array for the multi-model case + if (mm) { + obs.mm <- obs$data + for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { + obs.mm <- abind(obs.mm, data$obs$data, + along=which(names(dim(data$obs$data)) == 'dat')) + } + names(dim(obs.mm)) <- names(dim(obs$data)) + data$obs$data <- obs.mm + remove(obs.mm) + } + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + + CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") + ## TODO: implement other calibration methods + ## TODO: Restructure the code? + if (!(method %in% CST_CALIB_METHODS)) { + stop("Calibration method in the recipe is not available for monthly", + " data.") + } else { + ## Alba's version of CST_Calibration (pending merge) is being used + # Calibrate the hindcast + hcst_calibrated <- CST_Calibration(data$hcst, data$obs, cal.method = method, eval.method = "leave-one-out", multi.model = mm, @@ -66,47 +69,59 @@ calibrate_datasets <- function(data, recipe) { memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) + if (!is.null(data$fcst)) { + # Calibrate the forecast + fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + fcst_calibrated <- NULL + } + } + } else if (recipe$Analysis$Variables$freq == "daily_mean") { + # Daily data calibration using Quantile Mapping + if (!(method %in% c("qmap"))) { + stop("Calibration method in the recipe is not available at daily ", + "frequency. Only quantile mapping 'qmap' is implemented.") + } + # Calibrate the hindcast + hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, + exp_cor = NULL, + sample_dims = c("syear", + "time", + "ensemble"), + sample_length = NULL, + method = "QUANT", + wet.day = FALSE, + ncores = ncores, + na.rm = na.rm) + + if (!is.null(data$fcst)) { + # Calibrate the forecast + fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, + exp_cor = data$fcst, + sample_dims = c("syear", + "time", + "ensemble"), + sample_length = NULL, + method = "QUANT", + wet.day = FALSE, + ncores = ncores, + na.rm = na.rm) } else { fcst_calibrated <- NULL } - - } - - } else if (recipe$Analysis$Variables$freq == "daily_mean") { - # Daily data calibration using Quantile Mapping - if (!(method %in% c("qmap"))) { - stop("Calibration method in the recipe is not available at daily ", - "frequency. Only quantile mapping 'qmap' is implemented.") - } - # Calibrate the hindcast - hcst_calibrated <- CST_QuantileMapping(hcst, obs, - exp_cor = NULL, - sample_dims = c("syear", - "time", - "ensemble"), - sample_length = NULL, - method = "QUANT", - ncores = ncores, - na.rm = na.rm) - - if (!is.null(fcst)) { - # Calibrate the forecast - fcst_calibrated <- CST_QuantileMapping(hcst, obs, - exp_cor = fcst, - sample_dims = c("syear", - "time", - "ensemble"), - sample_length = NULL, - method = "QUANT", - ncores = ncores, - na.rm = na.rm) - } else { - fcst_calibrated <- NULL } } - - print("##### CALIBRATION COMPLETE #####") +print(CALIB_MSG) ## TODO: Return observations too? - ## TODO: Change naming convention? return(list(hcst = hcst_calibrated, fcst = fcst_calibrated)) } diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index c9040b067b3f9573cb9f86f30ca508033e49fe3f..8f0c4626db0cb59456b9591d271547758875e8d5 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -43,14 +43,16 @@ load_datasets <- function(recipe_file) { idxs <- NULL idxs$hcst <- get_timeidx(sdates$hcst, - recipe$Analysis$Time$leadtimemin, - recipe$Analysis$Time$leadtimemax, - time_freq=store.freq) - - idxs$fcst <- get_timeidx(sdates$fcst, - recipe$Analysis$Time$leadtimemin, - recipe$Analysis$Time$leadtimemax, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, time_freq=store.freq) + + if (!(is.null(sdates$fcst))) { + idxs$fcst <- get_timeidx(sdates$fcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + } ## TODO: Examine this verifications part, verify if it's necessary # stream <- verifications$stream @@ -130,7 +132,7 @@ load_datasets <- function(recipe_file) { split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - if (recipe$Analysis$Variables$freq == "daily_mean"){ + if (recipe$Analysis$Variables$freq == "daily_mean") { # Adjusts dims for daily case, could be removed if startR allows # multidim split names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" @@ -139,14 +141,23 @@ load_datasets <- function(recipe_file) { latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(hcst))] <- dim(hcst) dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims } # Convert hcst to s2dv_cube object + ## TODO: Give correct dimensions to $Dates$start + ## (sday, sweek, syear instead of file_date) hcst <- as.s2dv_cube(hcst) # Load forecast #------------------------------------------------------------------- - if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)){ + if (!is.null(recipe$Analysis$Time$fcst_year)) { # the call uses file_date instead of fcst_syear so that it can work # with the daily case and the current version of startR not allowing # multiple dims split @@ -182,6 +193,13 @@ load_datasets <- function(recipe_file) { latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(fcst))] <- dim(fcst) dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims } # Convert fcst to s2dv_cube @@ -271,9 +289,10 @@ load_datasets <- function(recipe_file) { dim(obs) <- default_dims # Convert obs to s2dv_cube + + # Check for consistency between hcst and obs grid obs <- as.s2dv_cube(obs) if (!(recipe$Analysis$Regrid$type == 'none')) { - # Check for consistency between hcst and obs if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { stop("hcst and obs don't share the same latitude.") } diff --git a/modules/Loading/dates2load.R b/modules/Loading/dates2load.R index fe4228c00fb2af866140c427d2bc8faecb721c55..e1e8f89ea227b3aa9f6a0631ffcbaf04a612b30c 100644 --- a/modules/Loading/dates2load.R +++ b/modules/Loading/dates2load.R @@ -23,15 +23,15 @@ dates2load <- function(recipe, logger){ # hcst dates file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), - recipe$sdate$fcst_sday) + recipe$sdate) if (temp_freq == "monthly_mean"){ file_dates <- .add_dims(file_dates, "hcst") } # fcst dates (if fcst_year empty it creates an empty object) - if (! is.null(recipe$sdate$fcst_syear)){ - file_dates.fcst <- paste0(recipe$sdate$fcst_syear, recipe$sdate$fcst_sday) + if (! is.null(recipe$fcst_year)){ + file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) if (temp_freq == "monthly_mean"){ file_dates.fcst <- .add_dims(file_dates.fcst, "fcst") } @@ -70,17 +70,17 @@ get_timeidx <- function(sdates, ltmin, ltmax, if (time_freq == "daily_mean"){ sdates <- ymd(sdates) - idx_min <- sdates + months(ltmin) - idx_max <- sdates + months(ltmax+1) - days(1) + idx_min <- sdates + months(ltmin - 1) + idx_max <- sdates + months(ltmax) - days(1) indxs <- array(numeric(), c(file_date=length(sdates), - time=(as.integer(idx_max[1]-idx_min[1])+1) - #syear=length(sdates) - #sday=1,sweek=1)) + time = (as.integer(idx_max[1]-idx_min[1]+1)) + #syear = length(sdates), + #sday = 1, sweek = 1, )) for (sdate in 1:length(sdates)) { - days <- seq(idx_min[sdate],idx_max[sdate], by='days') + days <- seq(idx_min[sdate], idx_max[sdate], by='days') indxs[sdate,] <- days[!(format(days, "%m%d") == "0229")] } indxs <- as.POSIXct(indxs*86400, @@ -88,12 +88,12 @@ get_timeidx <- function(sdates, ltmin, ltmax, lubridate::hour(indxs) <- 12 lubridate::minute(indxs) <- 00 dim(indxs) <- c(file_date=length(sdates), - time=(as.integer(idx_max[1] - idx_min[1]) + 1)) + time=(as.integer(idx_max[1]-idx_min[1])+1)) } else if (time_freq == "monthly_mean") { - idx_min <- ltmin + 1 - idx_max <- ltmax + 1 + idx_min <- ltmin + idx_max <- ltmax indxs <- indices(idx_min:idx_max) } diff --git a/modules/Loading/testing_recipes/recipe_1.yml b/modules/Loading/testing_recipes/recipe_1.yml index 041c44af204cf0d5f101c93950808c96393b59d6..71e386fae2420e0bb4cdc01c241b40169d787f68 100644 --- a/modules/Loading/testing_recipes/recipe_1.yml +++ b/modules/Loading/testing_recipes/recipe_1.yml @@ -14,13 +14,12 @@ Analysis: Reference: name: era5 # Mandatory, str: Reference codename. See docu. Time: - sdate: - fcst_syear: '2020' # Optional, int: Forecast year 'YYYY' - fcst_sday: '1101' # Mandatory, int: Start date, 'MMDD' + sdate: '1101' + fcst_year: '2020' # Optional, int: Forecast year 'YYYY' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '1996' # Mandatory, int: Hindcast end year 'YYYY' - leadtimemin: 0 # Mandatory, int: First leadtime time step in months - leadtimemax: 1 # Mandatory, int: Last leadtime time step in months + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months Region: latmin: -10 # Mandatory, int: minimum latitude latmax: 10 # Mandatory, int: maximum latitude @@ -33,9 +32,13 @@ Analysis: Calibration: method: qmap # Mandatory, str: Calibration method. See docu. Skill: - metric: RPSS #Mandatory, str: Skill metric or list of skill metrics. See docu. + metric: RPSS FRPSS # str: Skill metric or list of skill metrics. See docu. + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. Indicators: index: no + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E Run: Loglevel: INFO diff --git a/modules/Loading/testing_recipes/recipe_2.yml b/modules/Loading/testing_recipes/recipe_2.yml index b8f2d13acadda77d5b985807534eaa37b59dc4eb..27cdccdc89c56ea8ef0e3322063573c2017be744 100644 --- a/modules/Loading/testing_recipes/recipe_2.yml +++ b/modules/Loading/testing_recipes/recipe_2.yml @@ -13,13 +13,12 @@ Analysis: Reference: name: era5 Time: - sdate: - fcst_syear: '2020' - fcst_sday: '1101' + sdate: '0601' + fcst_year: '2020' hcst_start: '1993' - hcst_end: '2016' - leadtimemin: 2 - leadtimemax: 4 + hcst_end: '2006' + ftime_min: 1 + ftime_max: 3 Region: latmin: -10 latmax: 10 @@ -30,14 +29,16 @@ Analysis: type: to_system Workflow: Calibration: - method: SBC + method: raw Skill: - metric: RPSS + metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + Probabilities: + percentiles: [[1/3, 2/3]] Indicators: index: no Output_format: S2S4E Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_3.yml b/modules/Loading/testing_recipes/recipe_3.yml index 4488d43d9282c7bc3190e48524c06e6f7f8e3933..233c14eb4a74c7dad982ddbdf3e335c16cad601e 100644 --- a/modules/Loading/testing_recipes/recipe_3.yml +++ b/modules/Loading/testing_recipes/recipe_3.yml @@ -13,13 +13,12 @@ Analysis: Reference: name: era5 Time: - sdate: - fcst_syear: '2020' - fcst_sday: '1101' + sdate: '1101' + fcst_year: '2020' hcst_start: '1993' - hcst_end: '2016' - leadtimemin: 0 - leadtimemax: 0 + hcst_end: '2003' + ftime_min: 1 + ftime_max: 2 Region: latmin: -10 latmax: 10 @@ -32,12 +31,14 @@ Analysis: Calibration: method: qmap Skill: - metric: RPSS + metric: FRPS RPSS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: no Output_format: S2S4E Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index e787a9fd6770fd450fc2f4b0cb5eead47ded05db..717ef6922f3015943e871790a37ad26ad2d85696 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -13,13 +13,12 @@ Analysis: Reference: name: era5 Time: - sdate: - fcst_syear: '2020' - fcst_sday: '1101' + sdate: '1101' + fcst_year: '2020' hcst_start: '1993' hcst_end: '2016' - leadtimemin: 2 - leadtimemax: 3 + ftime_min: 1 + ftime_max: 3 Region: latmin: -10 latmax: 10 @@ -32,9 +31,13 @@ Analysis: Calibration: method: mse_min Skill: - metric: RPS FRPS RPSS FRPSS BSS10 BSS90 + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] Indicators: index: no + ncores: 1 + remove_NAs: yes Output_format: S2S4E Run: Loglevel: INFO diff --git a/modules/Loading/testing_recipes/recipe_5.yml b/modules/Loading/testing_recipes/recipe_5.yml index a3da66a54e056e40326b3ad016b4deeb65312198..3bfad08ea575ad8545371cd1a8384221ec369265 100644 --- a/modules/Loading/testing_recipes/recipe_5.yml +++ b/modules/Loading/testing_recipes/recipe_5.yml @@ -13,13 +13,12 @@ Analysis: Reference: name: era5 Time: - sdate: - fcst_syear: # - fcst_sday: '0301' + sdate: '0301' + fcst_year: # hcst_start: '1993' hcst_end: '2016' - leadtimemin: 0 - leadtimemax: 0 + ftime_min: 1 + ftime_max: 1 Region: latmin: -10 latmax: 10 diff --git a/modules/Loading/testing_recipes/recipe_6.yml b/modules/Loading/testing_recipes/recipe_6.yml new file mode 100644 index 0000000000000000000000000000000000000000..b1829d22e76b8e1e94a3218af3b91d46715600e1 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_6.yml @@ -0,0 +1,44 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Skill: + metric: CRPS CRPSS FCRPS FCRPSS FRPS_Specs + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_circular-sort-test.yml b/modules/Loading/testing_recipes/recipe_circular-sort-test.yml index 3c49f8074ffa050b94e986b454059da439a97006..700fd3b2a7fa6a41158c4fe51ea89f2ff32f639e 100644 --- a/modules/Loading/testing_recipes/recipe_circular-sort-test.yml +++ b/modules/Loading/testing_recipes/recipe_circular-sort-test.yml @@ -14,13 +14,12 @@ Analysis: Reference: name: era5 Time: - sdate: - fcst_syear: - fcst_sday: '1101' + sdate: '1101' + fcst_year: hcst_start: '1993' hcst_end: '2003' - leadtimemin: 1 - leadtimemax: 1 + leadtimemin: 2 + leadtimemax: 2 Region: latmin: -10 latmax: 10 diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml index d4e568d17237b518d226127bda7553c5bcb59296..bcbcc7366c9e871ed47d67ba6559a8acd670bf11 100644 --- a/modules/Loading/testing_recipes/recipe_decadal.yml +++ b/modules/Loading/testing_recipes/recipe_decadal.yml @@ -32,7 +32,9 @@ Analysis: Calibration: method: bias Skill: - metric: RPSS + metric: RPSS Corr + Probabilities: + percentiles: [[1/3, 2/3]] Indicators: index: FALSE Output_format: S2S4E diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml index 91a957dfb57d9308e8dcfddd17ab61acda9b45cd..457dccf6f46a9bcbe8a2c6da15525b6c0ac1aa96 100644 --- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml +++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml @@ -32,7 +32,9 @@ Analysis: Calibration: method: qmap Skill: - metric: RPSS + metric: RPSS FRPSS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: FALSE Output_format: S2S4E diff --git a/modules/Saving/Save_data.R b/modules/Saving/Save_data.R deleted file mode 100644 index 66f25baa3c07ab409ea0cbc94b4ab6bf0bfbab9e..0000000000000000000000000000000000000000 --- a/modules/Saving/Save_data.R +++ /dev/null @@ -1,141 +0,0 @@ -## TODO: Implement wrapper to get grid and time info? - -get_times <- function(fcst.horizon, leadtimes, sdate) { - # Generates time dimensions and the corresponding metadata. - - switch(fcst.horizon, - ## TODO: Remove "sub_obs"? - ## TODO: implement daily case - "seasonal" = {len <- length(leadtimes); ref <- 'months since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "sub_obs" = {len <- 52; ref <- 'week of the year '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- 4; ref <- 'weeks since '; - stdname <- ''} - ) - - ## TODO: Get correct date for each time step - time <- 1:len ## Is this correct? I think it needs to be changed - dim(time) <- length(time) - # metadata <- list(time = list(standard_name = stdname, - metadata <- list(time = list(units = paste0(ref, sdate, ' 00:00:00'))) - attr(time, 'variables') <- metadata - names(dim(time)) <- 'time' - - time_step <- 1 - dim(time_step) <- length(time_step) - metadata <- list(time_step = list(units = paste0(ref, sdate, ' 00:00:00'))) - attr(time_step, 'variables') <- metadata - names(dim(time_step)) <- 'time_step' - - sdate <- 1:length(sdate) - dim(sdate) <- length(sdate) - metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), - collapse=", "), - units = paste0('Init date'))) - attr(sdate, 'variables') <- metadata - names(dim(sdate)) <- 'sdate' - - return(list(time_step=time_step, time=time, sdate=sdate)) - -} - -get_latlon <- function(latitude, longitude) { - # Adds dimensions and metadata to lat and lon - # latitude: array containing the latitude values - # longitude: array containing the longitude values - - dim(longitude) <- length(longitude) - ## TODO: Extract metadata from s2dv_cube - metadata <- list(longitude = list(units = 'degrees_east')) - attr(longitude, 'variables') <- metadata - names(dim(longitude)) <- 'longitude' - - dim(latitude) <- length(latitude) - ## TODO: Extract metadata from s2dv_cube - metadata <- list(latitude = list(units = 'degrees_north')) - attr(latitude, 'variables') <- metadata - names(dim(latitude)) <- 'latitude' - - return(list(lat=latitude, lon=longitude)) - -} - -## TODO: Place inside a function somewhere -# if (tolower(agg) == "country"){ -# load(mask.path) -# grid <- europe.countries.iso -# } else { -# grid <- list(lon=attr(var.obs, 'Variables')$dat1$longitude, -# lat=attr(var.obs, 'Variables')$dat1$latitude) -# } - -save_metrics <- function(skill, - recipe, - data_cube, - outfile, - leadtimes, - agg="global") { - - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - - # Remove singleton dimensions from each metric array and rearrange the - # longitude, latitude and time dimensions to the correct order. - if (tolower(agg) == "global") { - ## TODO: Implement metrics with additional non-singleton dimensions - ## e.g. 'ensemble' in ensemble correlation. - skill <- lapply(skill, function(x) { - Reorder(drop(x[[1]]), c(lalo, 'time'))}) - } - - for (i in 1:length(skill)) { - ## TODO: create dictionary with proper metadata - metric <- names(skill[i]) - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'time') - } else { - sdname <- paste0(metric, " grid point metric") # formerly names(metric) - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = metric, standard_name = sdname)) - - attr(skill[[i]], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } - - # Time data and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - leadtimes <- seq(from = recipe$Analysis$Time$leadtimemin, - to = recipe$Analysis$Time$leadtimemax) - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { - fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, - recipe$Analysis$Time$sdate$fcst_sday) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate$fcst_sday) - } - - times <- get_times(fcst.horizon, leadtimes, fcst.sdate) - time <- times$time - time_step <- times$time_step - - # Grid data and metadata -# if (tolower(agg) == "country") { -# country <- get_countries(grid) -# ArrayToNc(append(country, time, skill, time_step), outfile) -# } else { - - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] - latlon <- get_latlon(latitude, longitude) - - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill, list(time_step)) - ArrayToNc(vars, outfile) - print("##### SKILL METRICS SAVED TO NETCDF FILE #####") - -} - diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R new file mode 100644 index 0000000000000000000000000000000000000000..1c56e459807dec531abca693c05bf1bcc4b38a22 --- /dev/null +++ b/modules/Saving/Saving.R @@ -0,0 +1,681 @@ +## TODO: Save obs percentiles + +source("modules/Saving/paths2save.R") + +save_data <- function(recipe, data, calibrated_data, + skill_metrics = NULL, + probabilities = NULL) { + # Wrapper for the saving functions. + # recipe: The auto-s2s recipe + # data: output of load_datasets() + # calibrated_data: output of calibrate_datasets() + # skill_metrics: output of compute_skill_metrics() + # probabilities: output of compute_probabilities() + + dict <- read_yaml("conf/variable-dictionary.yml") + + # Create output directory + outdir <- get_dir(recipe) + dir.create(outdir, showWarnings = FALSE, recursive = TRUE) + + # Export hindcast, forecast and observations onto outfile + save_forecast(calibrated_data$hcst, recipe, dict, outdir) + if (!is.null(calibrated_data$fcst)) { + save_forecast(calibrated_data$fcst, recipe, dict, outdir) + } + save_observations(data$obs, recipe, dict, outdir) + + # Export skill metrics onto outfile + if (!is.null(skill_metrics)) { + save_metrics(skill_metrics, recipe, dict, data$hcst, outdir) + if ("corr" %in% names(skill_metrics)) { + save_corr(skill_metrics, recipe, dict, data$hcst, outdir) + } + } + + # Export probabilities onto outfile + if (!is.null(probabilities)) { + save_percentiles(probabilities$percentiles, recipe, data$hcst, outdir) + save_probabilities(probabilities$probs, recipe, data$hcst, outdir) + } +} + +get_global_attributes <- function(recipe) { + # Generates metadata of interest to add to the global attributes of the + # netCDF files. + parameters <- recipe$Analysis + hcst_period <- paste0(parameters$Time$hcst_start, " to ", + parameters$Time$hcst_end) + current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) + + attrs <- list(reference_period = hcst_period, + institution = "BSC-CNS", + system = parameters$Datasets$System$name, + reference = parameters$Datasets$Reference$name, + calibration_method = parameters$Workflow$Calibration$method, + computed_on = current_time) + + return(attrs) +} + +get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) { + # Generates time dimensions and the corresponding metadata. + ## TODO: Add calendar + ## TODO: Subseasonal and decadal + + switch(fcst.horizon, + "seasonal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}, + "subseasonal" = {len <- 4; ref <- 'hours since '; + stdname <- ''}) + + dim(time) <- length(time) + sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting + metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'))) + attr(time, 'variables') <- metadata + names(dim(time)) <- 'time' + + sdate <- 1:length(sdate) + dim(sdate) <- length(sdate) + metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), + collapse=", "), + units = paste0('Init date'))) + attr(sdate, 'variables') <- metadata + names(dim(sdate)) <- 'sdate' + + return(list(time=time)) +} + +get_latlon <- function(latitude, longitude) { + # Adds dimensions and metadata to lat and lon + # latitude: array containing the latitude values + # longitude: array containing the longitude values + + dim(longitude) <- length(longitude) + metadata <- list(longitude = list(units = 'degrees_east')) + attr(longitude, 'variables') <- metadata + names(dim(longitude)) <- 'longitude' + + dim(latitude) <- length(latitude) + metadata <- list(latitude = list(units = 'degrees_north')) + attr(latitude, 'variables') <- metadata + names(dim(latitude)) <- 'latitude' + + return(list(lat=latitude, lon=longitude)) + +} + +save_forecast <- function(data_cube, + recipe, + dictionary, + outdir, + agg="global") { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + + variable <- data_cube$Variable$varName + var.longname <- attr(data_cube$Variable, 'variable')$long_name + global_attributes <- get_global_attributes(recipe) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- sort(as.Date(data_cube$Dates$start)) + n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps + dates <- dates[1:n_steps] + init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d') + # Get time difference in months + leadtimes <- interval(init_date, dates) %/% hours(1) + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- data_cube$data[ , , , , i, , , , ] # Select year + + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- attr(data_cube$Variable, 'variable')$units + } else { + dims <- c(lalo, 'ensemble', 'time') + var.expname <- variable + var.sdname <- var.longname + var.units <- attr(data_cube$Variable, 'variable')$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + global_attributes <- get_global_attributes(recipe) + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date + fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] + + # Get time dimension values and metadata + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, data_cube$Variable$varName, + fcst.sdate, fcst.sdate, + agg, fcst.horizon, "exp") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } + } + print("##### FCST SAVED TO NETCDF FILE #####") +} + + +save_observations <- function(data_cube, + recipe, + dictionary, + outdir, + agg="global") { + # Loops over the years in the s2dv_cube containing the observations and + # exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + + variable <- data_cube$Variable$varName + var.longname <- attr(data_cube$Variable, 'variable')$long_name + global_attributes <- get_global_attributes(recipe) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- sort(as.Date(data_cube$Dates$start)) + n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps + dates <- dates[1:n_steps] + init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d') + # Get time difference in months + leadtimes <- interval(init_date, dates) %/% hours(1) + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- data_cube$data[ , , , , i, , , , ] # Select year + + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- attr(data_cube$Variable, 'variable')$units + } else { + dims <- c(lalo, 'time') + var.expname <- variable + var.units <- attr(data_cube$Variable, 'variable')$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + global_attributes <- get_global_attributes(recipe) + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date. The date is computed for each year, and adapted for + # consistency with the hcst/fcst dates, so that both sets of files have + # the same name pattern. + ## Because observations are loaded differently in the daily vs. monthly + ## cases, different approaches are necessary. + if (store.freq == "monthly_mean") { + fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + } else { + fcst.sdate <- as.Date(data_cube$Dates$start[i]) + } + # Ensure the year is correct if the first leadtime goes to the next year + if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { + lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 + } + # Ensure that the initialization month is consistent with the hindcast + lubridate::month(fcst.sdate) <- lubridate::month(init_date) + fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') + + # Get time dimension values and metadata + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, data_cube$Variable$varName, + fcst.sdate, fcst.sdate, + agg, fcst.horizon, "obs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } + } + print("##### OBS SAVED TO NETCDF FILE #####") +} + +## TODO: Place inside a function somewhere +# if (tolower(agg) == "country"){ +# load(mask.path) +# grid <- europe.countries.iso +# } else { +# grid <- list(lon=attr(var.obs, 'Variables')$dat1$longitude, +# lat=attr(var.obs, 'Variables')$dat1$latitude) +# } + +save_metrics <- function(skill, + recipe, + dictionary, + data_cube, + outdir, + agg="global") { + # This function adds metadata to the skill metrics in 'skill' + # and exports them to a netCDF file inside 'outdir'. + + # Remove ensemble correlation from the list since it should be saved in + # a separate file, as it has 'ensemble' dim. + if ("corr" %in% names(skill)) { + corr_metrics <- grep("^corr", names(skill)) + skill <- skill[-corr_metrics] + } + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + skill <- lapply(skill, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + + # Add global and variable attributes + global_attributes <- get_global_attributes(recipe) + attr(skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(skill)) { + metric <- names(skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'time') + } else { + sdname <- paste0(metric, " grid point metric") + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + # Generate vector containing leadtimes + dates <- sort(as.Date(data_cube$Dates$start)) + n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps + dates <- dates[1:n_steps] + init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d') + # Get time difference in months + leadtimes <- interval(init_date, dates) %/% hours(1) + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, data_cube$Variable$varName, + fcst.sdate, fcst.sdate, + agg, fcst.horizon, "skill") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, skill), outfile) + } else { + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, skill) + ArrayToNc(vars, outfile) + } + print("##### SKILL METRICS SAVED TO NETCDF FILE #####") +} + +save_corr <- function(skill, + recipe, + dictionary, + data_cube, + outdir, + agg="global") { + # This function adds metadata to the ensemble correlation in 'skill' + # and exports it to a netCDF file inside 'outdir'. + + # Select ensemble correlation from the list of metrics + if ("corr" %in% names(skill)) { + corr_metrics <- grep("^corr", names(skill)) + skill <- skill[corr_metrics] + } + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + skill <- lapply(skill, function(x) { + Reorder(x, c(lalo, 'ensemble', 'time'))}) + } + + # Add global and variable attributes + global_attributes <- get_global_attributes(recipe) + attr(skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(skill)) { + metric <- names(skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'ensemble', 'time') + } else { + sdname <- paste0(metric, " grid point metric") # formerly names(metric) + dims <- c(lalo, 'ensemble', 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + # Generate vector containing leadtimes + dates <- sort(as.Date(data_cube$Dates$start)) + n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps + dates <- dates[1:n_steps] + init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d') + # Get time difference in months + leadtimes <- interval(init_date, dates) %/% hours(1) + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, data_cube$Variable$varName, + fcst.sdate, fcst.sdate, + agg, fcst.horizon, "corr") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, skill), outfile) + } else { + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, skill) + ArrayToNc(vars, outfile) + } + print("##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") +} + +save_percentiles <- function(percentiles, + recipe, + data_cube, + outdir, + agg="global") { + # This function adds metadata to the percentiles + # and exports them to a netCDF file inside 'outdir'. + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + percentiles <- lapply(percentiles, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + + # Add global and variable attributes + global_attributes <- get_global_attributes(recipe) + attr(percentiles[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(percentiles)) { + ## TODO: replace with proper standard names + percentile <- names(percentiles[i]) + long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = percentile, long_name = long_name)) + attr(percentiles[[i]], 'variables') <- metadata + names(dim(percentiles[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + # Generate vector containing leadtimes + dates <- sort(as.Date(data_cube$Dates$start)) + n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps + dates <- dates[1:n_steps] + init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d') + # Get time difference in hours + leadtimes <- interval(init_date, dates) %/% hours(1) + + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, data_cube$Variable$varName, + fcst.sdate, fcst.sdate, + agg, fcst.horizon, "percentiles") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, percentiles), outfile) + } else { + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, percentiles) + ArrayToNc(vars, outfile) + } + print("##### PERCENTILES SAVED TO NETCDF FILE #####") +} + +save_probabilities <- function(probs, + recipe, + data_cube, + outdir, + agg="global") { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports the corresponding category probabilities to a netCDF file. + # probs: array containing the probability data + # recipe: the auto-s2s recipe + # data_cube: s2dv_cube containing the data and metadata + # outdir: directory where the files should be saved + # type: 'exp' (hcst and fcst) or 'obs' + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + + variable <- data_cube$Variable$varName + var.longname <- attr(data_cube$Variable, 'variable')$long_name + global_attributes <- get_global_attributes(recipe) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- sort(as.Date(data_cube$Dates$start)) + n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps + dates <- dates[1:n_steps] + init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d') + # Get time difference in hours + leadtimes <- interval(init_date, dates) %/% hours(1) + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + for (i in syears) { + # Select year from array and rearrange dimensions + probs_syear <- lapply(probs, function(x) { + x[i, , , ]}) + # Restore time dimension if the arrays are missing it + if (!("time" %in% names(dim(probs_syear[[1]])))) { + probs_syear <- lapply(probs_syear, function(x) { + dim(x) <- c("time" = 1, dim(x))}) + } + if (tolower(agg) == "global") { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c(lalo, 'time'))}) + } else { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c('country', 'time'))}) + } + + ## TODO: Replace for loop with something more efficient? + for (bin in 1:length(probs_syear)) { + prob_bin <- names(probs_syear[bin]) + long_name <- paste0(prob_bin, " probability category") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = prob_bin, long_name = long_name)) + attr(probs_syear[[bin]], 'variables') <- metadata + names(dim(probs_syear[[bin]])) <- dims # is this necessary? + } + + # Add global attributes + global_attributes <- get_global_attributes(recipe) + attr(probs_syear[[1]], 'global_attrs') <- global_attributes + + # Select start date + fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] + + # Get time dimension values and metadata + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, data_cube$Variable$varName, + fcst.sdate, fcst.sdate, + agg, fcst.horizon, "probs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, probs_syear), outfile) + } else { + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, probs_syear) + ArrayToNc(vars, outfile) + } + } + print("##### PROBABILITIES SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/export_2_nc-s2s4e.R b/modules/Saving/export_2_nc-s2s4e.R index 9adb6d80f667c0cbc283f8050144266e2cb91aee..abf526e5f105021b41ece65ff5b52be09b8e4f51 100644 --- a/modules/Saving/export_2_nc-s2s4e.R +++ b/modules/Saving/export_2_nc-s2s4e.R @@ -22,6 +22,7 @@ save_bias <- function(variable, obs <- data; units <- "ÂșC"; var.longname <- "Temperature bias" } else { + # Unit conversion data.conv <- convert_data(list(fcst=data,test=data),variable,leadtimes,fcst.type,"forecast") obs <- data.conv$data$fcst; units <- data.conv$units; var.longname <- data.conv$var.longname @@ -199,7 +200,8 @@ save_forecast <- function(variable, } else { fcst <- Reorder(fcst, c('country','member', 'time')) } - + + # Unit conversion fcst.conv <- convert_data(list(fcst=fcst,test=fcst),variable,leadtimes,fcst.type,"forecast") fcst <- fcst.conv$data$fcst; units <- fcst.conv$units; var.longname <- fcst.conv$var.longname diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R new file mode 100644 index 0000000000000000000000000000000000000000..4974cbd830565743ef5ac6f065be4f39c33072ce --- /dev/null +++ b/modules/Saving/paths2save.R @@ -0,0 +1,63 @@ +## TODO: Separate by time aggregation + +get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { + # This function builds the path of the output file based on directory, + # variable, forecast date, startdate, aggregation, forecast horizon and + # type of metric/forecast/probability. + + if (horizon == "subseasonal") { + shortdate <- format(as.Date(as.character(fcst.sdate), "%Y%m%d"), "%V") + dd <- "week" + } else { + shortdate <- format(as.Date(as.character(fcst.sdate), "%Y%m%d"), "%m") + dd <- "month" + } + + switch(tolower(agg), + "country" = {gg <- "-country"}, + "global" = {gg <- ""}) + + switch(file.type, + "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, + "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, + "exp" = {file <- paste0(var, gg, "_", date)}, + "obs" = {file <- paste0(var, gg, "-obs_", date)}, + "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, + shortdate)}, + "probs" = {file <- paste0(var, gg, "-probs_", date)}) + + return(paste0(dir, file, ".nc")) + +} + +get_dir <- function(recipe, agg = "global") { + # This function builds the path for the output directory. The output + # directories will be subdirectories within outdir, organized by variable, + # startdate, and aggregation. + + ## TODO: Get aggregation from recipe + ## TODO: Add time frequency + + outdir <- recipe$Run$output_dir + variable <- recipe$Analysis$Variables$name + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) + } + + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + store.freq <- recipe$Analysis$Variables$freq + + switch(tolower(agg), + "country" = {dir <- paste0(outdir, "/", calib.method, "-", + store.freq, "/", variable, + "_country/", fcst.sdate, "/")}, + "global" = {dir <- paste0(outdir, "/", calib.method, "-", + store.freq, "/", variable, "/", + fcst.sdate, "/")}) + + return(dir) + +} diff --git a/modules/Skill/CRPS.R b/modules/Skill/CRPS.R new file mode 100644 index 0000000000000000000000000000000000000000..942ec9e4796860aac501001b322b4cf5e024c1ef --- /dev/null +++ b/modules/Skill/CRPS.R @@ -0,0 +1,119 @@ +#'Compute the Continuous Ranked Probability Score +#' +#'The Continuous Ranked Probability Score (CRPS; Wilks, 2011) is the continuous +#'version of the Ranked Probability Score (RPS; Wilks, 2011). It is a skill metric +#'to evaluate the full distribution of probabilistic forecasts. It has a negative +#'orientation (i.e., the higher-quality forecast the smaller CRPS) and it rewards +#'the forecast that has probability concentration around the observed value. In case +#'of a deterministic forecast, the CRPS is reduced to the mean absolute error. It has +#'the same units as the data. The function is based on enscrps_cpp from SpecsVerification. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#'@param Fair A logical indicating whether to compute the FairCRPS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of CRPS with the same dimensions as "exp" except the +#''time_dim' and 'memb_dim' dimensions. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- CRPS(exp = exp, obs = obs) +#' +#'@import multiApply +#'@importFrom SpecsVerification enscrps_cpp +#'@importFrom ClimProjDiags Subset +#'@export +CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', + Fair = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + ## exp and obs (2) + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]),1)){ + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else {stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1).")} + } + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim'.")) + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + crps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim), + output_dims = time_dim, + fun = .CRPS, Fair = Fair, + ncores = ncores)$output1 + + # Return only the mean RPS + crps <- MeanDims(crps, time_dim, na.rm = FALSE) + + return(crps) +} + +.CRPS <- function(exp, obs, Fair = FALSE) { + # exp: [sdate, memb] + # obs: [sdate] + + if (Fair) { # FairCRPS + R_new <- Inf + } else {R_new <- NA} + + crps <- SpecsVerification::enscrps_cpp(ens = exp, obs = obs, R_new = R_new) + + return(crps) +} diff --git a/modules/Skill/CRPSS.R b/modules/Skill/CRPSS.R new file mode 100644 index 0000000000000000000000000000000000000000..9f5edbd5fe1372fedc3c130cd49a16afa7b24fd6 --- /dev/null +++ b/modules/Skill/CRPSS.R @@ -0,0 +1,172 @@ +#'Compute the Continuous Ranked Probability Skill Score +#' +#'The Continuous Ranked Probability Skill Score (CRPSS; Wilks, 2011) is the skill score +#'based on the Continuous Ranked Probability Score (CRPS; Wilks, 2011). It can be used to +#'assess whether a forecast presents an improvement or worsening with respect to +#'a reference forecast. The CRPSS ranges between minus infinite and 1. If the +#'CRPSS is positive, it indicates that the forecast has higher skill than the +#'reference forecast, while a negative value means that it has a lower skill. +#'Examples of reference forecasts are the climatological forecast (same +#'probabilities for all categories for all time steps), persistence, a previous +#'model version, and another model. It is computed as CRPSS = 1 - CRPS_exp / CRPS_ref. +#'The statistical significance is obtained based on a Random Walk test at the +#'95% confidence level (DelSole and Tippett, 2016). +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as 'exp' except +#' 'memb_dim'. If it is NULL, the climatological forecast is used as reference +#' forecast. The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. +#'@param Fair A logical indicating whether to compute the FairCRPSS (the +#' potential CRPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$crpss}{ +#' A numerical array of the CRPSS with the same dimensions as "exp" except the +#' 'time_dim' and 'memb_dim' dimensions. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the CRPSS with the same +#' dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'res <- CRPSS(exp = exp, obs = obs) ## climatology as reference forecast +#'res <- CRPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', + Fair = FALSE, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop('Parameter "ref" must be a numeric array.') + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } + ## exp and obs (2) + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]),1)){ + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else {stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1).")} + } + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + name_ref <- name_ref[-which(name_ref == memb_dim)] + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute CRPSS + if (!is.null(ref)) { # use "ref" as reference forecast + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim, + ref = c(time_dim, memb_dim)) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim) + } + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + Fair = Fair, + ncores = ncores) + + return(output) +} + +.CRPSS <- function(exp, obs, ref = NULL, Fair = FALSE) { + # exp: [sdate, memb] + # obs: [sdate] + # ref: [sdate, memb] or NULL + + # CRPS of the forecast + crps_exp <- .CRPS(exp = exp, obs = obs, Fair = Fair) + + # CRPS of the reference forecast + if (is.null(ref)){ + ## using climatology as reference forecast + ## all the time steps are used as if they were members + ## then, ref dimensions are [sdate, memb], both with length(sdate) + ref <- array(data = obs, dim = c(member = length(obs))) + ref <- InsertDim(data = ref, posdim = 1, lendim = length(obs), name = 'sdate') + } + crps_ref <- .CRPS(exp = ref, obs = obs, Fair = Fair) + + # CRPSS + crpss <- 1 - mean(crps_exp) / mean(crps_ref) + + # Significance + sign <- .RandomWalkTest(skill_A = crps_exp, skill_B = crps_ref)$signif + + return(list(crpss = crpss, sign = sign)) +} diff --git a/modules/Skill/RandomWalkTest.R b/modules/Skill/RandomWalkTest.R new file mode 100644 index 0000000000000000000000000000000000000000..adeadc1ec94b62920c885640938f966c91e75ddc --- /dev/null +++ b/modules/Skill/RandomWalkTest.R @@ -0,0 +1,82 @@ +#'Random walk test for skill differences +#' +#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a +#'common reference) based on Random Walks, with significance estimate at the 95% +#'confidence level, as in DelSole and Tippett (2016). +#' +#'@param skill_A A numerical array of the time series of the skill with the +#' forecaster A's. +#'@param skill_B A numerical array of the time series of the skill with the +#' forecaster B's. The dimensions should be identical as parameter 'skill_A'. +#'@param time_dim A character string indicating the name of the dimension along +#' which the tests are computed. The default value is 'sdate'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of 2: +#'\item{$score}{ +#' A numerical array with the same dimensions as the input arrays except +#' 'time_dim'. The number of times that forecaster A has been better than +#' forecaster B minus the number of times that forecaster B has been better +#' than forecaster A (for skill positively oriented). If $score is positive +#' forecaster A is better than forecaster B, and if $score is negative +#' forecaster B is better than forecaster B. +#'} +#'\item{$signif}{ +#' A logical array with the same dimensions as the input arrays except +#' 'time_dim'. Whether the difference is significant or not at the 5% +#' significance level. +#'} +#' +#'@examples +#' fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_B <- array(c(21:60), dim = c(sdate = 10, lat = 2, lon = 2)) +#' reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +#' skill_A <- abs(fcst_A - reference) +#' skill_B <- abs(fcst_B - reference) +#' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) +#' +#'@import multiApply +#'@export +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ + + ## Check inputs + if (is.null(skill_A) | is.null(skill_B)){ + stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") + } + if(!is.numeric(skill_A) | !is.numeric(skill_B)){ + stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") + } + if (!identical(dim(skill_A),dim(skill_B))){ + stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") + } + if(!is.character(time_dim)){ + stop("Parameter 'time_dim' must be a character string.") + } + if(!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))){ + stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") + } + if (!is.null(ncores)){ + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ## Compute the Random Walk Test + res <- multiApply::Apply(data = list(skill_A, skill_B), + target_dims = time_dim, + fun = .RandomWalkTest, + ncores = ncores) + return(res) +} + +.RandomWalkTest <- function(skill_A, skill_B){ + + score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) + + ## TRUE if significant (if last value is above or below 2*sqrt(N)) + signif<- ifelse(test = (score[length(skill_A)] < (-2)*sqrt(length(skill_A))) | (score[length(skill_A)] > 2*sqrt(length(skill_A))), + yes = TRUE, no = FALSE) + + return(list("score"=score[length(skill_A)],"signif"=signif)) +} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index b29e6697555d83bcda508a84016b1e3e3b1d3629..7c6820da2b647c1a875e497258346c6ec85f8cf0 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -8,6 +8,9 @@ # - ask Carlos which decadal metrics he is currently using source("modules/Skill/s2s.metrics.R") +source("modules/Skill/CRPS.R") +source("modules/Skill/CRPSS.R") +source("modules/Skill/RandomWalkTest.R") ## TODO: Implement this in the future ## Which parameter are required? @@ -45,7 +48,7 @@ source("modules/Skill/s2s.metrics.R") # " running Skill module ", "\n", # " it can call ", metric_fun )) -compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { +compute_skill_metrics <- function(exp, obs, recipe) { # exp: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml @@ -54,73 +57,186 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { time_dim <- 'syear' memb_dim <- 'ensemble' metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } skill_metrics <- list() - for (metric in strsplit(metrics, " ")[[1]]) { + for (metric in strsplit(metrics, ", | |,")[[1]]) { # Whether the fair version of the metric is to be computed - if (metric %in% c('frps', 'frpss', 'bss10', 'bss90')) { + if (metric %in% c('frps', 'frpss', 'bss10', 'bss90', + 'fcrps', 'fcrpss')) { Fair <- T } else { Fair <- F } + # Whether to compute correlation for the ensemble mean or for each member + if (metric == 'corr') { + memb <- T + } else if (metric == 'enscorr') { + memb <- F + } # Ranked Probability Score and Fair version if (metric %in% c('rps', 'frps')) { skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - Fair = Fair, ncores = ncores) - skill_metrics[[ metric ]] <- list(skill) + Fair = Fair, ncores = ncores) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - Fair = Fair, ncores = ncores) - skill_metrics[[ metric ]] <- list(skill$rpss) - skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) + Fair = Fair, ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 10th percentile } else if (metric == 'bss10') { - skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - prob_thresholds = 0.1, Fair = Fair, ncores = ncores) - skill_metrics[[ metric ]] <- list(skill$rpss) - skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) + skill <- RPSS(exp$data, obs$data, time_dim = time_dim, + memb_dim = memb_dim, prob_thresholds = 0.1, Fair = Fair, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 90th percentile } else if (metric == 'bss90') { - skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - prob_thresholds = 0.9, Fair = Fair, ncores = ncores) - skill_metrics[[ metric ]] <- list(skill$rpss) - skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) + skill <- RPSS(exp$data, obs$data, time_dim = time_dim, + memb_dim = memb_dim, prob_thresholds = 0.9, Fair = Fair, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + # CRPS and FCRPS + } else if (metric %in% c('crps', 'fcrps')) { + skill <- CRPS(exp$data, obs$data, time_dim = time_dim, + memb_dim = memb_dim, Fair = Fair, ncores = ncores) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + # CRPSS and FCRPSS + } else if (metric %in% c('crpss', 'fcrpss')) { + skill <- CRPSS(exp$data, obs$data, time_dim = time_dim, + memb_dim = memb_dim, Fair = Fair, ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$crpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Ensemble mean correlation - } else if (metric == 'enscorr') { + } else if (metric %in% c('enscorr', 'corr')) { ## TODO: Implement option for Kendall and Spearman methods? skill <- s2dv::Corr(exp$data, obs$data, dat_dim = 'dat', time_dim = time_dim, method = 'pearson', memb_dim = memb_dim, - memb = FALSE, + memb = memb, ncores = ncores) - skill_metrics[[ metric ]] <- list(skill$corr) - skill_metrics[[ paste0(metric, "_p.value") ]] <- list(skill$p.val) - skill_metrics[[ paste0(metric, "_conf.low") ]] <- list(skill$conf.lower) - skill_metrics[[ paste0(metric, "_conf.up") ]] <- list(skill$conf.upper) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$corr + skill_metrics[[ paste0(metric, "_p.value") ]] <- skill$p.val + skill_metrics[[ paste0(metric, "_conf.low") ]] <- skill$conf.lower + skill_metrics[[ paste0(metric, "_conf.up") ]] <- skill$conf.upper # SpecsVerification metrics } else if (grepl("specs", metric, fixed = TRUE)) { # Compute SpecsVerification version of the metrics - ## Retain _specs in metric name for clarity? - metric <- (strsplit(metric, "_"))[[1]][1] # Get metric name + ## Retain _specs in metric name for clarity + metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name + if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', + 'rpss'))) { + stop("Some of the requested metrics are not available.") + } skill <- Compute_verif_metrics(exp$data, obs$data, - skill_metrics = metric, - verif.dims=c("syear", "sday", "sweek"), - na.rm = na.rm, - ncores = ncores) + skill_metrics = metric_name, + verif.dims=c("syear", "sday", "sweek"), + na.rm = na.rm, + ncores = ncores) + skill <- .drop_dims(skill) + if (metric_name == "frps") { + # Compute yearly mean for FRPS + skill <- colMeans(skill, dims = 1) + } skill_metrics[[ metric ]] <- skill - } - } - print("##### SKILL METRIC COMPUTATION COMPLETE #####") - return(skill_metrics) +} + +compute_probabilities <- function(data, recipe) { + + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } + + named_probs <- list() + named_quantiles <- list() + if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { + stop("Quantiles and probability bins have been requested, but no ", + "thresholds are provided in the recipe.") + } else { + for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { + thresholds <- sapply(element, function (x) eval(parse(text = x))) + probs <- Compute_probs(data$data, thresholds, + ncores = ncores, + na.rm = na.rm) + for (i in seq(1:dim(probs$quantiles)['bin'][[1]])) { + named_quantiles <- append(named_quantiles, + list(probs$quantiles[i, , , , , , ,])) + names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", + as.integer(thresholds[i]*100)) + } + for (i in seq(1:dim(probs$probs)['bin'][[1]])) { + if (i == 1) { + name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) + } else if (i == dim(probs$probs)['bin'][[1]]) { + name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) + } else { + name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", + as.integer(thresholds[i]*100)) + } + named_probs <- append(named_probs, list(probs$probs[i, , , , , , , ,])) + names(named_probs)[length(named_probs)] <- name_i + } + # remove(probs) + } + } + print("##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") + return(list(probs=named_probs, percentiles=named_quantiles)) +} + +.drop_dims <- function(metric_array) { + # Drop all singleton dimensions + metric_array <- drop(metric_array) + # If time happened to be a singleton dimension, add it back in the array + if (!("time" %in% names(dim(metric_array)))) { + dim(metric_array) <- c("time" = 1, dim(metric_array)) + } + # If array has memb_exp (EnsCorr case), change name to 'ensemble' + if ("exp_memb" %in% names(dim(metric_array))) { + names(dim(metric_array))[which(names(dim(metric_array)) == + "exp_memb")] <- "ensemble" + # } else { + # dim(metric_array) <- c(dim(metric_array), "ensemble" = 1) + } + return(metric_array) } diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R index 7246dd0929fecfcbb6c1303384cba631b40be0ab..7c0aa30e190ac7a00b10eeedbc74fa845047f446 100644 --- a/modules/Skill/s2s.metrics.R +++ b/modules/Skill/s2s.metrics.R @@ -101,7 +101,8 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, data <- Subset(data, c('ensemble'), list(1), drop='selected') data[!is.finite(data)] <- NaN metric <- paste0(metric, "_specs") - metrics_data[[ metric ]] <- data ## previously: list(data) + metrics_data <- data + # metrics_data[[ metric ]] <- data ## previously: list(data) } else if (metric == "corr_eno") { # computes ensemble mean diff --git a/modules/test_victoria.R b/modules/test_victoria.R index a8125b1680161742620200629661569b0349eb8c..920ca12fc7d3a112b7af555dcaee6dce66ffd005 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -1,11 +1,11 @@ - -recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" - source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") -source("modules/Saving/Save_data.R") +source("modules/Saving/Saving.R") + +recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" +recipe <- read_yaml(recipe_file) # Load datasets data <- load_datasets(recipe_file) @@ -14,9 +14,8 @@ recipe <- read_yaml(recipe_file) # Calibrate datasets calibrated_data <- calibrate_datasets(data, recipe) # Compute skill metrics -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, - recipe, na.rm = T, ncores = 4) - -# Export skill metrics onto outfile -outfile <- "/esarchive/scratch/vagudets/auto-s2s-tests/files/skill/test-metrics.nc" -save_metrics(skill_metrics, recipe, data$hcst, outfile, agg = "global") +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +# Compute percentiles and probability bins +probabilities <- compute_probabilities(calibrated_data$hcst, recipe) +# Export all data to netCDF +save_data(recipe, data, calibrated_data, skill_metrics, probabilities) diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml index ed81146fc259d6e5b93677350083617bb32419d0..de2c92887c15e40b85329226a8f816680b8fbca2 100644 --- a/tests/recipes/recipe-decadal_daily_1.yml +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -33,6 +33,8 @@ Analysis: method: qmap Skill: metric: RPSS + Probabilities: + percentiles: [[1/10, 9/10]] Indicators: index: FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml index 8312f81a26c07729c80e0cae78f2782d8b47cf0f..577fdb895dd1eb7ceed5b61610cb1c9fce74ee9e 100644 --- a/tests/recipes/recipe-decadal_monthly_1.yml +++ b/tests/recipes/recipe-decadal_monthly_1.yml @@ -14,7 +14,6 @@ Analysis: Reference: name: ERA5 #JRA-55 Time: - sdate: fcst: 2021 hcst_start: 1991 hcst_end: 1994 @@ -34,6 +33,8 @@ Analysis: method: bias Skill: metric: RPSS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml index 040bbe3e4efc5b52ef599ccdb706d7a8542d04b4..92f1553d6eb5e70ddad93107f79ec5330cec7e16 100644 --- a/tests/recipes/recipe-decadal_monthly_2.yml +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -27,12 +27,14 @@ Analysis: lonmax: 2 #359.9 Regrid: method: bilinear - type: to_system #to_reference + type: to_system Workflow: Calibration: - method: bias + method: raw Skill: - metric: RPSS + metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + Probabilities: + percentiles: [[1/3, 2/3]] Indicators: index: FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-seasonal_daily_1.yml b/tests/recipes/recipe-seasonal_daily_1.yml new file mode 100644 index 0000000000000000000000000000000000000000..fc1bc58c49b2747999100a098aaf000e9282b926 --- /dev/null +++ b/tests/recipes/recipe-seasonal_daily_1.yml @@ -0,0 +1,42 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: system5c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1201' + fcst_year: + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 1 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: conservative + type: to_system + Workflow: + Calibration: + method: qmap + Skill: + metric: EnsCorr_specs + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml index 3631c783ab08f5b895e62e0f2e70e82dee34aa8d..766b7e04f620b3ddc5bac02e1d37538f608f11b6 100644 --- a/tests/recipes/recipe-seasonal_monthly_1.yml +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -13,13 +13,12 @@ Analysis: Reference: name: era5 Time: - sdate: - fcst_syear: '2020' - fcst_sday: '1101' + sdate: '1101' + fcst_year: '2020' hcst_start: '1993' hcst_end: '1996' - leadtimemin: 0 - leadtimemax: 2 + ftime_min: 1 + ftime_max: 3 Region: latmin: 17 latmax: 20 @@ -32,7 +31,7 @@ Analysis: Calibration: method: mse_min Skill: - metric: RPSS + metric: RPSS CRPSS EnsCorr Corr Enscorr_specs Indicators: index: no Output_format: S2S4E diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R index 150205915e2c57c1defb8669b5c549e064562b5b..84a528cdc961e19334a5d6a2b606a2683f3fd881 100644 --- a/tests/testthat/test-decadal_daily_1.R +++ b/tests/testthat/test-decadal_daily_1.R @@ -5,7 +5,7 @@ context("Decadal daily data - 1") source("modules/Loading/Loading_decadal.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") -source("modules/Saving/Save_data.R") +source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-decadal_daily_1.yml" diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 19560921e95b6265e522a7f5351a63dfbb52bb77..51d10f1bd8c6e5232bc3ffa78d9065d4c6ffa4ba 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -5,7 +5,7 @@ context("Decadal monthly data - 1") source("modules/Loading/Loading_decadal.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") -source("modules/Saving/Save_data.R") +source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_1.yml" @@ -22,8 +22,10 @@ suppressWarnings({invisible(capture.output( # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, - recipe, na.rm = T, ncores = 4) +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +))}) +suppressWarnings({invisible(capture.output( +probs <- compute_probabilities(calibrated_data$hcst, recipe) ))}) #====================================== @@ -171,27 +173,67 @@ names(skill_metrics), c("rpss", "rpss_significance") ) expect_equal( -class(skill_metrics$rpss[[1]]), +class(skill_metrics$rpss), "array" ) expect_equal( -dim(skill_metrics$rpss[[1]]), -c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4) +dim(skill_metrics$rpss), +c(time = 3, latitude = 5, longitude = 4) ) expect_equal( -dim(skill_metrics$rpss_significance[[1]]), -dim(skill_metrics$rpss[[1]]) +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) ) expect_equal( -as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]), +as.vector(drop(skill_metrics$rpss)[, 2, 3]), c(-0.2857143, -1.2500000, -1.8928571), tolerance = 0.0001 ) expect_equal( -as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]), +as.vector(drop(skill_metrics$rpss_significance)[, 2, 3]), rep(FALSE, 3) ) +# Probs +expect_equal( +names(probs), +c('probs', 'percentiles') +) +expect_equal( +names(probs$probs), +c('prob_b33', 'prob_33_to_66', 'prob_a66', 'prob_b10', 'prob_10_to_90', 'prob_a90') +) +expect_equal( +names(probs$percentiles), +c('percentile_33', 'percentile_66', 'percentile_10', 'percentile_90') +) +expect_equal( +dim(probs$probs$prob_b33), +c(syear = 4, time = 3, latitude = 5, longitude = 4) +) +expect_equal( +dim(probs$percentiles$percentile_33), +c(time = 3, latitude = 5, longitude = 4) +) +expect_equal( +as.vector(probs$probs$prob_b33[, 1, 2, 2]), +c(0.0, 0.5, 0.0, 1.0) +) +expect_equal( +as.vector(probs$probs$prob_10_to_90[, 1, 2, 2]), +c(1.0, 1.0, 0.5, 0.5) +) +expect_equal( +as.vector(probs$percentiles$percentile_33[, 1, 2]), +c(293.7496, 287.4263, 285.8295), +tolerance = 0.0001 +) +expect_equal( +as.vector(probs$percentiles$percentile_10[, 1, 2]), +c(293.1772, 286.9533, 284.7887), +tolerance = 0.0001 +) + }) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index ac80fe7758d3dc63c1f7e7ace11c0c53b92d66d8..02aabdf117c4a6a61d80e3904d43b5a6117d616f 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -5,7 +5,7 @@ context("Decadal monthly data - 2") source("modules/Loading/Loading_decadal.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") -source("modules/Saving/Save_data.R") +source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_2.yml" @@ -14,17 +14,20 @@ suppressWarnings({invisible(capture.output( data <- load_datasets(recipe_file) ))}) -#recipe <- read_yaml(recipe_file) -## Calibrate datasets -#suppressWarnings({invisible(capture.output( -# calibrated_data <- calibrate_datasets(data, recipe) -#))}) -# -## Compute skill metrics -#suppressWarnings({invisible(capture.output( -#skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, -# recipe, na.rm = T, ncores = 4) -#))}) +recipe <- read_yaml(recipe_file) + +# Calibrate datasets +suppressWarnings({invisible(capture.output( + calibrated_data <- calibrate_datasets(data, recipe) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +))}) +suppressWarnings({invisible(capture.output( +probs <- compute_probabilities(calibrated_data$hcst, recipe) +))}) #====================================== @@ -126,89 +129,105 @@ as.POSIXct("1991-02-15", tz = 'UTC') }) #====================================== -#test_that("2. Calibration", { -# -#expect_equal( -#is.list(calibrated_data), -#TRUE -#) -#expect_equal( -#names(calibrated_data), -#c("hcst", "fcst") -#) -#expect_equal( -#class(calibrated_data$hcst), -#"s2dv_cube" -#) -#expect_equal( -#class(calibrated_data$fcst), -#"s2dv_cube" -#) -#expect_equal( -#dim(calibrated_data$hcst$data), -#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) -#) -#expect_equal( -#dim(calibrated_data$fcst$data), -#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) -#) -#expect_equal( -#mean(calibrated_data$fcst$data), -#291.8375, -#tolerance = 0.0001 -#) -#expect_equal( -#mean(calibrated_data$hcst$data), -#289.6679, -#tolerance = 0.0001 -#) -#expect_equal( -#as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), -#c(286.3895, 286.6408, 290.6652, 288.3759), -#tolerance = 0.0001 -#) -#expect_equal( -#range(calibrated_data$fcst$data), -#c(287.2173, 297.4578), -#tolerance = 0.0001 -#) -# -#}) -# -# +test_that("2. Calibration", { + +expect_equal( +names(calibrated_data), +c("hcst", "fcst") +) +expect_equal( +calibrated_data, +data[1:2] +) + +}) + + ##====================================== -#test_that("3. Metrics", { -# -#expect_equal( -#is.list(skill_metrics), -#TRUE -#) -#expect_equal( -#names(skill_metrics), -#c("rpss", "rpss_significance") -#) -#expect_equal( -#class(skill_metrics$rpss[[1]]), -#"array" -#) -#expect_equal( -#dim(skill_metrics$rpss[[1]]), -#c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4) -#) -#expect_equal( -#dim(skill_metrics$rpss_significance[[1]]), -#dim(skill_metrics$rpss[[1]]) -#) -#expect_equal( -#as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]), -#c(-0.2857143, -1.2500000, -1.8928571), -#tolerance = 0.0001 -#) -#expect_equal( -#as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]), -#rep(FALSE, 3) -#) -# -#}) -# -# +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("rpss_specs", "bss90_specs", "enscorr_specs", "frps_specs", "frpss_specs", "bss10_specs", "frps") +) +expect_equal( +class(skill_metrics$rpss_specs), +"array" +) +expect_equal( +all(unlist(lapply(lapply(skill_metrics, dim), all.equal, c(time = 14, latitude = 8, longitude = 5)))), +TRUE +) +expect_equal( +as.vector(skill_metrics$rpss_specs[6:8, 1, 2]), +c(-0.3333333, 0.1666667, -0.3333333), +tolerance = 0.0001 +) +expect_equal( +all(is.na(skill_metrics$bss90_specs)), +TRUE +) +expect_equal( +as.vector(skill_metrics$enscorr_specs[6:8, 1, 2]), +c(0.4474382, 0.1026333, 0.4042823), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$frps_specs[6:8, 1, 2]), +c(0.4444444, 0.2222222, 0.4444444), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$frpss_specs[4:7, 1, 5]), +c( 1.0, -0.5, -0.5, 0.5), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$bss10_specs[6:8, 1, 2]), +c(0.5, -0.5, -0.5), +) +expect_equal( +as.vector(skill_metrics$frps[6:8, 1, 2]), +c(0.4444444, 0.2222222, 0.4444444), +tolerance = 0.0001 +) + +# Probs +expect_equal( +names(probs), +c('probs', 'percentiles') +) +expect_equal( +names(probs$probs), +c('prob_b33', 'prob_33_to_66', 'prob_a66') +) +expect_equal( +names(probs$percentiles), +c('percentile_33', 'percentile_66') +) +expect_equal( +dim(probs$probs$prob_b33), +c(syear = 3, time = 14, latitude = 8, longitude = 5) +) +expect_equal( +dim(probs$percentiles$percentile_33), +c(time = 14, latitude = 8, longitude = 5) +) +expect_equal( +as.vector(probs$probs$prob_b33[, 1, 2, 2]), +c(0.0, 0.3333333, 0.6666667), +tolerance = 0.0001 +) +expect_equal( +as.vector(probs$percentiles$percentile_33[1:3, 1, 2]), +c(271.7508, 273.1682, 274.1937), +tolerance = 0.0001 +) + +}) + + diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R new file mode 100644 index 0000000000000000000000000000000000000000..c37b551486bb65a409079e762bb5145740ed4f0a --- /dev/null +++ b/tests/testthat/test-seasonal_daily.R @@ -0,0 +1,166 @@ +context("Seasonal daily data") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") + +recipe_file <- "tests/recipes/recipe-seasonal_daily_1.yml" + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe_file) +))}) + +recipe <- read_yaml(recipe_file) + +suppressWarnings({invisible(capture.output( +calibrated_data <- calibrate_datasets(data, recipe) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +))}) + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +data$fcst, +NULL +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 25) +) +expect_equal( +dim(data$obs$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 1) +) +expect_equal( +dim(data$obs$Dates$start), +c(sday = 1, sweek = 1, syear = 4, time = 31) +) +expect_equal( +as.vector(drop(data$hcst$data)[1:2,1:2,1,2,3]), +c(295.5691, 291.7752, 294.0874, 290.1173), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +288.3723, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(280.1490, 298.2324), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$Dates$start)[1], +as.POSIXct("1993-12-01 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[2], +as.POSIXct("1994-12-01 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[5], +as.POSIXct("1993-12-02 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$obs$Dates$start)[10], +as.POSIXct("1994-12-03 11:30:00 UTC", tz = 'UTC') +) + +}) + +test_that("2. Calibration", { + +expect_equal( +is.list(calibrated_data), +TRUE +) +expect_equal( +names(calibrated_data), +c("hcst", "fcst") +) +expect_equal( +class(calibrated_data$hcst), +"s2dv_cube" +) +expect_equal( +calibrated_data$fcst, +NULL +) +expect_equal( +dim(calibrated_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 25) +) +expect_equal( +mean(calibrated_data$hcst$data), +289.6468, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(calibrated_data$hcst$data)[1, 1:4, 2, 3, 4]), +c(295.1077, 294.2161, 294.5801, 292.6326), +tolerance = 0.0001 +) +expect_equal( +range(calibrated_data$hcst$data), +c(283.9447, 297.7496), +tolerance = 0.0001 +) +}) + + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("enscorr_specs") +) +expect_equal( +class(skill_metrics$enscorr_specs), +"array" +) +expect_equal( +dim(skill_metrics$enscorr_specs), +c(time = 31, latitude = 4, longitude = 4) +) +expect_equal( +skill_metrics$enscorr_specs[1:3, 1, 1], +c(0.8159317, 0.8956195, 0.8355627), +tolerance=0.0001 +) +}) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 4bdf833d44ff33ba185508e23b2e49dad5447ea5..05c1c4118370a29d0cec8fed282d422f2e0b4e15 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -3,7 +3,7 @@ context("Seasonal monthly data") source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") -source("modules/Saving/Save_data.R") +source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-seasonal_monthly_1.yml" @@ -20,8 +20,7 @@ calibrated_data <- calibrate_datasets(data, recipe) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, - recipe, na.rm = T, ncores = 4) +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) ))}) test_that("1. Loading", { @@ -163,27 +162,29 @@ TRUE ) expect_equal( names(skill_metrics), -c("rpss", "rpss_significance") +c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", + "enscorr_p.value", "enscorr_conf.low", "enscorr_conf.up", "corr", + "corr_p.value", "corr_conf.low", "corr_conf.up", "enscorr_specs") ) expect_equal( -class(skill_metrics$rpss[[1]]), +class(skill_metrics$rpss), "array" ) expect_equal( -dim(skill_metrics$rpss[[1]]), -c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 3, longitude = 3) +dim(skill_metrics$rpss), +c(time = 3, latitude = 3, longitude = 3) ) expect_equal( -dim(skill_metrics$rpss_significance[[1]]), -dim(skill_metrics$rpss[[1]]) +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) ) expect_equal( -as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]), +as.vector(skill_metrics$rpss[, 2, 3]), c(-1.153829, -1.114743, -1.392457), tolerance = 0.0001 ) expect_equal( -as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]), +as.vector(skill_metrics$rpss_significance[, 2, 3]), rep(FALSE, 3) )