From f8afc84dc1366209de771e7f7026c4d5c42c1bd0 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 19 Jul 2022 12:09:14 +0200 Subject: [PATCH 01/81] Add Visualization module --- modules/Visualization/Visualization.R | 103 ++++++++++++++++++++++++++ modules/Visualization/s2s.plots.R | 103 ++++++++++++++++++++++++++ modules/test_victoria.R | 2 +- 3 files changed, 207 insertions(+), 1 deletion(-) create mode 100644 modules/Visualization/Visualization.R create mode 100644 modules/Visualization/s2s.plots.R diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R new file mode 100644 index 00000000..f3c1a6eb --- /dev/null +++ b/modules/Visualization/Visualization.R @@ -0,0 +1,103 @@ +## TODO: Remove unnecessary packages + +library(easyNCDF) +source("../data-analysis/R_Reorder.R") +library(s2dverification) +library(RColorBrewer) +library(multiApply) + +plot_corr <- function(var, sdate, outdir, type, s2s, project=NULL) { + + # filename <- paste0("/esarchive/oper/MEDGOLD-data/monthly_statistics/", + # var,"/",var,"-corr_month03.nc") + ## TODO: Generate path to outfile + # outfile <- paste0("/esarchive/scratch/lpalma/", + # var,"-corr_month03.png") + + corr <- NcToArray(filename, vars_to_read='corr') + lon <- NcToArray(filename, vars_to_read='longitude') + lat <- NcToArray(filename, vars_to_read='latitude') + time <- NcToArray(filename, vars_to_read='time') + + corr <- Reorder(corr, c("var","time","longitude","latitude")) + + + col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", "#64AAFF", + "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", + "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") + + brks <- seq(-1,1,by=0.1) + color <- colorRampPalette(col2)(length(brks)-1) + options(bitmapType="cairo") + + PlotLayout(PlotEquiMap, c('longitude','latitude'), + corr, lon, lat, filled.continents=F, + brks=brks, + #rxow_titles=row_titles, + cols=col2, + fileout=outfile , bar_label_digits=1) + +} + +plot_skill <- function(file.name, var, sdate, outdir, type, s2s,project=NULL) { + + var <- "WSDI" + filename <- paste0("/esarchive/oper/MEDGOLD-data/monthly_statistics/", + var,"/",var,"-skill_month02.nc") + outfile <- paste0("/esarchive/scratch/lpalma/", + var,"-skill_month02.png") + + corr <- NcToArray(filename, vars_to_read='corr') + lon <- NcToArray(filename, vars_to_read='longitude') + lat <- NcToArray(filename, vars_to_read='latitude') + time <- NcToArray(filename, vars_to_read='time') + + corr <- Reorder(corr, c("var","time","longitude","latitude")) + + + col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", "#64AAFF", + "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", + "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") + + brks <- seq(-100,100,by=10) + color <- colorRampPalette(col2)(length(brks)-1) + options(bitmapType="cairo") + + PlotLayout(PlotEquiMap, c('longitude','latitude'), + corr, lon, lat, filled.continents=F, + brks=brks, + #rxow_titles=row_titles, + cols=col2, + fileout=outfile , bar_label_digits=1) + +} +plot_ensemblemean <- function(files.path, var, date, outdir, type, s2s){ + + + dimnames <- c('var','time', 'longitude', 'latitude') + var <- "WSDI" + filename <- paste0("/esarchive/oper/MEDGOLD-data/monthly_statistics/", + var,"/",var,"_20210301_03.nc") + outfile <- paste0("/esarchive/scratch/lpalma/", + var,"_20210301.png") + + data <- NcToArray(filename, vars_to_read=var) + data <- Apply(data, 'ensemble', mean, na.rm=T)[[1]] + #data <- aperm(data,c(1,4,2,3)) + data <- Reorder(data,dimnames) + names(dim(data)) <- dimnames + + lon <- NcToArray(filename, vars_to_read='longitude') + lat <- NcToArray(filename, vars_to_read='latitude') + + cols <- c("#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", + "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") + + PlotLayout(PlotEquiMap, c('longitude','latitude'), data, lon, + lat, filled.continents=F, fileout=outfile, + bar_label_digits=2) + + +} + + diff --git a/modules/Visualization/s2s.plots.R b/modules/Visualization/s2s.plots.R new file mode 100644 index 00000000..d387979c --- /dev/null +++ b/modules/Visualization/s2s.plots.R @@ -0,0 +1,103 @@ + + +library(easyNCDF) +source("../data-analysis/R_Reorder.R") +library(s2dverification) +library(CSTools) +library(RColorBrewer) library(multiApply) + +plot_corr <- function(file.name, var, sdate, outdir, type, s2s,project=NULL){ + + var <- "WSDI" + filename <- paste0("/esarchive/oper/MEDGOLD-data/monthly_statistics/", + var,"/",var,"-corr_month03.nc") + outfile <- paste0("/esarchive/scratch/lpalma/", + var,"-corr_month03.png") + + corr <- NcToArray(filename, vars_to_read='corr') + lon <- NcToArray(filename, vars_to_read='longitude') + lat <- NcToArray(filename, vars_to_read='latitude') + time <- NcToArray(filename, vars_to_read='time') + + corr <- Reorder(corr, c("var","time","longitude","latitude")) + + + col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", "#64AAFF", + "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", + "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") + + brks <- seq(-1,1,by=0.1) + color <- colorRampPalette(col2)(length(brks)-1) + options(bitmapType="cairo") + + PlotLayout(PlotEquiMap, c('longitude','latitude'), + corr, lon, lat, filled.continents=F, + brks=brks, + #rxow_titles=row_titles, + cols=col2, + fileout=outfile , bar_label_digits=1) + +} + +plot_skill <- function(file.name, var, sdate, outdir, type, s2s,project=NULL){ + + var <- "WSDI" + filename <- paste0("/esarchive/oper/MEDGOLD-data/monthly_statistics/", + var,"/",var,"-skill_month02.nc") + outfile <- paste0("/esarchive/scratch/lpalma/", + var,"-skill_month02.png") + + corr <- NcToArray(filename, vars_to_read='corr') + lon <- NcToArray(filename, vars_to_read='longitude') + lat <- NcToArray(filename, vars_to_read='latitude') + time <- NcToArray(filename, vars_to_read='time') + + corr <- Reorder(corr, c("var","time","longitude","latitude")) + + + col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", "#64AAFF", + "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", + "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") + + brks <- seq(-100,100,by=10) + color <- colorRampPalette(col2)(length(brks)-1) + options(bitmapType="cairo") + + PlotLayout(PlotEquiMap, c('longitude','latitude'), + corr, lon, lat, filled.continents=F, + brks=brks, + #rxow_titles=row_titles, + cols=col2, + fileout=outfile , bar_label_digits=1) + +} +plot_ensemblemean <- function(files.path, var, date, outdir, type, s2s){ + + + dimnames <- c('var','time', 'longitude', 'latitude') + var <- "WSDI" + filename <- paste0("/esarchive/oper/MEDGOLD-data/monthly_statistics/", + var,"/",var,"_20210301_03.nc") + outfile <- paste0("/esarchive/scratch/lpalma/", + var,"_20210301.png") + + data <- NcToArray(filename, vars_to_read=var) + data <- Apply(data, 'ensemble', mean, na.rm=T)[[1]] + #data <- aperm(data,c(1,4,2,3)) + data <- Reorder(data,dimnames) + names(dim(data)) <- dimnames + + lon <- NcToArray(filename, vars_to_read='longitude') + lat <- NcToArray(filename, vars_to_read='latitude') + + cols <- c("#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", + "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") + + PlotLayout(PlotEquiMap, c('longitude','latitude'), data, lon, + lat, filled.continents=F, fileout=outfile, + bar_label_digits=2) + + +} + + diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 0a5834b8..d52fd5f4 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -1,5 +1,5 @@ -recipe_file <- "modules/Loading/testing_recipes/recipe_3.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") -- GitLab From 9bf487668fcc92af7a39bb64e65c9303054927d8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 19 Jul 2022 16:41:11 +0200 Subject: [PATCH 02/81] Add first function to plot skill scores --- modules/Visualization/Visualization.R | 62 +++++++++++++-------------- modules/test_victoria.R | 4 ++ 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index f3c1a6eb..d2d60511 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,10 +1,7 @@ ## TODO: Remove unnecessary packages -library(easyNCDF) -source("../data-analysis/R_Reorder.R") -library(s2dverification) +# library(s2dverification) library(RColorBrewer) -library(multiApply) plot_corr <- function(var, sdate, outdir, type, s2s, project=NULL) { @@ -39,38 +36,41 @@ plot_corr <- function(var, sdate, outdir, type, s2s, project=NULL) { } -plot_skill <- function(file.name, var, sdate, outdir, type, s2s,project=NULL) { - - var <- "WSDI" - filename <- paste0("/esarchive/oper/MEDGOLD-data/monthly_statistics/", - var,"/",var,"-skill_month02.nc") - outfile <- paste0("/esarchive/scratch/lpalma/", - var,"-skill_month02.png") - - corr <- NcToArray(filename, vars_to_read='corr') - lon <- NcToArray(filename, vars_to_read='longitude') - lat <- NcToArray(filename, vars_to_read='latitude') - time <- NcToArray(filename, vars_to_read='time') - - corr <- Reorder(corr, c("var","time","longitude","latitude")) - - - col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", "#64AAFF", - "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", - "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") - - brks <- seq(-100,100,by=10) +plot_skill_scores <- function(skill_metrics, recipe, data_cube) { + ## TODO: Define output file name + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + + # Define plot characteristics + col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", + "#64AAFF", "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", + "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", + "#C80F1E", "#A50519") + brks <- seq(-1, 1, by = 0.1) color <- colorRampPalette(col2)(length(brks)-1) options(bitmapType="cairo") - PlotLayout(PlotEquiMap, c('longitude','latitude'), - corr, lon, lat, filled.continents=F, - brks=brks, - #rxow_titles=row_titles, - cols=col2, - fileout=outfile , bar_label_digits=1) + skill_scores <- c("rpss", "bss90", "bss10", "frpss") + + for (skill in skill_scores) { + if (skill %in% names(skill_metrics)) { + name <- names(skill_metrics[skill]) + skill <- skill_metrics[[skill]] + skill <- Reorder(skill, c("time", "longitude", "latitude")) + outfile <- paste0("/esarchive/scratch/vagudets/auto-s2s-tests/testviz_", + name, ".png") + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + skill[1, , ], longitude, latitude, filled.continents=F, + brks = brks, + #rxow_titles=row_titles, + cols = col2, + fileout = outfile, + bar_label_digits = 1) + } + } } + plot_ensemblemean <- function(files.path, var, date, outdir, type, s2s){ diff --git a/modules/test_victoria.R b/modules/test_victoria.R index d52fd5f4..30eba721 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -5,6 +5,7 @@ source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") # Load datasets data <- load_datasets(recipe_file) @@ -32,3 +33,6 @@ if (!is.null(calibrated_data$fcst)) { save_forecast(calibrated_data$fcst, recipe, outdir) } save_observations(data$obs, recipe, outdir) + +# Plots +plot_skill_scores(skill_metrics, recipe, data$hcst) -- GitLab From 69f4b92f7d0aa1e8fdb537beac2048315ddfa14e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 25 Jul 2022 12:24:29 +0200 Subject: [PATCH 03/81] Add functions to plot ensemble mean correlation, various skill metrics, and forecast ensemble mean --- modules/Visualization/Visualization.R | 154 +++++++++++++++----------- modules/test_victoria.R | 4 +- 2 files changed, 90 insertions(+), 68 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index d2d60511..42fc529c 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -3,40 +3,50 @@ # library(s2dverification) library(RColorBrewer) -plot_corr <- function(var, sdate, outdir, type, s2s, project=NULL) { +plot_enscorr <- function(skill_metrics, data_cube, recipe, outdir) { - # filename <- paste0("/esarchive/oper/MEDGOLD-data/monthly_statistics/", - # var,"/",var,"-corr_month03.nc") - ## TODO: Generate path to outfile - # outfile <- paste0("/esarchive/scratch/lpalma/", - # var,"-corr_month03.png") - - corr <- NcToArray(filename, vars_to_read='corr') - lon <- NcToArray(filename, vars_to_read='longitude') - lat <- NcToArray(filename, vars_to_read='latitude') - time <- NcToArray(filename, vars_to_read='time') - - corr <- Reorder(corr, c("var","time","longitude","latitude")) - - - col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", "#64AAFF", - "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", - "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") - - brks <- seq(-1,1,by=0.1) - color <- colorRampPalette(col2)(length(brks)-1) - options(bitmapType="cairo") - - PlotLayout(PlotEquiMap, c('longitude','latitude'), - corr, lon, lat, filled.continents=F, - brks=brks, - #rxow_titles=row_titles, - cols=col2, - fileout=outfile , bar_label_digits=1) + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + # Define plot characteristics + col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", + "#64AAFF", "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", + "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", + "#C80F1E", "#A50519") + brks <- seq(-1, 1, by = 0.1) + color <- colorRampPalette(col2)(length(brks) - 1) + options(bitmapType = "cairo") + + enscorr_names <- c("enscorr", "enscorr_specs") + + for (name in enscorr_names) { + if (name %in% names(skill_metrics)) { + enscorr <- skill_metrics[[name]] + enscorr <- Reorder(enscorr, c("time", "longitude", "latitude")) + outfile <- paste0(outdir, name, ".png") + ## TODO: Put plot info in the titles (startdate, month) + toptitle <- paste0("Ensemble Mean Correlation - ", + data_cube$Variable$varName) + months <- unique(lubridate::month(data$hcst$Dates$start, + label = T, abb = F)) + titles <- as.vector(months) + # Plot + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + enscorr, longitude, latitude, + toptitle = toptitle, + titles = titles, + filled.continents=F, + brks = brks, + #rxow_titles=row_titles, + cols = col2, + fileout = outfile, + bar_label_digits = 1) + } + } } -plot_skill_scores <- function(skill_metrics, recipe, data_cube) { +plot_skill_scores <- function(skill_metrics, data_cube, recipe, outdir) { + ## TODO: Define output file name latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] @@ -47,20 +57,29 @@ plot_skill_scores <- function(skill_metrics, recipe, data_cube) { "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") brks <- seq(-1, 1, by = 0.1) - color <- colorRampPalette(col2)(length(brks)-1) - options(bitmapType="cairo") + color <- colorRampPalette(col2)(length(brks) - 1) + options(bitmapType = "cairo") skill_scores <- c("rpss", "bss90", "bss10", "frpss") - for (skill in skill_scores) { - if (skill %in% names(skill_metrics)) { - name <- names(skill_metrics[skill]) - skill <- skill_metrics[[skill]] + for (name in skill_scores) { + if (name %in% names(skill_metrics)) { + # name <- names(skill_metrics[skill]) + skill <- skill_metrics[[name]] skill <- Reorder(skill, c("time", "longitude", "latitude")) - outfile <- paste0("/esarchive/scratch/vagudets/auto-s2s-tests/testviz_", - name, ".png") + ## TODO: Define name of output file + outfile <- paste0(outdir, name, ".png") + ## TODO: Put plot info in the titles (startdate, month) + toptitle <- paste0(toupper(name), " - ", data_cube$Variable$varName) + months <- unique(lubridate::month(data$hcst$Dates$start, + label = T, abb = F)) + titles <- as.vector(months) + # Plot PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - skill[1, , ], longitude, latitude, filled.continents=F, + skill, longitude, latitude, + toptitle = toptitle, + titles = titles, + filled.continents=F, brks = brks, #rxow_titles=row_titles, cols = col2, @@ -68,36 +87,37 @@ plot_skill_scores <- function(skill_metrics, recipe, data_cube) { bar_label_digits = 1) } } - } -plot_ensemblemean <- function(files.path, var, date, outdir, type, s2s){ - - - dimnames <- c('var','time', 'longitude', 'latitude') - var <- "WSDI" - filename <- paste0("/esarchive/oper/MEDGOLD-data/monthly_statistics/", - var,"/",var,"_20210301_03.nc") - outfile <- paste0("/esarchive/scratch/lpalma/", - var,"_20210301.png") - - data <- NcToArray(filename, vars_to_read=var) - data <- Apply(data, 'ensemble', mean, na.rm=T)[[1]] - #data <- aperm(data,c(1,4,2,3)) - data <- Reorder(data,dimnames) - names(dim(data)) <- dimnames +plot_ensemble_mean <- function(data_cube, recipe, outdir) { - lon <- NcToArray(filename, vars_to_read='longitude') - lat <- NcToArray(filename, vars_to_read='latitude') - - cols <- c("#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", - "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") - - PlotLayout(PlotEquiMap, c('longitude','latitude'), data, lon, - lat, filled.continents=F, fileout=outfile, + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + # Compute ensemble mean + ensemble_mean <- Apply(data_cube$data, 'ensemble', mean, na.rm=T)[[1]] + # Drop extra dims, add time dim if missing + ## is readding 'time' necessary? + ensemble_mean <- drop(ensemble_mean) + if (!("time" %in% names(dim(ensemble_mean)))) { + dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) + } + ensemble_mean <- Reorder(ensemble_mean, c("time", "longitude", "latitude")) + ## TODO: Redefine column colors? + cols <- c("#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", "#FF7055", "#FE6346", + "#F7403B", "#E92D36", "#C80F1E", "#A50519") + ## TODO: Define name of output file + outfile <- paste0(outdir, "forecast_ensemble_mean.png") + toptitle <- paste0("Ensemble Mean - ", data_cube$Variable$varName) + months <- unique(lubridate::month(data$hcst$Dates$start, + label = T, abb = F)) + titles <- as.vector(months) + # Plots + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + ensemble_mean, longitude, latitude, + filled.continents = F, + toptitle = toptitle, + titles = titles, + cols = cols, + fileout = outfile, bar_label_digits=2) - - } - - diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 30eba721..f697a399 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -35,4 +35,6 @@ if (!is.null(calibrated_data$fcst)) { save_observations(data$obs, recipe, outdir) # Plots -plot_skill_scores(skill_metrics, recipe, data$hcst) +plot_skill_scores(skill_metrics, data$hcst, recipe, outdir) +plot_enscorr(skill_metrics, data$hcst, recipe, outdir) +plot_ensemble_mean(calibrated_data$fcst, recipe, outdir) -- GitLab From b40577216ee130073e31ad7689019436efe2df85 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 25 Jul 2022 14:43:44 +0200 Subject: [PATCH 04/81] Remove unnecessary packages --- modules/Visualization/Visualization.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 42fc529c..2fadffe7 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,6 +1,3 @@ -## TODO: Remove unnecessary packages - -# library(s2dverification) library(RColorBrewer) plot_enscorr <- function(skill_metrics, data_cube, recipe, outdir) { -- GitLab From a3e68a348113dba8f56b3559aa38ff8b52ff4b97 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 26 Jul 2022 10:50:56 +0200 Subject: [PATCH 05/81] Add wrapper function and flag to optionally plot significance --- modules/Visualization/Visualization.R | 50 +++++++++++++++++++++++++-- modules/test_victoria.R | 5 ++- 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 2fadffe7..deb92ae2 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,5 +1,41 @@ library(RColorBrewer) +## TODO: Add the possibility to read the data directly from netCDF +plot_datasets <- function(data, + calibrated_data = NULL, + skill_metrics = NULL, + recipe, + outdir, + significance = F) { + + # Try to produce and save several basic plots. + # data: list containing the hcst, obs and fcst (optional) s2dv_cube objects + # calibrated_data: list containing the calibrated hcst and fcst (optional) + # s2dv_cube objects + # skill_metrics: list of arrays containing the computed skill metrics + # recipe: the auto-s2s recipe as read by read_yaml() + # outdir: output directory + # significance: Bool. Whether to include significance dots where applicable + + if ((is.null(skill_metrics)) && (is.null(calibrated_data)) && (is.null(data$fcst))) { + stop("The Visualization module has been called, but there is no data ", + "that can be plotted.") + } + if (!is.null(skill_metrics)) { + plot_enscorr(skill_metrics, data$hcst, recipe, outdir) + plot_skill_scores(skill_metrics, data$hcst, recipe, outdir, significance) + } + if (!is.null(calibrated_data$fcst)) { + plot_ensemble_mean(calibrated_data$fcst, recipe, outdir) + } else if (!is.null(data$fcst)) { + warning("Only the uncalibrated forecast was provided. Using this data ", + "to plot the forecast ensemble mean.") + plot_ensemble_mean(data$fcst, recipe, outdir) + } + print("##### PLOTS SAVED TO OUTPUT DIRECTORY #####") +} + + plot_enscorr <- function(skill_metrics, data_cube, recipe, outdir) { latitude <- data_cube$lat[1:length(data_cube$lat)] @@ -42,7 +78,8 @@ plot_enscorr <- function(skill_metrics, data_cube, recipe, outdir) { } } -plot_skill_scores <- function(skill_metrics, data_cube, recipe, outdir) { +plot_skill_scores <- function(skill_metrics, data_cube, recipe, outdir, + significance = F) { ## TODO: Define output file name latitude <- data_cube$lat[1:length(data_cube$lat)] @@ -61,9 +98,15 @@ plot_skill_scores <- function(skill_metrics, data_cube, recipe, outdir) { for (name in skill_scores) { if (name %in% names(skill_metrics)) { - # name <- names(skill_metrics[skill]) skill <- skill_metrics[[name]] skill <- Reorder(skill, c("time", "longitude", "latitude")) + if (significance) { + significance_name <- paste0(name, "_significance") + skill_significance <- skill_metrics[[significance_name]] + skill_significance <- Reorder(skill_significance, c("time", "longitude", "latitude")) + } else { + skill_significance <- NULL + } ## TODO: Define name of output file outfile <- paste0(outdir, name, ".png") ## TODO: Put plot info in the titles (startdate, month) @@ -73,7 +116,8 @@ plot_skill_scores <- function(skill_metrics, data_cube, recipe, outdir) { titles <- as.vector(months) # Plot PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - skill, longitude, latitude, + skill, longitude, latitude, + dots = skill_significance, toptitle = toptitle, titles = titles, filled.continents=F, diff --git a/modules/test_victoria.R b/modules/test_victoria.R index f697a399..2030c549 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -35,6 +35,5 @@ if (!is.null(calibrated_data$fcst)) { save_observations(data$obs, recipe, outdir) # Plots -plot_skill_scores(skill_metrics, data$hcst, recipe, outdir) -plot_enscorr(skill_metrics, data$hcst, recipe, outdir) -plot_ensemble_mean(calibrated_data$fcst, recipe, outdir) +plot_datasets(data, calibrated_data, skill_metrics, + recipe, outdir, significance = T) -- GitLab From 193e9522f45920341e8f19f811b0735c56a8f46f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 24 Aug 2022 15:42:14 +0200 Subject: [PATCH 06/81] Create outdir inside wrapper function --- modules/Visualization/Visualization.R | 5 +++-- modules/test_victoria.R | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index deb92ae2..513e23ee 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -5,7 +5,6 @@ plot_datasets <- function(data, calibrated_data = NULL, skill_metrics = NULL, recipe, - outdir, significance = F) { # Try to produce and save several basic plots. @@ -14,8 +13,10 @@ plot_datasets <- function(data, # s2dv_cube objects # skill_metrics: list of arrays containing the computed skill metrics # recipe: the auto-s2s recipe as read by read_yaml() - # outdir: output directory # significance: Bool. Whether to include significance dots where applicable + + outdir <- paste0(get_dir(recipe), "/plots/") + dir.create(outdir, showWarnings = FALSE, recursive = TRUE) if ((is.null(skill_metrics)) && (is.null(calibrated_data)) && (is.null(data$fcst))) { stop("The Visualization module has been called, but there is no data ", diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 3a246254..99eae634 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -20,4 +20,4 @@ probabilities <- compute_probabilities(calibrated_data$hcst, recipe) save_data(recipe, data, calibrated_data, skill_metrics, probabilities) plot_datasets(data, calibrated_data, skill_metrics, - recipe, outdir, significance = T) + recipe, significance = T) -- GitLab From d09cce1dd9fe276c2e9464a1340a1392636d0909 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 31 Aug 2022 13:03:35 +0200 Subject: [PATCH 07/81] Add function to save rps, crps and frps --- modules/Visualization/Visualization.R | 54 ++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 513e23ee..a1e7a43f 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,6 +1,7 @@ library(RColorBrewer) ## TODO: Add the possibility to read the data directly from netCDF +## TODO: Get variable and system/obs names from dictionary plot_datasets <- function(data, calibrated_data = NULL, skill_metrics = NULL, @@ -38,6 +39,7 @@ plot_datasets <- function(data, plot_enscorr <- function(skill_metrics, data_cube, recipe, outdir) { + ## TODO: Is this function even necessary...? latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] @@ -95,16 +97,19 @@ plot_skill_scores <- function(skill_metrics, data_cube, recipe, outdir, color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") - skill_scores <- c("rpss", "bss90", "bss10", "frpss") + skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss") for (name in skill_scores) { if (name %in% names(skill_metrics)) { skill <- skill_metrics[[name]] skill <- Reorder(skill, c("time", "longitude", "latitude")) + ## TODO: Different filename for with vs without significance? if (significance) { significance_name <- paste0(name, "_significance") skill_significance <- skill_metrics[[significance_name]] - skill_significance <- Reorder(skill_significance, c("time", "longitude", "latitude")) + skill_significance <- Reorder(skill_significance, c("time", + "longitude", + "latitude")) } else { skill_significance <- NULL } @@ -124,6 +129,51 @@ plot_skill_scores <- function(skill_metrics, data_cube, recipe, outdir, filled.continents=F, brks = brks, #rxow_titles=row_titles, + cols = col2, + fileout = outfile, + bar_label_digits = 1) + } + } +} + +plot_scores <- function(skill_metrics, data_cube, recipe, outdir) { + + ## TODO: Define output file name + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + + # Define plot characteristics + ## TODO: Change color palette + col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", + "#64AAFF", "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", + "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", + "#C80F1E", "#A50519") + brks <- seq(0, 1, by = 0.1) + color <- colorRampPalette(col2)(length(brks) - 1) + options(bitmapType = "cairo") + + scores <- c("rps", "frps", "crps") + + for (name in scores) { + if (name %in% names(skill_metrics)) { + skill <- skill_metrics[[name]] + skill <- Reorder(skill, c("time", "longitude", "latitude")) + ## TODO: Different filename for with vs without significance? + ## TODO: Define name of output file + outfile <- paste0(outdir, name, ".png") + ## TODO: Put plot info in the titles (startdate, month) + toptitle <- paste0(toupper(name), " - ", data_cube$Variable$varName) + months <- unique(lubridate::month(data$hcst$Dates$start, + label = T, abb = F)) + titles <- as.vector(months) + # Plot + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + skill, longitude, latitude, + toptitle = toptitle, + titles = titles, + filled.continents=F, + brks = brks, + #rxow_titles=row_titles, cols = col2, fileout = outfile, bar_label_digits = 1) -- GitLab From 784262d17e4427bb8f8e6f2b4da9b3c80daea7f2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 2 Sep 2022 09:25:59 +0200 Subject: [PATCH 08/81] change plot_datasets() to plot_data() --- modules/Visualization/Visualization.R | 16 ++++++++++------ modules/test_victoria.R | 4 ++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index a1e7a43f..5d25b475 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -2,11 +2,11 @@ library(RColorBrewer) ## TODO: Add the possibility to read the data directly from netCDF ## TODO: Get variable and system/obs names from dictionary -plot_datasets <- function(data, - calibrated_data = NULL, - skill_metrics = NULL, - recipe, - significance = F) { +plot_data <- function(data, + calibrated_data = NULL, + skill_metrics = NULL, + recipe, + significance = F) { # Try to produce and save several basic plots. # data: list containing the hcst, obs and fcst (optional) s2dv_cube objects @@ -18,6 +18,9 @@ plot_datasets <- function(data, outdir <- paste0(get_dir(recipe), "/plots/") dir.create(outdir, showWarnings = FALSE, recursive = TRUE) + + ## TODO: Add warning if parameter significance is included but skill:metrics + ## is null if ((is.null(skill_metrics)) && (is.null(calibrated_data)) && (is.null(data$fcst))) { stop("The Visualization module has been called, but there is no data ", @@ -26,6 +29,7 @@ plot_datasets <- function(data, if (!is.null(skill_metrics)) { plot_enscorr(skill_metrics, data$hcst, recipe, outdir) plot_skill_scores(skill_metrics, data$hcst, recipe, outdir, significance) + plot_scores(skill_metrics, data$hcst, recipe, outdir) } if (!is.null(calibrated_data$fcst)) { plot_ensemble_mean(calibrated_data$fcst, recipe, outdir) @@ -174,7 +178,7 @@ plot_scores <- function(skill_metrics, data_cube, recipe, outdir) { filled.continents=F, brks = brks, #rxow_titles=row_titles, - cols = col2, + #cols = col2, fileout = outfile, bar_label_digits = 1) } diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 99eae634..425f1915 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -19,5 +19,5 @@ probabilities <- compute_probabilities(calibrated_data$hcst, recipe) # Export all data to netCDF save_data(recipe, data, calibrated_data, skill_metrics, probabilities) -plot_datasets(data, calibrated_data, skill_metrics, - recipe, significance = T) +plot_data(data, calibrated_data, skill_metrics, + recipe, significance = T) -- GitLab From 2e5b116ed232b2b7fee532d2744e3ee604310d58 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 2 Sep 2022 12:26:43 +0200 Subject: [PATCH 09/81] Generate plot colors with grDevices library --- modules/Visualization/Visualization.R | 32 +++++++++++++++------------ 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 5d25b475..57713fdc 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,4 +1,5 @@ library(RColorBrewer) +library(grDevices) ## TODO: Add the possibility to read the data directly from netCDF ## TODO: Get variable and system/obs names from dictionary @@ -49,11 +50,12 @@ plot_enscorr <- function(skill_metrics, data_cube, recipe, outdir) { longitude <- data_cube$lon[1:length(data_cube$lon)] # Define plot characteristics - col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", - "#64AAFF", "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", - "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", - "#C80F1E", "#A50519") +# col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", +# "#64AAFF", "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", +# "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", +# "#C80F1E", "#A50519") brks <- seq(-1, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "BlueRed2") color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") @@ -93,11 +95,12 @@ plot_skill_scores <- function(skill_metrics, data_cube, recipe, outdir, longitude <- data_cube$lon[1:length(data_cube$lon)] # Define plot characteristics - col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", - "#64AAFF", "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", - "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", - "#C80F1E", "#A50519") +# col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", +# "#64AAFF", "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", +# "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", +# "#C80F1E", "#A50519") brks <- seq(-1, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "BlueRed2") color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") @@ -148,11 +151,12 @@ plot_scores <- function(skill_metrics, data_cube, recipe, outdir) { # Define plot characteristics ## TODO: Change color palette - col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", - "#64AAFF", "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", - "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", - "#C80F1E", "#A50519") +# col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", +# "#64AAFF", "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", +# "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", +# "#C80F1E", "#A50519") brks <- seq(0, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "BlueRed2", rev = T) color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") @@ -178,7 +182,7 @@ plot_scores <- function(skill_metrics, data_cube, recipe, outdir) { filled.continents=F, brks = brks, #rxow_titles=row_titles, - #cols = col2, + cols = col2, fileout = outfile, bar_label_digits = 1) } @@ -198,7 +202,7 @@ plot_ensemble_mean <- function(data_cube, recipe, outdir) { dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) } ensemble_mean <- Reorder(ensemble_mean, c("time", "longitude", "latitude")) - ## TODO: Redefine column colors? + ## TODO: Redefine column colors, possibly depending on variable cols <- c("#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") ## TODO: Define name of output file -- GitLab From d641f1363663b96f748e4f2c5a6fa808e6b47268 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 6 Sep 2022 11:28:10 +0200 Subject: [PATCH 10/81] Improve color palettes (1) --- modules/Visualization/Visualization.R | 33 ++++++++++----------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 57713fdc..bca64533 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -50,12 +50,8 @@ plot_enscorr <- function(skill_metrics, data_cube, recipe, outdir) { longitude <- data_cube$lon[1:length(data_cube$lon)] # Define plot characteristics -# col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", -# "#64AAFF", "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", -# "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", -# "#C80F1E", "#A50519") brks <- seq(-1, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "BlueRed2") + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") @@ -95,16 +91,12 @@ plot_skill_scores <- function(skill_metrics, data_cube, recipe, outdir, longitude <- data_cube$lon[1:length(data_cube$lon)] # Define plot characteristics -# col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", -# "#64AAFF", "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", -# "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", -# "#C80F1E", "#A50519") brks <- seq(-1, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "BlueRed2") + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") - skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss") + skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss") for (name in skill_scores) { if (name %in% names(skill_metrics)) { @@ -150,13 +142,8 @@ plot_scores <- function(skill_metrics, data_cube, recipe, outdir) { longitude <- data_cube$lon[1:length(data_cube$lon)] # Define plot characteristics - ## TODO: Change color palette -# col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", -# "#64AAFF", "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", -# "#FFBF87", "#FFA173", "#FF7055", "#FE6346", "#F7403B", "#E92D36", -# "#C80F1E", "#A50519") brks <- seq(0, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "BlueRed2", rev = T) + col2 <- grDevices::hcl.colors(length(brks) - 1, "Reds") color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") @@ -203,8 +190,11 @@ plot_ensemble_mean <- function(data_cube, recipe, outdir) { } ensemble_mean <- Reorder(ensemble_mean, c("time", "longitude", "latitude")) ## TODO: Redefine column colors, possibly depending on variable - cols <- c("#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", "#FF7055", "#FE6346", - "#F7403B", "#E92D36", "#C80F1E", "#A50519") + brks <- pretty(range(ensemble_mean, na.rm = T), n = 10, min.n = 8) + col2 <- grDevices::hcl.colors(length(brks) - 1, "YlOrRd", rev = T) + color <- colorRampPalette(col2)(length(brks) - 1) + options(bitmapType = "cairo") + ## TODO: Define name of output file outfile <- paste0(outdir, "forecast_ensemble_mean.png") toptitle <- paste0("Ensemble Mean - ", data_cube$Variable$varName) @@ -217,7 +207,8 @@ plot_ensemble_mean <- function(data_cube, recipe, outdir) { filled.continents = F, toptitle = toptitle, titles = titles, - cols = cols, + cols = col2, + brks = brks, fileout = outfile, - bar_label_digits=2) + bar_label_digits=4) } -- GitLab From 610ef97b99b1f101e5f1b4fd91194e4f6cc26b93 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 7 Sep 2022 14:22:57 +0200 Subject: [PATCH 11/81] Add TODO related to forecast tercile probabilities --- modules/Skill/s2s.probs.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/Skill/s2s.probs.R b/modules/Skill/s2s.probs.R index a8cab57c..916faa01 100644 --- a/modules/Skill/s2s.probs.R +++ b/modules/Skill/s2s.probs.R @@ -4,6 +4,11 @@ Compute_probs <- function(data, thresholds, ncores=1, quantile_dims=c('syear', 'ensemble'), probs_dims=list('ensemble', 'bin'), split_factor=1, na.rm=FALSE) { + + ## TODO: Adapt to the case where the forecast probability bins need + ## to be computed. The quantiles should be the hcst quantiles, and then + ## c2p() can be used to compute fcst probabilities for most likely terciles + ## map. quantiles <- Apply(data, quantile_dims, -- GitLab From 3663764fd6b527a3273819b4c2f5c737dcc52e94 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 7 Sep 2022 15:10:54 +0200 Subject: [PATCH 12/81] Add proper system and reference names to archive --- conf/archive.yml | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 63abfb0a..ef76fbad 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -3,7 +3,8 @@ archive: src: "/esarchive/" System: - system5c3s: + system5c3s: + name: "ECMWF SEAS5" src: "exp/ecmwf/system5c3s/" daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", @@ -17,6 +18,7 @@ archive: calendar: "proleptic_gregorian" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" system7c3s: + name: "Méteo-France System 7" src: "exp/meteofrance/system7c3s/" monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", "prlr":"_f24h/", "sfcWind": "_f6h/", @@ -27,6 +29,7 @@ archive: calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system7c3s.txt" system21_m1: + name: "DWD GCFS 2.1" src: "exp/dwd/system21_m1/" monthly_mean: {"tas":"_f6h/", "prlr":"_f24h", "g500":"_f12h/", "sfcWind":"_f6h/", @@ -37,6 +40,7 @@ archive: calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system21_m1.txt" system35c3s: + name: "CMCC-SPS3.5" src: "exp/cmcc/system35c3s/" monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", "prlr":"_f24h/", "sfcWind": "_f6h/", @@ -47,6 +51,7 @@ archive: calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system35c3s.txt" system2c3s: + name: "JMA System 2" src: "exp/jma/system2c3s/" monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", "tasmax":"_f6h/", "tasmin":"_f6h/"} @@ -56,6 +61,7 @@ archive: calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system2c3s.txt" eccc1: + name: "ECCC CanCM4i" src: "exp/eccc/eccc1/" monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", "tasmax":"_f6h/", "tasmin":"_f6h/"} @@ -65,6 +71,7 @@ archive: calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_eccc1.txt" glosea6_system600-c3s: + name: "UKMO GloSea 6 6.0" src: "exp/ukmo/glosea6_system600-c3s/" monthly_mean: {"tas":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/", "prlr":"_f24h/"} @@ -73,8 +80,8 @@ archive: hcst: 28 calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_ukmo600.txt" - ncep-cfsv2: + name: "NCEP CFSv2" src: "exp/ncep/cfs-v2/" monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", "tasmax":"_f6h/", "tasmin":"_f6h/"} @@ -84,7 +91,8 @@ archive: calendar: "gregorian" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" Reference: - era5: + era5: + name: "ERA5" src: "recon/ecmwf/era5/" daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", @@ -103,6 +111,7 @@ archive: calendar: "standard" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" era5land: + name: "ERA5-Land" src: "recon/ecmwf/era5land/" daily_mean: {"tas":"_f1h/", "rsds":"_f1h/", "prlr":"_f1h/", "sfcWind":"_f1h/"} @@ -111,7 +120,8 @@ archive: "sfcWind":"_f1h/", "rsds":"_f1h/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" - uerra: + uerra: + name: "ECMWF UERRA" src: "recon/ecmwf/uerra_mescan/" daily_mean: {"tas":"_f6h/"} monthly_mean: {"tas":"_f6h/"} -- GitLab From 090d57bb106cd08574d6f3c8b0fd7496a6b17dcf Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 8 Sep 2022 16:40:36 +0200 Subject: [PATCH 13/81] Add institution names to archive --- conf/archive.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/conf/archive.yml b/conf/archive.yml index ef76fbad..9d994ee5 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -5,6 +5,7 @@ archive: System: system5c3s: name: "ECMWF SEAS5" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ecmwf/system5c3s/" daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", @@ -19,6 +20,7 @@ archive: reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" system7c3s: name: "Méteo-France System 7" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/meteofrance/system7c3s/" monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", "prlr":"_f24h/", "sfcWind": "_f6h/", @@ -30,6 +32,7 @@ archive: reference_grid: "conf/grid_description/griddes_system7c3s.txt" system21_m1: name: "DWD GCFS 2.1" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/dwd/system21_m1/" monthly_mean: {"tas":"_f6h/", "prlr":"_f24h", "g500":"_f12h/", "sfcWind":"_f6h/", @@ -41,6 +44,7 @@ archive: reference_grid: "conf/grid_description/griddes_system21_m1.txt" system35c3s: name: "CMCC-SPS3.5" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/cmcc/system35c3s/" monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", "prlr":"_f24h/", "sfcWind": "_f6h/", @@ -52,6 +56,7 @@ archive: reference_grid: "conf/grid_description/griddes_system35c3s.txt" system2c3s: name: "JMA System 2" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/jma/system2c3s/" monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", "tasmax":"_f6h/", "tasmin":"_f6h/"} @@ -62,6 +67,7 @@ archive: reference_grid: "conf/grid_description/griddes_system2c3s.txt" eccc1: name: "ECCC CanCM4i" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/eccc/eccc1/" monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", "tasmax":"_f6h/", "tasmin":"_f6h/"} @@ -72,6 +78,7 @@ archive: reference_grid: "conf/grid_description/griddes_eccc1.txt" glosea6_system600-c3s: name: "UKMO GloSea 6 6.0" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ukmo/glosea6_system600-c3s/" monthly_mean: {"tas":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/", "prlr":"_f24h/"} @@ -82,6 +89,7 @@ archive: reference_grid: "conf/grid_description/griddes_ukmo600.txt" ncep-cfsv2: name: "NCEP CFSv2" + institution: "NOAA NCEP" #? src: "exp/ncep/cfs-v2/" monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", "tasmax":"_f6h/", "tasmin":"_f6h/"} @@ -93,6 +101,7 @@ archive: Reference: era5: name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", @@ -112,6 +121,7 @@ archive: reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" era5land: name: "ERA5-Land" + institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5land/" daily_mean: {"tas":"_f1h/", "rsds":"_f1h/", "prlr":"_f1h/", "sfcWind":"_f1h/"} @@ -122,6 +132,7 @@ archive: reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" uerra: name: "ECMWF UERRA" + institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/uerra_mescan/" daily_mean: {"tas":"_f6h/"} monthly_mean: {"tas":"_f6h/"} -- GitLab From bf44d2885e0385bb54765c56f3cfcf49afe640a6 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 8 Sep 2022 17:12:38 +0200 Subject: [PATCH 14/81] Add archive as parameter to get system name --- modules/Visualization/Visualization.R | 61 ++++++++++++++------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index bca64533..d0da585d 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -3,10 +3,13 @@ library(grDevices) ## TODO: Add the possibility to read the data directly from netCDF ## TODO: Get variable and system/obs names from dictionary +## TODO: Adapt to multi-model case + plot_data <- function(data, + recipe, + archive, calibrated_data = NULL, skill_metrics = NULL, - recipe, significance = F) { # Try to produce and save several basic plots. @@ -20,34 +23,32 @@ plot_data <- function(data, outdir <- paste0(get_dir(recipe), "/plots/") dir.create(outdir, showWarnings = FALSE, recursive = TRUE) - ## TODO: Add warning if parameter significance is included but skill:metrics - ## is null - if ((is.null(skill_metrics)) && (is.null(calibrated_data)) && (is.null(data$fcst))) { stop("The Visualization module has been called, but there is no data ", "that can be plotted.") } if (!is.null(skill_metrics)) { - plot_enscorr(skill_metrics, data$hcst, recipe, outdir) - plot_skill_scores(skill_metrics, data$hcst, recipe, outdir, significance) - plot_scores(skill_metrics, data$hcst, recipe, outdir) + plot_enscorr(skill_metrics, data$hcst, recipe, archive, outdir) + plot_skill_scores(skill_metrics, data$hcst, recipe, archive, outdir, significance) + plot_scores(skill_metrics, data$hcst, recipe, archive, outdir) } if (!is.null(calibrated_data$fcst)) { - plot_ensemble_mean(calibrated_data$fcst, recipe, outdir) + plot_ensemble_mean(calibrated_data$fcst, recipe, archive, outdir) } else if (!is.null(data$fcst)) { warning("Only the uncalibrated forecast was provided. Using this data ", "to plot the forecast ensemble mean.") - plot_ensemble_mean(data$fcst, recipe, outdir) + plot_ensemble_mean(data$fcst, recipe, archive, outdir) } print("##### PLOTS SAVED TO OUTPUT DIRECTORY #####") } -plot_enscorr <- function(skill_metrics, data_cube, recipe, outdir) { +plot_enscorr <- function(skill_metrics, data_cube, recipe, archive, outdir) { ## TODO: Is this function even necessary...? latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name # Define plot characteristics brks <- seq(-1, 1, by = 0.1) @@ -64,7 +65,8 @@ plot_enscorr <- function(skill_metrics, data_cube, recipe, outdir) { outfile <- paste0(outdir, name, ".png") ## TODO: Put plot info in the titles (startdate, month) toptitle <- paste0("Ensemble Mean Correlation - ", - data_cube$Variable$varName) + data_cube$Variable$varName, " - ", + system_name) months <- unique(lubridate::month(data$hcst$Dates$start, label = T, abb = F)) titles <- as.vector(months) @@ -83,13 +85,13 @@ plot_enscorr <- function(skill_metrics, data_cube, recipe, outdir) { } } -plot_skill_scores <- function(skill_metrics, data_cube, recipe, outdir, +plot_skill_scores <- function(skill_metrics, data_cube, recipe, archive, outdir, significance = F) { - ## TODO: Define output file name latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] - + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + # Define plot characteristics brks <- seq(-1, 1, by = 0.1) col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) @@ -102,7 +104,6 @@ plot_skill_scores <- function(skill_metrics, data_cube, recipe, outdir, if (name %in% names(skill_metrics)) { skill <- skill_metrics[[name]] skill <- Reorder(skill, c("time", "longitude", "latitude")) - ## TODO: Different filename for with vs without significance? if (significance) { significance_name <- paste0(name, "_significance") skill_significance <- skill_metrics[[significance_name]] @@ -112,10 +113,11 @@ plot_skill_scores <- function(skill_metrics, data_cube, recipe, outdir, } else { skill_significance <- NULL } - ## TODO: Define name of output file + # Define output file name and titles outfile <- paste0(outdir, name, ".png") ## TODO: Put plot info in the titles (startdate, month) - toptitle <- paste0(toupper(name), " - ", data_cube$Variable$varName) + toptitle <- paste0(toupper(name), " - ", data_cube$Variable$varName, + " - ", system_name) months <- unique(lubridate::month(data$hcst$Dates$start, label = T, abb = F)) titles <- as.vector(months) @@ -135,11 +137,11 @@ plot_skill_scores <- function(skill_metrics, data_cube, recipe, outdir, } } -plot_scores <- function(skill_metrics, data_cube, recipe, outdir) { +plot_scores <- function(skill_metrics, data_cube, recipe, archive, outdir) { - ## TODO: Define output file name latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name # Define plot characteristics brks <- seq(0, 1, by = 0.1) @@ -153,11 +155,10 @@ plot_scores <- function(skill_metrics, data_cube, recipe, outdir) { if (name %in% names(skill_metrics)) { skill <- skill_metrics[[name]] skill <- Reorder(skill, c("time", "longitude", "latitude")) - ## TODO: Different filename for with vs without significance? - ## TODO: Define name of output file + # Define name of output file and titles outfile <- paste0(outdir, name, ".png") - ## TODO: Put plot info in the titles (startdate, month) - toptitle <- paste0(toupper(name), " - ", data_cube$Variable$varName) + toptitle <- paste0(toupper(name), " - ", data_cube$Variable$varName, + " - ", system_name) months <- unique(lubridate::month(data$hcst$Dates$start, label = T, abb = F)) titles <- as.vector(months) @@ -176,14 +177,15 @@ plot_scores <- function(skill_metrics, data_cube, recipe, outdir) { } } -plot_ensemble_mean <- function(data_cube, recipe, outdir) { +plot_ensemble_mean <- function(data_cube, recipe, archive, outdir) { latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + # Compute ensemble mean - ensemble_mean <- Apply(data_cube$data, 'ensemble', mean, na.rm=T)[[1]] - # Drop extra dims, add time dim if missing - ## is readding 'time' necessary? + ensemble_mean <- s2dv::MeanDims(data_cube$data, 'ensemble') + # Drop extra dims, add time dim if missing: ensemble_mean <- drop(ensemble_mean) if (!("time" %in% names(dim(ensemble_mean)))) { dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) @@ -195,9 +197,10 @@ plot_ensemble_mean <- function(data_cube, recipe, outdir) { color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") - ## TODO: Define name of output file + # Define name of output file and titles outfile <- paste0(outdir, "forecast_ensemble_mean.png") - toptitle <- paste0("Ensemble Mean - ", data_cube$Variable$varName) + toptitle <- paste0("Ensemble Mean - ", data_cube$Variable$varName, " - ", + system_name) months <- unique(lubridate::month(data$hcst$Dates$start, label = T, abb = F)) titles <- as.vector(months) -- GitLab From 05ad8dcaf9973145dcea2d5919ec30ae5951e291 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 9 Sep 2022 09:36:06 +0200 Subject: [PATCH 15/81] Rearrange order of parameters in plotting functions --- modules/Visualization/Visualization.R | 33 +++++++++++++++------------ 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index d0da585d..af914490 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -5,45 +5,48 @@ library(grDevices) ## TODO: Get variable and system/obs names from dictionary ## TODO: Adapt to multi-model case -plot_data <- function(data, - recipe, +plot_data <- function(recipe, archive, + data, calibrated_data = NULL, skill_metrics = NULL, significance = F) { # Try to produce and save several basic plots. - # data: list containing the hcst, obs and fcst (optional) s2dv_cube objects - # calibrated_data: list containing the calibrated hcst and fcst (optional) + # recipe: the auto-s2s recipe as read by read_yaml() + # archive: the auto-s2s archive as read by read_yaml() + # data: list containing the hcst, obs and (optional) fcst s2dv_cube objects + # calibrated_data: list containing the calibrated hcst and (optional) fcst # s2dv_cube objects # skill_metrics: list of arrays containing the computed skill metrics - # recipe: the auto-s2s recipe as read by read_yaml() # significance: Bool. Whether to include significance dots where applicable outdir <- paste0(get_dir(recipe), "/plots/") dir.create(outdir, showWarnings = FALSE, recursive = TRUE) - if ((is.null(skill_metrics)) && (is.null(calibrated_data)) && (is.null(data$fcst))) { + if ((is.null(skill_metrics)) && (is.null(calibrated_data)) && + (is.null(data$fcst))) { stop("The Visualization module has been called, but there is no data ", "that can be plotted.") } if (!is.null(skill_metrics)) { - plot_enscorr(skill_metrics, data$hcst, recipe, archive, outdir) - plot_skill_scores(skill_metrics, data$hcst, recipe, archive, outdir, significance) - plot_scores(skill_metrics, data$hcst, recipe, archive, outdir) + plot_enscorr(recipe, archive, data$hcst, skill_metrics, outdir) + plot_skill_scores(recipe, archive, data$hcst, skill_metrics, outdir, + significance) + plot_scores(recipe, archive, data$hcst, skill_metrics, outdir) } if (!is.null(calibrated_data$fcst)) { - plot_ensemble_mean(calibrated_data$fcst, recipe, archive, outdir) + plot_ensemble_mean(recipe, archive, calibrated_data$fcst, outdir) } else if (!is.null(data$fcst)) { warning("Only the uncalibrated forecast was provided. Using this data ", "to plot the forecast ensemble mean.") - plot_ensemble_mean(data$fcst, recipe, archive, outdir) + plot_ensemble_mean(recipe, archive, data$fcst, outdir) } print("##### PLOTS SAVED TO OUTPUT DIRECTORY #####") } -plot_enscorr <- function(skill_metrics, data_cube, recipe, archive, outdir) { +plot_enscorr <- function(recipe, archive, data_cube, skill_metrics, outdir) { ## TODO: Is this function even necessary...? latitude <- data_cube$lat[1:length(data_cube$lat)] @@ -85,7 +88,7 @@ plot_enscorr <- function(skill_metrics, data_cube, recipe, archive, outdir) { } } -plot_skill_scores <- function(skill_metrics, data_cube, recipe, archive, outdir, +plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, significance = F) { latitude <- data_cube$lat[1:length(data_cube$lat)] @@ -137,7 +140,7 @@ plot_skill_scores <- function(skill_metrics, data_cube, recipe, archive, outdir, } } -plot_scores <- function(skill_metrics, data_cube, recipe, archive, outdir) { +plot_scores <- function(recipe, archive, data_cube, skill_metrics, outdir) { latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] @@ -177,7 +180,7 @@ plot_scores <- function(skill_metrics, data_cube, recipe, archive, outdir) { } } -plot_ensemble_mean <- function(data_cube, recipe, archive, outdir) { +plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] -- GitLab From e256f8c5479ef6c7f61cf53ba67eb3beb629a2f7 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 9 Sep 2022 10:26:52 +0200 Subject: [PATCH 16/81] Changes to color palettes --- modules/Visualization/Visualization.R | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index af914490..3884b5a4 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -56,7 +56,7 @@ plot_enscorr <- function(recipe, archive, data_cube, skill_metrics, outdir) { # Define plot characteristics brks <- seq(-1, 1, by = 0.1) col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) - color <- colorRampPalette(col2)(length(brks) - 1) + # color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") enscorr_names <- c("enscorr", "enscorr_specs") @@ -98,7 +98,7 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, # Define plot characteristics brks <- seq(-1, 1, by = 0.1) col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) - color <- colorRampPalette(col2)(length(brks) - 1) + # color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss") @@ -149,7 +149,7 @@ plot_scores <- function(recipe, archive, data_cube, skill_metrics, outdir) { # Define plot characteristics brks <- seq(0, 1, by = 0.1) col2 <- grDevices::hcl.colors(length(brks) - 1, "Reds") - color <- colorRampPalette(col2)(length(brks) - 1) + # color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") scores <- c("rps", "frps", "crps") @@ -185,6 +185,7 @@ plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + variable <- recipe$Analysis$Variables$name # Compute ensemble mean ensemble_mean <- s2dv::MeanDims(data_cube$data, 'ensemble') @@ -195,15 +196,22 @@ plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { } ensemble_mean <- Reorder(ensemble_mean, c("time", "longitude", "latitude")) ## TODO: Redefine column colors, possibly depending on variable + if (variable == 'prlr') { + palette = "BrBG" + rev = F + } else { + palette = "RdBu" + rev = T + } + brks <- pretty(range(ensemble_mean, na.rm = T), n = 10, min.n = 8) - col2 <- grDevices::hcl.colors(length(brks) - 1, "YlOrRd", rev = T) - color <- colorRampPalette(col2)(length(brks) - 1) + col2 <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) + # color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") # Define name of output file and titles outfile <- paste0(outdir, "forecast_ensemble_mean.png") - toptitle <- paste0("Ensemble Mean - ", data_cube$Variable$varName, " - ", - system_name) + toptitle <- paste0("Ensemble Mean - ", variable, " - ", system_name) months <- unique(lubridate::month(data$hcst$Dates$start, label = T, abb = F)) titles <- as.vector(months) -- GitLab From 2feda5ce3307350e3c5c2883d0ef7039ecc6df46 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 9 Sep 2022 10:40:40 +0200 Subject: [PATCH 17/81] Fix order of plot_data() arguments --- modules/test_victoria.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/test_victoria.R b/modules/test_victoria.R index b9aa1cc7..6075874f 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -20,4 +20,4 @@ probabilities <- compute_probabilities(calibrated_data$hcst, recipe) # Export all data to netCDF save_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities) # Plot data -plot_data(data, calibrated_data, skill_metrics, recipe, significance = T) +plot_data(recipe, archive, data, calibrated_data, skill_metrics, significance = T) -- GitLab From fb810f23761977e98ffe7b4ec330c90fa7f7d291 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 13 Sep 2022 10:28:43 +0200 Subject: [PATCH 18/81] Separate compute_quants() and compute_probs(), add most likely tercile plot (attempt) --- modules/Skill/Skill.R | 19 +++++---- modules/Skill/s2s.probs.R | 27 ++++++++----- modules/Visualization/Visualization.R | 58 +++++++++++++++++++++++++-- 3 files changed, 81 insertions(+), 23 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index d6b959c8..7dc52235 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -195,27 +195,30 @@ compute_probabilities <- function(data, recipe) { } else { for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { thresholds <- sapply(element, function (x) eval(parse(text = x))) - probs <- Compute_probs(data$data, thresholds, - ncores = ncores, - na.rm = na.rm) - for (i in seq(1:dim(probs$quantiles)['bin'][[1]])) { + quants <- compute_quants(data$data, thresholds, + ncores = ncores, + na.rm = na.rm) + probs <- compute_probs(data$data, quants, + ncores = ncores, + na.rm = na.rm) + for (i in seq(1:dim(quants)['bin'][[1]])) { named_quantiles <- append(named_quantiles, - list(ClimProjDiags::Subset(probs$quantiles, + list(ClimProjDiags::Subset(quants, 'bin', i))) names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", as.integer(thresholds[i]*100)) } - for (i in seq(1:dim(probs$probs)['bin'][[1]])) { + for (i in seq(1:dim(probs)['bin'][[1]])) { if (i == 1) { name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) - } else if (i == dim(probs$probs)['bin'][[1]]) { + } else if (i == dim(probs)['bin'][[1]]) { name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) } else { name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", as.integer(thresholds[i]*100)) } named_probs <- append(named_probs, - list(ClimProjDiags::Subset(probs$probs, + list(ClimProjDiags::Subset(probs, 'bin', i))) names(named_probs)[length(named_probs)] <- name_i } diff --git a/modules/Skill/s2s.probs.R b/modules/Skill/s2s.probs.R index 916faa01..672110cc 100644 --- a/modules/Skill/s2s.probs.R +++ b/modules/Skill/s2s.probs.R @@ -1,9 +1,9 @@ -Compute_probs <- function(data, thresholds, - ncores=1, quantile_dims=c('syear', 'ensemble'), - probs_dims=list('ensemble', 'bin'), - split_factor=1, na.rm=FALSE) { +compute_quants <- function(data, thresholds, + ncores=1, quantile_dims=c('syear', 'ensemble'), + probs_dims=list('ensemble', 'bin'), + split_factor=1, na.rm=FALSE) { ## TODO: Adapt to the case where the forecast probability bins need ## to be computed. The quantiles should be the hcst quantiles, and then @@ -19,20 +19,28 @@ Compute_probs <- function(data, thresholds, na.rm=na.rm, split_factor=split_factor)[[1]] + return(quantiles) +} + +compute_probs <- function(data, quantiles, + ncores=1, quantile_dims=c('syear', 'ensemble'), + probs_dims=list('ensemble', 'bin'), + split_factor=1, na.rm=FALSE) { + if (na.rm == FALSE) { c2p <- function(x, t) { colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) } } else { c2p <- function(x, t) { - if (any(!is.na(x))){ + if (any(!is.na(x))) { colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) } else { c(NA, NA, NA) } } } - + probs <- Apply(data = list(x = data, t = quantiles), target_dims = probs_dims, c2p, @@ -40,8 +48,5 @@ Compute_probs <- function(data, thresholds, split_factor = split_factor, ncores = ncores)[[1]] - return(list(probs=probs, quantiles=quantiles)) - -} - - + return(probs) +} diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 3884b5a4..df5b1b44 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -70,7 +70,7 @@ plot_enscorr <- function(recipe, archive, data_cube, skill_metrics, outdir) { toptitle <- paste0("Ensemble Mean Correlation - ", data_cube$Variable$varName, " - ", system_name) - months <- unique(lubridate::month(data$hcst$Dates$start, + months <- unique(lubridate::month(data_cube$Dates$start, label = T, abb = F)) titles <- as.vector(months) # Plot @@ -121,7 +121,7 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, ## TODO: Put plot info in the titles (startdate, month) toptitle <- paste0(toupper(name), " - ", data_cube$Variable$varName, " - ", system_name) - months <- unique(lubridate::month(data$hcst$Dates$start, + months <- unique(lubridate::month(data_cube$Dates$start, label = T, abb = F)) titles <- as.vector(months) # Plot @@ -162,7 +162,7 @@ plot_scores <- function(recipe, archive, data_cube, skill_metrics, outdir) { outfile <- paste0(outdir, name, ".png") toptitle <- paste0(toupper(name), " - ", data_cube$Variable$varName, " - ", system_name) - months <- unique(lubridate::month(data$hcst$Dates$start, + months <- unique(lubridate::month(data_cube$Dates$start, label = T, abb = F)) titles <- as.vector(months) # Plot @@ -212,7 +212,7 @@ plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { # Define name of output file and titles outfile <- paste0(outdir, "forecast_ensemble_mean.png") toptitle <- paste0("Ensemble Mean - ", variable, " - ", system_name) - months <- unique(lubridate::month(data$hcst$Dates$start, + months <- unique(lubridate::month(data_cube$Dates$start, label = T, abb = F)) titles <- as.vector(months) # Plots @@ -226,3 +226,53 @@ plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { fileout = outfile, bar_label_digits=4) } + +plot_most_likely_terciles <- function(recipe, archive, + data_cube, + percentiles, + outdir) { + + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + variable <- recipe$Analysis$Variables$name + if (is.null(recipe$Analysis$remove_NAs)) { + recipe$Analysis$remove_NAs <- FALSE + } + if (is.null(recipe$Analysis$ncores)) { + recipe$Analysis$ncores <- 1 + } + + # Compute probability bins for the forecast + quantiles <- abind(percentiles$percentile_33, percentiles$percentile_66, + along = 0) + names(dim(quantiles)) <- c("bin", names(dim(percentiles$percentile_33))) + probs_fcst <- compute_probs(data_cube$data, quantiles, + ncores = recipe$Analysis$ncores, + na.rm = recipe$Analysis$remove_NAs) + + # Drop extra dims, add time dim if missing: + probs_fcst <- drop(probs_fcst) + if (!("time" %in% names(dim(probs_fcst)))) { + dim(probs_fcst) <- c("time" = 1, dim(probs_fcst)) + } + + probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) + # Define name of output file and titles + outfile <- paste0(outdir, "forecast_most_likely_tercile.png") + toptitle <- paste0("Most likely tercile - ", variable, " - ", system_name) + months <- unique(lubridate::month(data_cube$Dates$start, + label = T, abb = F)) + titles <- as.vector(months) + print(dim(probs_fcst)) + + # Plots + PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), + probs_fcst, longitude, latitude, + coast_width = 1.5, + legend_scale = 0.8, + toptitle = toptitle, + titles = titles, + fileout = outfile, + bar_label_digits=2) +} -- GitLab From 4e21e546d63d2de9d3064a2e536617591723b3ee Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 13 Sep 2022 11:14:26 +0200 Subject: [PATCH 19/81] Remove print() line, add call to plot_most_likely_terciles() --- modules/Visualization/Visualization.R | 8 +++++++- modules/test_victoria.R | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index df5b1b44..67c68916 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -10,6 +10,7 @@ plot_data <- function(recipe, data, calibrated_data = NULL, skill_metrics = NULL, + probabilities = NULL, significance = F) { # Try to produce and save several basic plots. @@ -42,6 +43,12 @@ plot_data <- function(recipe, "to plot the forecast ensemble mean.") plot_ensemble_mean(recipe, archive, data$fcst, outdir) } + + if ((!is.null(probabilities)) && (!is.null(calibrated_data$fcst))) { + plot_most_likely_terciles(recipe, archive, calibrated_data$fcst, + probabilities$percentiles, outdir) + } + print("##### PLOTS SAVED TO OUTPUT DIRECTORY #####") } @@ -264,7 +271,6 @@ plot_most_likely_terciles <- function(recipe, archive, months <- unique(lubridate::month(data_cube$Dates$start, label = T, abb = F)) titles <- as.vector(months) - print(dim(probs_fcst)) # Plots PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 6075874f..0e5740d1 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -20,4 +20,5 @@ probabilities <- compute_probabilities(calibrated_data$hcst, recipe) # Export all data to netCDF save_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities) # Plot data -plot_data(recipe, archive, data, calibrated_data, skill_metrics, significance = T) +plot_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities, + significance = T) -- GitLab From 0a5c376025e09a40768714d552723cf42a155f9a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 13 Sep 2022 13:11:47 +0200 Subject: [PATCH 20/81] Add comments, remove unnecessary function to plot enscorr --- modules/Visualization/Visualization.R | 76 ++++++++++----------------- 1 file changed, 28 insertions(+), 48 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 67c68916..628c2a91 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -30,71 +30,37 @@ plot_data <- function(recipe, stop("The Visualization module has been called, but there is no data ", "that can be plotted.") } + + # Plot skill metrics if (!is.null(skill_metrics)) { - plot_enscorr(recipe, archive, data$hcst, skill_metrics, outdir) plot_skill_scores(recipe, archive, data$hcst, skill_metrics, outdir, significance) plot_scores(recipe, archive, data$hcst, skill_metrics, outdir) } + + # Plot forecast ensemble mean if (!is.null(calibrated_data$fcst)) { plot_ensemble_mean(recipe, archive, calibrated_data$fcst, outdir) } else if (!is.null(data$fcst)) { warning("Only the uncalibrated forecast was provided. Using this data ", "to plot the forecast ensemble mean.") - plot_ensemble_mean(recipe, archive, data$fcst, outdir) + plot_ensemble_mean(recipe, archive, data$fcst, outdir, raw = T) } + # Plot Most Likely Terciles if ((!is.null(probabilities)) && (!is.null(calibrated_data$fcst))) { plot_most_likely_terciles(recipe, archive, calibrated_data$fcst, probabilities$percentiles, outdir) + } else if ((!is.null(probabilities)) && (!is.null(data$fcst))) { + warning("Only the uncalibrated forecast was provided. Using this data ", + "to plot the most likely terciles.") + plot_most_likely_terciles(recipe, archive, data$fcst, + probabilities$percentiles, outdir) } print("##### PLOTS SAVED TO OUTPUT DIRECTORY #####") } - -plot_enscorr <- function(recipe, archive, data_cube, skill_metrics, outdir) { - ## TODO: Is this function even necessary...? - - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - - # Define plot characteristics - brks <- seq(-1, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) - # color <- colorRampPalette(col2)(length(brks) - 1) - options(bitmapType = "cairo") - - enscorr_names <- c("enscorr", "enscorr_specs") - - for (name in enscorr_names) { - if (name %in% names(skill_metrics)) { - enscorr <- skill_metrics[[name]] - enscorr <- Reorder(enscorr, c("time", "longitude", "latitude")) - outfile <- paste0(outdir, name, ".png") - ## TODO: Put plot info in the titles (startdate, month) - toptitle <- paste0("Ensemble Mean Correlation - ", - data_cube$Variable$varName, " - ", - system_name) - months <- unique(lubridate::month(data_cube$Dates$start, - label = T, abb = F)) - titles <- as.vector(months) - # Plot - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - enscorr, longitude, latitude, - toptitle = toptitle, - titles = titles, - filled.continents=F, - brks = brks, - #rxow_titles=row_titles, - cols = col2, - fileout = outfile, - bar_label_digits = 1) - } - } -} - plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, significance = F) { @@ -108,13 +74,15 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, # color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") - skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss") - + skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", + "enscorr") + ## TODO: Add "_specs" metrics for (name in skill_scores) { if (name %in% names(skill_metrics)) { skill <- skill_metrics[[name]] skill <- Reorder(skill, c("time", "longitude", "latitude")) - if (significance) { + significance_name <- paste0(name, "_significance") + if ((significance) && (significance_name %in% names(skill_metrics))) { significance_name <- paste0(name, "_significance") skill_significance <- skill_metrics[[significance_name]] skill_significance <- Reorder(skill_significance, c("time", @@ -273,6 +241,18 @@ plot_most_likely_terciles <- function(recipe, archive, titles <- as.vector(months) # Plots +# for (month in seq(1:dim(probs_fcst)[1])) { +# PlotMostLikelyQuantileMap(probs_fcst[month, , , ], longitude, latitude, +# coast_width = 1.5, +# legend_scale = 0.8, +# toptitle = paste0(toptitle, " - ", +# titles[month]), +# fileout = paste0(outfile, "-", titles[month], +# ".png"), +# bar_label_digits=2) +# } +# ERROR: Error in plot.new() : figure margins too large + PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), probs_fcst, longitude, latitude, coast_width = 1.5, -- GitLab From c863679930541c07558bb6f530a76db2b8fb889f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 15 Sep 2022 10:48:27 +0200 Subject: [PATCH 21/81] Separate most likely tercile plots --- modules/Visualization/Visualization.R | 50 +++++++++++++++------------ 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 628c2a91..b805cb79 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,5 +1,6 @@ library(RColorBrewer) library(grDevices) +library(ragg) ## TODO: Add the possibility to read the data directly from netCDF ## TODO: Get variable and system/obs names from dictionary @@ -71,7 +72,6 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, # Define plot characteristics brks <- seq(-1, 1, by = 0.1) col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) - # color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", @@ -81,6 +81,8 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, if (name %in% names(skill_metrics)) { skill <- skill_metrics[[name]] skill <- Reorder(skill, c("time", "longitude", "latitude")) + # If the significance has been requested and the variable has it, + # retrieve it to plot it as well. significance_name <- paste0(name, "_significance") if ((significance) && (significance_name %in% names(skill_metrics))) { significance_name <- paste0(name, "_significance") @@ -157,8 +159,8 @@ plot_scores <- function(recipe, archive, data_cube, skill_metrics, outdir) { plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] + latitude <- data_cube$lat + longitude <- data_cube$lon system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name variable <- recipe$Analysis$Variables$name @@ -234,31 +236,33 @@ plot_most_likely_terciles <- function(recipe, archive, probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) # Define name of output file and titles - outfile <- paste0(outdir, "forecast_most_likely_tercile.png") + outfile <- paste0(outdir, "forecast_most_likely_tercile") toptitle <- paste0("Most likely tercile - ", variable, " - ", system_name) months <- unique(lubridate::month(data_cube$Dates$start, label = T, abb = F)) titles <- as.vector(months) # Plots -# for (month in seq(1:dim(probs_fcst)[1])) { -# PlotMostLikelyQuantileMap(probs_fcst[month, , , ], longitude, latitude, -# coast_width = 1.5, -# legend_scale = 0.8, -# toptitle = paste0(toptitle, " - ", -# titles[month]), -# fileout = paste0(outfile, "-", titles[month], -# ".png"), -# bar_label_digits=2) -# } -# ERROR: Error in plot.new() : figure margins too large - PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), - probs_fcst, longitude, latitude, - coast_width = 1.5, - legend_scale = 0.8, - toptitle = toptitle, - titles = titles, - fileout = outfile, - bar_label_digits=2) + for (month in seq(1:dim(probs_fcst)[1])) { + outfile2 <- paste0(outfile, "-", tolower(titles[month]), ".png") + PlotMostLikelyQuantileMap(probs_fcst[month, , , ], longitude, latitude, + coast_width = 1.5, + legend_scale = 0.8, + toptitle = paste0(toptitle, " - ", + titles[month]), + width = 10, height = 8, + fileout = outfile2, + bar_label_digits=2) + } +## PlotLayout cannot work well with PlotMostLikelyQuantileMap currently. +## The issue might be fixed in the future. +# PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), +# probs_fcst, longitude, latitude, +# coast_width = 1.5, +# legend_scale = 0.8, +# toptitle = toptitle, +# titles = titles, +# fileout = outfile, +# bar_label_digits=2) } -- GitLab From 6421c624055d9e98f86000461c912e0fe7f26427 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 15 Sep 2022 11:40:06 +0200 Subject: [PATCH 22/81] Add units to forecast ensemble mean plto --- modules/Visualization/Visualization.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index b805cb79..cc2bdbc8 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -68,6 +68,9 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + init_month <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) # Define plot characteristics brks <- seq(-1, 1, by = 0.1) @@ -107,7 +110,7 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, dots = skill_significance, toptitle = toptitle, titles = titles, - filled.continents=F, + filled.continents=F, brks = brks, #rxow_titles=row_titles, cols = col2, @@ -163,6 +166,7 @@ plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { longitude <- data_cube$lon system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name variable <- recipe$Analysis$Variables$name + units <- attr(data_cube$Variable, "variable")$units # Compute ensemble mean ensemble_mean <- s2dv::MeanDims(data_cube$data, 'ensemble') @@ -198,6 +202,7 @@ plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { filled.continents = F, toptitle = toptitle, titles = titles, + units = units, cols = col2, brks = brks, fileout = outfile, @@ -221,6 +226,7 @@ plot_most_likely_terciles <- function(recipe, archive, } # Compute probability bins for the forecast + ## TODO: Add check in case percentile_33 and percentile_66 don't exist quantiles <- abind(percentiles$percentile_33, percentiles$percentile_66, along = 0) names(dim(quantiles)) <- c("bin", names(dim(percentiles$percentile_33))) -- GitLab From 27b5e781722ae6625980578365f7095a53a30c00 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 15 Sep 2022 15:14:54 +0200 Subject: [PATCH 23/81] Add fcst year to most likely tercile plot, add new prlr recipe --- .../recipe_system7c3s-prlr.yml | 46 +++++++++++++++++++ modules/Visualization/Visualization.R | 5 +- 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml b/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml new file mode 100644 index 00000000..197c109c --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml @@ -0,0 +1,46 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: prlr + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Skill: + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 1 + remove_NAs: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index cc2bdbc8..dac05c4c 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -246,6 +246,8 @@ plot_most_likely_terciles <- function(recipe, archive, toptitle <- paste0("Most likely tercile - ", variable, " - ", system_name) months <- unique(lubridate::month(data_cube$Dates$start, label = T, abb = F)) + ## TODO: Ensure this works for daily and sub-daily cases + years <- lubridate::year(data_cube$Dates$start) titles <- as.vector(months) # Plots @@ -256,7 +258,8 @@ plot_most_likely_terciles <- function(recipe, archive, coast_width = 1.5, legend_scale = 0.8, toptitle = paste0(toptitle, " - ", - titles[month]), + titles[month], " ", + years[month]), width = 10, height = 8, fileout = outfile2, bar_label_digits=2) -- GitLab From 57349655171cf3c519be26936986cdeafe462870 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Sep 2022 10:39:19 +0200 Subject: [PATCH 24/81] Add start date and other relevant info to plot titles, improve aesthetics --- modules/Visualization/Visualization.R | 49 ++++++++++++++++++++------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index dac05c4c..7482eead 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -70,7 +70,9 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - init_month <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + init_month <- lubridate::month(as.numeric(substr(recipe$Analysis$Time$sdate, + start = 1, stop = 2)), + label = T, abb = T) # Define plot characteristics brks <- seq(-1, 1, by = 0.1) @@ -82,6 +84,15 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, ## TODO: Add "_specs" metrics for (name in skill_scores) { if (name %in% names(skill_metrics)) { + # Define metric name to display in plot + if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss")) { + display_name <- toupper(name) + } else if (name == "mean_bias_ss") { + display_name <- "Mean Bias SS" + } else if (name == "enscorr") { + display_name <- "Ensemble Mean Correlation" + } + # Retrieve metric and reorder dimensions skill <- skill_metrics[[name]] skill <- Reorder(skill, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, @@ -98,9 +109,8 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, } # Define output file name and titles outfile <- paste0(outdir, name, ".png") - ## TODO: Put plot info in the titles (startdate, month) - toptitle <- paste0(toupper(name), " - ", data_cube$Variable$varName, - " - ", system_name) + toptitle <- paste(display_name, "-", data_cube$Variable$varName, + "-", system_name, "-", init_month, hcst_period) months <- unique(lubridate::month(data_cube$Dates$start, label = T, abb = F)) titles <- as.vector(months) @@ -108,7 +118,9 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, PlotLayout(PlotEquiMap, c('longitude', 'latitude'), skill, longitude, latitude, dots = skill_significance, + dot_symbol = 20, toptitle = toptitle, + title_scale = 0.6, titles = titles, filled.continents=F, brks = brks, @@ -125,7 +137,12 @@ plot_scores <- function(recipe, archive, data_cube, skill_metrics, outdir) { latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - + hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + init_month <- lubridate::month(as.numeric(substr(recipe$Analysis$Time$sdate, + start = 1, stop = 2)), + label = T, abb = T) + # Define plot characteristics brks <- seq(0, 1, by = 0.1) col2 <- grDevices::hcl.colors(length(brks) - 1, "Reds") @@ -140,8 +157,8 @@ plot_scores <- function(recipe, archive, data_cube, skill_metrics, outdir) { skill <- Reorder(skill, c("time", "longitude", "latitude")) # Define name of output file and titles outfile <- paste0(outdir, name, ".png") - toptitle <- paste0(toupper(name), " - ", data_cube$Variable$varName, - " - ", system_name) + toptitle <- paste(toupper(name), "-", data_cube$Variable$varName, + "-", system_name, "-", init_month, hcst_period) months <- unique(lubridate::month(data_cube$Dates$start, label = T, abb = F)) titles <- as.vector(months) @@ -149,6 +166,7 @@ plot_scores <- function(recipe, archive, data_cube, skill_metrics, outdir) { PlotLayout(PlotEquiMap, c('longitude', 'latitude'), skill, longitude, latitude, toptitle = toptitle, + title_scale = 0.6, titles = titles, filled.continents=F, brks = brks, @@ -167,6 +185,8 @@ plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name variable <- recipe$Analysis$Variables$name units <- attr(data_cube$Variable, "variable")$units + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) # Compute ensemble mean ensemble_mean <- s2dv::MeanDims(data_cube$data, 'ensemble') @@ -192,7 +212,8 @@ plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { # Define name of output file and titles outfile <- paste0(outdir, "forecast_ensemble_mean.png") - toptitle <- paste0("Ensemble Mean - ", variable, " - ", system_name) + toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, + "- Initialization:", start_date) months <- unique(lubridate::month(data_cube$Dates$start, label = T, abb = F)) titles <- as.vector(months) @@ -201,6 +222,7 @@ plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { ensemble_mean, longitude, latitude, filled.continents = F, toptitle = toptitle, + title_scale = 0.6, titles = titles, units = units, cols = col2, @@ -218,6 +240,8 @@ plot_most_likely_terciles <- function(recipe, archive, longitude <- data_cube$lon[1:length(data_cube$lon)] system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name variable <- recipe$Analysis$Variables$name + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) if (is.null(recipe$Analysis$remove_NAs)) { recipe$Analysis$remove_NAs <- FALSE } @@ -243,7 +267,8 @@ plot_most_likely_terciles <- function(recipe, archive, probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) # Define name of output file and titles outfile <- paste0(outdir, "forecast_most_likely_tercile") - toptitle <- paste0("Most likely tercile - ", variable, " - ", system_name) + toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", + "Initialization:", start_date) months <- unique(lubridate::month(data_cube$Dates$start, label = T, abb = F)) ## TODO: Ensure this works for daily and sub-daily cases @@ -257,9 +282,9 @@ plot_most_likely_terciles <- function(recipe, archive, PlotMostLikelyQuantileMap(probs_fcst[month, , , ], longitude, latitude, coast_width = 1.5, legend_scale = 0.8, - toptitle = paste0(toptitle, " - ", - titles[month], " ", - years[month]), + toptitle = paste(titles[month], + years[month], + toptitle), width = 10, height = 8, fileout = outfile2, bar_label_digits=2) -- GitLab From 2d43ec711c4114155193bf93d102f07c6cb3c5f2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Sep 2022 12:35:16 +0200 Subject: [PATCH 25/81] Include specsVerification metrics --- modules/Visualization/Visualization.R | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 7482eead..8b5d4ded 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -80,16 +80,18 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, options(bitmapType = "cairo") skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", - "enscorr") + "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", + "enscorr_specs") ## TODO: Add "_specs" metrics for (name in skill_scores) { if (name %in% names(skill_metrics)) { # Define metric name to display in plot - if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss")) { - display_name <- toupper(name) + if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", + "rpss_specs", "bss90_specs", "bss10_specs")) { + display_name <- toupper(strsplit(name, "_")[[1]][1]) } else if (name == "mean_bias_ss") { display_name <- "Mean Bias SS" - } else if (name == "enscorr") { + } else if (name == "enscorr", "enscorr_specs") { display_name <- "Ensemble Mean Correlation" } # Retrieve metric and reorder dimensions @@ -149,7 +151,7 @@ plot_scores <- function(recipe, archive, data_cube, skill_metrics, outdir) { # color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") - scores <- c("rps", "frps", "crps") + scores <- c("rps", "frps", "crps", "frps_specs") for (name in scores) { if (name %in% names(skill_metrics)) { @@ -157,7 +159,8 @@ plot_scores <- function(recipe, archive, data_cube, skill_metrics, outdir) { skill <- Reorder(skill, c("time", "longitude", "latitude")) # Define name of output file and titles outfile <- paste0(outdir, name, ".png") - toptitle <- paste(toupper(name), "-", data_cube$Variable$varName, + display_name <- toupper(strsplit(name, "_")[[1]][1]) + toptitle <- paste(display_name, "-", data_cube$Variable$varName, "-", system_name, "-", init_month, hcst_period) months <- unique(lubridate::month(data_cube$Dates$start, label = T, abb = F)) -- GitLab From 94520783143306c8e014fcbe78b2ac297d79181f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Sep 2022 12:46:46 +0200 Subject: [PATCH 26/81] Improve formatting --- modules/Loading/Loading.R | 59 ++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index aaf4d916..596bf2f2 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -83,16 +83,19 @@ load_datasets <- function(recipe_file) { # ----------- obs.path <- paste0(archive$src, - obs.dir, store.freq, "/$var$", - reference_descrip[[store.freq]][[variable]],"$var$_$file_date$.nc") + obs.dir, store.freq, "/$var$", + reference_descrip[[store.freq]][[variable]], + "$var$_$file_date$.nc") hcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") + hcst.dir, store.freq, "/$var$", + exp_descrip[[store.freq]][[variable]], + "$var$_$file_date$.nc") fcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") + hcst.dir, store.freq, "/$var$", + exp_descrip[[store.freq]][[variable]], + "$var$_$file_date$.nc") # Define regrid parameters: #------------------------------------------------------------------- @@ -257,28 +260,28 @@ load_datasets <- function(recipe_file) { dim(dates) <- dim(dates_file) obs <- Start(dat = obs.path, - var = variable, - file_date = sort(unique(dates_file)), - time = dates, - time_var = 'time', - time_across = 'file_date', - merge_across_dims = TRUE, - merge_across_dims_narm = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) } # Adds ensemble dim to obs (for consistency with hcst/fcst) -- GitLab From 0cb489f8479a404ac8c86dc99f92c9305f41d4c8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 19 Sep 2022 11:25:24 +0200 Subject: [PATCH 27/81] Fix bug introduced in merge --- modules/Skill/Skill.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index ea524ae3..a927efdc 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -258,6 +258,8 @@ compute_probabilities <- function(data, recipe) { names(named_probs)[length(named_probs)] <- name_i } } + named_probs <- lapply(named_probs, function(x) {.drop_dims(x)}) + named_quantiles <- lapply(named_quantiles, function(x) {.drop_dims(x)}) } print("##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") return(list(probs=named_probs, percentiles=named_quantiles)) -- GitLab From 7c7b332812660f014962f0a6f535d957e671f401 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 19 Sep 2022 11:26:01 +0200 Subject: [PATCH 28/81] Fix bug: bad if statement --- modules/Visualization/Visualization.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 8b5d4ded..e46d7455 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -91,7 +91,7 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, display_name <- toupper(strsplit(name, "_")[[1]][1]) } else if (name == "mean_bias_ss") { display_name <- "Mean Bias SS" - } else if (name == "enscorr", "enscorr_specs") { + } else if (name %in% c("enscorr", "enscorr_specs")) { display_name <- "Ensemble Mean Correlation" } # Retrieve metric and reorder dimensions @@ -208,7 +208,7 @@ plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { rev = T } - brks <- pretty(range(ensemble_mean, na.rm = T), n = 10, min.n = 8) + brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) col2 <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) # color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") -- GitLab From 7db7e0b555810d954896286253b99097087dfb26 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 20 Sep 2022 10:35:20 +0200 Subject: [PATCH 29/81] Deal with accumulated variables, convert precipitation to mm/day --- conf/variable-dictionary.yml | 8 +++++++- modules/Loading/Loading.R | 34 +++++++++++++++++++++++----------- modules/test_victoria.R | 2 +- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index f535c4e1..33cd1a57 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -6,36 +6,42 @@ vars: units: "K" long_name: "Near-Surface Air Temperature" standard_name: "air_temperature" + accum: no # outname: "t2" tasmax: units: "K" long_name: "Maximum Near-Surface Air Temperature" standard_name: "air_temperature" + accum: no tasmin: units: "K" long_name: "Minimum Near-Surface Air Temperature" standard_name: "air_temperature" + accum: no sfcWind: units: "m s-1" long_name: "Near-Surface Wind Speed" standard_name: "wind_speed" + accum: no # outname: "wind" rsds: units: "W m-2" long_name: "Surface Downwelling Shortwave Radiation" standard_name: "surface_downwelling_shortwave_flux_in_air" positive: "down" + accum: yes # outname: "rswin" prlr: units: "mm" long_name: "Total precipitation" standard_name: "total_precipitation_flux" #? Not in CF + accum: yes # outname: "acprec" g500: units: "m2 s-2" long_name: "Geopotential" standard_name: "geopotential" - + accum: no # Coordinates coords: longitude: diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 4c4e2891..c3d522b3 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -305,7 +305,29 @@ load_datasets <- function(recipe_file) { } } - + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + if (dictionary$vars[[variable]]$accum) { + info(logger, " Accumulated variable: setting negative values to zero.") + obs$data[obs$data < 0] <- 0 + hcst$data[hcst$data < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[fcst$data < 0] <- 0 + } + } + # Convert precipitation to mm/day + ## TODO: Make a function? + if (variable == "prlr") { + info(logger, "Converting precipitation from mm/s to mm/day.") + obs$data <- obs$data*84000*1000 + attr(obs$Variable, "variable")$units <- "mm/day" + hcst$data <- hcst$data*84000*1000 + attr(hcst$Variable, "variable")$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data <- fcst$data*84000*1000 + attr(hcst$Variable, "variable")$units <- "mm/day" + } + } # Print a summary of the loaded data for the user, for each object data_summary(hcst, store.freq) @@ -364,16 +386,6 @@ load_datasets <- function(recipe_file) { ############################################################################ ############################################################################ - ## TODO: we need to define accumulated vars - #filters negative values in accum vars - #if (accum){ - # obs$data[obs$data < 0 ] <- 0 - # hcst$data[hcst$data < 0 ] <- 0 - # if (!is.null(fcst)){ - # fcst$data[fcst$data < 0 ] <- 0 - # } - #} - return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 0e5740d1..b4543186 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -5,7 +5,7 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml" recipe <- read_yaml(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive -- GitLab From d0c3eba446edbb1abf8917fa26bd6d755b93f59a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 20 Sep 2022 10:37:00 +0200 Subject: [PATCH 30/81] Unify metric plotting functions (needs work) --- modules/Visualization/Visualization.R | 107 ++++++++++++++++---------- 1 file changed, 66 insertions(+), 41 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index e46d7455..53336afa 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -75,62 +75,86 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, label = T, abb = T) # Define plot characteristics - brks <- seq(-1, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + # brks <- seq(-1, 1, by = 0.1) + # col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) options(bitmapType = "cairo") skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", "enscorr_specs") - ## TODO: Add "_specs" metrics - for (name in skill_scores) { + scores <- c("rps", "frps", "crps", "frps_specs") + + for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { + if (name %in% names(skill_metrics)) { - # Define metric name to display in plot + # Define plot characteristics and metric name to display in plot if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", "rpss_specs", "bss90_specs", "bss10_specs")) { display_name <- toupper(strsplit(name, "_")[[1]][1]) + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) } else if (name == "mean_bias_ss") { - display_name <- "Mean Bias SS" + display_name <- "Mean Bias Skill Score" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) } else if (name %in% c("enscorr", "enscorr_specs")) { display_name <- "Ensemble Mean Correlation" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + } else if (name %in% scores) { + skill <- skill_metrics[[name]] + display_name <- toupper(strsplit(name, "_")[[1]][1]) + brks <- seq(0, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "Reds") + } else if (name == "enssprerr") { + skill <- skill_metrics[[name]] + display_name <- "Spread-to-Error Ratio" + brks <- pretty(0:max(skill, na.rm = T), n = 20, min.n = 10) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + } else if (name == "mean_bias") { + skill <- skill_metrics[[name]] + display_name <- "Mean Bias" + brks <- pretty(range(skill, na.rm = T), n = 20, min.n = 10) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) } - # Retrieve metric and reorder dimensions - skill <- skill_metrics[[name]] - skill <- Reorder(skill, c("time", "longitude", "latitude")) - # If the significance has been requested and the variable has it, - # retrieve it to plot it as well. + # Reorder dimensions + skill <- Reorder(skill, c("time", "longitude", "latitude")) + # If the significance has been requested and the variable has it, + # retrieve it and reorder its dimensions. + significance_name <- paste0(name, "_significance") + if ((significance) && (significance_name %in% names(skill_metrics))) { significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(skill_metrics))) { - significance_name <- paste0(name, "_significance") - skill_significance <- skill_metrics[[significance_name]] - skill_significance <- Reorder(skill_significance, c("time", - "longitude", - "latitude")) - } else { - skill_significance <- NULL - } - # Define output file name and titles - outfile <- paste0(outdir, name, ".png") - toptitle <- paste(display_name, "-", data_cube$Variable$varName, - "-", system_name, "-", init_month, hcst_period) - months <- unique(lubridate::month(data_cube$Dates$start, - label = T, abb = F)) - titles <- as.vector(months) - # Plot - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - skill, longitude, latitude, - dots = skill_significance, - dot_symbol = 20, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - filled.continents=F, - brks = brks, - #rxow_titles=row_titles, - cols = col2, - fileout = outfile, - bar_label_digits = 1) + skill_significance <- skill_metrics[[significance_name]] + skill_significance <- Reorder(skill_significance, c("time", + "longitude", + "latitude")) + } else { + skill_significance <- NULL } + # Define output file name and titles + outfile <- paste0(outdir, name, ".png") + toptitle <- paste(display_name, "-", data_cube$Variable$varName, + "-", system_name, "-", init_month, hcst_period) + months <- unique(lubridate::month(data_cube$Dates$start, + label = T, abb = F)) + titles <- as.vector(months) + # Plot + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + skill, longitude, latitude, + dots = skill_significance, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents=F, + brks = brks, + cols = col2, + fileout = outfile, + bar_label_digits = 1) + } } } @@ -148,6 +172,7 @@ plot_scores <- function(recipe, archive, data_cube, skill_metrics, outdir) { # Define plot characteristics brks <- seq(0, 1, by = 0.1) col2 <- grDevices::hcl.colors(length(brks) - 1, "Reds") + #brks <- pretty(range(skill, na.rm = T), n = 20, min.n = 10) # color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") -- GitLab From 88443bd2661b97b0f99a3c3035e4dabe4655f524 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 20 Sep 2022 11:08:31 +0200 Subject: [PATCH 31/81] Make compute_quants() always have na.rm = TRUE, provisionally --- modules/Skill/Skill.R | 3 +-- modules/Skill/s2s.probs.R | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index a927efdc..e8af103e 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -231,8 +231,7 @@ compute_probabilities <- function(data, recipe) { # Parse thresholds in recipe thresholds <- sapply(element, function (x) eval(parse(text = x))) quants <- compute_quants(data$data, thresholds, - ncores = ncores, - na.rm = na.rm) + ncores = ncores) probs <- compute_probs(data$data, quants, ncores = ncores, na.rm = na.rm) diff --git a/modules/Skill/s2s.probs.R b/modules/Skill/s2s.probs.R index 43c5be04..c82e9697 100644 --- a/modules/Skill/s2s.probs.R +++ b/modules/Skill/s2s.probs.R @@ -3,7 +3,7 @@ compute_quants <- function(data, thresholds, ncores=1, quantile_dims=c('syear', 'ensemble'), probs_dims=list('ensemble', 'bin'), - split_factor=1, na.rm=FALSE) { + split_factor=1, na.rm=TRUE) { ## TODO: Adapt to the case where the forecast probability bins need ## to be computed. The quantiles should be the hcst quantiles, and then -- GitLab From 052e0824309fc5128b9818bc86485f544f2132bf Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 20 Sep 2022 15:01:49 +0200 Subject: [PATCH 32/81] Add new recipe, delete plot_scores(), add PlotLayout() + PlotMostLikelyQuantileMap() combo again --- modules/Loading/testing_recipes/recipe_4.yml | 2 +- .../recipe_system5c3s-rsds.yml | 46 +++++++ modules/Visualization/Visualization.R | 118 ++++++------------ modules/test_victoria.R | 6 +- 4 files changed, 86 insertions(+), 86 deletions(-) create mode 100644 modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index 4e7896f5..f83f91bb 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -18,7 +18,7 @@ Analysis: hcst_start: '1993' hcst_end: '2016' ftime_min: 1 - ftime_max: 3 + ftime_max: 6 Region: latmin: -10 latmax: 10 diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml b/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml new file mode 100644 index 00000000..ca52e8cc --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml @@ -0,0 +1,46 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: rsds + freq: monthly_mean + Datasets: + System: + name: system5c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Skill: + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 1 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 53336afa..43ad3552 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,10 +1,14 @@ library(RColorBrewer) library(grDevices) -library(ragg) +## TODO: Download functions locally +source("https://earth.bsc.es/gitlab/external/cstools/-/raw/3c004cf52e9cfd0a75925466a4ae08005a848680/R/PlotMostLikelyQuantileMap.R") +source("https://earth.bsc.es/gitlab/external/cstools/-/raw/3c004cf52e9cfd0a75925466a4ae08005a848680/R/PlotCombinedMap.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/450df59b98edd314c37dfda315357d3dbcfc55d0/R/PlotLayout.R") ## TODO: Add the possibility to read the data directly from netCDF ## TODO: Get variable and system/obs names from dictionary ## TODO: Adapt to multi-model case +## TODO: Adapt to decadal case plot_data <- function(recipe, archive, @@ -36,7 +40,6 @@ plot_data <- function(recipe, if (!is.null(skill_metrics)) { plot_skill_scores(recipe, archive, data$hcst, skill_metrics, outdir, significance) - plot_scores(recipe, archive, data$hcst, skill_metrics, outdir) } # Plot forecast ensemble mean @@ -58,7 +61,7 @@ plot_data <- function(recipe, plot_most_likely_terciles(recipe, archive, data$fcst, probabilities$percentiles, outdir) } - + print("##### PLOTS SAVED TO OUTPUT DIRECTORY #####") } @@ -74,11 +77,7 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, start = 1, stop = 2)), label = T, abb = T) - # Define plot characteristics - # brks <- seq(-1, 1, by = 0.1) - # col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) - options(bitmapType = "cairo") - + # Group different metrics by type skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", "enscorr_specs") @@ -94,32 +93,43 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, skill <- skill_metrics[[name]] brks <- seq(-1, 1, by = 0.1) col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + } else if (name == "mean_bias_ss") { display_name <- "Mean Bias Skill Score" skill <- skill_metrics[[name]] brks <- seq(-1, 1, by = 0.1) col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + } else if (name %in% c("enscorr", "enscorr_specs")) { display_name <- "Ensemble Mean Correlation" skill <- skill_metrics[[name]] brks <- seq(-1, 1, by = 0.1) col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + } else if (name %in% scores) { skill <- skill_metrics[[name]] display_name <- toupper(strsplit(name, "_")[[1]][1]) brks <- seq(0, 1, by = 0.1) col2 <- grDevices::hcl.colors(length(brks) - 1, "Reds") + } else if (name == "enssprerr") { + ## TODO: Adjust colorbar parameters skill <- skill_metrics[[name]] display_name <- "Spread-to-Error Ratio" brks <- pretty(0:max(skill, na.rm = T), n = 20, min.n = 10) col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + } else if (name == "mean_bias") { skill <- skill_metrics[[name]] display_name <- "Mean Bias" - brks <- pretty(range(skill, na.rm = T), n = 20, min.n = 10) + max_value <- max(abs(skill)) + ugly_intervals <- seq(-max_value, max_value, (max_value*2)/10) + brks <- pretty(ugly_intervals, n = 20, min.n = 10) col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) } + + options(bitmapType = "cairo") + # Reorder dimensions skill <- Reorder(skill, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, @@ -153,59 +163,11 @@ plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, brks = brks, cols = col2, fileout = outfile, - bar_label_digits = 1) + bar_label_digits = 3) } } } -plot_scores <- function(recipe, archive, data_cube, skill_metrics, outdir) { - - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", - recipe$Analysis$Time$hcst_end) - init_month <- lubridate::month(as.numeric(substr(recipe$Analysis$Time$sdate, - start = 1, stop = 2)), - label = T, abb = T) - - # Define plot characteristics - brks <- seq(0, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "Reds") - #brks <- pretty(range(skill, na.rm = T), n = 20, min.n = 10) - # color <- colorRampPalette(col2)(length(brks) - 1) - options(bitmapType = "cairo") - - scores <- c("rps", "frps", "crps", "frps_specs") - - for (name in scores) { - if (name %in% names(skill_metrics)) { - skill <- skill_metrics[[name]] - skill <- Reorder(skill, c("time", "longitude", "latitude")) - # Define name of output file and titles - outfile <- paste0(outdir, name, ".png") - display_name <- toupper(strsplit(name, "_")[[1]][1]) - toptitle <- paste(display_name, "-", data_cube$Variable$varName, - "-", system_name, "-", init_month, hcst_period) - months <- unique(lubridate::month(data_cube$Dates$start, - label = T, abb = F)) - titles <- as.vector(months) - # Plot - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - skill, longitude, latitude, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - filled.continents=F, - brks = brks, - #rxow_titles=row_titles, - cols = col2, - fileout = outfile, - bar_label_digits = 1) - } - } -} - plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { latitude <- data_cube$lat @@ -294,7 +256,7 @@ plot_most_likely_terciles <- function(recipe, archive, probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) # Define name of output file and titles - outfile <- paste0(outdir, "forecast_most_likely_tercile") + outfile <- paste0(outdir, "forecast_most_likely_tercile.png") toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", "Initialization:", start_date) months <- unique(lubridate::month(data_cube$Dates$start, @@ -304,27 +266,19 @@ plot_most_likely_terciles <- function(recipe, archive, titles <- as.vector(months) # Plots - - for (month in seq(1:dim(probs_fcst)[1])) { - outfile2 <- paste0(outfile, "-", tolower(titles[month]), ".png") - PlotMostLikelyQuantileMap(probs_fcst[month, , , ], longitude, latitude, - coast_width = 1.5, - legend_scale = 0.8, - toptitle = paste(titles[month], - years[month], - toptitle), - width = 10, height = 8, - fileout = outfile2, - bar_label_digits=2) - } -## PlotLayout cannot work well with PlotMostLikelyQuantileMap currently. -## The issue might be fixed in the future. -# PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), -# probs_fcst, longitude, latitude, -# coast_width = 1.5, -# legend_scale = 0.8, -# toptitle = toptitle, -# titles = titles, -# fileout = outfile, -# bar_label_digits=2) + ## PlotLayout cannot work well with PlotMostLikelyQuantileMap currently. + ## The issue might be fixed in the future. + suppressWarnings( + PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), + cat_dim = 'bin', + probs_fcst, longitude, latitude, + coast_width = 1.5, + title_scale = 0.6, + legend_scale = 0.8, #cex_bar_titles = 0.6, + toptitle = toptitle, + titles = titles, + fileout = outfile, + bar_label_digits=2, + triangle_ends = c(F, F), width = 11, height = 8) + ) } diff --git a/modules/test_victoria.R b/modules/test_victoria.R index b4543186..3e33dc60 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -5,7 +5,7 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" recipe <- read_yaml(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive @@ -20,5 +20,5 @@ probabilities <- compute_probabilities(calibrated_data$hcst, recipe) # Export all data to netCDF save_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities) # Plot data -plot_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities, - significance = T) +plot_data(recipe, archive, data, calibrated_data, skill_metrics, + probabilities, significance = T) -- GitLab From 1b08e411522f1d3cc630b2d224f8d474bd723738 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 20 Sep 2022 15:27:28 +0200 Subject: [PATCH 33/81] Change plot_skill_scores() to plot_skill_metrics() --- modules/Visualization/Visualization.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 43ad3552..5304d565 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -38,8 +38,8 @@ plot_data <- function(recipe, # Plot skill metrics if (!is.null(skill_metrics)) { - plot_skill_scores(recipe, archive, data$hcst, skill_metrics, outdir, - significance) + plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, + significance) } # Plot forecast ensemble mean @@ -65,8 +65,8 @@ plot_data <- function(recipe, print("##### PLOTS SAVED TO OUTPUT DIRECTORY #####") } -plot_skill_scores <- function(recipe, archive, data_cube, skill_metrics, outdir, - significance = F) { +plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, + outdir, significance = F) { latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] -- GitLab From a7e7d47aac0bdfcf9b343f2a0b824cab44073ccf Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 20 Sep 2022 15:40:30 +0200 Subject: [PATCH 34/81] Bugfix: Fix line to change fcst units instead of hcst when precipitation is loaded --- modules/Loading/Loading.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index c3d522b3..5c64cf0a 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -325,7 +325,7 @@ load_datasets <- function(recipe_file) { attr(hcst$Variable, "variable")$units <- "mm/day" if (!is.null(fcst)) { fcst$data <- fcst$data*84000*1000 - attr(hcst$Variable, "variable")$units <- "mm/day" + attr(fcst$Variable, "variable")$units <- "mm/day" } } -- GitLab From aff25b4a204cbae381f72e63c0d506e7d58327d9 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 21 Sep 2022 11:05:38 +0200 Subject: [PATCH 35/81] Add check for most likely tercile function, change print messages, change 'quantiles' to 'terciles' --- modules/Visualization/Visualization.R | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 5304d565..ee63a431 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -61,8 +61,6 @@ plot_data <- function(recipe, plot_most_likely_terciles(recipe, archive, data$fcst, probabilities$percentiles, outdir) } - - print("##### PLOTS SAVED TO OUTPUT DIRECTORY #####") } plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, @@ -166,6 +164,8 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, bar_label_digits = 3) } } + + print("##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") } plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { @@ -219,6 +219,7 @@ plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { brks = brks, fileout = outfile, bar_label_digits=4) + print("##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") } plot_most_likely_terciles <- function(recipe, archive, @@ -240,11 +241,15 @@ plot_most_likely_terciles <- function(recipe, archive, } # Compute probability bins for the forecast - ## TODO: Add check in case percentile_33 and percentile_66 don't exist - quantiles <- abind(percentiles$percentile_33, percentiles$percentile_66, + if (is.null(percentiles$percentile_33) | is.null(percentiles$percentile_33)) { + stop("The quantile array does not contain the 33rd and 66th percentiles,", + " the most likely tercile map cannot be plotted.") + } + + terciles <- abind(percentiles$percentile_33, percentiles$percentile_66, along = 0) - names(dim(quantiles)) <- c("bin", names(dim(percentiles$percentile_33))) - probs_fcst <- compute_probs(data_cube$data, quantiles, + names(dim(terciles)) <- c("bin", names(dim(percentiles$percentile_33))) + probs_fcst <- compute_probs(data_cube$data, terciles, ncores = recipe$Analysis$ncores, na.rm = recipe$Analysis$remove_NAs) @@ -266,8 +271,8 @@ plot_most_likely_terciles <- function(recipe, archive, titles <- as.vector(months) # Plots - ## PlotLayout cannot work well with PlotMostLikelyQuantileMap currently. - ## The issue might be fixed in the future. + ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked + ## on. suppressWarnings( PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), cat_dim = 'bin', @@ -281,4 +286,6 @@ plot_most_likely_terciles <- function(recipe, archive, bar_label_digits=2, triangle_ends = c(F, F), width = 11, height = 8) ) + + print("##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") } -- GitLab From 70bfe5153ec03e0d932e602667f65a88cb158399 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 21 Sep 2022 16:41:44 +0200 Subject: [PATCH 36/81] Rename function parameters, remove unused line --- modules/Visualization/Visualization.R | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index ee63a431..c4dce46c 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -168,18 +168,18 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, print("##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") } -plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { +plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { - latitude <- data_cube$lat - longitude <- data_cube$lon + latitude <- fcst$lat + longitude <- fcst$lon system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name variable <- recipe$Analysis$Variables$name - units <- attr(data_cube$Variable, "variable")$units + units <- attr(fcst$Variable, "variable")$units start_date <- paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate) # Compute ensemble mean - ensemble_mean <- s2dv::MeanDims(data_cube$data, 'ensemble') + ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') # Drop extra dims, add time dim if missing: ensemble_mean <- drop(ensemble_mean) if (!("time" %in% names(dim(ensemble_mean)))) { @@ -204,8 +204,8 @@ plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { outfile <- paste0(outdir, "forecast_ensemble_mean.png") toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, "- Initialization:", start_date) - months <- unique(lubridate::month(data_cube$Dates$start, - label = T, abb = F)) + months <- unique(lubridate::month(fcst$Dates$start, + label = T, abb = F)) titles <- as.vector(months) # Plots PlotLayout(PlotEquiMap, c('longitude', 'latitude'), @@ -223,12 +223,12 @@ plot_ensemble_mean <- function(recipe, archive, data_cube, outdir) { } plot_most_likely_terciles <- function(recipe, archive, - data_cube, + fcst, percentiles, outdir) { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] + latitude <- fcst$lat + longitude <- fcst$lon system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name variable <- recipe$Analysis$Variables$name start_date <- paste0(recipe$Analysis$Time$fcst_year, @@ -249,7 +249,7 @@ plot_most_likely_terciles <- function(recipe, archive, terciles <- abind(percentiles$percentile_33, percentiles$percentile_66, along = 0) names(dim(terciles)) <- c("bin", names(dim(percentiles$percentile_33))) - probs_fcst <- compute_probs(data_cube$data, terciles, + probs_fcst <- compute_probs(fcst$data, terciles, ncores = recipe$Analysis$ncores, na.rm = recipe$Analysis$remove_NAs) @@ -264,10 +264,9 @@ plot_most_likely_terciles <- function(recipe, archive, outfile <- paste0(outdir, "forecast_most_likely_tercile.png") toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", "Initialization:", start_date) - months <- unique(lubridate::month(data_cube$Dates$start, + months <- unique(lubridate::month(fcst$Dates$start, label = T, abb = F)) ## TODO: Ensure this works for daily and sub-daily cases - years <- lubridate::year(data_cube$Dates$start) titles <- as.vector(months) # Plots -- GitLab From a8c56e0c7a08109a2a74bc5f2eade2c4fdd21a9d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 21 Sep 2022 16:45:57 +0200 Subject: [PATCH 37/81] Remove unused parameter, add TODO --- modules/Visualization/Visualization.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index c4dce46c..3681ba77 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -9,6 +9,7 @@ source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/450df59b98edd314c37dfda315357d ## TODO: Get variable and system/obs names from dictionary ## TODO: Adapt to multi-model case ## TODO: Adapt to decadal case +## TODO: Add param 'raw'? plot_data <- function(recipe, archive, @@ -48,7 +49,7 @@ plot_data <- function(recipe, } else if (!is.null(data$fcst)) { warning("Only the uncalibrated forecast was provided. Using this data ", "to plot the forecast ensemble mean.") - plot_ensemble_mean(recipe, archive, data$fcst, outdir, raw = T) + plot_ensemble_mean(recipe, archive, data$fcst, outdir) } # Plot Most Likely Terciles @@ -66,8 +67,8 @@ plot_data <- function(recipe, plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, outdir, significance = F) { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] + latitude <- data_cube$lat + longitude <- data_cube$lon system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) @@ -147,7 +148,7 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, toptitle <- paste(display_name, "-", data_cube$Variable$varName, "-", system_name, "-", init_month, hcst_period) months <- unique(lubridate::month(data_cube$Dates$start, - label = T, abb = F)) + label = T, abb = F)) titles <- as.vector(months) # Plot PlotLayout(PlotEquiMap, c('longitude', 'latitude'), -- GitLab From 98beafda7d5b5b5b277ee626d3fe126a796df03f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 22 Sep 2022 09:02:23 +0200 Subject: [PATCH 38/81] Add check to raise error if plotting functions are called with daily data --- modules/Visualization/Visualization.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 3681ba77..f8a11577 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -66,6 +66,11 @@ plot_data <- function(recipe, plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, outdir, significance = F) { + + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } latitude <- data_cube$lat longitude <- data_cube$lon @@ -171,6 +176,11 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } + latitude <- fcst$lat longitude <- fcst$lon system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name @@ -228,6 +238,11 @@ plot_most_likely_terciles <- function(recipe, archive, percentiles, outdir) { + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } + latitude <- fcst$lat longitude <- fcst$lon system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name -- GitLab From 6178c48b6a9feecb41905f6c8f20123d6f33d090 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 22 Sep 2022 14:33:18 +0200 Subject: [PATCH 39/81] Fix typo --- modules/Loading/Loading.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 5c64cf0a..e1a6a818 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -318,7 +318,7 @@ load_datasets <- function(recipe_file) { # Convert precipitation to mm/day ## TODO: Make a function? if (variable == "prlr") { - info(logger, "Converting precipitation from mm/s to mm/day.") + info(logger, "Converting precipitation from m/s to mm/day.") obs$data <- obs$data*84000*1000 attr(obs$Variable, "variable")$units <- "mm/day" hcst$data <- hcst$data*84000*1000 -- GitLab From 6f8475ebcb0cd880e10a679eee34457232573f2e Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 23 Sep 2022 12:30:09 +0200 Subject: [PATCH 40/81] Add name and institution in the list --- conf/archive_decadal.yml | 44 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 61e3f2b8..61f0a12e 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -3,6 +3,8 @@ archive: System: # ---- EC-Earth3-i1: + name: + institution: src: hcst: "exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: @@ -23,6 +25,8 @@ archive: #NOTE: EC-Earth3-i2 the first file of each sdate has 2 time step only (Nov-Dec). # The rest files are Jan to Dec. EC-Earth3-i2: + name: + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: @@ -41,6 +45,8 @@ archive: # ---- EC-Earth3-i4: + name: + institution: src: hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" @@ -77,6 +83,8 @@ archive: # ---- HadGEM3-GC31-MM: + name: + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppB-forecast/" @@ -96,6 +104,8 @@ archive: # ---- BCC-CSM2-MR: + name: + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/" fcst: @@ -115,6 +125,8 @@ archive: # ---- CanESM5: + name: + institution: src: hcst: "exp/canesm5/cmip6-dcppA-hindcast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/" fcst: "exp/canesm5/cmip6-dcppB-forecast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppB-forecast/" @@ -134,6 +146,8 @@ archive: # ---- CESM1-1-CAM5-CMIP5: + name: + institution: src: hcst: "exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast" fcst: @@ -152,6 +166,8 @@ archive: # ---- CMCC-CM2-SR5: + name: + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppB-forecast/" @@ -170,6 +186,8 @@ archive: # ---- FGOALS-f3-L: + name: + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppB-forecast/" @@ -189,6 +207,8 @@ archive: # ---- IPSL-CM6A-LR: + name: + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/dcppA-hindcast/" fcst: @@ -206,6 +226,8 @@ archive: # ---- MIROC6: + name: + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" fcst: @@ -223,6 +245,8 @@ archive: # ---- MPI-ESM1.2-HR: + name: + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/" fcst: @@ -240,6 +264,8 @@ archive: # ---- MPI-ESM1.2-LR: + name: + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/dcppA-hindcast/" fcst: @@ -257,6 +283,8 @@ archive: # ---- MRI-ESM2-0: + name: + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/dcppA-hindcast/" fcst: @@ -275,6 +303,8 @@ archive: # ---- #NOTE: NorCPM1-i1 and i2 are under the same directory NorCPM1-i1: + name: + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: @@ -292,6 +322,8 @@ archive: # ---- NorCPM1-i2: + name: + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: @@ -312,6 +344,8 @@ archive: Reference: GHCNv4: + name: + institution: src: "obs/noaa/ghcn_v4/" monthly_mean: {"tas":"", "tasanomaly":""} daily_mean: @@ -319,6 +353,8 @@ archive: reference_grid: "/esarchive/obs/noaa/ghcn_v4/monthly_mean/tasanomaly/tasanomaly_201811.nc" # ---- ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" monthly_mean: {"tas":"_f1h-r1440x721cds", "prlr":"_f1h-r1440x721cds", "psl":"_f1h-r1440x721cds", "tos":"_f1h-r1440x721cds"} daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", @@ -333,6 +369,8 @@ archive: # ---- JRA-55: + name: + institution: src: "recon/jma/jra55/" monthly_mean: {"tas":"_f6h", "psl":"_f6h", "tos":"", "pr":"_s0-3h", "prlr":"_s0-3h"} daily_mean: {"tas":"_f6h", "psl":"_f6h", "prlr":"_s0-3h", "sfcWind":"_f6h"} @@ -341,6 +379,8 @@ archive: # ---- GISTEMPv4: + name: + institution: src: "obs/noaa-nasa/ghcnersstgiss/" monthly_mean: {"tasanomaly":""} daily_mean: @@ -349,6 +389,8 @@ archive: # ---- HadCRUT4: + name: + institution: src: "obs/ukmo/hadcrut_v4.6/" monthly_mean: {"tasanomaly":""} daily_mean: @@ -357,6 +399,8 @@ archive: # ---- HadSLP2: + name: + institution: src: "obs/ukmo/hadslp_v2/" monthly_mean: {"psl":""} daily_mean: -- GitLab From ee563aed22a8e26bfed53b323d0ff9d30572f080 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 27 Sep 2022 14:50:33 +0200 Subject: [PATCH 41/81] Reorder data dim, ensemble to the last --- modules/Loading/Loading_decadal.R | 105 ++++++++++++++++-------- tests/testthat/test-decadal_daily_1.R | 12 +-- tests/testthat/test-decadal_monthly_1.R | 12 +-- tests/testthat/test-decadal_monthly_2.R | 10 +-- tests/testthat/test-decadal_monthly_3.R | 6 +- 5 files changed, 90 insertions(+), 55 deletions(-) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 7f99ac68..7f9b89a8 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -23,12 +23,21 @@ source("tools/tmp/as.s2dv_cube.R") load_datasets <- function(recipe_file) { recipe <- read_yaml(recipe_file) - recipe$filename <- recipe_file + recipe$filepath <- recipe_file + recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) + archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive # Print Start() info or not DEBUG <- FALSE + ## TODO: this should come from the main script + # Create output folder and log: + logger <- prepare_outputs(recipe = recipe) + folder <- logger$foldername + log_file <- logger$logname + logger <- logger$logger + #------------------------- # Read from recipe: #------------------------- @@ -103,7 +112,6 @@ load_datasets <- function(recipe_file) { Start_default_arg_list <- list( dat = path_list, #file.path(hcst.path, hcst.files), var = variable, - ensemble = member, syear = paste0(sdates_hcst), chunk = 'all', chunk_depends = 'syear', @@ -115,6 +123,7 @@ load_datasets <- function(recipe_file) { latitude_reorder = Sort(decreasing = TRUE), longitude = values(list(lons.min, lons.max)), longitude_reorder = circularsort, + ensemble = member, transform = regrid_params$fcst.transform, transform_extra_cells = 2, transform_params = list(grid = regrid_params$fcst.gridtype, @@ -144,7 +153,7 @@ load_datasets <- function(recipe_file) { # Reshape and reorder dimensions ## dat should be 1, syear should be length of dat; reorder dimensions dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6]) - hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'ensemble', 'syear', 'time', 'latitude', 'longitude')) + hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) # Manipulate time attr because Start() cannot read it correctly wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only @@ -161,11 +170,12 @@ load_datasets <- function(recipe_file) { tmp_time_attr <- attr(hcst, 'Variables')$common$time # change syear to c(sday, sweek, syear) - dim(hcst) <- c(dim(hcst)[1:3], sday = 1, sweek = 1, dim(hcst)[4:7]) - if (!identical(dim(tmp_time_attr), dim(hcst)[6:7])) { + # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] + dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) + if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) { stop("hcst has problem in matching data and time attr dimension.") } - dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(hcst)[6:7]) + dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 if (multi_path) { @@ -213,8 +223,9 @@ load_datasets <- function(recipe_file) { # Reshape and reorder dimensions ## dat should be 1, syear should be length of dat; reorder dimensions + ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble] dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) - fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'ensemble', 'syear', 'time', 'latitude', 'longitude')) + fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) # Manipulate time attr because Start() cannot read it correctly wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only @@ -231,11 +242,12 @@ load_datasets <- function(recipe_file) { tmp_time_attr <- attr(fcst, 'Variables')$common$time # change syear to c(sday, sweek, syear) - dim(fcst) <- c(dim(fcst)[1:3], sday = 1, sweek = 1, dim(fcst)[4:7]) - if (!identical(dim(tmp_time_attr), dim(fcst)[6:7])) { + # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] + dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) + if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { stop("fcst has problem in matching data and time attr dimension.") } - dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(fcst)[6:7]) + dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 if (multi_path) { @@ -247,7 +259,8 @@ load_datasets <- function(recipe_file) { fcst <- as.s2dv_cube(fcst) ) - if (!identical(dim(hcst$data)[-6], dim(fcst$data)[-6])) { + # Only syear could be different + if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { stop("hcst and fcst do not share the same dimension structure.") } @@ -334,20 +347,12 @@ load_datasets <- function(recipe_file) { # sday sweek syear time # 1 1 2 14 - -# # TODO: Reorder obs dims to match hcst dims? -# # Adds ensemble dim to obs (for consistency with hcst/fcst) -# default_dims <- c(dat = 1, var = 1, sweek = 1, -# sday = 1, syear = 1, time = 1, -# latitude = 1, longitude = 1, ensemble = 1) -# default_dims[names(dim(obs))] <- dim(obs) -# dim(obs) <- default_dims - - if (!identical(dim(obs), dim(hcst$data)[-3])) { + # Only ensemble dim could be different + if (!identical(dim(obs), dim(hcst$data)[-9])) { stop("obs and hcst dimensions do not match.") } # Add ensemble dim to obs - dim(obs) <- c(dim(obs)[1:2], ensemble = 1, dim(obs)[3:8]) + dim(obs) <- c(dim(obs), ensemble = 1) # Change class from startR_array to s2dv_cube suppressWarnings( @@ -356,19 +361,7 @@ load_datasets <- function(recipe_file) { #------------------------------------------- -# Step 4. Print the summary of data -#------------------------------------------- - - # Print a summary of the loaded data for the user, for each object - data_summary(hcst, store.freq) - data_summary(obs, store.freq) - if (!is.null(fcst)) { - data_summary(fcst, store.freq) - } - - -#------------------------------------------- -# Step 5. Verify the consistance btwn hcst and obs +# Step 4. Verify the consistance between data #------------------------------------------- # dimension if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) { @@ -413,5 +406,47 @@ load_datasets <- function(recipe_file) { stop("hcst and fcst don't share the same longitude.") } + +#------------------------------------------- +# Step 5. Tune data +#------------------------------------------- + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + if (dictionary$vars[[variable]]$accum) { + info(logger, " Accumulated variable: setting negative values to zero.") + obs$data[obs$data < 0] <- 0 + hcst$data[hcst$data < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[fcst$data < 0] <- 0 + } + } + # Convert precipitation to mm/day + ## TODO: Make a function? + if (variable == "prlr") { + info(logger, "Converting precipitation from m/s to mm/day.") + obs$data <- obs$data*84000*1000 + attr(obs$Variable, "variable")$units <- "mm/day" + hcst$data <- hcst$data*84000*1000 + attr(hcst$Variable, "variable")$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data <- fcst$data*84000*1000 + attr(fcst$Variable, "variable")$units <- "mm/day" + } + } + +#------------------------------------------- +# Step 6. Print summary +#------------------------------------------- + + # Print a summary of the loaded data for the user, for each object + data_summary(hcst, store.freq) + data_summary(obs, store.freq) + if (!is.null(fcst)) { + data_summary(fcst, store.freq) + } + + print("##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R index 720662d4..a78fd135 100644 --- a/tests/testthat/test-decadal_daily_1.R +++ b/tests/testthat/test-decadal_daily_1.R @@ -65,11 +65,11 @@ names(data$obs) ) expect_equal( dim(data$hcst$data), -c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 3, time = 90, latitude = 7, longitude = 11) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 3, time = 90, latitude = 7, longitude = 11, ensemble = 3) ) expect_equal( dim(data$fcst$data), -c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 2, time = 90, latitude = 7, longitude = 11) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 2, time = 90, latitude = 7, longitude = 11, ensemble = 3) ) expect_equal( dim(data$hcst$Dates$start), @@ -77,12 +77,12 @@ c(sday = 1, sweek = 1, syear = 3, time = 90) ) # hcst data expect_equal( -as.vector(drop(data$hcst$data)[1, 2:3, 1:3, 1, 1]), +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[1, 2:3, 1:3, 1, 1]), c(298.5787, 293.6479, 298.5042, 293.7802, 297.8072, 293.0764), tolerance = 0.0001 ) expect_equal( -as.vector(drop(data$hcst$data)[2, , 89:90, 1, 1]), +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[2, , 89:90, 1, 1]), c(301.6978, 308.9792, 308.4501, 302.1620, 307.6034, 307.6388), tolerance = 0.0001 ) @@ -99,12 +99,12 @@ tolerance = 0.0001 # fcst data expect_equal( -as.vector(drop(data$fcst$data)[1, , 1:3, 1, 1]), +as.vector(aperm(drop(data$fcst$data), c(5, 1:4))[1, , 1:3, 1, 1]), c(295.0745, 291.1006, 296.2279, 291.6309, 295.3123, 290.8995), tolerance = 0.0001 ) expect_equal( -as.vector(drop(data$fcst$data)[2, , 89:90, 1, 1]), +as.vector(aperm(drop(data$fcst$data), c(5, 1:4))[2, , 89:90, 1, 1]), c(305.3428, 305.0657, 305.5445, 305.5681), tolerance = 0.0001 ) diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 6f43aaa1..4267eb63 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -75,18 +75,18 @@ names(data$obs) ) expect_equal( dim(data$hcst$data), -c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4, ensemble = 2) ) expect_equal( dim(data$fcst$data), -c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4, ensemble = 2) ) expect_equal( dim(data$hcst$Dates$start), c(sday = 1, sweek = 1, syear = 4, time = 3) ) expect_equal( -as.vector(drop(data$hcst$data)[,1:2,1,2,3]), +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[, 1:2,1,2,3]), c(291.3831, 291.6227, 292.3012, 290.9779), tolerance = 0.0001 ) @@ -140,11 +140,11 @@ class(calibrated_data$fcst), ) expect_equal( dim(calibrated_data$hcst$data), -c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4, ensemble = 2) ) expect_equal( dim(calibrated_data$fcst$data), -c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4, ensemble = 2) ) expect_equal( mean(calibrated_data$fcst$data), @@ -157,7 +157,7 @@ mean(calibrated_data$hcst$data), tolerance = 0.0001 ) expect_equal( -as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), +as.vector(aperm(drop(calibrated_data$hcst$data), c(5, 1:4))[1, , 2, 3, 4]), c(286.3895, 286.6408, 290.6652, 288.3759), tolerance = 0.0001 ) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 8fd3525d..ac4f2fff 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -22,7 +22,7 @@ suppressWarnings({invisible(capture.output( ))}) # Compute skill metrics -suppressWarnings({invisible(capture.output( +suppressMessages({invisible(capture.output( skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) ))}) suppressWarnings({invisible(capture.output( @@ -67,11 +67,11 @@ names(data$obs) ) expect_equal( dim(data$hcst$data), -c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 3, time = 14, latitude = 8, longitude = 5) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 3, time = 14, latitude = 8, longitude = 5, ensemble = 3) ) expect_equal( dim(data$fcst$data), -c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 2, time = 14, latitude = 8, longitude = 5) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 2, time = 14, latitude = 8, longitude = 5, ensemble = 3) ) expect_equal( dim(data$hcst$Dates$start), @@ -83,7 +83,7 @@ c(sday = 1, sweek = 1, syear = 3, time = 14) #) # hcst data expect_equal( -as.vector(drop(data$hcst$data)[1, , 1:2, 2, 2]), +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[1,, 1:2, 2, 2]), c(272.8613, 271.0689, 270.8007, 273.5594, 272.1561, 272.8729), tolerance = 0.0001 ) @@ -99,7 +99,7 @@ tolerance = 0.0001 ) # fcst data expect_equal( -as.vector(drop(data$fcst$data)[1, , 1:2, 2, 2]), +as.vector(aperm(drop(data$fcst$data), c(5, 1:4))[1, , 1:2, 2, 2]), c(271.7708, 271.8424, 272.4980, 273.5842), tolerance = 0.0001 ) diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index b6be7f2b..21665f6e 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -63,7 +63,7 @@ names(data$obs) ) expect_equal( dim(data$hcst$data), -c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 25, longitude = 16) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 25, longitude = 16, ensemble = 3) ) expect_equal( dim(data$hcst$Dates$start), @@ -71,7 +71,7 @@ c(sday = 1, sweek = 1, syear = 4, time = 3) ) # hcst data expect_equal( -as.vector(drop(data$hcst$data)[3, , 2, 2, 2]), +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[3, , 2, 2, 2]), c(278.4305, 279.5065, 280.4110, 278.7608), tolerance = 0.0001 ) @@ -113,7 +113,7 @@ names(calibrated_data), c("hcst", "fcst") ) expect_equal( -as.vector(drop(calibrated_data$hcst$data)[3, , 2, 2, 2]), +as.vector(aperm(drop(calibrated_data$hcst$data), c(5, 1:4))[3, , 2, 2, 2]), c(279.0648, 281.0578, 282.6535, 280.3137), tolerance = 0.0001 ) -- GitLab From 3b518579f293f6c06b1028bb139db3e4e13c54ac Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 27 Sep 2022 14:51:04 +0200 Subject: [PATCH 42/81] Consider fcst has >1 syear --- modules/Visualization/Visualization.R | 118 ++++++++++++++++---------- modules/test_decadal.R | 10 +++ 2 files changed, 82 insertions(+), 46 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index f8a11577..a191b663 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -193,10 +193,15 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') # Drop extra dims, add time dim if missing: ensemble_mean <- drop(ensemble_mean) + if (!("time" %in% names(dim(ensemble_mean)))) { dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) } - ensemble_mean <- Reorder(ensemble_mean, c("time", "longitude", "latitude")) + if (!'syear' %in% names(dim(ensemble_mean))) { + ensemble_mean <- Reorder(ensemble_mean, c("time", "longitude", "latitude")) + } else { + ensemble_mean <- Reorder(ensemble_mean, c("syear", "time", "longitude", "latitude")) + } ## TODO: Redefine column colors, possibly depending on variable if (variable == 'prlr') { palette = "BrBG" @@ -211,25 +216,34 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { # color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") - # Define name of output file and titles - outfile <- paste0(outdir, "forecast_ensemble_mean.png") - toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, - "- Initialization:", start_date) - months <- unique(lubridate::month(fcst$Dates$start, - label = T, abb = F)) - titles <- as.vector(months) - # Plots - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - ensemble_mean, longitude, latitude, - filled.continents = F, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - units = units, - cols = col2, - brks = brks, - fileout = outfile, - bar_label_digits=4) + for (i_syear in start_date) { + # Define name of output file and titles + if (length(start_date) == 1) { + i_ensemble_mean <- ensemble_mean + outfile <- paste0(outdir, "forecast_ensemble_mean.png") + } else { + i_ensemble_mean <- ensemble_mean[which(start_date == i_syear), , , ] + outfile <- paste0(outdir, "forecast_ensemble_mean_", i_syear, ".png") + } + toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, + "- Initialization:", i_syear) + months <- lubridate::month(fcst$Dates$start[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + titles <- as.vector(months) + # Plots + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + i_ensemble_mean, longitude, latitude, + filled.continents = F, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + units = units, + cols = col2, + brks = brks, + fileout = outfile, + bar_label_digits = 4) + } + print("##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") } @@ -274,33 +288,45 @@ plot_most_likely_terciles <- function(recipe, archive, if (!("time" %in% names(dim(probs_fcst)))) { dim(probs_fcst) <- c("time" = 1, dim(probs_fcst)) } + if (!'syear' %in% names(dim(probs_fcst))) { + probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) + } else { + probs_fcst <- Reorder(probs_fcst, c("syear", "time", "bin", "longitude", "latitude")) + } - probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) - # Define name of output file and titles - outfile <- paste0(outdir, "forecast_most_likely_tercile.png") - toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", - "Initialization:", start_date) - months <- unique(lubridate::month(fcst$Dates$start, - label = T, abb = F)) - ## TODO: Ensure this works for daily and sub-daily cases - titles <- as.vector(months) - - # Plots - ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked - ## on. - suppressWarnings( - PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), - cat_dim = 'bin', - probs_fcst, longitude, latitude, - coast_width = 1.5, - title_scale = 0.6, - legend_scale = 0.8, #cex_bar_titles = 0.6, - toptitle = toptitle, - titles = titles, - fileout = outfile, - bar_label_digits=2, - triangle_ends = c(F, F), width = 11, height = 8) - ) + for (i_syear in start_date) { + # Define name of output file and titles + if (length(start_date) == 1) { + i_probs_fcst <- probs_fcst + outfile <- paste0(outdir, "forecast_most_likely_tercile.png") + } else { + i_probs_fcst <- probs_fcst[which(start_date == i_syear), , , , ] + outfile <- paste0(outdir, "forecast_most_likely_tercile_", i_syear, ".png") + } + toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", + "Initialization:", i_syear) + months <- lubridate::month(fcst$Dates$start[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + ## TODO: Ensure this works for daily and sub-daily cases + titles <- as.vector(months) + + # Plots + ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked + ## on. + suppressWarnings( + PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), + cat_dim = 'bin', + i_probs_fcst, longitude, latitude, + coast_width = 1.5, + title_scale = 0.6, + legend_scale = 0.8, #cex_bar_titles = 0.6, + toptitle = toptitle, + titles = titles, + fileout = outfile, + bar_label_digits = 2, + triangle_ends = c(F, F), width = 11, height = 8) + ) + } print("##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") } diff --git a/modules/test_decadal.R b/modules/test_decadal.R index 8077b7e2..01cf2d92 100644 --- a/modules/test_decadal.R +++ b/modules/test_decadal.R @@ -3,6 +3,7 @@ source("modules/Loading/Loading_decadal.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") recipe_file <- "modules/Loading/testing_recipes/recipe_decadal.yml" recipe <- read_yaml(recipe_file) @@ -10,11 +11,20 @@ archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$ar # Load datasets data <- load_datasets(recipe_file) + # Calibrate datasets calibrated_data <- calibrate_datasets(data, recipe) + # Compute skill metrics skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) + # Compute percentiles and probability bins probabilities <- compute_probabilities(calibrated_data$hcst, recipe) + # Export all data to netCDF save_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities) + +# Plot data +plot_data(recipe, archive, data, calibrated_data, skill_metrics, + probabilities, significance = T) + -- GitLab From 0f3bce92af46e9b1de002d36ba69a5c2f8988c30 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 28 Sep 2022 11:58:23 +0200 Subject: [PATCH 43/81] Rename recipes for clarity --- ....yml => recipe_system2c3s-prlr-nofcst.yml} | 0 ...recipe_2.yml => recipe_system5c3s-tas.yml} | 0 ..._6.yml => recipe_system7c3s-tas-specs.yml} | 0 ...recipe_4.yml => recipe_system7c3s-tas.yml} | 0 ... recipe_tas-daily-regrid-to-reference.yml} | 0 ... => recipe_tas-daily-regrid-to-system.yml} | 0 modules/test_seasonal.R | 24 +++++++++++++++++++ modules/test_victoria.R | 2 +- 8 files changed, 25 insertions(+), 1 deletion(-) rename modules/Loading/testing_recipes/{recipe_5.yml => recipe_system2c3s-prlr-nofcst.yml} (100%) rename modules/Loading/testing_recipes/{recipe_2.yml => recipe_system5c3s-tas.yml} (100%) rename modules/Loading/testing_recipes/{recipe_6.yml => recipe_system7c3s-tas-specs.yml} (100%) rename modules/Loading/testing_recipes/{recipe_4.yml => recipe_system7c3s-tas.yml} (100%) rename modules/Loading/testing_recipes/{recipe_1.yml => recipe_tas-daily-regrid-to-reference.yml} (100%) rename modules/Loading/testing_recipes/{recipe_3.yml => recipe_tas-daily-regrid-to-system.yml} (100%) create mode 100644 modules/test_seasonal.R diff --git a/modules/Loading/testing_recipes/recipe_5.yml b/modules/Loading/testing_recipes/recipe_system2c3s-prlr-nofcst.yml similarity index 100% rename from modules/Loading/testing_recipes/recipe_5.yml rename to modules/Loading/testing_recipes/recipe_system2c3s-prlr-nofcst.yml diff --git a/modules/Loading/testing_recipes/recipe_2.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml similarity index 100% rename from modules/Loading/testing_recipes/recipe_2.yml rename to modules/Loading/testing_recipes/recipe_system5c3s-tas.yml diff --git a/modules/Loading/testing_recipes/recipe_6.yml b/modules/Loading/testing_recipes/recipe_system7c3s-tas-specs.yml similarity index 100% rename from modules/Loading/testing_recipes/recipe_6.yml rename to modules/Loading/testing_recipes/recipe_system7c3s-tas-specs.yml diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml similarity index 100% rename from modules/Loading/testing_recipes/recipe_4.yml rename to modules/Loading/testing_recipes/recipe_system7c3s-tas.yml diff --git a/modules/Loading/testing_recipes/recipe_1.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml similarity index 100% rename from modules/Loading/testing_recipes/recipe_1.yml rename to modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml diff --git a/modules/Loading/testing_recipes/recipe_3.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml similarity index 100% rename from modules/Loading/testing_recipes/recipe_3.yml rename to modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R new file mode 100644 index 00000000..5f59794f --- /dev/null +++ b/modules/test_seasonal.R @@ -0,0 +1,24 @@ + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-tas.yml" +recipe <- read_yaml(recipe_file) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + +# Load datasets +data <- load_datasets(recipe_file) +# Calibrate datasets +calibrated_data <- calibrate_datasets(data, recipe) +# Compute skill metrics +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +# Compute percentiles and probability bins +probabilities <- compute_probabilities(calibrated_data$hcst, recipe) +# Export all data to netCDF +save_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities) +# Plot data +plot_data(recipe, archive, data, calibrated_data, skill_metrics, + probabilities, significance = T) diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 3e33dc60..5f59794f 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -5,7 +5,7 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-tas.yml" recipe <- read_yaml(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive -- GitLab From 7995f949e0070fe0ee30924ba45b314f92f04c6c Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 28 Sep 2022 14:33:51 +0200 Subject: [PATCH 44/81] Add name and institution if possible --- conf/archive_decadal.yml | 82 ++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 40 deletions(-) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 61f0a12e..2b74bff8 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -3,8 +3,8 @@ archive: System: # ---- EC-Earth3-i1: - name: - institution: + name: "EC-Earth3-i1" + institution: "EC-Earth-Consortium" src: hcst: "exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: @@ -25,8 +25,8 @@ archive: #NOTE: EC-Earth3-i2 the first file of each sdate has 2 time step only (Nov-Dec). # The rest files are Jan to Dec. EC-Earth3-i2: - name: - institution: + name: "EC-Earth3-i2" + institution: "EC-Earth-Consortium" src: hcst: "exp/CMIP6/dcppA-hindcast/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: @@ -45,8 +45,8 @@ archive: # ---- EC-Earth3-i4: - name: - institution: + name: "EC-Earth3-i4" + institution: "EC-Earth-Consortium" src: hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" @@ -83,7 +83,7 @@ archive: # ---- HadGEM3-GC31-MM: - name: + name: "HadGEM3-GC31-MM" institution: src: hcst: "exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/" @@ -100,12 +100,12 @@ archive: calendar: "360-day" member: r1i1p1f2,r2i1p1f2,r3i1p1f2,r4i1p1f2,r5i1p1f2,r6i1p1f2,r7i1p1f2,r8i1p1f2,r9i1p1f2,r10i1p1f2 initial_month: 11 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/tas/gr/v20200316/tas_Amon_HadGEM3_dcppA-hindcast_s2018-r1i1p1f2_gr_201811-202903.nc" #'r432x324' + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/tas/gn/v20200417/tas_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s1960-r1i1p1f2_gn_196011-196012.nc" #'r432x324' # ---- BCC-CSM2-MR: - name: - institution: + name: "BCC-CSM2-MR" + institution: "Beijing Climate Center, Beijing 100081, China" src: hcst: "exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/" fcst: @@ -125,10 +125,10 @@ archive: # ---- CanESM5: - name: + name: "CanESM5" institution: src: - hcst: "exp/canesm5/cmip6-dcppA-hindcast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/" + hcst: "exp/canesm5/cmip6-dcppA-hindcast/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/" fcst: "exp/canesm5/cmip6-dcppB-forecast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppB-forecast/" first_dcppB_syear: 2020 monthly_mean: @@ -142,11 +142,12 @@ archive: calendar: "365_day" member: r1i1p2f1,r2i1p2f1,r3i1p2f1,r4i1p2f1,r5i1p2f1,r6i1p2f1,r7i1p2f1,r8i1p2f1, r9i1p2f1, r10i1p2f1, r11i1p2f1,r12i1p2f1,r13i1p2f1,r14i1p2f1,r15i1p2f1,r16i1p2f1,r17i1p2f1,r18i1p2f1, r19i1p2f1, r20i1p2f1,r21i1p2f1,r22i1p2f1,r23i1p2f1,r24i1p2f1,r25i1p2f1,r26i1p2f1,r27i1p2f1,r28i1p2f1, r29i1p2f1, r30i1p2f1, r31i1p2f1,r32i1p2f1,r33i1p2f1,r34i1p2f1,r35i1p2f1,r36i1p2f1,r37i1p2f1,r38i1p2f1, r39i1p2f1, r40i1p2f1 initial_month: 1 #next year Jan - reference_grid: "/esarchive/exp/canesm5/cmip6-dcppA-hindcast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_dcppA-hindcast_s2008-r1i1p2f1_gn_200901-201812.nc" + reference_grid: "/esarchive/exp/canesm5/cmip6-dcppA-hindcast/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_dcppA-hindcast_s2008-r1i1p2f1_gn_200901-201812.nc" # ---- +#NOTE: no data there CESM1-1-CAM5-CMIP5: - name: + name: "CESM1-1-CAM5-CMIP5" institution: src: hcst: "exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast" @@ -165,8 +166,9 @@ archive: reference_grid: "/esarchive/exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- +#NOTE: in tapes CMCC-CM2-SR5: - name: + name: "CMCC-CM2-SR5" institution: src: hcst: "exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/" @@ -186,8 +188,8 @@ archive: # ---- FGOALS-f3-L: - name: - institution: + name: "FGOALS-f3-L" + institution: "Chinese Academy of Sciences, Beijing 100029, China" src: hcst: "exp/CMIP6/dcppA-hindcast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppB-forecast/" @@ -207,8 +209,8 @@ archive: # ---- IPSL-CM6A-LR: - name: - institution: + name: "IPSL-CM6A-LR" + institution: "IPSL" src: hcst: "exp/CMIP6/dcppA-hindcast/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/dcppA-hindcast/" fcst: @@ -245,8 +247,8 @@ archive: # ---- MPI-ESM1.2-HR: - name: - institution: + name: "MPI-ESM1.2-HR" + institution: "MIROC" src: hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/" fcst: @@ -264,8 +266,8 @@ archive: # ---- MPI-ESM1.2-LR: - name: - institution: + name: "MPI-ESM1.2-LR" + institution: "Max-Planck-Institute for Meteorology" src: hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/dcppA-hindcast/" fcst: @@ -283,8 +285,8 @@ archive: # ---- MRI-ESM2-0: - name: - institution: + name: "MRI-ESM2-0" + institution: "Meteorological Research Institute, Japan" src: hcst: "exp/CMIP6/dcppA-hindcast/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/dcppA-hindcast/" fcst: @@ -303,8 +305,8 @@ archive: # ---- #NOTE: NorCPM1-i1 and i2 are under the same directory NorCPM1-i1: - name: - institution: + name: "NorCPM1-i1" + institution: "NCC" src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: @@ -314,16 +316,16 @@ archive: version: {"tas":"v20200320", "pr":"v20200320", "psl":"v20200320"} daily_mean: grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} - version: {"pr":"v20191005", "tas":"v20200320", "tasmax":"v20191005", "tasmin":"v20191005"} + version: {"pr":"v20191005", "tas":"v20191029", "tasmax":"v20191005", "tasmin":"v20191005"} calendar: "noleap" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 10 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200320/tas_Amon_NorCPM1_dcppA-hindcast_s2008-r1i1p1f1_gn_200810-201812.nc" + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20191029/tas_Amon_NorCPM1_dcppA-hindcast_s2008-r1i1p1f1_gn_200810-201812.nc" # ---- NorCPM1-i2: - name: - institution: + name: "NorCPM1-i2" + institution: "NCC" src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: @@ -369,8 +371,8 @@ archive: # ---- JRA-55: - name: - institution: + name: "JRA-55" + institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/jma/jra55/" monthly_mean: {"tas":"_f6h", "psl":"_f6h", "tos":"", "pr":"_s0-3h", "prlr":"_s0-3h"} daily_mean: {"tas":"_f6h", "psl":"_f6h", "prlr":"_s0-3h", "sfcWind":"_f6h"} @@ -379,18 +381,18 @@ archive: # ---- GISTEMPv4: - name: - institution: + name: "GISTEMPv4" + institution: "NASA Goddard Institute for Space Studies" src: "obs/noaa-nasa/ghcnersstgiss/" monthly_mean: {"tasanomaly":""} daily_mean: calendar: "standard" - reference_grid: "/esarchive/obs/noaa-nasa/ghcnersstgiss/monthly_mean/tasanomaly_200811.nc" + reference_grid: "/esarchive/obs/noaa-nasa/ghcnersstgiss/monthly_mean/tasanomaly/tasanomaly_200811.nc" # ---- HadCRUT4: - name: - institution: + name: "HadCRUT4" + institution: "Met Office Hadley Centre / Climatic Research Unit, University of East Anglia" src: "obs/ukmo/hadcrut_v4.6/" monthly_mean: {"tasanomaly":""} daily_mean: @@ -399,11 +401,11 @@ archive: # ---- HadSLP2: - name: + name: "HadSLP2" institution: src: "obs/ukmo/hadslp_v2/" monthly_mean: {"psl":""} daily_mean: calendar: "proleptic_gregorian" - reference_grid: "/esarchive/obs/ukmo/hadslp_v2/monthly_mean/psl_200811.nc" + reference_grid: "/esarchive/obs/ukmo/hadslp_v2/monthly_mean/psl/psl_200811.nc" -- GitLab From c08a475114e978a9839ac4634bef1bf442e13ed5 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 29 Sep 2022 15:27:08 +0200 Subject: [PATCH 45/81] Rename test file --- modules/test_victoria.R | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 modules/test_victoria.R diff --git a/modules/test_victoria.R b/modules/test_victoria.R deleted file mode 100644 index 5f59794f..00000000 --- a/modules/test_victoria.R +++ /dev/null @@ -1,24 +0,0 @@ - -source("modules/Loading/Loading.R") -source("modules/Calibration/Calibration.R") -source("modules/Skill/Skill.R") -source("modules/Saving/Saving.R") -source("modules/Visualization/Visualization.R") - -recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-tas.yml" -recipe <- read_yaml(recipe_file) -archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive - -# Load datasets -data <- load_datasets(recipe_file) -# Calibrate datasets -calibrated_data <- calibrate_datasets(data, recipe) -# Compute skill metrics -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) -# Compute percentiles and probability bins -probabilities <- compute_probabilities(calibrated_data$hcst, recipe) -# Export all data to netCDF -save_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities) -# Plot data -plot_data(recipe, archive, data, calibrated_data, skill_metrics, - probabilities, significance = T) -- GitLab From 699dbab99373f971e58dc368c0adfb5a6668250f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 30 Sep 2022 10:53:41 +0200 Subject: [PATCH 46/81] Add variables to variable dictionary --- conf/variable-dictionary.yml | 107 +++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 6 deletions(-) diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index 33cd1a57..bb3ffbda 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -1,28 +1,39 @@ vars: +## NOTE: The units field in this file corresponds to CMOR standards. +## Some variables in esarchive may have different units than stated here. +## Use with caution. # ECVs tas: units: "K" long_name: "Near-Surface Air Temperature" standard_name: "air_temperature" - accum: no + tos: + units: "degC" + long_name: "Sea Surface Temperature" + standard_name: "sea_surface_temperature" # outname: "t2" tasmax: units: "K" long_name: "Maximum Near-Surface Air Temperature" standard_name: "air_temperature" - accum: no tasmin: units: "K" long_name: "Minimum Near-Surface Air Temperature" standard_name: "air_temperature" - accum: no + ts: + units: "K" + long_name: "Surface Temperature" + standard_name: "surface_temperature" sfcWind: units: "m s-1" long_name: "Near-Surface Wind Speed" standard_name: "wind_speed" - accum: no + sfcWindmax: + units: "m s-1" + long_name: "Daily Maximum Near-Surface Wind Speed" + standard_name: "wind_speed" # outname: "wind" rsds: units: "W m-2" @@ -32,7 +43,7 @@ vars: accum: yes # outname: "rswin" prlr: - units: "mm" + units: "mm/day" long_name: "Total precipitation" standard_name: "total_precipitation_flux" #? Not in CF accum: yes @@ -41,7 +52,90 @@ vars: units: "m2 s-2" long_name: "Geopotential" standard_name: "geopotential" - accum: no + pr: + units: "kg m-2 s-1" + long_name: "Precipitation" + standard_name: "precipitation_flux" + accum: yes + prc: + units: "kg m-2 s-1" + long_name: "Convective Precipitation" + standard_name: "convective_precipitation_flux" + accum: yes + psl: + units: "Pa" + long_name: "Sea Level Pressure" + standard_name: "air_pressure_at_mean_sea_level" + clt: + units: "%" + long_name: "Total Cloud Cover Percentage" + standard_name: "cloud_area_fraction" + hurs: + units: "%" + long_name: "Near-Surface Relative Humidity" + standard_name: "relative_humidity" + hursmin: + units: "%" + long_name: "Daily Minimum Near-Surface Relative Humidity" + standard_name: "relative_humidity" + hursmax: + units: "%" + long_name: "Daily Maximum Near-Surface Relative Humidity" + standard_name: "relative_humidity" + hfls: + units: "W m-2" + long_name: "Surface Upward Latent Heat Flux" + standard_name: "surface_upward_latent_heat_flux" + huss: + units: "1" + long_name: "Near-Surface Specific Humidity" + standard_name: "specific_humidity" + rsut: + units: "W m-2" + long_name: "TOA Outgoing Shortwave Radiation" + standard_name: "toa_outgoing_shortwave_flux" + rlut: + units: "W m-2" + long_name: "TOA Outgoing Longwave Radiation" + standard_name: "toa_outgoing_longwave_flux" + rsdt: + units: "W m-2" + long_name: "TOA Incident Shortwave Radiation" + standard_name: "toa_incoming_shortwave_flux" + + ta: + units: "K" + long_name: "Air Temperature" + standard_name: "air_temperature" + ua: + units: "m s-1" + long_name: "Eastward Wind" + standard_name: "eastward_wind" + uas: + units: "m s-1" + long_name: "Eastward Near-Surface Wind" + standard_name: "eastward_wind" + va: + units: "m s-1" + long_name: "Northward Wind" + standard_name: "northward_wind" + vas: + units: "m s-1" + long_name: "Northward Near-Surface Wind" + standard_name: "northward wind" + zg: + units: "m" + long_name: "Geopotential Height" + standard_name: "geopotential_height" + evspsbl: + units: "kg m-2 s-1" + long_name: "Evaporation Including Sublimation and Transpiration" + standard_name: "water_evapotranspiration_flux" + hfss: + units: "W m-2" + long_name: "Surface Upward Sensible Heat Flux" + standard_name: "surface_upward_sensible_heat_flux" + # Coordinates coords: longitude: @@ -54,6 +148,7 @@ coords: standard_name: "latitude" long_name: "Latitude" axis: "Y" +## TODO: Add plevels # Skill metrics metrics: -- GitLab From 88f2c081378b49056e065a2700c009ce6243aade Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 30 Sep 2022 12:51:44 +0200 Subject: [PATCH 47/81] Add check to verify prlr units before changing them --- modules/Loading/Loading.R | 29 ++++++++++++++++++----------- modules/Loading/Loading_decadal.R | 22 ++++++++++++++-------- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index e1a6a818..a93be8ca 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -308,24 +308,31 @@ load_datasets <- function(recipe_file) { # Remove negative values in accumulative variables dictionary <- read_yaml("conf/variable-dictionary.yml") if (dictionary$vars[[variable]]$accum) { - info(logger, " Accumulated variable: setting negative values to zero.") + info(logger, "Accumulated variable: setting negative values to zero.") obs$data[obs$data < 0] <- 0 hcst$data[hcst$data < 0] <- 0 if (!is.null(fcst)) { fcst$data[fcst$data < 0] <- 0 } } - # Convert precipitation to mm/day - ## TODO: Make a function? + + # Convert prlr from m/s to mm/day + ## TODO: Make a unit conversion function? if (variable == "prlr") { - info(logger, "Converting precipitation from m/s to mm/day.") - obs$data <- obs$data*84000*1000 - attr(obs$Variable, "variable")$units <- "mm/day" - hcst$data <- hcst$data*84000*1000 - attr(hcst$Variable, "variable")$units <- "mm/day" - if (!is.null(fcst)) { - fcst$data <- fcst$data*84000*1000 - attr(fcst$Variable, "variable")$units <- "mm/day" + # Verify that the units are m/s and the same in obs and hcst + if ((attr(obs$Variable, "variable")$units != + attr(hcst$Variable, "variable")$units) && + (attr(obs$Variable, "variable")$units == "m s-1")) { + + info(logger, "Converting precipitation from m/s to mm/day.") + obs$data <- obs$data*84000*1000 + attr(obs$Variable, "variable")$units <- "mm/day" + hcst$data <- hcst$data*84000*1000 + attr(hcst$Variable, "variable")$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data <- fcst$data*84000*1000 + attr(fcst$Variable, "variable")$units <- "mm/day" + } } } diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 7f9b89a8..9c4bb33d 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -423,14 +423,20 @@ load_datasets <- function(recipe_file) { # Convert precipitation to mm/day ## TODO: Make a function? if (variable == "prlr") { - info(logger, "Converting precipitation from m/s to mm/day.") - obs$data <- obs$data*84000*1000 - attr(obs$Variable, "variable")$units <- "mm/day" - hcst$data <- hcst$data*84000*1000 - attr(hcst$Variable, "variable")$units <- "mm/day" - if (!is.null(fcst)) { - fcst$data <- fcst$data*84000*1000 - attr(fcst$Variable, "variable")$units <- "mm/day" + # Verify that the units are m/s and the same in obs and hcst + if ((attr(obs$Variable, "variable")$units != + attr(hcst$Variable, "variable")$units) && + (attr(obs$Variable, "variable")$units == "m s-1")) { + + info(logger, "Converting precipitation from m/s to mm/day.") + obs$data <- obs$data*84000*1000 + attr(obs$Variable, "variable")$units <- "mm/day" + hcst$data <- hcst$data*84000*1000 + attr(hcst$Variable, "variable")$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data <- fcst$data*84000*1000 + attr(fcst$Variable, "variable")$units <- "mm/day" + } } } -- GitLab From 26de202813884ded29707951f788295a022b409a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 30 Sep 2022 13:17:31 +0200 Subject: [PATCH 48/81] Bugfix: Fix issue with NAs in compute_probs() and compute_quants() --- modules/Skill/Skill.R | 3 ++- modules/Skill/s2s.probs.R | 51 ++++++++++++++++++++++++++------------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index e8af103e..a927efdc 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -231,7 +231,8 @@ compute_probabilities <- function(data, recipe) { # Parse thresholds in recipe thresholds <- sapply(element, function (x) eval(parse(text = x))) quants <- compute_quants(data$data, thresholds, - ncores = ncores) + ncores = ncores, + na.rm = na.rm) probs <- compute_probs(data$data, quants, ncores = ncores, na.rm = na.rm) diff --git a/modules/Skill/s2s.probs.R b/modules/Skill/s2s.probs.R index c82e9697..00d7edb3 100644 --- a/modules/Skill/s2s.probs.R +++ b/modules/Skill/s2s.probs.R @@ -9,15 +9,27 @@ compute_quants <- function(data, thresholds, ## to be computed. The quantiles should be the hcst quantiles, and then ## c2p() can be used to compute fcst probabilities for most likely terciles ## map. + + if (na.rm == FALSE) { + get_quantiles <- function(x, t) { + if (any(is.na(x))) { + rep(NA, length(t)) + } else { + quantile(as.vector(x), t, na.rm = FALSE) + } + } + } else { + get_quantiles <- function(x, t) { + quantile(as.vector(x), t, na.rm = TRUE) + } + } quantiles <- Apply(data, - quantile_dims, - function(x, na.rm) {quantile(as.vector(x), - thresholds,na.rm=na.rm)}, - output_dims="bin", - ncores=ncores, - na.rm=na.rm, - split_factor=split_factor)[[1]] + target_dims = quantile_dims, + function(x, t) {get_quantiles(as.vector(x), thresholds)}, + output_dims = "bin", + ncores = ncores, + split_factor = split_factor)[[1]] return(quantiles) } @@ -25,28 +37,33 @@ compute_quants <- function(data, thresholds, compute_probs <- function(data, quantiles, ncores=1, quantile_dims=c('syear', 'ensemble'), probs_dims=list('ensemble', 'bin'), - split_factor=1, na.rm=FALSE) { + split_factor=1, na.rm=TRUE) { if (na.rm == FALSE) { c2p <- function(x, t) { - colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) + # If the array contains any NA values, return NA + if (any(is.na(x))) { + rep(NA, dim(t)[['bin']] + 1) + } else { + colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) + } } } else { c2p <- function(x, t) { - if (any(!is.na(x))) { + if (any(!is.na(x))) { # If the array contains some non-NA values colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) - } else { - rep(NA, dim(t)[['bin']] + 1) # vector with as many NAs as probability bins. + } else { # If the array contains NAs only + rep(NA, dim(t)[['bin']] + 1) # vector with as many NAs as prob bins. } } } probs <- Apply(data = list(x = data, t = quantiles), - target_dims = probs_dims, - c2p, - output_dims = "bin", - split_factor = split_factor, - ncores = ncores)[[1]] + target_dims = probs_dims, + c2p, + output_dims = "bin", + split_factor = split_factor, + ncores = ncores)[[1]] return(probs) } -- GitLab From 67b46773a5279dcd3324a2045a4592a2297f20cf Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 30 Sep 2022 13:25:54 +0200 Subject: [PATCH 49/81] Add accum to all variables, fix pipeline --- conf/variable-dictionary.yml | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index bb3ffbda..51252154 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -9,31 +9,38 @@ vars: units: "K" long_name: "Near-Surface Air Temperature" standard_name: "air_temperature" + accum: no tos: units: "degC" long_name: "Sea Surface Temperature" standard_name: "sea_surface_temperature" + accum: no # outname: "t2" tasmax: units: "K" long_name: "Maximum Near-Surface Air Temperature" standard_name: "air_temperature" + accum: no tasmin: units: "K" long_name: "Minimum Near-Surface Air Temperature" standard_name: "air_temperature" + accum: no ts: units: "K" long_name: "Surface Temperature" standard_name: "surface_temperature" + accum: no sfcWind: units: "m s-1" long_name: "Near-Surface Wind Speed" standard_name: "wind_speed" + accum: no sfcWindmax: units: "m s-1" long_name: "Daily Maximum Near-Surface Wind Speed" standard_name: "wind_speed" + accum: no # outname: "wind" rsds: units: "W m-2" @@ -52,6 +59,7 @@ vars: units: "m2 s-2" long_name: "Geopotential" standard_name: "geopotential" + accum: no pr: units: "kg m-2 s-1" long_name: "Precipitation" @@ -66,75 +74,92 @@ vars: units: "Pa" long_name: "Sea Level Pressure" standard_name: "air_pressure_at_mean_sea_level" + accum: no clt: units: "%" long_name: "Total Cloud Cover Percentage" standard_name: "cloud_area_fraction" + accum: no hurs: units: "%" long_name: "Near-Surface Relative Humidity" standard_name: "relative_humidity" + accum: no hursmin: units: "%" long_name: "Daily Minimum Near-Surface Relative Humidity" standard_name: "relative_humidity" + accum: no hursmax: units: "%" long_name: "Daily Maximum Near-Surface Relative Humidity" standard_name: "relative_humidity" + accum: no hfls: units: "W m-2" long_name: "Surface Upward Latent Heat Flux" standard_name: "surface_upward_latent_heat_flux" + accum: no huss: units: "1" long_name: "Near-Surface Specific Humidity" standard_name: "specific_humidity" + accum: no rsut: units: "W m-2" long_name: "TOA Outgoing Shortwave Radiation" standard_name: "toa_outgoing_shortwave_flux" + accum: no rlut: units: "W m-2" long_name: "TOA Outgoing Longwave Radiation" standard_name: "toa_outgoing_longwave_flux" + accum: no rsdt: units: "W m-2" long_name: "TOA Incident Shortwave Radiation" standard_name: "toa_incoming_shortwave_flux" - + accum: no ta: units: "K" long_name: "Air Temperature" standard_name: "air_temperature" + accum: no ua: units: "m s-1" long_name: "Eastward Wind" standard_name: "eastward_wind" + accum: no uas: units: "m s-1" long_name: "Eastward Near-Surface Wind" standard_name: "eastward_wind" + accum: no va: units: "m s-1" long_name: "Northward Wind" standard_name: "northward_wind" + accum: no vas: units: "m s-1" long_name: "Northward Near-Surface Wind" standard_name: "northward wind" + accum: no zg: units: "m" long_name: "Geopotential Height" standard_name: "geopotential_height" + accum: no evspsbl: units: "kg m-2 s-1" long_name: "Evaporation Including Sublimation and Transpiration" standard_name: "water_evapotranspiration_flux" + accum: no hfss: units: "W m-2" long_name: "Surface Upward Sensible Heat Flux" standard_name: "surface_upward_sensible_heat_flux" + accum: no # Coordinates coords: -- GitLab From a9fde98a0ece6b9c6bdc6c32ca6f6f7754a92068 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 30 Sep 2022 14:53:23 +0200 Subject: [PATCH 50/81] Set default na.rm to FALSE --- modules/Skill/Skill.R | 4 +--- modules/Skill/s2s.probs.R | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index a927efdc..76f7492f 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -213,10 +213,8 @@ compute_probabilities <- function(data, recipe) { ncores <- recipe$Analysis$ncores } - ## TODO: Remove commented lines and include warning if quantile() - ## can not accept na.rm = FALSE if (is.null(recipe$Analysis$remove_NAs)) { - na.rm = T + na.rm = F } else { na.rm = recipe$Analysis$remove_NAs } diff --git a/modules/Skill/s2s.probs.R b/modules/Skill/s2s.probs.R index 00d7edb3..5921705c 100644 --- a/modules/Skill/s2s.probs.R +++ b/modules/Skill/s2s.probs.R @@ -3,7 +3,7 @@ compute_quants <- function(data, thresholds, ncores=1, quantile_dims=c('syear', 'ensemble'), probs_dims=list('ensemble', 'bin'), - split_factor=1, na.rm=TRUE) { + split_factor=1, na.rm=FALSE) { ## TODO: Adapt to the case where the forecast probability bins need ## to be computed. The quantiles should be the hcst quantiles, and then @@ -37,7 +37,7 @@ compute_quants <- function(data, thresholds, compute_probs <- function(data, quantiles, ncores=1, quantile_dims=c('syear', 'ensemble'), probs_dims=list('ensemble', 'bin'), - split_factor=1, na.rm=TRUE) { + split_factor=1, na.rm=FALSE) { if (na.rm == FALSE) { c2p <- function(x, t) { -- GitLab From 43b82c6b6bf68432645347770e06ea2b5dbd96b3 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 4 Oct 2022 10:07:51 +0200 Subject: [PATCH 51/81] Add institution_system and institution_reference to output netCDF attributes --- modules/Saving/Saving.R | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index f50a06ff..713741fb 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -76,18 +76,21 @@ save_data <- function(recipe, archive, data, } } -get_global_attributes <- function(recipe) { +get_global_attributes <- function(recipe, archive) { # Generates metadata of interest to add to the global attributes of the # netCDF files. parameters <- recipe$Analysis hcst_period <- paste0(parameters$Time$hcst_start, " to ", parameters$Time$hcst_end) current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) + system_name <- parameters$Datasets$System$name + reference_name <- parameters$Datasets$Reference$name attrs <- list(reference_period = hcst_period, - institution = "BSC-CNS", - system = parameters$Datasets$System$name, - reference = parameters$Datasets$Reference$name, + institution_system = archive$System[[system_name]]$institution, + institution_reference = archive$Reference[[reference_name]]$institution, + system = system_name, + reference = reference_name, calibration_method = parameters$Workflow$Calibration$method, computed_on = current_time) @@ -162,7 +165,7 @@ save_forecast <- function(data_cube, variable <- data_cube$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name - global_attributes <- get_global_attributes(recipe) + global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$System[[global_attributes$system]]$calendar @@ -300,7 +303,7 @@ save_observations <- function(data_cube, variable <- data_cube$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name - global_attributes <- get_global_attributes(recipe) + global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$Reference[[global_attributes$reference]]$calendar @@ -445,7 +448,7 @@ save_metrics <- function(skill, } # Add global and variable attributes - global_attributes <- get_global_attributes(recipe) + global_attributes <- get_global_attributes(recipe, archive) attr(skill[[1]], 'global_attrs') <- global_attributes for (i in 1:length(skill)) { @@ -553,7 +556,7 @@ save_corr <- function(skill, } # Add global and variable attributes - global_attributes <- get_global_attributes(recipe) + global_attributes <- get_global_attributes(recipe, archive) attr(skill[[1]], 'global_attrs') <- global_attributes for (i in 1:length(skill)) { @@ -659,7 +662,7 @@ save_percentiles <- function(percentiles, } # Add global and variable attributes - global_attributes <- get_global_attributes(recipe) + global_attributes <- get_global_attributes(recipe, archive) attr(percentiles[[1]], 'global_attrs') <- global_attributes for (i in 1:length(percentiles)) { @@ -759,7 +762,7 @@ save_probabilities <- function(probs, variable <- data_cube$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name - global_attributes <- get_global_attributes(recipe) + global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$System[[global_attributes$system]]$calendar -- GitLab From 99dac1e1611651a422e26c7b1f8094901092452f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 4 Oct 2022 10:14:56 +0200 Subject: [PATCH 52/81] Remove/rearrange visualization module files, download plotting functions locally) --- modules/Visualization/PlotCombinedMap.R | 608 +++++++++++++++ modules/Visualization/PlotLayout.R | 732 ++++++++++++++++++ .../Visualization/PlotMostLikelyQuantileMap.R | 196 +++++ modules/Visualization/Visualization.R | 12 +- modules/Visualization/s2s.plots.R | 103 --- 5 files changed, 1540 insertions(+), 111 deletions(-) create mode 100644 modules/Visualization/PlotCombinedMap.R create mode 100644 modules/Visualization/PlotLayout.R create mode 100644 modules/Visualization/PlotMostLikelyQuantileMap.R delete mode 100644 modules/Visualization/s2s.plots.R diff --git a/modules/Visualization/PlotCombinedMap.R b/modules/Visualization/PlotCombinedMap.R new file mode 100644 index 00000000..a7b5fc97 --- /dev/null +++ b/modules/Visualization/PlotCombinedMap.R @@ -0,0 +1,608 @@ +#'Plot Multiple Lon-Lat Variables In a Single Map According to a Decision Function +#'@description Plot a number a two dimensional matrices with (longitude, latitude) dimensions on a single map with the cylindrical equidistant latitude and longitude projection. +#'@author Nicolau Manubens, \email{nicolau.manubens@bsc.es} +#'@author Veronica Torralba, \email{veronica.torralba@bsc.es} +#' +#'@param maps List of matrices to plot, each with (longitude, latitude) dimensions, or 3-dimensional array with the dimensions (longitude, latitude, map). Dimension names are required. +#'@param lon Vector of longitudes. Must match the length of the corresponding dimension in 'maps'. +#'@param lat Vector of latitudes. Must match the length of the corresponding dimension in 'maps'. +#'@param map_select_fun Function that selects, for each grid point, which value to take among all the provided maps. This function receives as input a vector of values for a same grid point for all the provided maps, and must return a single selected value (not its index!) or NA. For example, the \code{min} and \code{max} functions are accepted. +#'@param display_range Range of values to be displayed for all the maps. This must be a numeric vector c(range min, range max). The values in the parameter 'maps' can go beyond the limits specified in this range. If the selected value for a given grid point (according to 'map_select_fun') falls outside the range, it will be coloured with 'col_unknown_map'. +#'@param map_dim Optional name for the dimension of 'maps' along which the multiple maps are arranged. Only applies when 'maps' is provided as a 3-dimensional array. Takes the value 'map' by default. +#'@param brks Colour levels to be sent to PlotEquiMap. This parameter is optional and adjusted automatically by the function. +#'@param cols List of vectors of colours to be sent to PlotEquiMap for the colour bar of each map. This parameter is optional and adjusted automatically by the function (up to 5 maps). The colours provided for each colour bar will be automatically interpolated to match the number of breaks. Each item in this list can be named, and the name will be used as title for the corresponding colour bar (equivalent to the parameter 'bar_titles'). +#'@param col_unknown_map Colour to use to paint the grid cells for which a map is not possible to be chosen according to 'map_select_fun' or for those values that go beyond 'display_range'. Takes the value 'white' by default. +#'@param mask Optional numeric array with dimensions (latitude, longitude), with values in the range [0, 1], indicating the opacity of the mask over each grid point. Cells with a 0 will result in no mask, whereas cells with a 1 will result in a totally opaque superimposed pixel coloured in 'col_mask'. +#'@param col_mask Colour to be used for the superimposed mask (if specified in 'mask'). Takes the value 'grey' by default. +#'@param dots Array of same dimensions as 'var' or with dimensions +#' c(n, dim(var)), where n is the number of dot/symbol layers to add to the +#' plot. A value of TRUE at a grid cell will draw a dot/symbol on the +#' corresponding square of the plot. By default all layers provided in 'dots' +#' are plotted with dots, but a symbol can be specified for each of the +#' layers via the parameter 'dot_symbol'. +#'@param bar_titles Optional vector of character strings providing the titles to be shown on top of each of the colour bars. +#'@param legend_scale Scale factor for the size of the colour bar labels. Takes 1 by default. +#'@param cex_bar_titles Scale factor for the sizes of the bar titles. Takes 1.5 by default. +#'@param fileout File where to save the plot. If not specified (default) a graphics device will pop up. Extensions allowed: eps/ps, jpeg, png, pdf, bmp and tiff +#'@param width File width, in the units specified in the parameter size_units (inches by default). Takes 8 by default. +#'@param height File height, in the units specified in the parameter size_units (inches by default). Takes 5 by default. +#'@param size_units Units of the size of the device (file or window) to plot in. Inches ('in') by default. See ?Devices and the creator function of the corresponding device. +#'@param res Resolution of the device (file or window) to plot in. See ?Devices and the creator function of the corresponding device. +#'@param drawleg Where to draw the common colour bar. Can take values TRUE, +#' FALSE or:\cr +#' 'up', 'u', 'U', 'top', 't', 'T', 'north', 'n', 'N'\cr +#' 'down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S' (default)\cr +#' 'right', 'r', 'R', 'east', 'e', 'E'\cr +#' 'left', 'l', 'L', 'west', 'w', 'W' +#'@param ... Additional parameters to be passed on to \code{PlotEquiMap}. + +#'@seealso \code{PlotCombinedMap} and \code{PlotEquiMap} +#' +#'@importFrom s2dv PlotEquiMap ColorBar +#'@importFrom maps map +#'@importFrom graphics box image layout mtext par plot.new +#'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off hcl jpeg pdf png postscript svg tiff +#'@examples +#'# Simple example +#'x <- array(1:(20 * 10), dim = c(lat = 10, lon = 20)) / 200 +#'a <- x * 0.6 +#'b <- (1 - x) * 0.6 +#'c <- 1 - (a + b) +#'lons <- seq(0, 359.5, length = 20) +#'lats <- seq(-89.5, 89.5, length = 10) +#'PlotCombinedMap(list(a, b, c), lons, lats, +#' toptitle = 'Maximum map', +#' map_select_fun = max, +#' display_range = c(0, 1), +#' bar_titles = paste('% of belonging to', c('a', 'b', 'c')), +#' brks = 20, width = 10, height = 8) +#' +#'Lon <- c(0:40, 350:359) +#'Lat <- 51:26 +#'data <- rnorm(51 * 26 * 3) +#'dim(data) <- c(map = 3, lon = 51, lat = 26) +#'mask <- sample(c(0,1), replace = TRUE, size = 51 * 26) +#'dim(mask) <- c(lat = 26, lon = 51) +#'PlotCombinedMap(data, lon = Lon, lat = Lat, map_select_fun = max, +#' display_range = range(data), mask = mask, +#' width = 12, height = 8) +#' +#'@export +PlotCombinedMap <- function(maps, lon, lat, + map_select_fun, display_range, + map_dim = 'map', + brks = NULL, cols = NULL, + col_unknown_map = 'white', + mask = NULL, col_mask = 'grey', + dots = NULL, + bar_titles = NULL, legend_scale = 1, + cex_bar_titles = 1.5, + plot_margin = NULL, bar_margin = rep(0, 4), + fileout = NULL, width = 8, height = 5, + size_units = 'in', res = 100, drawleg = T, + ...) { + args <- list(...) + + # If there is any filenames to store the graphics, process them + # to select the right device + if (!is.null(fileout)) { + deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, + units = size_units, res = res) + saveToFile <- deviceInfo$fun + fileout <- deviceInfo$files + } + + # Check probs + error <- FALSE + if (is.list(maps)) { + if (length(maps) < 1) { + stop("Parameter 'maps' must be of length >= 1 if provided as a list.") + } + check_fun <- function(x) { + is.numeric(x) && (length(dim(x)) == 2) + } + if (!all(sapply(maps, check_fun))) { + error <- TRUE + } + ref_dims <- dim(maps[[1]]) + equal_dims <- all(sapply(maps, function(x) identical(dim(x), ref_dims))) + if (!equal_dims) { + stop("All arrays in parameter 'maps' must have the same dimension ", + "sizes and names when 'maps' is provided as a list of arrays.") + } + num_maps <- length(maps) + maps <- unlist(maps) + dim(maps) <- c(ref_dims, map = num_maps) + map_dim <- 'map' + } + if (!is.numeric(maps)) { + error <- TRUE + } + if (is.null(dim(maps))) { + error <- TRUE + } + if (length(dim(maps)) != 3) { + error <- TRUE + } + if (error) { + stop("Parameter 'maps' must be either a numeric array with 3 dimensions ", + " or a list of numeric arrays of the same size with the 'lon' and ", + "'lat' dimensions.") + } + dimnames <- names(dim(maps)) + + # Check map_dim + if (is.character(map_dim)) { + if (is.null(dimnames)) { + stop("Specified a dimension name in 'map_dim' but no dimension names provided ", + "in 'maps'.") + } + map_dim <- which(dimnames == map_dim) + if (length(map_dim) < 1) { + stop("Dimension 'map_dim' not found in 'maps'.") + } else { + map_dim <- map_dim[1] + } + } else if (!is.numeric(map_dim)) { + stop("Parameter 'map_dim' must be either a numeric value or a ", + "dimension name.") + } + if (length(map_dim) != 1) { + stop("Parameter 'map_dim' must be of length 1.") + } + map_dim <- round(map_dim) + + # Work out lon_dim and lat_dim + lon_dim <- NULL + if (!is.null(dimnames)) { + lon_dim <- which(dimnames %in% c('lon', 'longitude'))[1] + } + if (length(lon_dim) < 1) { + lon_dim <- (1:3)[-map_dim][1] + } + lon_dim <- round(lon_dim) + + lat_dim <- NULL + if (!is.null(dimnames)) { + lat_dim <- which(dimnames %in% c('lat', 'latitude'))[1] + } + if (length(lat_dim) < 1) { + lat_dim <- (1:3)[-map_dim][2] + } + lat_dim <- round(lat_dim) + + # Check lon + if (!is.numeric(lon)) { + stop("Parameter 'lon' must be a numeric vector.") + } + if (length(lon) != dim(maps)[lon_dim]) { + stop("Parameter 'lon' does not match the longitude dimension in 'maps'.") + } + + # Check lat + if (!is.numeric(lat)) { + stop("Parameter 'lat' must be a numeric vector.") + } + if (length(lat) != dim(maps)[lat_dim]) { + stop("Parameter 'lat' does not match the longitude dimension in 'maps'.") + } + + # Check map_select_fun + if (is.numeric(map_select_fun)) { + if (length(dim(map_select_fun)) != 2) { + stop("Parameter 'map_select_fun' must be an array with dimensions ", + "'lon' and 'lat' if provided as an array.") + } + if (!identical(dim(map_select_fun), dim(maps)[-map_dim])) { + stop("The dimensions 'lon' and 'lat' in the 'map_select_fun' array must ", + "have the same size, name and order as in the 'maps' parameter.") + } + } + if (!is.function(map_select_fun)) { + stop("The parameter 'map_select_fun' must be a function or a numeric array.") + } + + # Check display_range + if (!is.numeric(display_range) || length(display_range) != 2) { + stop("Parameter 'display_range' must be a numeric vector of length 2.") + } + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = display_range[1], to = display_range[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= dim(maps)[map_dim]) { + chosen_sets <- 1:(dim(maps)[map_dim]) + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(1:length(col_sets), dim(maps)[map_dim]) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != dim(maps)[map_dim]) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in 1:length(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (!is.null(names(cols))) { + bar_titles <- names(cols) + } else { + bar_titles <- paste0("Map ", 1:length(cols)) + } + } else { + if (!is.character(bar_titles)) { + stop("Parameter 'bar_titles' must be a character vector.") + } + if (length(bar_titles) != length(cols)) { + stop("Parameter 'bar_titles' must be of the same length as the number of ", + "maps in 'maps'.") + } + } + + # Check legend_scale + if (!is.numeric(legend_scale)) { + stop("Parameter 'legend_scale' must be numeric.") + } + + # Check col_unknown_map + if (!is.character(col_unknown_map)) { + stop("Parameter 'col_unknown_map' must be a character string.") + } + + # Check col_mask + if (!is.character(col_mask)) { + stop("Parameter 'col_mask' must be a character string.") + } + + # Check mask + if (!is.null(mask)) { + if (!is.numeric(mask)) { + stop("Parameter 'mask' must be numeric.") + } + if (length(dim(mask)) != 2) { + stop("Parameter 'mask' must have two dimensions.") + } + if ((dim(mask)[1] != dim(maps)[lat_dim]) || + (dim(mask)[2] != dim(maps)[lon_dim])) { + stop("Parameter 'mask' must have dimensions c(lat, lon).") + } + } + # Check dots + if (!is.null(dots)) { + if (length(dim(dots)) != 2) { + stop("Parameter 'mask' must have two dimensions.") + } + if ((dim(dots)[1] != dim(maps)[lat_dim]) || + (dim(dots)[2] != dim(maps)[lon_dim])) { + stop("Parameter 'mask' must have dimensions c(lat, lon).") + } + } + + #---------------------- + # Identify the most likely map + #---------------------- + brks_norm <- seq(0, 1, length.out = length(brks)) + if (is.function(map_select_fun)) { + range_width <- display_range[2] - display_range[1] + ml_map <- apply(maps, c(lat_dim, lon_dim), function(x) { + if (any(is.na(x))) { + res <- NA + } else { + res <- which(x == map_select_fun(x)) + if (length(res) > 0) { + res <- res[1] + if (map_select_fun(x) < display_range[1] || + map_select_fun(x) > display_range[2]) { + res <- -0.5 + } else { + res <- res + (map_select_fun(x) - display_range[1]) / range_width + if (map_select_fun(x) == display_range[1]) { + res <- res + brks_norm[2] / (num_brks * 2) + } + } + } else { + res <- -0.5 + } + } + res + }) + } else { + stop("Providing 'map_select_fun' as array not implemented yet.") + ml_map <- map_select_fun + } + nmap <- dim(maps)[map_dim] + nlat <- length(lat) + nlon <- length(lon) + + #---------------------- + # Set latitudes from minimum to maximum + #---------------------- + if (lat[1] > lat[nlat]){ + lat <- lat[nlat:1] + indices <- list(nlat:1, TRUE) + ml_map <- do.call("[", c(list(x = ml_map), indices)) + if (!is.null(mask)){ + mask <- mask[nlat:1, ] + } + if (!is.null(dots)){ + dots <- dots[nlat:1,] + } + } + + #---------------------- + # Set layout and parameters + #---------------------- + # Open connection to graphical device + if (!is.null(fileout)) { + saveToFile(fileout) + } else if (names(dev.cur()) == 'null device') { + dev.new(units = size_units, res = res, width = width, height = height) + } + #NOTE: I think plot.new() is not necessary in any case. +# plot.new() + par(font.main = 1) + # If colorbars need to be plotted, re-define layout. + if (drawleg) { + layout(matrix(c(rep(1, nmap),2:(nmap + 1)), 2, nmap, byrow = TRUE), heights = c(6, 1.5)) + } + + #---------------------- + # Set colors and breaks and then PlotEquiMap + #---------------------- + tcols <- c(col_unknown_map, cols[[1]]) + for (k in 2:nmap) { + tcols <- append(tcols, c(col_unknown_map, cols[[k]])) + } + + tbrks <- c(-1, brks_norm + rep(1:nmap, each = length(brks))) + + if (is.null(plot_margin)) { + plot_margin <- c(5, 4, 4, 2) + 0.1 # default of par()$mar + } + + PlotEquiMap(var = ml_map, lon = lon, lat = lat, + brks = tbrks, cols = tcols, drawleg = FALSE, + filled.continents = FALSE, dots = dots, mar = plot_margin, ...) + + #---------------------- + # Add overplot on top + #---------------------- + if (!is.null(mask)) { + dims_mask <- dim(mask) + latb <- sort(lat, index.return = TRUE) + dlon <- lon[2:dims_mask[2]] - lon[1:(dims_mask[2] - 1)] + wher <- which(dlon > (mean(dlon) + 1)) + if (length(wher) > 0) { + lon[(wher + 1):dims_mask[2]] <- lon[(wher + 1):dims_mask[2]] - 360 + } + lonb <- sort(lon, index.return = TRUE) + + cols_mask <- sapply(seq(from = 0, to = 1, length.out = 10), + function(x) adjustcolor(col_mask, alpha.f = x)) + image(lonb$x, latb$x, t(mask)[lonb$ix, latb$ix], + axes = FALSE, col = cols_mask, + breaks = seq(from = 0, to = 1, by = 0.1), + xlab='', ylab='', add = TRUE, xpd = TRUE) + if (!exists('coast_color')) { + coast_color <- 'black' + } + if (min(lon) < 0) { + map('world', interior = FALSE, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon -180 to 180). + } else { + map('world2', interior = FALSE, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon 0 to 360). + } + box() + } + + #---------------------- + # Add colorbars + #---------------------- + if ('toptitle' %in% names(args)) { + size_title <- 1 + if ('title_scale' %in% names(args)) { + size_title <- args[['title_scale']] + } + old_mar <- par('mar') + old_mar[3] <- old_mar[3] - (2 * size_title + 1) + par(mar = old_mar) + } + + if (drawleg) { + for (k in 1:nmap) { + ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, + draw_separators = TRUE, extra_margin = c(2, 0, 2, 0), + label_scale = legend_scale * 1.5) + if (!is.null(bar_titles)) { + mtext(bar_titles[[k]], 3, line = -3, cex = cex_bar_titles) + } + #TODO: Change to below code. Plot title together. extra_margin needs to be adjusted. +# ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, +# draw_separators = TRUE, extra_margin = c(1, 0, 1, 0), +# label_scale = legend_scale * 1.5, title = bar_titles[[k]], title_scale = cex_bar_titles) + } + } + + # If the graphic was saved to file, close the connection with the device + if (!is.null(fileout)) dev.off() +} + +# Color bar for PlotMostLikelyQuantileMap +multi_ColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits = NULL, var_limits = NULL, + triangle_ends = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, label_scale = 1, extra_margin = rep(0, 4), + ...) { + + minimum_value <- ceiling(1 / nmap * 10 * 1.1) * 10 + display_range = c(minimum_value, 100) + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = display_range[1], to = display_range[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(1:length(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != dim(maps)[map_dim]) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in 1:length(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { + s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, + bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + #TODO: col_inf and col_sup + return(list(brks = brks, cols = cols)) + } + +} + +#TODO: use s2dv:::.SelectDevice and remove this function here? +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + warning(paste0("some extensions of the filenames provided in 'fileout' are not ", ext[1],". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + diff --git a/modules/Visualization/PlotLayout.R b/modules/Visualization/PlotLayout.R new file mode 100644 index 00000000..e5ae9800 --- /dev/null +++ b/modules/Visualization/PlotLayout.R @@ -0,0 +1,732 @@ +#'Arrange and Fill Multi-Pannel Layouts With Optional Colour Bar +#' +#'This function takes an array or list of arrays and loops over each of them +#'to plot all the sub-arrays they contain on an automatically generated +#'multi-pannel layout. A different plot function (not necessarily from +#'s2dv) can be applied over each of the provided arrays. The input +#'dimensions of each of the functions have to be specified, either with the +#'names or the indices of the corresponding input dimensions. It is possible +#'to draw a common colour bar at any of the sides of the multi-pannel for all +#'the s2dv plots that use a colour bar. Common plotting arguments +#'for all the arrays in 'var' can be specified via the '...' parameter, and +#'specific plotting arguments for each array can be fully adjusted via +#''special_args'. It is possible to draw titles for each of the figures, +#'layout rows, layout columns and for the whole figure. A number of parameters +#'is provided in order to adjust the position, size and colour of the +#'components. Blank cells can be forced to appear and later be filled in +#'manually with customized plots.\cr +#'This function pops up a blank new device and fills it in, so it cannot be +#'nested in complex layouts. +#' +#'@param fun Plot function (or name of the function) to be called on the +#' arrays provided in 'var'. If multiple arrays are provided in 'var', a +#' vector of as many function names (character strings!) can be provided in +#' 'fun', one for each array in 'var'. +#'@param plot_dims Numeric or character string vector with identifiers of the +#' input plot dimensions of the plot function specified in 'fun'. If +#' character labels are provided, names(dim(var)) or attr('dimensions', var) +#' will be checked to locate the dimensions. As many plots as +#' prod(dim(var)[-plot_dims]) will be generated. If multiple arrays are +#' provided in 'var', 'plot_dims' can be sent a list with a vector of plot +#' dimensions for each. If a single vector is provided, it will be used for +#' all the arrays in 'var'. +#'@param var Multi-dimensional array with at least the dimensions expected by +#' the specified plot function in 'fun'. The dimensions reqired by the +#' function must be specified in 'plot_dims'. The dimensions can be +#' disordered and will be reordered automatically. Dimensions can optionally +#' be labelled in order to refer to them with names in 'plot_dims'. All the +#' available plottable sub-arrays will be automatically plotted and arranged +#' in consecutive cells of an automatically arranged layout. A list of +#' multiple (super-)arrays can be specified. The process will be repeated for +#' each of them, by default applying the same plot function to all of them +#' or, if properly specified in 'fun', a different plot function will be +#' applied to each of them. NAs can be passed to the list: a NA will yield a +#' blank cell in the layout, which can be populated after +#' (see .SwitchToFigure). +#'@param \dots Parameters to be sent to the plotting function 'fun'. If +#' multiple arrays are provided in 'var' and multiple functions are provided +#' in 'fun', the parameters provided through \dots will be sent to all the +#' plot functions, as common parameters. To specify concrete arguments for +#' each of the plot functions see parameter 'special_args'. +#'@param special_args List of sub-lists, each sub-list having specific extra +#' arguments for each of the plot functions provided in 'fun'. If you want to +#' fix a different value for each plot in the layout you can do so by +#' a) splitting your array into a list of sub-arrays (each with the data for +#' one plot) and providing it as parameter 'var', +#' b) providing a list of named sub-lists in 'special_args', where the names +#' of each sub-list match the names of the parameters to be adjusted, and +#' each value in a sub-list contains the value of the corresponding parameter. +#' For example, if the plots are two maps with different arguments, the +#' structure would be like:\cr +#' var:\cr +#' List of 2\cr +#' $ : num [1:360, 1:181] 1 3.82 5.02 6.63 8.72 ...\cr +#' $ : num [1:360, 1:181] 2.27 2.82 4.82 7.7 10.32 ...\cr +#' special_args:\cr +#' List of 2\cr +#' $ :List of 2\cr +#' ..$ arg1: ...\cr +#' ..$ arg2: ...\cr +#' $ :List of 1\cr +#' ..$ arg1: ...\cr +#'@param nrow Numeric value to force the number of rows in the automatically +#' generated layout. If higher than the required, this will yield blank cells +#' in the layout (which can then be populated). If lower than the required +#' the function will stop. By default it is configured to arrange the layout +#' in a shape as square as possible. Blank cells can be manually populated +#' after with customized plots (see SwitchTofigure). +#'@param ncol Numeric value to force the number of columns in the +#' automatically generated layout. If higher than the required, this will +#' yield blank cells in the layout (which can then be populated). If lower +#' than the required the function will stop. By default it is configured to +#' arrange the layout in a shape as square as possible. Blank cells can be +#' manually populated after with customized plots (see SwitchTofigure). +#'@param toptitle Topt title for the multi-pannel. Blank by default. +#'@param row_titles Character string vector with titles for each of the rows +#' in the layout. Blank by default. +#'@param col_titles Character string vector with titles for each of the +#' columns in the layout. Blank by default. +#'@param bar_scale Scale factor for the common colour bar. Takes 1 by default. +#'@param title_scale Scale factor for the multi-pannel title. Takes 1 by +#' default. +#'@param title_margin_scale Scale factor for the margins surrounding the top +#' title. Takes 1 by default. +#'@param title_left_shift_scale When plotting row titles, a shift is added +#' to the horizontal positioning of the top title in order to center it to +#' the region of the figures (without taking row titles into account). This +#' shift can be reduced. A value of 0 will remove the shift completely, +#' centering the title to the total width of the device. This parameter will +#' be disregarded if no 'row_titles' are provided. +#'@param subtitle_scale Scale factor for the row titles and column titles +#' (specified in 'row_titles' and 'col_titles'). Takes 1 by default. +#'@param subtitle_margin_scale Scale factor for the margins surrounding the +#' subtitles. Takes 1 by default. +#'@param units Title at the top of the colour bar, most commonly the units of +#' the variable provided in parameter 'var'. +#'@param brks,cols,bar_limits,triangle_ends Usually only providing 'brks' is +#' enough to generate the desired colour bar. These parameters allow to +#' define n breaks that define n - 1 intervals to classify each of the values +#' in 'var'. The corresponding grid cell of a given value in 'var' will be +#' coloured in function of the interval it belongs to. These parameters are +#' sent to \code{ColorBar()} to generate the breaks and colours. Additional +#' colours for values beyond the limits of the colour bar are also generated +#' and applied to the plot if 'bar_limits' or 'brks' and 'triangle_ends' are +#' properly provided to do so. See ?ColorBar for a full explanation. +#'@param col_inf,col_sup Colour identifiers to colour the values in 'var' that +#' go beyond the extremes of the colour bar and to colour NA values, +#' respectively. 'colNA' takes 'white' by default. 'col_inf' and 'col_sup' +#' will take the value of 'colNA' if not specified. See ?ColorBar for a full +#' explanation on 'col_inf' and 'col_sup'. +#'@param color_fun,subsampleg,bar_extra_labels,draw_bar_ticks,draw_separators,triangle_ends_scale,bar_label_digits,bar_label_scale,units_scale,bar_tick_scale,bar_extra_margin Set of parameters to control the visual aspect of the drawn colour bar. See ?ColorBar for a full explanation. +#'@param drawleg Where to draw the common colour bar. Can take values TRUE, +#' FALSE or:\cr +#' 'up', 'u', 'U', 'top', 't', 'T', 'north', 'n', 'N'\cr +#' 'down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S' (default)\cr +#' 'right', 'r', 'R', 'east', 'e', 'E'\cr +#' 'left', 'l', 'L', 'west', 'w', 'W' +#'@param titles Character string vector with titles for each of the figures in +#' the multi-pannel, from top-left to bottom-right. Blank by default. +#'@param bar_left_shift_scale When plotting row titles, a shift is added to +#' the horizontal positioning of the colour bar in order to center it to the +#' region of the figures (without taking row titles into account). This shift +#' can be reduced. A value of 0 will remove the shift completely, centering +#' the colour bar to the total width of the device. This parameter will be +#' disregarded if no 'row_titles' are provided. +#'@param extra_margin Extra margins to be added around the layout, in the +#' format c(y1, x1, y2, x2). The units are margin lines. Takes rep(0, 4) +#' by default. +#'@param layout_by_rows Logical indicating wether the panels should be filled +#' by columns (FALSE) or by raws (TRUE, default). +#'@param fileout File where to save the plot. If not specified (default) a +#' graphics device will pop up. Extensions allowed: eps/ps, jpeg, png, pdf, +#' bmp and tiff. +#'@param width Width in inches of the multi-pannel. 7 by default, or 11 if +#' 'fielout' has been specified. +#'@param height Height in inches of the multi-pannel. 7 by default, or 11 if +#' 'fileout' has been specified. +#'@param size_units Units of the size of the device (file or window) to plot +#' in. Inches ('in') by default. See ?Devices and the creator function of +#' the corresponding device. +#'@param res Resolution of the device (file or window) to plot in. See +#' ?Devices and the creator function of the corresponding device. +#'@param close_device Whether to close the graphics device after plotting +#' the layout and a 'fileout' has been specified. This is useful to avoid +#' closing the device when saving the layout into a file and willing to add +#' extra elements or figures. Takes TRUE by default. Disregarded if no +#' 'fileout' has been specified. +#' +#'@return +#'\item{brks}{ +#' Breaks used for colouring the map (and legend if drawleg = TRUE). +#'} +#'\item{cols}{ +#' Colours used for colouring the map (and legend if drawleg = TRUE). +#' Always of length length(brks) - 1. +#'} +#'\item{col_inf}{ +#' Colour used to draw the lower triangle end in the colour bar +#' (NULL if not drawn at all). +#'} +#'\item{col_sup}{ +#' Colour used to draw the upper triangle end in the colour bar +#' (NULL if not drawn at all). +#'} +#'\item{layout_matrix}{ +#' Underlying matrix of the layout. Useful to later set any of the layout +#' cells as current figure to add plot elements. See .SwitchToFigure. +#'} +#' +#'@examples +#'# See examples on Load() to understand the first lines in this example +#' \dontrun{ +#'data_path <- system.file('sample_data', package = 's2dv') +#'expA <- list(name = 'experiment', path = file.path(data_path, +#' 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', +#' '$VAR_NAME$_$START_DATE$.nc')) +#'obsX <- list(name = 'observation', path = file.path(data_path, +#' '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', +#' '$VAR_NAME$_$YEAR$$MONTH$.nc')) +#' +#'# Now we are ready to use Load(). +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- Load('tos', list(expA), list(obsX), startDates, +#' leadtimemin = 1, leadtimemax = 4, output = 'lonlat', +#' latmin = 27, latmax = 48, lonmin = -12, lonmax = 40) +#' } +#' \dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#' } +#'PlotLayout(PlotEquiMap, c('lat', 'lon'), sampleData$mod[1, , 1, 1, , ], +#' sampleData$lon, sampleData$lat, +#' toptitle = 'Predicted tos for Nov 1960 from 1st Nov', +#' titles = paste('Member', 1:15)) +#' +#'@importFrom grDevices dev.cur dev.new dev.off +#'@export +PlotLayout <- function(fun, plot_dims, var, ..., special_args = NULL, + nrow = NULL, ncol = NULL, toptitle = NULL, + row_titles = NULL, col_titles = NULL, bar_scale = 1, + title_scale = 1, title_margin_scale = 1, + title_left_shift_scale = 1, + subtitle_scale = 1, subtitle_margin_scale = 1, + brks = NULL, cols = NULL, drawleg = 'S', titles = NULL, + subsampleg = NULL, bar_limits = NULL, + triangle_ends = NULL, col_inf = NULL, col_sup = NULL, + color_fun = clim.colors, + draw_bar_ticks = TRUE, draw_separators = FALSE, + triangle_ends_scale = 1, bar_extra_labels = NULL, + units = NULL, units_scale = 1, bar_label_scale = 1, + bar_tick_scale = 1, bar_extra_margin = rep(0, 4), + bar_left_shift_scale = 1, bar_label_digits = 4, + extra_margin = rep(0, 4), layout_by_rows = TRUE, + fileout = NULL, width = NULL, height = NULL, + size_units = 'in', res = 100, close_device = TRUE) { + # If there is any filenames to store the graphics, process them + # to select the right device + if (!is.null(fileout)) { + deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, units = size_units, res = res) + saveToFile <- deviceInfo$fun + fileout <- deviceInfo$files + } + + is_single_na <- function(x) ifelse(length(x) > 1, FALSE, is.na(x)) + # Check var + if (!is.list(var) & (is.array(var) || (is_single_na(var)))) { + var <- list(var) + } else if (is.list(var)) { + if (!all(sapply(var, is.array) | sapply(var, is_single_na))) { + stop("Parameter 'var' must be an array or a list of arrays (or NA values).") + } + } else { + stop("Parameter 'var' must be an array or a list of arrays.") + } + + # Check fun + if (length(fun) == 1) { + if (is.function(fun)) { + fun <- as.character(substitute(fun)) + } + if (is.character(fun)) { + fun <- rep(fun, length(var)) + } + } + if (!is.character(fun) || (length(fun) != length(var))) { + stop("Parameter 'fun' must be a single function or a vector of function names, one for each array provided in parameter 'var'.") + } + + # Check special_args + if (!is.null(special_args)) { + if (!is.list(special_args) || any(!sapply(special_args, is.list))) { + stop("Parameter 'special_args' must be a list of lists.") + } else if (length(special_args) != length(var)) { + stop("Parameter 'special_args' must contain a list of special arguments for each array provided in 'var'.") + } + } + + # Check plot_dims + if (is.character(plot_dims) || is.numeric(plot_dims)) { + plot_dims <- replicate(length(var), plot_dims, simplify = FALSE) + } + if (!is.list(plot_dims) || !all(sapply(plot_dims, is.character) | sapply(plot_dims, is.numeric)) || + (length(plot_dims) != length(var))) { + stop("Parameter 'plot_dims' must contain a single numeric or character vector with dimension identifiers or a vector for each array provided in parameter 'var'.") + } + + # Check nrow + if (!is.null(nrow)) { + if (!is.numeric(nrow)) { + stop("Parameter 'nrow' must be numeric or NULL.") + } + nrow <- round(nrow) + } + + # Check ncol + if (!is.null(ncol)) { + if (!is.numeric(ncol)) { + stop("Parameter 'ncol' must be numeric or NULL.") + } + ncol <- round(ncol) + } + # Check layout_by_rows + if (!is.logical(layout_by_rows)) { + stop("Parameter 'layout_by_rows' must be logical.") + } + + # Check toptitle + if (is.null(toptitle) || is.na(toptitle)) { + toptitle <- '' + } + if (!is.character(toptitle)) { + stop("Parameter 'toptitle' must be a character string.") + } + + # Check row_titles + if (!is.null(row_titles)) { + if (!is.character(row_titles)) { + stop("Parameter 'row_titles' must be a vector of character strings.") + } + } + + # Check col_titles + if (!is.null(row_titles)) { + if (!is.character(row_titles)) { + stop("Parameter 'row_titles' must be a vector of character strings.") + } + } + + # Check drawleg + if (is.character(drawleg)) { + if (drawleg %in% c('up', 'u', 'U', 'top', 't', 'T', 'north', 'n', 'N')) { + drawleg <- 'N' + } else if (drawleg %in% c('down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S')) { + drawleg <- 'S' + } else if (drawleg %in% c('right', 'r', 'R', 'east', 'e', 'E')) { + drawleg <- 'E' + } else if (drawleg %in% c('left', 'l', 'L', 'west', 'w', 'W')) { + drawleg <- 'W' + } else { + stop("Parameter 'drawleg' must be either TRUE, FALSE or a valid identifier of a position (see ?PlotMultiMap).") + } + } else if (!is.logical(drawleg)) { + stop("Parameter 'drawleg' must be either TRUE, FALSE or a valid identifier of a position (see ?PlotMultiMap).") + } + if (drawleg != FALSE && all(sapply(var, is_single_na)) && + (is.null(brks) || length(brks) < 2)) { + stop("Either data arrays in 'var' or breaks in 'brks' must be provided if 'drawleg' is requested.") + } + + # Check the rest of parameters (unless the user simply wants to build an empty layout) + var_limits <- NULL + if (!all(sapply(var, is_single_na))) { + var_limits <- c(min(unlist(var), na.rm = TRUE), max(unlist(var), na.rm = TRUE)) + if ((any(is.infinite(var_limits)) || var_limits[1] == var_limits[2])) { + stop("Arrays in parameter 'var' must contain at least 2 different values.") + } + } + colorbar <- ColorBar(brks, cols, FALSE, subsampleg, bar_limits, + var_limits, triangle_ends, col_inf, col_sup, color_fun, + plot = FALSE, draw_bar_ticks, + draw_separators, triangle_ends_scale, bar_extra_labels, + units, units_scale, bar_label_scale, bar_tick_scale, + bar_extra_margin, bar_label_digits) + + # Check bar_scale + if (!is.numeric(bar_scale)) { + stop("Parameter 'bar_scale' must be numeric.") + } + + # Check bar_left_shift_scale + if (!is.numeric(bar_left_shift_scale)) { + stop("Parameter 'bar_left_shift_scale' must be numeric.") + } + + # Check title_scale + if (!is.numeric(title_scale)) { + stop("Parameter 'title_scale' must be numeric.") + } + + # Check title_margin_scale + if (!is.numeric(title_margin_scale)) { + stop("Parameter 'title_margin_scale' must be numeric.") + } + + # Check title_left_shift_scale + if (!is.numeric(title_left_shift_scale)) { + stop("Parameter 'title_left_shift_scale' must be numeric.") + } + + # Check subtitle_scale + if (!is.numeric(subtitle_scale)) { + stop("Parameter 'subtite_scale' must be numeric.") + } + + # Check subtitle_margin_scale + if (!is.numeric(subtitle_margin_scale)) { + stop("Parameter 'subtite_margin_scale' must be numeric.") + } + + # Check titles + if (!all(sapply(titles, is.character))) { + stop("Parameter 'titles' must be a vector of character strings.") + } + + # Check extra_margin + if (!is.numeric(extra_margin) || length(extra_margin) != 4) { + stop("Parameter 'extra_margin' must be a numeric vector with 4 elements.") + } + + # Check width + if (is.null(width)) { + if (is.null(fileout)) { + width <- 7 + } else { + width <- 11 + } + } + if (!is.numeric(width)) { + stop("Parameter 'width' must be numeric.") + } + + # Check height + if (is.null(height)) { + if (is.null(fileout)) { + height <- 7 + } else { + height <- 8 + } + } + if (!is.numeric(height)) { + stop("Parameter 'height' must be numeric.") + } + + # Check close_device + if (!is.logical(close_device)) { + stop("Parameter 'close_device' must be logical.") + } + + # Count the total number of maps and reorder each array of maps to have the lat and lon dimensions at the end. + n_plots <- 0 + plot_array_i <- 1 + for (plot_array in var) { + if (is_single_na(plot_array)) { + n_plots <- n_plots + 1 + } else { + dim_ids <- plot_dims[[plot_array_i]] + if (is.character(dim_ids)) { + dimnames <- NULL + if (!is.null(names(dim(plot_array)))) { + dimnames <- names(dim(plot_array)) + } else if (!is.null(attr(plot_array, 'dimensions'))) { + dimnames <- attr(plot_array, 'dimensions') + } + if (!is.null(dimnames)) { + if (any(!sapply(dim_ids, `%in%`, dimnames))) { + stop("All arrays provided in parameter 'var' must have all the dimensions in 'plot_dims'.") + } + dim_ids <- sapply(dim_ids, function(x) which(dimnames == x)[1]) + var[[plot_array_i]] <- Reorder(var[[plot_array_i]], c((1:length(dim(plot_array)))[-dim_ids], dim_ids)) + } else { + .warning(paste0("Assuming the ", plot_array_i, "th array provided in 'var' has 'plot_dims' as last dimensions (right-most).")) + dims <- tail(c(rep(1, length(dim_ids)), dim(plot_array)), length(dim_ids)) + dim_ids <- tail(1:length(dim(plot_array)), length(dim_ids)) + if (length(dim(var[[plot_array_i]])) < length(dims)) { + dim(var[[plot_array_i]]) <- dims + } + } + } else if (any(dim_ids > length(dim(plot_array)))) { + stop("Parameter 'plot_dims' contains dimension identifiers out of range.") + } + n_plots <- n_plots + prod(dim(plot_array)[-dim_ids]) + #n_plots <- n_plots + prod(head(c(rep(1, length(dim_ids)), dim(plot_array)), length(dim(plot_array)))) + if (length(dim(var[[plot_array_i]])) == length(dim_ids)) { + dim(var[[plot_array_i]]) <- c(1, dim(var[[plot_array_i]])) + dim_ids <- dim_ids + 1 + } + plot_dims[[plot_array_i]] <- dim_ids + } + plot_array_i <- plot_array_i + 1 + } + if (is.null(nrow) && is.null(ncol)) { + ncol <- ceiling(sqrt(n_plots)) + nrow <- ceiling(n_plots/ncol) + } else if (is.null(ncol)) { + ncol <- ceiling(n_plots/nrow) + } else if (is.null(nrow)) { + nrow <- ceiling(n_plots/ncol) + } else if (nrow * ncol < n_plots) { + stop("There are more arrays to plot in 'var' than cells defined by 'nrow' x 'ncol'.") + } + + if (is.logical(drawleg) && drawleg) { + if (nrow > ncol) { + drawleg <- 'S' + } else { + drawleg <- 'E' + } + } + vertical <- drawleg %in% c('E', 'W') + + # Open connection to graphical device + if (!is.null(fileout)) { + saveToFile(fileout) + } else if (names(dev.cur()) == 'null device') { + dev.new(units = size_units, res = res, width = width, height = height) + } else if (prod(par('mfrow')) > 1) { + dev.new(units = units, res = res, width = width, height = height) + } + + # Take size of device and set up layout: + # --------------------------------------------- + # |0000000000000000000000000000000000000000000| + # |0000000000000000 TOP TITLE 0000000000000000| + # |0000000000000000000000000000000000000000000| + # |-------------------------------------------| + # |00000|0000000000000000000000000000000000000| + # |00000|000000000000 ROW TITLES 0000000000000| + # |00000|0000000000000000000000000000000000000| + # |00000|-------------------------------------| + # |0 0|222222222222222222|333333333333333333| + # |0 C 0|222222222222222222|333333333333333333| + # |0 O 0|222222222222222222|333333333333333333| + # |0 L 0|2222 FIGURE 1 2222|3333 FIGURE 2 3333| + # |0 0|222222222222222222|333333333333333333| + # |0 T 0|222222222222222222|333333333333333333| + # |0 I 0|222222222222222222|333333333333333333| + # |0 T 0|-------------------------------------| + # |0 L 0|444444444444444444|555555555555555555| + # |0 S 0|444444444444444444|555555555555555555| + # |0 0|444444444444444444|555555555555555555| + # |00000|4444 FIGURE 3 4444|5555 FIGURE 4 5555| + # |00000|444444444444444444|555555555555555555| + # |00000|444444444444444444|555555555555555555| + # |00000|444444444444444444|555555555555555555| + # |-------------------------------------------| + # |1111111111111111111111111111111111111111111| + # |1111111111111111 COLOR BAR 1111111111111111| + # |1111111111111111111111111111111111111111111| + # --------------------------------------------- + device_size <- par('din') + device_size[1] <- device_size[1] - sum(extra_margin[c(2, 4)]) + device_size[2] <- device_size[2] - sum(extra_margin[c(1, 3)]) + cs <- char_size <- par('csi') + title_cex <- 2.5 * title_scale + title_margin <- 0.5 * title_cex * title_margin_scale + subtitle_cex <- 1.5 * subtitle_scale + subtitle_margin <- 0.5 * sqrt(nrow * ncol) * subtitle_cex * subtitle_margin_scale + mat_layout <- 1:(nrow * ncol) + if (drawleg != FALSE) { + if (fun == 'PlotMostLikelyQuantileMap') { #multi_colorbar + multi_colorbar <- TRUE + cat_dim <- list(...)$cat_dim + nmap <- as.numeric(dim(var[[1]])[cat_dim]) + mat_layout <- mat_layout + nmap + } else { + multi_colorbar <- FALSE + mat_layout <- mat_layout + 1 + } + } + mat_layout <- matrix(mat_layout, nrow, ncol, byrow = layout_by_rows) + fsu <- figure_size_units <- 10 # unitless + widths <- rep(fsu, ncol) + heights <- rep(fsu, nrow) + # Useless +# n_figures <- nrow * ncol + + if (drawleg != FALSE) { + if (drawleg == 'N') { + mat_layout <- rbind(rep(1, dim(mat_layout)[2]), mat_layout) + heights <- c(round(bar_scale * 2 * nrow), heights) + } else if (drawleg == 'S') { + if (multi_colorbar) { + new_mat_layout <- c() + for (i_col in 1:ncol) { + new_mat_layout <- c(new_mat_layout, rep(mat_layout[, i_col], nmap)) + } + new_mat_layout <- matrix(new_mat_layout, nrow, nmap * ncol) + colorbar_row <- rep(1:nmap, each = ncol) + mat_layout <- rbind(new_mat_layout, as.numeric(colorbar_row)) + widths <- rep(widths, nmap) + } else { + mat_layout <- rbind(mat_layout, rep(1, dim(mat_layout)[2])) + } + heights <- c(heights, round(bar_scale * 2 * nrow)) + } else if (drawleg == 'W') { + mat_layout <- cbind(rep(1, dim(mat_layout)[1]), mat_layout) + widths <- c(round(bar_scale * 3 * ncol), widths) + } else if (drawleg == 'E') { + mat_layout <- cbind(mat_layout, rep(1, dim(mat_layout)[1])) + widths <- c(widths, round(bar_scale * 3 * ncol)) + } + # Useless +# n_figures <- n_figures + 1 + } + + # row and col titles + if (length(row_titles) > 0) { + mat_layout <- cbind(rep(0, dim(mat_layout)[1]), mat_layout) + widths <- c(((subtitle_cex + subtitle_margin / 2) * cs / device_size[1]) * ncol * fsu, widths) + } + if (length(col_titles) > 0) { + mat_layout <- rbind(rep(0, dim(mat_layout)[2]), mat_layout) + heights <- c(((subtitle_cex + subtitle_margin) * cs / device_size[2]) * nrow * fsu, heights) + } + # toptitle + if (toptitle != '') { + mat_layout <- rbind(rep(0, dim(mat_layout)[2]), mat_layout) + heights <- c(((title_cex + title_margin) * cs / device_size[2]) * nrow * fsu, heights) + } + par(oma = extra_margin) + layout(mat_layout, widths, heights) + # Draw the color bar + if (drawleg != FALSE) { + if (length(row_titles) > 0) { + bar_extra_margin[2] <- bar_extra_margin[2] + (subtitle_cex + subtitle_margin / 2) * + bar_left_shift_scale + } + + if (multi_colorbar) { # multiple colorbar + if (!is.null(list(...)$bar_titles)) { + bar_titles <- list(...)$bar_titles + } else { + bar_titles <- NULL + } + multi_ColorBar(nmap = nmap, + brks = brks, cols = cols, vertical = vertical, subsampleg = subsampleg, + bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + bar_titles = bar_titles, title_scale = units_scale, + label_scale = bar_label_scale, extra_margin = bar_extra_margin) + + } else { # one colorbar + ColorBar(brks = colorbar$brks, cols = colorbar$cols, vertical = vertical, subsampleg = subsampleg, + bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, col_inf = colorbar$col_inf, + col_sup = colorbar$col_sup, color_fun = color_fun, plot = TRUE, draw_ticks = draw_bar_ticks, + draw_separators = draw_separators, triangle_ends_scale = triangle_ends_scale, + extra_labels = bar_extra_labels, + title = units, title_scale = units_scale, label_scale = bar_label_scale, tick_scale = bar_tick_scale, + extra_margin = bar_extra_margin, label_digits = bar_label_digits) + + } + } + + # Draw titles + if (toptitle != '' || length(col_titles) > 0 || length(row_titles) > 0) { + plot(0, type = 'n', ann = FALSE, axes = FALSE, xaxs = 'i', yaxs = 'i', + xlim = c(0, 1), ylim = c(0, 1)) + width_lines <- par('fin')[1] / par('csi') + plot_lines <- par('pin')[1] / par('csi') + plot_range <- par('xaxp')[2] - par('xaxp')[1] + size_units_per_line <- plot_range / plot_lines + if (toptitle != '') { + title_x_center <- par('xaxp')[1] - par('mar')[2] * size_units_per_line + + ncol * width_lines * size_units_per_line / 2 + if (length(row_titles) > 0) { + title_x_center <- title_x_center - (1 - title_left_shift_scale) * + (subtitle_cex + subtitle_margin) / 2 * size_units_per_line + } + title_y_center <- par('mar')[3] + (title_margin + title_cex) / 2 + if (length(col_titles > 0)) { + title_y_center <- title_y_center + (subtitle_margin + subtitle_cex) + } + mtext(toptitle, cex = title_cex, line = title_y_center, at = title_x_center, + padj = 0.5) + } + if (length(col_titles) > 0) { + t_x_center <- par('xaxp')[1] - par('mar')[2] * size_units_per_line + for (t in 1:ncol) { + mtext(col_titles[t], cex = subtitle_cex, + line = par('mar')[3] + (subtitle_margin + subtitle_cex) / 2, + at = t_x_center + (t - 0.5) * width_lines * size_units_per_line, + padj = 0.5) + } + } + height_lines <- par('fin')[2] / par('csi') + plot_lines <- par('pin')[2] / par('csi') + plot_range <- par('yaxp')[2] - par('yaxp')[1] + size_units_per_line <- plot_range / plot_lines + if (length(row_titles) > 0) { + t_y_center <- par('yaxp')[1] - par('mar')[1] * size_units_per_line + for (t in 1:nrow) { + mtext(row_titles[t], cex = subtitle_cex, + line = par('mar')[2] + (subtitle_margin + subtitle_cex) / 2, + at = t_y_center - (t - 1.5) * height_lines * size_units_per_line, + padj = 0.5, side = 2) + } + } + par(new = TRUE) + } + + array_number <- 1 + plot_number <- 1 + # For each array provided in var + lapply(var, function(x) { + if (is_single_na(x)) { + if (!all(sapply(var[array_number:length(var)], is_single_na))) { + plot.new() + par(new = FALSE) + } + plot_number <<- plot_number + 1 + } else { + if (is.character(plot_dims[[array_number]])) { + plot_dim_indices <- which(names(dim(x)) %in% plot_dims[[array_number]]) + } else { + plot_dim_indices <- plot_dims[[array_number]] + } + # For each of the arrays provided in that array + apply(x, (1:length(dim(x)))[-plot_dim_indices], + function(y) { + # Do the plot. colorbar is not drew. + fun_args <- c(list(y, toptitle = titles[plot_number], drawleg = FALSE), list(...), + special_args[[array_number]]) +# funct <- fun[[array_number]] + if (fun[[array_number]] %in% c('PlotEquiMap', 'PlotStereoMap', 'PlotSection')) { + fun_args <- c(fun_args, list(brks = colorbar$brks, cols = colorbar$cols, + col_inf = colorbar$col_inf, + col_sup = colorbar$col_sup)) + } else if (fun[[array_number]] %in% 'PlotMostLikelyQuantileMap') { + #TODO: pre-generate colorbar params? like above + fun_args <- c(fun_args, list(brks = brks, cols = cols)) + } + do.call(fun[[array_number]], fun_args) + plot_number <<- plot_number + 1 + }) + } + array_number <<- array_number + 1 + }) + + # If the graphic was saved to file, close the connection with the device + if (!is.null(fileout) && close_device) dev.off() + + invisible(list(brks = colorbar$brks, cols = colorbar$cols, + col_inf = colorbar$col_inf, col_sup = colorbar$col_sup, + layout_matrix = mat_layout)) +} diff --git a/modules/Visualization/PlotMostLikelyQuantileMap.R b/modules/Visualization/PlotMostLikelyQuantileMap.R new file mode 100644 index 00000000..9f9f1914 --- /dev/null +++ b/modules/Visualization/PlotMostLikelyQuantileMap.R @@ -0,0 +1,196 @@ +#'Plot Maps of Most Likely Quantiles +#' +#'@author Veronica Torralba, \email{veronica.torralba@bsc.es}, Nicolau Manubens, \email{nicolau.manubens@bsc.es} +#'@description This function receives as main input (via the parameter \code{probs}) a collection of longitude-latitude maps, each containing the probabilities (from 0 to 1) of the different grid cells of belonging to a category. As many categories as maps provided as inputs are understood to exist. The maps of probabilities must be provided on a common rectangular regular grid, and a vector with the longitudes and a vector with the latitudes of the grid must be provided. The input maps can be provided in two forms, either as a list of multiple two-dimensional arrays (one for each category) or as a three-dimensional array, where one of the dimensions corresponds to the different categories. +#' +#'@param probs a list of bi-dimensional arrays with the named dimensions 'latitude' (or 'lat') and 'longitude' (or 'lon'), with equal size and in the same order, or a single tri-dimensional array with an additional dimension (e.g. 'bin') for the different categories. The arrays must contain probability values between 0 and 1, and the probabilities for all categories of a grid cell should not exceed 1 when added. +#'@param lon a numeric vector with the longitudes of the map grid, in the same order as the values along the corresponding dimension in \code{probs}. +#'@param lat a numeric vector with the latitudes of the map grid, in the same order as the values along the corresponding dimension in \code{probs}. +#'@param cat_dim the name of the dimension along which the different categories are stored in \code{probs}. This only applies if \code{probs} is provided in the form of 3-dimensional array. The default expected name is 'bin'. +#'@param bar_titles vector of character strings with the names to be drawn on top of the color bar for each of the categories. As many titles as categories provided in \code{probs} must be provided. +#'@param col_unknown_cat character string with a colour representation of the colour to be used to paint the cells for which no category can be clearly assigned. Takes the value 'white' by default. +#'@param drawleg Where to draw the common colour bar. Can take values TRUE, +#' FALSE or:\cr +#' 'up', 'u', 'U', 'top', 't', 'T', 'north', 'n', 'N'\cr +#' 'down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S' (default)\cr +#' 'right', 'r', 'R', 'east', 'e', 'E'\cr +#' 'left', 'l', 'L', 'west', 'w', 'W' +#'@param ... additional parameters to be sent to \code{PlotCombinedMap} and \code{PlotEquiMap}. +#'@seealso \code{PlotCombinedMap} and \code{PlotEquiMap} +#' +#'@importFrom maps map +#'@importFrom graphics box image layout mtext par plot.new +#'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off hcl jpeg pdf png postscript svg tiff +#'@examples +#'# Simple example +#'x <- array(1:(20 * 10), dim = c(lat = 10, lon = 20)) / 200 +#'a <- x * 0.6 +#'b <- (1 - x) * 0.6 +#'c <- 1 - (a + b) +#'lons <- seq(0, 359.5, length = 20) +#'lats <- seq(-89.5, 89.5, length = 10) +#'PlotMostLikelyQuantileMap(list(a, b, c), lons, lats, +#' toptitle = 'Most likely tercile map', +#' bar_titles = paste('% of belonging to', c('a', 'b', 'c')), +#' brks = 20, width = 10, height = 8) +#' +#'# More complex example +#'n_lons <- 40 +#'n_lats <- 20 +#'n_timesteps <- 100 +#'n_bins <- 4 +#' +#'# 1. Generation of sample data +#'lons <- seq(0, 359.5, length = n_lons) +#'lats <- seq(-89.5, 89.5, length = n_lats) +#' +#'# This function builds a 3-D gaussian at a specified point in the map. +#'make_gaussian <- function(lon, sd_lon, lat, sd_lat) { +#' w <- outer(lons, lats, function(x, y) dnorm(x, lon, sd_lon) * dnorm(y, lat, sd_lat)) +#' min_w <- min(w) +#' w <- w - min_w +#' w <- w / max(w) +#' w <- t(w) +#' names(dim(w)) <- c('lat', 'lon') +#' w +#'} +#' +#'# This function generates random time series (with values ranging 1 to 5) +#'# according to 2 input weights. +#'gen_data <- function(w1, w2, n) { +#' r <- sample(1:5, n, +#' prob = c(.05, .9 * w1, .05, .05, .9 * w2), +#' replace = TRUE) +#' r <- r + runif(n, -0.5, 0.5) +#' dim(r) <- c(time = n) +#' r +#'} +#' +#'# We build two 3-D gaussians. +#'w1 <- make_gaussian(120, 80, 20, 30) +#'w2 <- make_gaussian(260, 60, -10, 40) +#' +#'# We generate sample data (with dimensions time, lat, lon) according +#'# to the generated gaussians +#'sample_data <- multiApply::Apply(list(w1, w2), NULL, +#' gen_data, n = n_timesteps)$output1 +#' +#'# 2. Binning sample data +#'prob_thresholds <- 1:n_bins / n_bins +#'prob_thresholds <- prob_thresholds[1:(n_bins - 1)] +#'thresholds <- quantile(sample_data, prob_thresholds) +#' +#'binning <- function(x, thresholds) { +#' n_samples <- length(x) +#' n_bins <- length(thresholds) + 1 +#' +#' thresholds <- c(thresholds, max(x)) +#' result <- 1:n_bins +#' lower_threshold <- min(x) - 1 +#' for (i in 1:n_bins) { +#' result[i] <- sum(x > lower_threshold & x <= thresholds[i]) / n_samples +#' lower_threshold <- thresholds[i] +#' } +#' +#' dim(result) <- c(bin = n_bins) +#' result +#'} +#' +#'bins <- multiApply::Apply(sample_data, 'time', binning, thresholds)$output1 +#' +#'# 3. Plotting most likely quantile/bin +#'PlotMostLikelyQuantileMap(bins, lons, lats, +#' toptitle = 'Most likely quantile map', +#' bar_titles = paste('% of belonging to', letters[1:n_bins]), +#' mask = 1 - (w1 + w2 / max(c(w1, w2))), +#' brks = 20, width = 10, height = 8) +#' +#'@export +PlotMostLikelyQuantileMap <- function(probs, lon, lat, cat_dim = 'bin', + bar_titles = NULL, + col_unknown_cat = 'white', drawleg = T, + ...) { + # Check probs + error <- FALSE + if (is.list(probs)) { + if (length(probs) < 1) { + stop("Parameter 'probs' must be of length >= 1 if provided as a list.") + } + check_fun <- function(x) { + is.numeric(x) && (length(dim(x)) == 2) + } + if (!all(sapply(probs, check_fun))) { + error <- TRUE + } + ref_dims <- dim(probs[[1]]) + equal_dims <- all(sapply(probs, function(x) identical(dim(x), ref_dims))) + if (!equal_dims) { + stop("All arrays in parameter 'probs' must have the same dimension ", + "sizes and names when 'probs' is provided as a list of arrays.") + } + num_probs <- length(probs) + probs <- unlist(probs) + dim(probs) <- c(ref_dims, map = num_probs) + cat_dim <- 'map' + } + if (!is.numeric(probs)) { + error <- TRUE + } + if (is.null(dim(probs))) { + error <- TRUE + } + if (length(dim(probs)) != 3) { + error <- TRUE + } + if (error) { + stop("Parameter 'probs' must be either a numeric array with 3 dimensions ", + " or a list of numeric arrays of the same size with the 'lon' and ", + "'lat' dimensions.") + } + dimnames <- names(dim(probs)) + + # Check cat_dim + if (is.character(cat_dim)) { + if (is.null(dimnames)) { + stop("Specified a dimension name in 'cat_dim' but no dimension names provided ", + "in 'probs'.") + } + cat_dim <- which(dimnames == cat_dim) + if (length(cat_dim) < 1) { + stop("Dimension 'cat_dim' not found in 'probs'.") + } + cat_dim <- cat_dim[1] + } else if (!is.numeric(cat_dim)) { + stop("Parameter 'cat_dim' must be either a numeric value or a ", + "dimension name.") + } + if (length(cat_dim) != 1) { + stop("Parameter 'cat_dim' must be of length 1.") + } + cat_dim <- round(cat_dim) + nprobs <- dim(probs)[cat_dim] + + # Check bar_titles + if (is.null(bar_titles)) { + if (nprobs == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nprobs == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nprobs, " (%)") + } + } + + minimum_value <- ceiling(1 / nprobs * 10 * 1.1) * 10 + + # By now, the PlotCombinedMap function is included below in this file. + # In the future, PlotCombinedMap will be part of s2dverification and will + # be properly imported. + PlotCombinedMap(probs * 100, lon, lat, map_select_fun = max, + display_range = c(minimum_value, 100), + map_dim = cat_dim, + bar_titles = bar_titles, + col_unknown_map = col_unknown_cat, + drawleg = drawleg, ...) +} diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index a191b663..34aacf67 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,14 +1,10 @@ -library(RColorBrewer) -library(grDevices) -## TODO: Download functions locally -source("https://earth.bsc.es/gitlab/external/cstools/-/raw/3c004cf52e9cfd0a75925466a4ae08005a848680/R/PlotMostLikelyQuantileMap.R") -source("https://earth.bsc.es/gitlab/external/cstools/-/raw/3c004cf52e9cfd0a75925466a4ae08005a848680/R/PlotCombinedMap.R") -source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/450df59b98edd314c37dfda315357d3dbcfc55d0/R/PlotLayout.R") +## TODO: Remove once released in s2dv/CSTools +source("modules/Visualization/PlotMostLikelyQuantileMap.R") +source("modules/Visualization/PlotCombinedMap.R") +source("modules/Visualization/PlotLayout.R") ## TODO: Add the possibility to read the data directly from netCDF -## TODO: Get variable and system/obs names from dictionary ## TODO: Adapt to multi-model case -## TODO: Adapt to decadal case ## TODO: Add param 'raw'? plot_data <- function(recipe, diff --git a/modules/Visualization/s2s.plots.R b/modules/Visualization/s2s.plots.R deleted file mode 100644 index d387979c..00000000 --- a/modules/Visualization/s2s.plots.R +++ /dev/null @@ -1,103 +0,0 @@ - - -library(easyNCDF) -source("../data-analysis/R_Reorder.R") -library(s2dverification) -library(CSTools) -library(RColorBrewer) library(multiApply) - -plot_corr <- function(file.name, var, sdate, outdir, type, s2s,project=NULL){ - - var <- "WSDI" - filename <- paste0("/esarchive/oper/MEDGOLD-data/monthly_statistics/", - var,"/",var,"-corr_month03.nc") - outfile <- paste0("/esarchive/scratch/lpalma/", - var,"-corr_month03.png") - - corr <- NcToArray(filename, vars_to_read='corr') - lon <- NcToArray(filename, vars_to_read='longitude') - lat <- NcToArray(filename, vars_to_read='latitude') - time <- NcToArray(filename, vars_to_read='time') - - corr <- Reorder(corr, c("var","time","longitude","latitude")) - - - col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", "#64AAFF", - "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", - "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") - - brks <- seq(-1,1,by=0.1) - color <- colorRampPalette(col2)(length(brks)-1) - options(bitmapType="cairo") - - PlotLayout(PlotEquiMap, c('longitude','latitude'), - corr, lon, lat, filled.continents=F, - brks=brks, - #rxow_titles=row_titles, - cols=col2, - fileout=outfile , bar_label_digits=1) - -} - -plot_skill <- function(file.name, var, sdate, outdir, type, s2s,project=NULL){ - - var <- "WSDI" - filename <- paste0("/esarchive/oper/MEDGOLD-data/monthly_statistics/", - var,"/",var,"-skill_month02.nc") - outfile <- paste0("/esarchive/scratch/lpalma/", - var,"-skill_month02.png") - - corr <- NcToArray(filename, vars_to_read='corr') - lon <- NcToArray(filename, vars_to_read='longitude') - lat <- NcToArray(filename, vars_to_read='latitude') - time <- NcToArray(filename, vars_to_read='time') - - corr <- Reorder(corr, c("var","time","longitude","latitude")) - - - col2 <- c("#0C046E", "#1914BE", "#2341F7", "#2F55FB", "#3E64FF", "#528CFF", "#64AAFF", - "#82C8FF", "#A0DCFF", "#B4F0FF", "#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", - "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") - - brks <- seq(-100,100,by=10) - color <- colorRampPalette(col2)(length(brks)-1) - options(bitmapType="cairo") - - PlotLayout(PlotEquiMap, c('longitude','latitude'), - corr, lon, lat, filled.continents=F, - brks=brks, - #rxow_titles=row_titles, - cols=col2, - fileout=outfile , bar_label_digits=1) - -} -plot_ensemblemean <- function(files.path, var, date, outdir, type, s2s){ - - - dimnames <- c('var','time', 'longitude', 'latitude') - var <- "WSDI" - filename <- paste0("/esarchive/oper/MEDGOLD-data/monthly_statistics/", - var,"/",var,"_20210301_03.nc") - outfile <- paste0("/esarchive/scratch/lpalma/", - var,"_20210301.png") - - data <- NcToArray(filename, vars_to_read=var) - data <- Apply(data, 'ensemble', mean, na.rm=T)[[1]] - #data <- aperm(data,c(1,4,2,3)) - data <- Reorder(data,dimnames) - names(dim(data)) <- dimnames - - lon <- NcToArray(filename, vars_to_read='longitude') - lat <- NcToArray(filename, vars_to_read='latitude') - - cols <- c("#FFFBAF", "#FFDD9A", "#FFBF87", "#FFA173", - "#FF7055", "#FE6346", "#F7403B", "#E92D36", "#C80F1E", "#A50519") - - PlotLayout(PlotEquiMap, c('longitude','latitude'), data, lon, - lat, filled.continents=F, fileout=outfile, - bar_label_digits=2) - - -} - - -- GitLab From 8ff512d0317795610039a599110200324dee9d8e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 4 Oct 2022 10:42:28 +0200 Subject: [PATCH 53/81] Tidy up compute_probs() and compute_quants(), adapt code --- modules/Skill/Skill.R | 2 + modules/Skill/compute_probs.R | 34 +++++++++++++++++ modules/Skill/compute_quants.R | 30 +++++++++++++++ modules/Skill/s2s.metrics.R | 43 ++++++++++++--------- modules/Skill/s2s.probs.R | 69 ---------------------------------- tools/libs.R | 2 + 6 files changed, 94 insertions(+), 86 deletions(-) create mode 100644 modules/Skill/compute_probs.R create mode 100644 modules/Skill/compute_quants.R delete mode 100644 modules/Skill/s2s.probs.R diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 76f7492f..368bb581 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -7,6 +7,8 @@ # - reliability diagram # - ask Carlos which decadal metrics he is currently using +source("modules/Skill/compute_quants.R") +source("modules/Skill/compute_probs.R") source("modules/Skill/s2s.metrics.R") ## TODO: Remove once the new version of s2dv is released source("modules/Skill/CRPS.R") diff --git a/modules/Skill/compute_probs.R b/modules/Skill/compute_probs.R new file mode 100644 index 00000000..a662df14 --- /dev/null +++ b/modules/Skill/compute_probs.R @@ -0,0 +1,34 @@ +## TODO: Document header +compute_probs <- function(data, quantiles, + ncores=1, quantile_dims=c('syear', 'ensemble'), + probs_dims=list('ensemble', 'bin'), + split_factor=1, na.rm=FALSE) { + + if (na.rm == FALSE) { + c2p <- function(x, t) { + # If the array contains any NA values, return NA + if (any(is.na(x))) { + rep(NA, dim(t)[['bin']] + 1) + } else { + colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) + } + } + } else { + c2p <- function(x, t) { + if (any(!is.na(x))) { # If the array contains some non-NA values + colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) + } else { # If the array contains NAs only + rep(NA, dim(t)[['bin']] + 1) # vector with as many NAs as prob bins. + } + } + } + + probs <- Apply(data = list(x = data, t = quantiles), + target_dims = probs_dims, + c2p, + output_dims = "bin", + split_factor = split_factor, + ncores = ncores)[[1]] + + return(probs) +} diff --git a/modules/Skill/compute_quants.R b/modules/Skill/compute_quants.R new file mode 100644 index 00000000..60ad981f --- /dev/null +++ b/modules/Skill/compute_quants.R @@ -0,0 +1,30 @@ +## TODO: Document header + +compute_quants <- function(data, thresholds, + ncores=1, quantile_dims=c('syear', 'ensemble'), + probs_dims=list('ensemble', 'bin'), + split_factor=1, na.rm=FALSE) { + + if (na.rm == FALSE) { + get_quantiles <- function(x, t) { + if (any(is.na(x))) { + rep(NA, length(t)) + } else { + quantile(as.vector(x), t, na.rm = FALSE) + } + } + } else { + get_quantiles <- function(x, t) { + quantile(as.vector(x), t, na.rm = TRUE) + } + } + + quantiles <- Apply(data, + target_dims = quantile_dims, + function(x, t) {get_quantiles(as.vector(x), thresholds)}, + output_dims = "bin", + ncores = ncores, + split_factor = split_factor)[[1]] + + return(quantiles) +} diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R index 7c0aa30e..04e9d801 100644 --- a/modules/Skill/s2s.metrics.R +++ b/modules/Skill/s2s.metrics.R @@ -1,7 +1,4 @@ -source("modules/Skill/s2s.probs.R") - - # MERGES verification dimns int single sdate dim along which the # verification metrics will be computed mergedims <- function(data, indims, outdim) { @@ -127,24 +124,36 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, } else if (metric == "frpss_sign") { - terciles_obs <- Compute_probs(obs, c(1/3, 2/3), - quantile_dims=c(time.dim), - ncores=ncores, - split_factor=1, - na.rm=na.rm) + terciles_obs <- compute_quants(obs, c(1/3, 2/3), + quantile_dims=c(time.dim), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + terciles_exp <- compute_quants(exp, c(1/3, 2/3), + quantile_dims=c(time.dim, 'ensemble'), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + probs_obs <- compute_probs(obs, terciles_obs, + quantile_dims=c(time.dim), + ncores=ncores, + split_factor=1, + na.rm=na.rm) - terciles_exp <- Compute_probs(exp, c(1/3, 2/3), - quantile_dims=c(time.dim, 'ensemble'), - ncores=ncores, - split_factor=1, - na.rm=na.rm) + probs_exp <- compute_probs(exp, terciles_exp, + quantile_dims=c(time.dim), + ncores=ncores, + split_factor=1, + na.rm=na.rm) - probs_clim = array(data = 1/3, dim = dim(terciles_obs$probs)) + probs_clim = array(data = 1/3, dim = dim(probs_obs)) frps <- NULL n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][] frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim, - probs_obs = terciles_obs$probs), + probs_obs = probs_obs), target_dims = 'bin', fun = .rps_from_probs, n_categories = 3, @@ -152,8 +161,8 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, Fair = TRUE, ncores = ncores)$output1 - frps$exp <- multiApply::Apply(data = list(probs_exp = terciles_exp$probs, - probs_obs = terciles_obs$probs), + frps$exp <- multiApply::Apply(data = list(probs_exp = probs_exp, + probs_obs = probs_obs), target_dims = 'bin', fun = .rps_from_probs, n_categories = 3, diff --git a/modules/Skill/s2s.probs.R b/modules/Skill/s2s.probs.R deleted file mode 100644 index 5921705c..00000000 --- a/modules/Skill/s2s.probs.R +++ /dev/null @@ -1,69 +0,0 @@ - - -compute_quants <- function(data, thresholds, - ncores=1, quantile_dims=c('syear', 'ensemble'), - probs_dims=list('ensemble', 'bin'), - split_factor=1, na.rm=FALSE) { - - ## TODO: Adapt to the case where the forecast probability bins need - ## to be computed. The quantiles should be the hcst quantiles, and then - ## c2p() can be used to compute fcst probabilities for most likely terciles - ## map. - - if (na.rm == FALSE) { - get_quantiles <- function(x, t) { - if (any(is.na(x))) { - rep(NA, length(t)) - } else { - quantile(as.vector(x), t, na.rm = FALSE) - } - } - } else { - get_quantiles <- function(x, t) { - quantile(as.vector(x), t, na.rm = TRUE) - } - } - - quantiles <- Apply(data, - target_dims = quantile_dims, - function(x, t) {get_quantiles(as.vector(x), thresholds)}, - output_dims = "bin", - ncores = ncores, - split_factor = split_factor)[[1]] - - return(quantiles) -} - -compute_probs <- function(data, quantiles, - ncores=1, quantile_dims=c('syear', 'ensemble'), - probs_dims=list('ensemble', 'bin'), - split_factor=1, na.rm=FALSE) { - - if (na.rm == FALSE) { - c2p <- function(x, t) { - # If the array contains any NA values, return NA - if (any(is.na(x))) { - rep(NA, dim(t)[['bin']] + 1) - } else { - colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) - } - } - } else { - c2p <- function(x, t) { - if (any(!is.na(x))) { # If the array contains some non-NA values - colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) - } else { # If the array contains NAs only - rep(NA, dim(t)[['bin']] + 1) # vector with as many NAs as prob bins. - } - } - } - - probs <- Apply(data = list(x = data, t = quantiles), - target_dims = probs_dims, - c2p, - output_dims = "bin", - split_factor = split_factor, - ncores = ncores)[[1]] - - return(probs) -} diff --git a/tools/libs.R b/tools/libs.R index 2b298359..a0767f76 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -12,6 +12,8 @@ library(easyNCDF) library(CSTools) library(lubridate) library(PCICt) +library(RColorBrewer) +library(grDevices) # # library(parallel) # library(pryr) # To check mem usage. -- GitLab From 82db3d49f002a6c86cb1f72c21333309c5dd8248 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 4 Oct 2022 10:55:12 +0200 Subject: [PATCH 54/81] Put temporary files in tmp/ subdirectories, delete old s2s4e script --- modules/Saving/export_2_nc-s2s4e.R | 583 ------------------ modules/Skill/Skill.R | 10 +- modules/Skill/{ => tmp}/AbsBiasSS.R | 0 modules/Skill/{ => tmp}/Bias.R | 0 modules/Skill/{ => tmp}/CRPS.R | 0 modules/Skill/{ => tmp}/CRPSS.R | 0 modules/Skill/{ => tmp}/RandomWalkTest.R | 0 modules/Visualization/Visualization.R | 6 +- .../Visualization/{ => tmp}/PlotCombinedMap.R | 0 modules/Visualization/{ => tmp}/PlotLayout.R | 0 .../{ => tmp}/PlotMostLikelyQuantileMap.R | 0 11 files changed, 8 insertions(+), 591 deletions(-) delete mode 100644 modules/Saving/export_2_nc-s2s4e.R rename modules/Skill/{ => tmp}/AbsBiasSS.R (100%) rename modules/Skill/{ => tmp}/Bias.R (100%) rename modules/Skill/{ => tmp}/CRPS.R (100%) rename modules/Skill/{ => tmp}/CRPSS.R (100%) rename modules/Skill/{ => tmp}/RandomWalkTest.R (100%) rename modules/Visualization/{ => tmp}/PlotCombinedMap.R (100%) rename modules/Visualization/{ => tmp}/PlotLayout.R (100%) rename modules/Visualization/{ => tmp}/PlotMostLikelyQuantileMap.R (100%) diff --git a/modules/Saving/export_2_nc-s2s4e.R b/modules/Saving/export_2_nc-s2s4e.R deleted file mode 100644 index abf526e5..00000000 --- a/modules/Saving/export_2_nc-s2s4e.R +++ /dev/null @@ -1,583 +0,0 @@ -library(easyNCDF) - -save_bias <- function(variable, - data, - fcst.sdate, - outfile, - leadtimes, - grid, - agg, - fcst.type) { - - lalo <- c('longitude', 'latitude') #decathlon subseasonal - - ## TODO: Sort out different aggregation cases - # if (tolower(agg) == "global") { - # data <- Reorder(data, c(lalo,'time')) - # } else { - # data <- Reorder(data, c('country', 'time')) - # } - - if (variable %in% c("tas", "tasmin", "tasmax")) { - obs <- data; units <- "ºC"; - var.longname <- "Temperature bias" - } else { - # Unit conversion - data.conv <- convert_data(list(fcst=data,test=data),variable,leadtimes,fcst.type,"forecast") - obs <- data.conv$data$fcst; units <- data.conv$units; - var.longname <- data.conv$var.longname - remove(data.conv) - } - - if (tolower(agg) == "country"){ - dims <- c('Country', 'time') - var.expname <- paste0(variable, '_country') - var.sdname <- paste0("Country-Aggregated ", var.longname) - } else { - dims <- c(lalo,'time') - var.expname <- get_outname(variable,VARS_DICT) - var.sdname <- var.longname - } - - metadata <- list(obs = list(name = var.expname, - standard_name = var.sdname, - units = units)) - attr(obs, 'variables') <- metadata - names(dim(obs)) <- dims - - times <- get_times(fcst.type, leadtimes, fcst.sdate) - time <- times$time - time_step <- times$time_step - - if (tolower(agg) == "country") { - - country <- get_countries(grid) - ArrayToNc(list(country, time, time_step, obs), outfile) - - } else { - - latlon <- get_latlon(grid$lat, grid$lon) - ArrayToNc(list(latlon$lat, latlon$lon, time, obs, time_step), outfile) - - } -} - -save_obs_country_file <- - function(variable, - obs, - fcst.sdate, - outfile, - fcst.type, - monthnames) { - - if (fcst.type == 'seasonal'){ - mask.path <- 'masks/mask_europe_system5_2.Rdata' - } else { - mask.path <- 'masks/mask_europe_S2S_ecmwf.Rdata' - } - - load(mask.path) - - ifelse(exists("lonlat_dctln"), - lalo <- c('longitude','latitude'), #decathlon subseasonal - lalo <- c('latitude','longitude')) #no decathlon - - obs <- Reorder(obs, c(lalo,'time')) -# obs <- Reorder(obs, c('latitude','longitude','time')) - - obs.country <-Apply(data=list(pointdata=obs), -# target_dims=c('latitude','longitude'), - target_dims=lalo, - output_dims=c('country'), - mask.path=mask.path, - ncores=2, - split_factor=1, - fun = Country_mean)[[1]] - - vars <- yaml.load_file(VARS_DICT)$vars - units <- vars[[variable]]$units - var.longname <- vars[[variable]]$longname - - metadata <- list(obs.country = - list( - name = paste0(variable, "_country"), - standard_name = paste0(var.longname, " (Country-Aggregated)"), - units = units - ) - ) - - attr(obs.country, 'variables') <- metadata - names(dim(obs.country)) <- c('Country', 'time') - - times <- get_times(fcst.type, monthnames, fcst.sdate) - time <- times$time - time_step <- times$time_step - - country <- 1:length(europe.countries.iso) - dim(country) <- length(country) - metadata <- list( country = list( - standard_name = paste(europe.countries.iso, collapse=" "), - units = 'Country ISO 3166-1 alpha 3 Code')) #if region, these units are incorrect. - attr(country, 'variables') <- metadata - names(dim(country)) <- 'Country' - - - ArrayToNc(list(country,time,time_step,obs.country), - outfile) - - } - -save_obs <- function(variable, - data, - fcst.sdate, - outfile, - leadtimes, - grid, - agg, - fcst.type) { - - lalo <- c('longitude','latitude') #decathlon subseasonal - - if (tolower(agg) == "global"){ - data <- Reorder(data, c(lalo,'time')) - } else { - data <- Reorder(data, c('country', 'time')) - } - - data.conv <- convert_data(list(fcst=data,test=data),variable,leadtimes,fcst.type,"forecast") - obs <- data.conv$data$fcst; units <- data.conv$units; - var.longname <- data.conv$var.longname - remove(data.conv) - - if (tolower(agg) == "country"){ - dims <- c('Country', 'time') - var.expname <- paste0(variable, '_country') - var.sdname <- paste0("Country-Aggregated ", var.longname) - } else { - dims <- c(lalo,'time') - var.expname <- get_outname(variable,VARS_DICT) - var.sdname <- var.longname - } - - metadata <- list(obs = list(name = var.expname, - standard_name = var.sdname, - units = units)) - attr(obs, 'variables') <- metadata - names(dim(obs)) <- dims - - times <- get_times(fcst.type, leadtimes, fcst.sdate) - time <- times$time - time_step <- times$time_step - - if (tolower(agg) == "country") { - - country <- get_countries(grid) - ArrayToNc(list(country, time, time_step, obs), outfile) - - } else { - - latlon <- get_latlon(grid$lat, grid$lon) - ArrayToNc(list(latlon$lat, latlon$lon, time, obs, time_step), outfile) - - } -} - - -save_forecast <- function(variable, - fcst, - fcst.sdate, - outfile, - leadtimes, - grid, - agg, - fcst.type) { - - lalo <- c('longitude','latitude') #decathlon subseasonal - - if (tolower(agg) == "global"){ - fcst <- Reorder(fcst, c(lalo,'member', - 'time')) - } else { - fcst <- Reorder(fcst, c('country','member', 'time')) - } - - # Unit conversion - fcst.conv <- convert_data(list(fcst=fcst,test=fcst),variable,leadtimes,fcst.type,"forecast") - fcst <- fcst.conv$data$fcst; units <- fcst.conv$units; - var.longname <- fcst.conv$var.longname - remove(fcst.conv) - - if (tolower(agg) == "country"){ - dims <- c('Country', 'ensemble', 'time') - var.expname <- paste0(variable, '_country') - var.sdname <- paste0("Country-Aggregated ", var.longname) - } else { - dims <- c(lalo,'ensemble', 'time') - var.expname <- get_outname(variable,VARS_DICT) - var.sdname <- var.longname - } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - units = units)) - attr(fcst, 'variables') <- metadata - names(dim(fcst)) <- dims - - times <- get_times(fcst.type, leadtimes, fcst.sdate) - time <- times$time - time_step <- times$time_step - - if (tolower(agg) == "country") { - - country <- get_countries(grid) - ArrayToNc(list(country, time, time_step, fcst), outfile) - - } else { - - latlon <- get_latlon(grid$lat, grid$lon) - ArrayToNc(list(latlon$lat, latlon$lon, time, fcst, time_step), outfile) - - } -} - -save_probs <- function(variable, - probs, - fcst.sdate, - outfile, - monthnames, - grid, - agg, - fcst.type) { - - lalo <- c('longitude','latitude') #decathlon subseasonal - - if (tolower(agg) == "global"){ - probs <- lapply(probs, function(x){ - Reorder(x, c('bin',lalo, 'time'))}) - } - - pbn <- Subset(probs$tercile, 'bin', list(1), drop='selected') - pn <- Subset(probs$tercile, 'bin', list(2), drop='selected') - pan <- Subset(probs$tercile, 'bin', list(3), drop='selected') - p10 <- Subset(probs$extreme, 'bin', list(1), drop='selected') - p90 <- Subset(probs$extreme, 'bin', list(3), drop='selected') - - pn.sdname <- paste('Probability below normal category ', sep=''); - pan.sdname <- paste('Probability above normal category ', sep=''); - pbn.sdname <- paste('Probability normal category ', sep=''); - p10.sdname <- paste('Probability below extreme category ', sep=''); - p90.sdname <- paste('Probability above extreme category ', sep=''); - - if (tolower(agg) == "country"){ - dims <- c('Country', 'time') - pn.sdanme <- paste0('Country-Aggregated ', pn.sdname) - pbn.sdanme <- paste0('Country-Aggregated ', pbn.sdname) - pan.sdanme <- paste0('Country-Aggregated ', pan.sdname) - p10.sdanme <- paste0('Country-Aggregated ', p10.sdname) - p90.sdanme <- paste0('Country-Aggregated ', p90.sdname) - } else { - dims <- c(lalo, 'time') - pn.sdanme <- paste0('Global ', pn.sdname) - pbn.sdanme <- paste0('Global ', pbn.sdname) - pan.sdanme <- paste0('Global ', pan.sdname) - p10.sdanme <- paste0('Global ', p10.sdname) - p90.sdanme <- paste0('Global ', p90.sdname) - } - - metadata <- list(pbn = list(name = 'prob_bn', - standard_name = pbn.sdname ), - pn = list(name = 'prob_n', - standard_name = pn.sdname), - pan = list(name = 'prob_an', - standard_name = pan.sdname), - p10 = list(name = 'prob_bp10', - standard_name = p10.sdname), - p90 = list(name = 'prob_ap90', - standard_name = p90.sdname)) - - attr(pbn, 'variables') <- metadata[1] - attr(pn, 'variables') <- metadata[2] - attr(pan, 'variables') <- metadata[3] - attr(p10, 'variables') <- metadata[4] - attr(p90, 'variables') <- metadata[5] - - names(dim(pbn)) <- dims - names(dim(pn)) <- dims - names(dim(pan)) <- dims - names(dim(p10)) <- dims - names(dim(p90)) <- dims - - times <- get_times(fcst.type, monthnames, fcst.sdate) - time <- times$time - time_step <- times$time_step - - if (tolower(agg) == "country") { - - country <- get_countries(grid) - ArrayToNc(list(country, time, pbn, pn, pan, p10, p90, time_step), outfile) - - } else { - - latlon <- get_latlon(grid$lat, grid$lon) - latitude <- latlon$lat; longitude <- latlon$lon - ArrayToNc(list(latitude, longitude, time, pbn, pn, pan, p10, p90, - time_step), outfile) - } - - } - -get_times <- function (fcst.type, leadtimes, sdate){ - - switch(tolower(fcst.type), - "seasonal" = {len <- length(leadtimes); ref <- 'months since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "sub_obs" = {len <- 52; ref <- 'week of the year '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- 4; ref <- 'weeks since '; - stdname <- ''} - ) - - time <- 1:len - dim(time) <- length(time) - #metadata <- list(time = list(standard_name = stdname, - metadata <- list(time = list( - units = paste0(ref, sdate, ' 00:00:00'))) - attr(time, 'variables') <- metadata - names(dim(time)) <- 'time' - - time_step <- 1 - dim(time_step) <- length(time_step) - metadata <- list(time_step = list(units = paste0( - ref, sdate, ' 00:00:00'))) - attr(time_step, 'variables') <- metadata - names(dim(time_step)) <- 'time_step' - - sdate <- 1:length(sdate) - dim(sdate) <- length(sdate) - metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), collapse=", "), - units = paste0('Init date'))) - attr(sdate, 'variables') <- metadata - names(dim(sdate)) <- 'sdate' - - return(list(time_step=time_step, time=time, sdate=sdate)) - -} - -get_countries <- function(europe.countries.iso){ - - country <- 1:length(europe.countries.iso) - dim(country) <- length(country) - metadata <- list( country = list( - standard_name = paste(europe.countries.iso, collapse=" "), - units = 'Country ISO 3166-1 alpha 3 Code')) - attr(country, 'variables') <- metadata - names(dim(country)) <- 'Country' - return(country) - -} - -get_latlon <- function(lat, lon){ - - longitude <- lon - dim(longitude) <- length(longitude) - metadata <- list(longitude = list(units = 'degrees_east')) - attr(longitude, 'variables') <- metadata - names(dim(longitude)) <- 'longitude' - - latitude <- lat - dim(latitude) <- length(latitude) - metadata <- list(latitude = list(units = 'degrees_north')) - attr(latitude, 'variables') <- metadata - names(dim(latitude)) <- 'latitude' - - return(list(lat=latitude,lon=longitude)) - -} - -save_metrics <- function(variable, - skill, - fcst.sdate, - grid, - outfile, - monthnames, - fcst.type, - agg) -{ - - lalo <- c('longitude', 'latitude') - - ## TODO: Sort out aggregation - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x){ - Reorder(x[[1]], c(lalo, 'time'))}) - } - - for (i in 1:length(skill)) { - - metric <- names(skill[i]) - if (tolower(agg) == "country"){ - sdname <- paste0(names(metric), " region-aggregated metric") - dims <- c('Country', 'time') - } else { - sdname <- paste0(names(metric), " grid point metric") - dims <- c(lalo, 'time') - } - metadata <- list(name = metric, standard_name = sdname) - - attr(skill[i], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } - - times <- get_times(fcst.type, monthnames, fcst.sdate) - time <- times$time - time_step <- times$time_step - - if (tolower(agg) == "country") { - - country <- get_countries(grid) - ArrayToNc(append(country, time, skill, time_step), outfile) - - } else { - - latlon <- get_latlon(grid$lat, grid$lon) - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill, list(time_step)) - ArrayToNc(vars, outfile) - } -} - -convert_seasosnal_prlr <- function(data2convert, leadtimes,filetype){ - - ind <- 1:length(leadtimes) - #dates <- paste0(leadtimes,"01") - # computes the last day of the month - lastday <- sapply(ind, function(x) - {as.integer(substr((seq(as.Date(leadtimes[x],"%Y%m%d"), - length=2,by="months")-1)[2], - 9, 10))}) - - if (filetype == "terciles"){ - ter <- sapply(ind, function(x) { - Subset(data2convert$tercile, along='time', - indices=x,drop='selected')*1000*3600*24*lastday[x]}, - simplify='array') - ext <- sapply(ind, function(x) { - Subset(data2convert$extreme, along='time', - indices=x,drop='selected')*1000*3600*24*lastday[x]}, - simplify='array') - - data2convert <- list(tercile=ter, extreme=ext) - } else { - - ens <- sapply(ind, function(x) { - Subset(data2convert$fcst, along='time', - indices=x,drop='selected')*1000*3600*24*lastday[x]}, - simplify='array') - - data2convert <- list(fcst=ens) - } - - return(data2convert) -} - -convert_data <- function(data,variable, leadtimes, fcst.type,filetype){ - - vars <- yaml.load_file(VARS_DICT)$vars - units <- vars[[variable]]$units - var.longname <- vars[[variable]]$longname - - if (variable %in% c("tas","tasmin","tasmax")){ - data <- lapply(data, function(x){ x - 273.15}) - } else if (variable %in% c("psl")){ - data <- lapply(data, function(x){ x/100}) - } else { - print("WARNING: NO DATA CONVERSION APPLIED") - } - - - return(list(data=data, units=units, var.longname=var.longname)) - -} - - -## TODO: implement lists as in save_metrics -save_terciles <- function(variable, - terciles, - fcst.sdate, - grid, - outfile, - leadtimes, - fcst.type, - agg) { - - lalo <- c('longitude','latitude') #decathlon subseasonal - - if (tolower(agg) == "global"){ - terciles <- lapply(terciles, function(x){ - Reorder(x, c('bin',lalo, 'time'))}) - } - - terciles.conv <- convert_data(terciles,variable,leadtimes,fcst.type,"terciles") - terciles <- terciles.conv$data; units <- terciles.conv$units; - var.longname <- terciles.conv$var.longname - remove(terciles.conv) - - p33 <- Subset(terciles$tercile, 'bin', list(1), drop='selected') - - p66 <- Subset(terciles$tercile, 'bin', list(2), drop='selected') - p10 <- Subset(terciles$extreme, 'bin', list(1), drop='selected') - p90 <- Subset(terciles$extreme, 'bin', list(2), drop='selected') - - p33.sdname <- paste('Lower Tercile ', sep=''); - p66.sdname <- paste('Upper Tercile ', sep=''); - p10.sdname <- paste('Lower extreme', sep=''); - p90.sdname <- paste('Upper extreme', sep=''); - - if (tolower(agg) == "country"){ - dims <- c('Country', 'time') - p33.sdanme <- paste0('Country-Aggregated ', p33.sdname) - p66.sdanme <- paste0('Country-Aggregated ', p66.sdname) - p10.sdanme <- paste0('Country-Aggregated ', p10.sdname) - p90.sdanme <- paste0('Country-Aggregated ', p90.sdname) - } else { - dims <- c(lalo, 'time') - p33.sdanme <- paste0('Global ', p33.sdname) - p66.sdanme <- paste0('Global ', p66.sdname) - p10.sdanme <- paste0('Gloabl ', p10.sdname) - p90.sdanme <- paste0('Global ', p90.sdname) - } - - metadata <- list(pbn = list(name = 'p33', - standard_name = p33.sdname ), - pn = list(name = 'p66', - standard_name = p66.sdname), - pan = list(name = 'p10', - standard_name = p10.sdname), - p10 = list(name = 'p90', - standard_name = p90.sdname)) - - attr(p33, 'variables') <- metadata[1] - attr(p66, 'variables') <- metadata[2] - attr(p10, 'variables') <- metadata[3] - attr(p90, 'variables') <- metadata[4] - - names(dim(p33)) <- dims - names(dim(p66)) <- dims - names(dim(p10)) <- dims - names(dim(p90)) <- dims - - times <- get_times(fcst.type, leadtimes, fcst.sdate) - time <- times$time - time_step <- times$time_step - - if (tolower(agg) == "country") { - - country <- get_countries(grid) - ArrayToNc(list(country, time, p33, p66, p10, p90, time_step), outfile) - - } else { - - latlon <- get_latlon(grid$lat, grid$lon) - ArrayToNc(list(latlon$lat, latlon$lon, time, p33, p66, p10, p90, time_step), outfile) - } -} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 368bb581..fb5498e6 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -11,11 +11,11 @@ source("modules/Skill/compute_quants.R") source("modules/Skill/compute_probs.R") source("modules/Skill/s2s.metrics.R") ## TODO: Remove once the new version of s2dv is released -source("modules/Skill/CRPS.R") -source("modules/Skill/CRPSS.R") -source("modules/Skill/RandomWalkTest.R") -source("modules/Skill/Bias.R") -source("modules/Skill/AbsBiasSS.R") +source("modules/Skill/tmp/CRPS.R") +source("modules/Skill/tmp/CRPSS.R") +source("modules/Skill/tmp/RandomWalkTest.R") +source("modules/Skill/tmp/Bias.R") +source("modules/Skill/tmp/AbsBiasSS.R") ## TODO: Implement this in the future ## Which parameter are required? diff --git a/modules/Skill/AbsBiasSS.R b/modules/Skill/tmp/AbsBiasSS.R similarity index 100% rename from modules/Skill/AbsBiasSS.R rename to modules/Skill/tmp/AbsBiasSS.R diff --git a/modules/Skill/Bias.R b/modules/Skill/tmp/Bias.R similarity index 100% rename from modules/Skill/Bias.R rename to modules/Skill/tmp/Bias.R diff --git a/modules/Skill/CRPS.R b/modules/Skill/tmp/CRPS.R similarity index 100% rename from modules/Skill/CRPS.R rename to modules/Skill/tmp/CRPS.R diff --git a/modules/Skill/CRPSS.R b/modules/Skill/tmp/CRPSS.R similarity index 100% rename from modules/Skill/CRPSS.R rename to modules/Skill/tmp/CRPSS.R diff --git a/modules/Skill/RandomWalkTest.R b/modules/Skill/tmp/RandomWalkTest.R similarity index 100% rename from modules/Skill/RandomWalkTest.R rename to modules/Skill/tmp/RandomWalkTest.R diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 34aacf67..b07d6f1f 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,7 +1,7 @@ ## TODO: Remove once released in s2dv/CSTools -source("modules/Visualization/PlotMostLikelyQuantileMap.R") -source("modules/Visualization/PlotCombinedMap.R") -source("modules/Visualization/PlotLayout.R") +source("modules/Visualization/tmp/PlotMostLikelyQuantileMap.R") +source("modules/Visualization/tmp/PlotCombinedMap.R") +source("modules/Visualization/tmp/PlotLayout.R") ## TODO: Add the possibility to read the data directly from netCDF ## TODO: Adapt to multi-model case diff --git a/modules/Visualization/PlotCombinedMap.R b/modules/Visualization/tmp/PlotCombinedMap.R similarity index 100% rename from modules/Visualization/PlotCombinedMap.R rename to modules/Visualization/tmp/PlotCombinedMap.R diff --git a/modules/Visualization/PlotLayout.R b/modules/Visualization/tmp/PlotLayout.R similarity index 100% rename from modules/Visualization/PlotLayout.R rename to modules/Visualization/tmp/PlotLayout.R diff --git a/modules/Visualization/PlotMostLikelyQuantileMap.R b/modules/Visualization/tmp/PlotMostLikelyQuantileMap.R similarity index 100% rename from modules/Visualization/PlotMostLikelyQuantileMap.R rename to modules/Visualization/tmp/PlotMostLikelyQuantileMap.R -- GitLab From 3322db7fc404c5c5ad43477852b2b88a19671701 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 4 Oct 2022 12:09:46 +0200 Subject: [PATCH 55/81] Add visualization test to seasonal unit tests --- tests/testthat/test-seasonal_monthly.R | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 0f9149b9..90938d62 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -4,6 +4,7 @@ source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") recipe_file <- "tests/recipes/recipe-seasonal_monthly_1.yml" recipe <- read_yaml(recipe_file) @@ -33,6 +34,12 @@ save_data(recipe = recipe, data = data, calibrated_data = calibrated_data, skill_metrics = skill_metrics, probabilities = probs, archive = archive) ))}) +# Plotting +suppressWarnings({invisible(capture.output( +plot_data(recipe = recipe, archive = archive, data = data, + calibrated_data = calibrated_data, skill_metrics = skill_metrics, + probabilities = probs, significance = T) +))}) outdir <- get_dir(recipe) # ------- TESTS -------- @@ -208,7 +215,7 @@ test_that("4. Saving", { expect_equal( list.files(outdir), -c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", "tas_19961101.nc", "tas_20201101.nc", +c("plots", "tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", "tas-skill_month11.nc") @@ -216,5 +223,14 @@ c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", "tas_19961101.nc", "t }) +test_that("5. Visualization", { +expect_equal( +list.files(paste0(outdir, "/plots/")), +c("crpss.png", "enscorr_specs.png", "enscorr.png", "forecast_ensemble_mean.png", + "forecast_most_likely_tercile.png", "rpss.png") +) + +}) + # Delete files -unlink(paste0(outdir, list.files(outdir))) +unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) -- GitLab From 555a5ae42e6f031c29a668570492c9b341e9f728 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 6 Oct 2022 10:41:36 +0200 Subject: [PATCH 56/81] Add viz test --- tests/testthat/test-decadal_monthly_1.R | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 4267eb63..7bb5031e 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -6,6 +6,7 @@ source("modules/Loading/Loading_decadal.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_1.yml" recipe <- read_yaml(recipe_file) @@ -35,6 +36,14 @@ save_data(recipe = recipe, data = data, calibrated_data = calibrated_data, skill_metrics = skill_metrics, probabilities = probs, archive = archive) ))}) +# Plotting +suppressWarnings({invisible(capture.output( +plot_data(recipe = recipe, archive = archive, data = data, + calibrated_data = calibrated_data, skill_metrics = skill_metrics, + probabilities = probs, significance = T) +))}) + + outdir <- get_dir(recipe) #====================================== @@ -251,7 +260,7 @@ test_that("4. Saving", { expect_equal( list.files(outdir), -c("tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", +c("plots", "tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-percentiles_month11.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc", "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-skill_month11.nc") @@ -261,7 +270,17 @@ c("tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "t #) +}) + + +test_that("5. Visualization", { +expect_equal( +list.files(paste0(outdir, "/plots/")), +c("forecast_ensemble_mean.png", "forecast_most_likely_tercile.png", + "rpss.png") +) + }) # Delete files -unlink(paste0(outdir, list.files(outdir))) +unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) -- GitLab From af5949b876853e4041abdbfebc6a04ae6811c062 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 10 Oct 2022 10:12:02 +0200 Subject: [PATCH 57/81] Rearrange na.rm conditiosn --- modules/Skill/compute_probs.R | 26 +++++++++++++++----------- modules/Skill/compute_quants.R | 17 ++++++++++------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/modules/Skill/compute_probs.R b/modules/Skill/compute_probs.R index a662df14..1c17b358 100644 --- a/modules/Skill/compute_probs.R +++ b/modules/Skill/compute_probs.R @@ -4,28 +4,32 @@ compute_probs <- function(data, quantiles, probs_dims=list('ensemble', 'bin'), split_factor=1, na.rm=FALSE) { - if (na.rm == FALSE) { - c2p <- function(x, t) { - # If the array contains any NA values, return NA - if (any(is.na(x))) { - rep(NA, dim(t)[['bin']] + 1) + # Define na.rm behavior + if (na.rm) { + .c2p <- function(x, t) { + if (any(!is.na(x))) { + # If the array contains any non-NA values, call convert2prob + colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) } else { - colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) + # If the array only contains NAs, return NA vector + rep(NA, dim(t)[['bin']] + 1) # vector with as many NAs as prob bins. } } } else { - c2p <- function(x, t) { - if (any(!is.na(x))) { # If the array contains some non-NA values + .c2p <- function(x, t) { + if (any(is.na(x))) { + # If the array contains any NA values, return NA vector + rep(NA, dim(t)[['bin']] + 1) + } else { + # If there are no NAs, call convert2prob colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) - } else { # If the array contains NAs only - rep(NA, dim(t)[['bin']] + 1) # vector with as many NAs as prob bins. } } } probs <- Apply(data = list(x = data, t = quantiles), target_dims = probs_dims, - c2p, + .c2p, output_dims = "bin", split_factor = split_factor, ncores = ncores)[[1]] diff --git a/modules/Skill/compute_quants.R b/modules/Skill/compute_quants.R index 60ad981f..8c89e87e 100644 --- a/modules/Skill/compute_quants.R +++ b/modules/Skill/compute_quants.R @@ -5,23 +5,26 @@ compute_quants <- function(data, thresholds, probs_dims=list('ensemble', 'bin'), split_factor=1, na.rm=FALSE) { - if (na.rm == FALSE) { - get_quantiles <- function(x, t) { + # Define na.rm behavior + if (na.rm) { + .get_quantiles <- function(x, t) { + quantile(as.vector(x), t, na.rm = TRUE) + } + } else { + .get_quantiles <- function(x, t) { if (any(is.na(x))) { + # If the array contains any NA values, return NA vector rep(NA, length(t)) } else { + # If there are no NAs, call quantile() quantile(as.vector(x), t, na.rm = FALSE) } } - } else { - get_quantiles <- function(x, t) { - quantile(as.vector(x), t, na.rm = TRUE) - } } quantiles <- Apply(data, target_dims = quantile_dims, - function(x, t) {get_quantiles(as.vector(x), thresholds)}, + function(x, t) {.get_quantiles(as.vector(x), thresholds)}, output_dims = "bin", ncores = ncores, split_factor = split_factor)[[1]] -- GitLab From 0a12514bac034051bf2a17dc29a540fdf7dd6cde Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 10 Oct 2022 15:00:21 +0200 Subject: [PATCH 58/81] Add check for skill_metrics in plot_data() --- modules/Visualization/Visualization.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index b07d6f1f..5605f9d4 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -35,8 +35,12 @@ plot_data <- function(recipe, # Plot skill metrics if (!is.null(skill_metrics)) { - plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, - significance) + if (is.list(skill_metrics)) { + plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, + significance) + } else { + stop("The element 'skill_metrics' must be a list of named arrays.") + } } # Plot forecast ensemble mean -- GitLab From ecbf1c2a3eda270df5952469b5dac4e192f89eb2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 10 Oct 2022 15:42:00 +0200 Subject: [PATCH 59/81] Put check inside plot_skill_metrics() --- modules/Visualization/Visualization.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 5605f9d4..61efe4a2 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,4 +1,4 @@ -## TODO: Remove once released in s2dv/CSTools +#G# TODO: Remove once released in s2dv/CSTools source("modules/Visualization/tmp/PlotMostLikelyQuantileMap.R") source("modules/Visualization/tmp/PlotCombinedMap.R") source("modules/Visualization/tmp/PlotLayout.R") @@ -35,12 +35,8 @@ plot_data <- function(recipe, # Plot skill metrics if (!is.null(skill_metrics)) { - if (is.list(skill_metrics)) { - plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, - significance) - } else { - stop("The element 'skill_metrics' must be a list of named arrays.") - } + plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, + significance) } # Plot forecast ensemble mean @@ -71,6 +67,10 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, if (recipe$Analysis$Variables$freq == "daily_mean") { stop("Visualization functions not yet implemented for daily data.") } + # Abort if skill_metrics is not list + if (!is.list(skill_metrics)) { + stop("The element 'skill_metrics' must be a list of named arrays.") + } latitude <- data_cube$lat longitude <- data_cube$lon -- GitLab From bbf0e7953ad43ff1fcacaa200f1545a020156741 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 11 Oct 2022 14:55:02 +0200 Subject: [PATCH 60/81] Bugfix: Plot correct significance field --- modules/Visualization/Visualization.R | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 61efe4a2..c1bf9121 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -145,6 +145,12 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, skill_significance <- Reorder(skill_significance, c("time", "longitude", "latitude")) + # Split skill significance into list of lists, along the time dimension + # This allows for plotting the significance dots correctly. + skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + dim = 'time', + level = "sublist", + names = "dots") } else { skill_significance <- NULL } @@ -156,9 +162,11 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, label = T, abb = F)) titles <- as.vector(months) # Plot + suppressWarnings( PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - skill, longitude, latitude, - dots = skill_significance, + asplit(skill, MARGIN=1), # Splitting array into a list + longitude, latitude, + special_args = skill_significance, dot_symbol = 20, toptitle = toptitle, title_scale = 0.6, @@ -168,6 +176,7 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, cols = col2, fileout = outfile, bar_label_digits = 3) + ) } } -- GitLab From b0bf948dfb9452724eb77322e4dd8d727777de9d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 13 Oct 2022 08:45:08 +0200 Subject: [PATCH 61/81] Rearrange function arguments for consistency, make archive optional --- modules/Calibration/Calibration.R | 2 +- modules/Saving/Saving.R | 19 +++++++++++++------ modules/Skill/Skill.R | 4 ++-- modules/Visualization/Visualization.R | 12 +++++++++++- tests/testthat/test-decadal_monthly_1.R | 6 +++--- tests/testthat/test-decadal_monthly_2.R | 6 +++--- tests/testthat/test-decadal_monthly_3.R | 6 +++--- tests/testthat/test-seasonal_daily.R | 4 ++-- tests/testthat/test-seasonal_monthly.R | 14 +++++++------- 9 files changed, 45 insertions(+), 28 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 85b1b007..d49dd9d9 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -3,7 +3,7 @@ source("tools/tmp/CST_Calibration.R") ## Entry params data and recipe? -calibrate_datasets <- function(data, recipe) { +calibrate_datasets <- function(recipe, data) { # Function that calibrates the hindcast using the method stated in the # recipe. If the forecast is not null, it calibrates it as well. # diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 713741fb..cf2ec1d0 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -2,10 +2,11 @@ source("modules/Saving/paths2save.R") -save_data <- function(recipe, archive, data, +save_data <- function(recipe, data, calibrated_data = NULL, skill_metrics = NULL, - probabilities = NULL) { + probabilities = NULL, + archive = NULL) { # Wrapper for the saving functions. # recipe: The auto-s2s recipe @@ -19,14 +20,20 @@ save_data <- function(recipe, archive, data, if (is.null(recipe)) { stop("The 'recipe' parameter is mandatory.") } - if (is.null(archive)) { - stop("The 'archive' parameter is mandatory.") - } + if (is.null(data)) { stop("The 'data' parameter is mandatory. It should be the output of", "load_datasets().") } - + if (is.null(archive)) { + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + archive <- read_yaml(paste0(recipe$Run$code_dir, + "conf/archive.yml"))$archive + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + archive <- read_yaml(paste0(recipe$Run$code_dir, + "conf/archive_decadal.yml"))$archive + } + } dict <- read_yaml("conf/variable-dictionary.yml") # Create output directory diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index fb5498e6..726cd605 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -53,7 +53,7 @@ source("modules/Skill/tmp/AbsBiasSS.R") # " running Skill module ", "\n", # " it can call ", metric_fun )) -compute_skill_metrics <- function(exp, obs, recipe) { +compute_skill_metrics <- function(recipe, exp, obs) { # exp: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml @@ -207,7 +207,7 @@ compute_skill_metrics <- function(exp, obs, recipe) { return(skill_metrics) } -compute_probabilities <- function(data, recipe) { +compute_probabilities <- function(recipe, data) { if (is.null(recipe$Analysis$ncores)) { ncores <- 1 diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index c1bf9121..4b63de8c 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -8,11 +8,11 @@ source("modules/Visualization/tmp/PlotLayout.R") ## TODO: Add param 'raw'? plot_data <- function(recipe, - archive, data, calibrated_data = NULL, skill_metrics = NULL, probabilities = NULL, + archive = NULL, significance = F) { # Try to produce and save several basic plots. @@ -33,6 +33,16 @@ plot_data <- function(recipe, "that can be plotted.") } + if (is.null(archive)) { + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + archive <- read_yaml(paste0(recipe$Run$code_dir, + "conf/archive.yml"))$archive + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + archive <- read_yaml(paste0(recipe$Run$code_dir, + "conf/archive_decadal.yml"))$archive + } + } + # Plot skill metrics if (!is.null(skill_metrics)) { plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 7bb5031e..39c8d900 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -19,15 +19,15 @@ data <- load_datasets(recipe_file) # Calibrate datasets suppressWarnings({invisible(capture.output( - calibrated_data <- calibrate_datasets(data, recipe) + calibrated_data <- calibrate_datasets(recipe, data) ))}) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(calibrated_data$hcst, recipe) +probs <- compute_probabilities(recipe, calibrated_data$hcst) ))}) # Saving diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index ac4f2fff..98fa66cb 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -18,15 +18,15 @@ data <- load_datasets(recipe_file) # Calibrate datasets suppressWarnings({invisible(capture.output( - calibrated_data <- calibrate_datasets(data, recipe) + calibrated_data <- calibrate_datasets(recipe, data) ))}) # Compute skill metrics suppressMessages({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(calibrated_data$hcst, recipe) +probs <- compute_probabilities(recipe, calibrated_data$hcst) ))}) #====================================== diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 21665f6e..22f47d4c 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -18,15 +18,15 @@ data <- load_datasets(recipe_file) # Calibrate datasets suppressWarnings({invisible(capture.output( - calibrated_data <- calibrate_datasets(data, recipe) + calibrated_data <- calibrate_datasets(recipe, data) ))}) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(calibrated_data$hcst, recipe) +probs <- compute_probabilities(recipe, calibrated_data$hcst) ))}) #====================================== diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index c37b5514..11c01f19 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -15,12 +15,12 @@ data <- load_datasets(recipe_file) recipe <- read_yaml(recipe_file) suppressWarnings({invisible(capture.output( -calibrated_data <- calibrate_datasets(data, recipe) +calibrated_data <- calibrate_datasets(recipe, data) ))}) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) ))}) test_that("1. Loading", { diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 90938d62..9423cde9 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -16,29 +16,29 @@ data <- load_datasets(recipe_file) ))}) suppressWarnings({invisible(capture.output( -calibrated_data <- calibrate_datasets(data, recipe) +calibrated_data <- calibrate_datasets(recipe, data) ))}) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(calibrated_data$hcst, recipe) +probs <- compute_probabilities(recipe, calibrated_data$hcst) ))}) # Saving suppressWarnings({invisible(capture.output( save_data(recipe = recipe, data = data, calibrated_data = calibrated_data, - skill_metrics = skill_metrics, probabilities = probs, archive = archive) + skill_metrics = skill_metrics, probabilities = probs) ))}) # Plotting suppressWarnings({invisible(capture.output( -plot_data(recipe = recipe, archive = archive, data = data, - calibrated_data = calibrated_data, skill_metrics = skill_metrics, - probabilities = probs, significance = T) +plot_data(recipe = recipe, data = data, calibrated_data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + significance = T) ))}) outdir <- get_dir(recipe) -- GitLab From 8de3ad947480896a0c08cbd36f4dce946c321aa8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 18 Oct 2022 11:14:35 +0200 Subject: [PATCH 62/81] Refine longitude and latitude mismatch error messages --- modules/Loading/Loading.R | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index a93be8ca..39208d01 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -298,10 +298,31 @@ load_datasets <- function(recipe_file) { # Check for consistency between hcst and obs grid if (!(recipe$Analysis$Regrid$type == 'none')) { if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { - stop("hcst and obs don't share the same latitude.") + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") } if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) { - stop("hcst and obs don't share the same longitude.") + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } } -- GitLab From a033072b6887c79b8451ac3f64bd4921b93ec24a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 18 Oct 2022 13:06:10 +0200 Subject: [PATCH 63/81] Remove local copies of s2dv functions after release of s2dv v1.3.0 --- modules/Skill/Skill.R | 6 - modules/Skill/tmp/AbsBiasSS.R | 280 ---------- modules/Skill/tmp/Bias.R | 189 ------- modules/Skill/tmp/CRPS.R | 119 ---- modules/Skill/tmp/CRPSS.R | 172 ------ modules/Skill/tmp/RandomWalkTest.R | 82 --- modules/Visualization/Visualization.R | 1 - modules/Visualization/tmp/PlotLayout.R | 732 ------------------------- 8 files changed, 1581 deletions(-) delete mode 100644 modules/Skill/tmp/AbsBiasSS.R delete mode 100644 modules/Skill/tmp/Bias.R delete mode 100644 modules/Skill/tmp/CRPS.R delete mode 100644 modules/Skill/tmp/CRPSS.R delete mode 100644 modules/Skill/tmp/RandomWalkTest.R delete mode 100644 modules/Visualization/tmp/PlotLayout.R diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index fb5498e6..9a1df4f8 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -10,12 +10,6 @@ source("modules/Skill/compute_quants.R") source("modules/Skill/compute_probs.R") source("modules/Skill/s2s.metrics.R") -## TODO: Remove once the new version of s2dv is released -source("modules/Skill/tmp/CRPS.R") -source("modules/Skill/tmp/CRPSS.R") -source("modules/Skill/tmp/RandomWalkTest.R") -source("modules/Skill/tmp/Bias.R") -source("modules/Skill/tmp/AbsBiasSS.R") ## TODO: Implement this in the future ## Which parameter are required? diff --git a/modules/Skill/tmp/AbsBiasSS.R b/modules/Skill/tmp/AbsBiasSS.R deleted file mode 100644 index 0838f251..00000000 --- a/modules/Skill/tmp/AbsBiasSS.R +++ /dev/null @@ -1,280 +0,0 @@ -#'Compute the Absolute Mean Bias Skill Score -#' -#'The Absolute Mean Bias Skill Score is based on the Absolute Mean Error (Wilks, -#' 2011) between the ensemble mean forecast and the observations. It measures -#'the accuracy of the forecast in comparison with a reference forecast to assess -#'whether the forecast presents an improvement or a worsening with respect to -#'that reference. The Mean Bias Skill Score ranges between minus infinite and 1. -#'Positive values indicate that the forecast has higher skill than the reference -#'forecast, while negative values indicate that it has a lower skill. Examples -#'of reference forecasts are the climatological forecast (average of the -#'observations), a previous model version, or another model. It is computed as -#'\code{AbsBiasSS = 1 - AbsBias_exp / AbsBias_ref}. The statistical significance -#'is obtained based on a Random Walk test at the 95% confidence level (DelSole -#'and Tippett, 2016). If there is more than one dataset, the result will be -#'computed for each pair of exp and obs data. -#' -#'@param exp A named numerical array of the forecast with at least time -#' dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -#' 'dat_dim'. -#'@param ref A named numerical array of the reference forecast data with at -#' least time dimension. The dimensions must be the same as 'exp' except -#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should -#' not have dataset dimension. If there is corresponding reference for each -#' experiement, the dataset dimension must have the same length as in 'exp'. If -#' 'ref' is NULL, the climatological forecast is used as reference forecast. -#' The default value is NULL. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' -#' and 'ref' are already the ensemble mean. The default value is NULL. -#'@param dat_dim A character string indicating the name of dataset dimension. -#' The length of this dimension can be different between 'exp' and 'obs'. -#' The default value is NULL. -#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or -#' kept (FALSE) for computation. The default value is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'\item{$biasSS}{ -#' A numerical array of BiasSS with dimensions nexp, nobs and the rest -#' dimensions of 'exp' except 'time_dim' and 'memb_dim'. -#'} -#'\item{$sign}{ -#' A logical array of the statistical significance of the BiasSS -#' with the same dimensions as $biasSS. nexp is the number of -#' experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation -#' (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. -#'} -#' -#'@references -#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 -#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 -#' -#'@examples -#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) -#'ref <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) -#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) -#'biasSS1 <- AbsBiasSS(exp = exp, obs = obs, ref = ref, memb_dim = 'member') -#'biasSS2 <- AbsBiasSS(exp = exp, obs = obs, ref = NULL, memb_dim = 'member') -#' -#'@import multiApply -#'@export -AbsBiasSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = NULL, - dat_dim = NULL, na.rm = FALSE, ncores = NULL) { - - # Check inputs - ## exp, obs, and ref (1) - if (!is.array(exp) | !is.numeric(exp)) { - stop("Parameter 'exp' must be a numeric array.") - } - if (!is.array(obs) | !is.numeric(obs)) { - stop("Parameter 'obs' must be a numeric array.") - } - if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names.") - } - if (!is.null(ref)) { - if (!is.array(ref) | !is.numeric(ref)) - stop("Parameter 'ref' must be a numeric array.") - if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { - stop("Parameter 'ref' must have dimension names.") - } - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - if (!is.null(ref) & !time_dim %in% names(dim(ref))) { - stop("Parameter 'time_dim' is not found in 'ref' dimension.") - } - ## memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else { - stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", - "but it should be of length = 1).") - } - } - } - ## dat_dim - if (!is.null(dat_dim)) { - if (!is.character(dat_dim) | length(dat_dim) > 1) { - stop("Parameter 'dat_dim' must be a character string.") - } - if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { - stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", - " Set it as NULL if there is no dataset dimension.") - } - } - ## exp, obs, and ref (2) - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - if (!is.null(memb_dim)) { - name_exp <- name_exp[-which(name_exp == memb_dim)] - } - if (!is.null(dat_dim)) { - name_exp <- name_exp[-which(name_exp == dat_dim)] - name_obs <- name_obs[-which(name_obs == dat_dim)] - } - if (!identical(length(name_exp), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'.")) - } - if (!is.null(ref)) { - name_ref <- sort(names(dim(ref))) - if (!is.null(memb_dim) && memb_dim %in% name_ref) { - name_ref <- name_ref[-which(name_ref == memb_dim)] - } - if (!is.null(dat_dim)) { - if (dat_dim %in% name_ref) { - if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has dataset dimension, it must be", - " equal to dataset dimension of 'exp'.")) - } - name_ref <- name_ref[-which(name_ref == dat_dim)] - } - } - if (!identical(length(name_exp), length(name_ref)) | - !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", - "all dimensions except 'memb_dim' and 'dat_dim' if there is ", - "only one reference dataset.")) - } - } - ## na.rm - if (!is.logical(na.rm) | length(na.rm) > 1) { - stop("Parameter 'na.rm' must be one logical value.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################ - - ## Ensemble mean - if (!is.null(memb_dim)) { - exp <- MeanDims(exp, memb_dim, na.rm = na.rm) - if (!is.null(ref) & memb_dim %in% names(dim(ref))) { - ref <- MeanDims(ref, memb_dim, na.rm = na.rm) - } - } - - ## Mean bias skill score - if (!is.null(ref)) { # use "ref" as reference forecast - if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { - target_dims_ref <- c(time_dim, dat_dim) - } else { - target_dims_ref <- c(time_dim) - } - data <- list(exp = exp, obs = obs, ref = ref) - target_dims = list(exp = c(time_dim, dat_dim), - obs = c(time_dim, dat_dim), - ref = target_dims_ref) - } else { - data <- list(exp = exp, obs = obs) - target_dims = list(exp = c(time_dim, dat_dim), - obs = c(time_dim, dat_dim)) - } - - output <- Apply(data, - target_dims = target_dims, - fun = .AbsBiasSS, - dat_dim = dat_dim, - na.rm = na.rm, - ncores = ncores) - - return(output) -} - -.AbsBiasSS <- function(exp, obs, ref = NULL, dat_dim = NULL, na.rm = FALSE) { - # exp and obs: [sdate, (dat_dim)] - # ref: [sdate, (dat_dim)] or NULL - - # Adjust exp, obs, ref to have dat_dim temporarily - if (is.null(dat_dim)) { - nexp <- 1 - nobs <- 1 - exp <- InsertDim(exp, posdim = 2, lendim = 1, name = 'dataset') - obs <- InsertDim(obs, posdim = 2, lendim = 1, name = 'dataset') - if (!is.null(ref)) { - ref <- InsertDim(ref, posdim = 2, lendim = 1, name = 'dataset') - } - ref_dat_dim <- FALSE - } else { - nexp <- as.numeric(dim(exp)[dat_dim]) - nobs <- as.numeric(dim(obs)[dat_dim]) - if (length(dim(ref)) == 1) { # ref: [sdate] - ref_dat_dim <- FALSE - } else { - ref_dat_dim <- TRUE - } - } - - biasSS <- array(dim = c(nexp = nexp, nobs = nobs)) - sign <- array(dim = c(nexp = nexp, nobs = nobs)) - - for (i in 1:nexp) { - exp_data <- exp[, i] - if (isTRUE(ref_dat_dim)) { - ref_data <- ref[, i] - } else { - ref_data <- ref - } - for (j in 1:nobs) { - obs_data <- obs[, j] - - if (isTRUE(na.rm)) { - if (is.null(ref)) { - good_values <- !is.na(exp_data) & !is.na(obs_data) - exp_data <- exp_data[good_values] - obs_data <- obs_data[good_values] - } else { - good_values <- !is.na(exp_data) & !is.na(ref_data) & !is.na(obs_data) - exp_data <- exp_data[good_values] - ref_data <- ref_data[good_values] - obs_data <- obs_data[good_values] - } - } - ## Bias of the exp - bias_exp <- .Bias(exp = exp_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = TRUE) - ## Bias of the ref - if (is.null(ref)) { ## Climatological forecast - ref_data <- mean(obs_data, na.rm = na.rm) - } - bias_ref <- .Bias(exp = ref_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = TRUE) - ## Skill score and significance - biasSS[i, j] <- 1 - bias_exp / bias_ref - sign[i, j] <- .RandomWalkTest(skill_A = bias_exp, skill_B = bias_ref)$signif - } - } - - if (is.null(dat_dim)) { - dim(biasSS) <- NULL - dim(sign) <- NULL - } - - - return(list(biasSS = biasSS, sign = sign)) -} diff --git a/modules/Skill/tmp/Bias.R b/modules/Skill/tmp/Bias.R deleted file mode 100644 index 0319a0f0..00000000 --- a/modules/Skill/tmp/Bias.R +++ /dev/null @@ -1,189 +0,0 @@ -#'Compute the Mean Bias -#' -#'The Mean Bias or Mean Error (Wilks, 2011) is defined as the mean difference -#'between the ensemble mean forecast and the observations. It is a deterministic -#'metric. Positive values indicate that the forecasts are on average too high -#'and negative values indicate that the forecasts are on average too low. -#'It also allows to compute the Absolute Mean Bias or bias without temporal -#'mean. If there is more than one dataset, the result will be computed for each -#'pair of exp and obs data. -#' -#'@param exp A named numerical array of the forecast with at least time -#' dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -#' 'dat_dim'. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param dat_dim A character string indicating the name of dataset dimension. -#' The length of this dimension can be different between 'exp' and 'obs'. -#' The default value is NULL. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the ensemble mean; it should be set to NULL if the parameter -#' 'exp' is already the ensemble mean. The default value is NULL. -#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or -#' kept (FALSE) for computation. The default value is FALSE. -#'@param absolute A logical value indicating whether to compute the absolute -#' bias. The default value is FALSE. -#'@param time_mean A logical value indicating whether to compute the temporal -#' mean of the bias. The default value is TRUE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of -#''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number -#'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation -#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. -#' -#'@references -#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 -#' -#'@examples -#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) -#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) -#'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') -#' -#'@import multiApply -#'@importFrom ClimProjDiags Subset -#'@export -Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, - absolute = FALSE, time_mean = TRUE, ncores = NULL) { - - # Check inputs - ## exp and obs (1) - if (!is.array(exp) | !is.numeric(exp)) - stop("Parameter 'exp' must be a numeric array.") - if (!is.array(obs) | !is.numeric(obs)) - stop("Parameter 'obs' must be a numeric array.") - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) - stop("Parameter 'time_dim' must be a character string.") - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - ## memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else { - stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", - "but it should be of length = 1).") - } - } - } - ## dat_dim - if (!is.null(dat_dim)) { - if (!is.character(dat_dim) | length(dat_dim) > 1) { - stop("Parameter 'dat_dim' must be a character string.") - } - if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { - stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", - " Set it as NULL if there is no dataset dimension.") - } - } - ## exp and obs (2) - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - if (!is.null(memb_dim)) { - name_exp <- name_exp[-which(name_exp == memb_dim)] - } - if (!is.null(dat_dim)) { - name_exp <- name_exp[-which(name_exp == dat_dim)] - name_obs <- name_obs[-which(name_obs == dat_dim)] - } - if (!identical(length(name_exp), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'.")) - } - ## na.rm - if (!is.logical(na.rm) | length(na.rm) > 1) { - stop("Parameter 'na.rm' must be one logical value.") - } - ## absolute - if (!is.logical(absolute) | length(absolute) > 1) { - stop("Parameter 'absolute' must be one logical value.") - } - ## time_mean - if (!is.logical(time_mean) | length(time_mean) > 1) { - stop("Parameter 'time_mean' must be one logical value.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################### - - ## Ensemble mean - if (!is.null(memb_dim)) { - exp <- MeanDims(exp, memb_dim, na.rm = na.rm) - } - - ## (Mean) Bias - bias <- Apply(data = list(exp, obs), - target_dims = c(time_dim, dat_dim), - fun = .Bias, - time_dim = time_dim, - dat_dim = dat_dim, - na.rm = na.rm, - absolute = absolute, - time_mean = time_mean, - ncores = ncores)$output1 - - return(bias) -} - - -.Bias <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, na.rm = FALSE, - absolute = FALSE, time_mean = TRUE) { - # exp and obs: [sdate, (dat)] - - if (is.null(dat_dim)) { - bias <- exp - obs - - if (isTRUE(absolute)) { - bias <- abs(bias) - } - - if (isTRUE(time_mean)) { - bias <- mean(bias, na.rm = na.rm) - } - - } else { - nexp <- as.numeric(dim(exp)[dat_dim]) - nobs <- as.numeric(dim(obs)[dat_dim]) - bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) - - for (i in 1:nexp) { - for (j in 1:nobs) { - bias[, i, j] <- exp[, i] - obs[, j] - } - } - - if (isTRUE(absolute)) { - bias <- abs(bias) - } - - if (isTRUE(time_mean)) { - bias <- MeanDims(bias, time_dim, na.rm = na.rm) - } - } - - return(bias) -} diff --git a/modules/Skill/tmp/CRPS.R b/modules/Skill/tmp/CRPS.R deleted file mode 100644 index 942ec9e4..00000000 --- a/modules/Skill/tmp/CRPS.R +++ /dev/null @@ -1,119 +0,0 @@ -#'Compute the Continuous Ranked Probability Score -#' -#'The Continuous Ranked Probability Score (CRPS; Wilks, 2011) is the continuous -#'version of the Ranked Probability Score (RPS; Wilks, 2011). It is a skill metric -#'to evaluate the full distribution of probabilistic forecasts. It has a negative -#'orientation (i.e., the higher-quality forecast the smaller CRPS) and it rewards -#'the forecast that has probability concentration around the observed value. In case -#'of a deterministic forecast, the CRPS is reduced to the mean absolute error. It has -#'the same units as the data. The function is based on enscrps_cpp from SpecsVerification. -#' -#'@param exp A named numerical array of the forecast with at least time -#' dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the probabilities of the forecast. The default value is 'member'. -#'@param Fair A logical indicating whether to compute the FairCRPS (the -#' potential RPSS that the forecast would have with an infinite ensemble size). -#' The default value is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A numerical array of CRPS with the same dimensions as "exp" except the -#''time_dim' and 'memb_dim' dimensions. -#' -#'@references -#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 -#' -#'@examples -#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -#'res <- CRPS(exp = exp, obs = obs) -#' -#'@import multiApply -#'@importFrom SpecsVerification enscrps_cpp -#'@importFrom ClimProjDiags Subset -#'@export -CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', - Fair = FALSE, ncores = NULL) { - - # Check inputs - ## exp and obs (1) - if (!is.array(exp) | !is.numeric(exp)) - stop('Parameter "exp" must be a numeric array.') - if (!is.array(obs) | !is.numeric(obs)) - stop('Parameter "obs" must be a numeric array.') - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) - stop('Parameter "time_dim" must be a character string.') - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - ## exp and obs (2) - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]),1)){ - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else {stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1).")} - } - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (!identical(length(name_exp), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim'.")) - } - ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { - stop("Parameter 'Fair' must be either TRUE or FALSE.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################### - - crps <- Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim), - output_dims = time_dim, - fun = .CRPS, Fair = Fair, - ncores = ncores)$output1 - - # Return only the mean RPS - crps <- MeanDims(crps, time_dim, na.rm = FALSE) - - return(crps) -} - -.CRPS <- function(exp, obs, Fair = FALSE) { - # exp: [sdate, memb] - # obs: [sdate] - - if (Fair) { # FairCRPS - R_new <- Inf - } else {R_new <- NA} - - crps <- SpecsVerification::enscrps_cpp(ens = exp, obs = obs, R_new = R_new) - - return(crps) -} diff --git a/modules/Skill/tmp/CRPSS.R b/modules/Skill/tmp/CRPSS.R deleted file mode 100644 index 9f5edbd5..00000000 --- a/modules/Skill/tmp/CRPSS.R +++ /dev/null @@ -1,172 +0,0 @@ -#'Compute the Continuous Ranked Probability Skill Score -#' -#'The Continuous Ranked Probability Skill Score (CRPSS; Wilks, 2011) is the skill score -#'based on the Continuous Ranked Probability Score (CRPS; Wilks, 2011). It can be used to -#'assess whether a forecast presents an improvement or worsening with respect to -#'a reference forecast. The CRPSS ranges between minus infinite and 1. If the -#'CRPSS is positive, it indicates that the forecast has higher skill than the -#'reference forecast, while a negative value means that it has a lower skill. -#'Examples of reference forecasts are the climatological forecast (same -#'probabilities for all categories for all time steps), persistence, a previous -#'model version, and another model. It is computed as CRPSS = 1 - CRPS_exp / CRPS_ref. -#'The statistical significance is obtained based on a Random Walk test at the -#'95% confidence level (DelSole and Tippett, 2016). -#' -#'@param exp A named numerical array of the forecast with at least time -#' dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. -#'@param ref A named numerical array of the reference forecast data with at -#' least time dimension. The dimensions must be the same as 'exp' except -#' 'memb_dim'. If it is NULL, the climatological forecast is used as reference -#' forecast. The default value is NULL. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the probabilities of the forecast and the reference forecast. The -#' default value is 'member'. -#'@param Fair A logical indicating whether to compute the FairCRPSS (the -#' potential CRPSS that the forecast would have with an infinite ensemble size). -#' The default value is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'\item{$crpss}{ -#' A numerical array of the CRPSS with the same dimensions as "exp" except the -#' 'time_dim' and 'memb_dim' dimensions. -#'} -#'\item{$sign}{ -#' A logical array of the statistical significance of the CRPSS with the same -#' dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. -#'} -#' -#'@references -#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 -#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 -#' -#'@examples -#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -#'ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#'res <- CRPSS(exp = exp, obs = obs) ## climatology as reference forecast -#'res <- CRPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast -#' -#'@import multiApply -#'@importFrom ClimProjDiags Subset -#'@export -CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', - Fair = FALSE, ncores = NULL) { - - # Check inputs - ## exp, obs, and ref (1) - if (!is.array(exp) | !is.numeric(exp)) - stop('Parameter "exp" must be a numeric array.') - if (!is.array(obs) | !is.numeric(obs)) - stop('Parameter "obs" must be a numeric array.') - if (!is.null(ref)) { - if (!is.array(ref) | !is.numeric(ref)) - stop('Parameter "ref" must be a numeric array.') - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) - stop('Parameter "time_dim" must be a character string.') - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - if (!is.null(ref) & !time_dim %in% names(dim(ref))) { - stop("Parameter 'time_dim' is not found in 'ref' dimension.") - } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { - stop("Parameter 'memb_dim' is not found in 'ref' dimension.") - } - ## exp and obs (2) - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]),1)){ - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else {stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1).")} - } - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (!identical(length(name_exp), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) - } - if (!is.null(ref)) { - name_ref <- sort(names(dim(ref))) - name_ref <- name_ref[-which(name_ref == memb_dim)] - if (!identical(length(name_exp), length(name_ref)) | - !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) - } - } - ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { - stop("Parameter 'Fair' must be either TRUE or FALSE.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################### - - # Compute CRPSS - if (!is.null(ref)) { # use "ref" as reference forecast - data <- list(exp = exp, obs = obs, ref = ref) - target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim, - ref = c(time_dim, memb_dim)) - } else { - data <- list(exp = exp, obs = obs) - target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim) - } - output <- Apply(data, - target_dims = target_dims, - fun = .CRPSS, - Fair = Fair, - ncores = ncores) - - return(output) -} - -.CRPSS <- function(exp, obs, ref = NULL, Fair = FALSE) { - # exp: [sdate, memb] - # obs: [sdate] - # ref: [sdate, memb] or NULL - - # CRPS of the forecast - crps_exp <- .CRPS(exp = exp, obs = obs, Fair = Fair) - - # CRPS of the reference forecast - if (is.null(ref)){ - ## using climatology as reference forecast - ## all the time steps are used as if they were members - ## then, ref dimensions are [sdate, memb], both with length(sdate) - ref <- array(data = obs, dim = c(member = length(obs))) - ref <- InsertDim(data = ref, posdim = 1, lendim = length(obs), name = 'sdate') - } - crps_ref <- .CRPS(exp = ref, obs = obs, Fair = Fair) - - # CRPSS - crpss <- 1 - mean(crps_exp) / mean(crps_ref) - - # Significance - sign <- .RandomWalkTest(skill_A = crps_exp, skill_B = crps_ref)$signif - - return(list(crpss = crpss, sign = sign)) -} diff --git a/modules/Skill/tmp/RandomWalkTest.R b/modules/Skill/tmp/RandomWalkTest.R deleted file mode 100644 index adeadc1e..00000000 --- a/modules/Skill/tmp/RandomWalkTest.R +++ /dev/null @@ -1,82 +0,0 @@ -#'Random walk test for skill differences -#' -#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a -#'common reference) based on Random Walks, with significance estimate at the 95% -#'confidence level, as in DelSole and Tippett (2016). -#' -#'@param skill_A A numerical array of the time series of the skill with the -#' forecaster A's. -#'@param skill_B A numerical array of the time series of the skill with the -#' forecaster B's. The dimensions should be identical as parameter 'skill_A'. -#'@param time_dim A character string indicating the name of the dimension along -#' which the tests are computed. The default value is 'sdate'. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return A list of 2: -#'\item{$score}{ -#' A numerical array with the same dimensions as the input arrays except -#' 'time_dim'. The number of times that forecaster A has been better than -#' forecaster B minus the number of times that forecaster B has been better -#' than forecaster A (for skill positively oriented). If $score is positive -#' forecaster A is better than forecaster B, and if $score is negative -#' forecaster B is better than forecaster B. -#'} -#'\item{$signif}{ -#' A logical array with the same dimensions as the input arrays except -#' 'time_dim'. Whether the difference is significant or not at the 5% -#' significance level. -#'} -#' -#'@examples -#' fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) -#' fcst_B <- array(c(21:60), dim = c(sdate = 10, lat = 2, lon = 2)) -#' reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) -#' skill_A <- abs(fcst_A - reference) -#' skill_B <- abs(fcst_B - reference) -#' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) -#' -#'@import multiApply -#'@export -RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ - - ## Check inputs - if (is.null(skill_A) | is.null(skill_B)){ - stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") - } - if(!is.numeric(skill_A) | !is.numeric(skill_B)){ - stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") - } - if (!identical(dim(skill_A),dim(skill_B))){ - stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") - } - if(!is.character(time_dim)){ - stop("Parameter 'time_dim' must be a character string.") - } - if(!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))){ - stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") - } - if (!is.null(ncores)){ - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ - stop("Parameter 'ncores' must be a positive integer.") - } - } - - ## Compute the Random Walk Test - res <- multiApply::Apply(data = list(skill_A, skill_B), - target_dims = time_dim, - fun = .RandomWalkTest, - ncores = ncores) - return(res) -} - -.RandomWalkTest <- function(skill_A, skill_B){ - - score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) - - ## TRUE if significant (if last value is above or below 2*sqrt(N)) - signif<- ifelse(test = (score[length(skill_A)] < (-2)*sqrt(length(skill_A))) | (score[length(skill_A)] > 2*sqrt(length(skill_A))), - yes = TRUE, no = FALSE) - - return(list("score"=score[length(skill_A)],"signif"=signif)) -} diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index c1bf9121..a4450d14 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,7 +1,6 @@ #G# TODO: Remove once released in s2dv/CSTools source("modules/Visualization/tmp/PlotMostLikelyQuantileMap.R") source("modules/Visualization/tmp/PlotCombinedMap.R") -source("modules/Visualization/tmp/PlotLayout.R") ## TODO: Add the possibility to read the data directly from netCDF ## TODO: Adapt to multi-model case diff --git a/modules/Visualization/tmp/PlotLayout.R b/modules/Visualization/tmp/PlotLayout.R deleted file mode 100644 index e5ae9800..00000000 --- a/modules/Visualization/tmp/PlotLayout.R +++ /dev/null @@ -1,732 +0,0 @@ -#'Arrange and Fill Multi-Pannel Layouts With Optional Colour Bar -#' -#'This function takes an array or list of arrays and loops over each of them -#'to plot all the sub-arrays they contain on an automatically generated -#'multi-pannel layout. A different plot function (not necessarily from -#'s2dv) can be applied over each of the provided arrays. The input -#'dimensions of each of the functions have to be specified, either with the -#'names or the indices of the corresponding input dimensions. It is possible -#'to draw a common colour bar at any of the sides of the multi-pannel for all -#'the s2dv plots that use a colour bar. Common plotting arguments -#'for all the arrays in 'var' can be specified via the '...' parameter, and -#'specific plotting arguments for each array can be fully adjusted via -#''special_args'. It is possible to draw titles for each of the figures, -#'layout rows, layout columns and for the whole figure. A number of parameters -#'is provided in order to adjust the position, size and colour of the -#'components. Blank cells can be forced to appear and later be filled in -#'manually with customized plots.\cr -#'This function pops up a blank new device and fills it in, so it cannot be -#'nested in complex layouts. -#' -#'@param fun Plot function (or name of the function) to be called on the -#' arrays provided in 'var'. If multiple arrays are provided in 'var', a -#' vector of as many function names (character strings!) can be provided in -#' 'fun', one for each array in 'var'. -#'@param plot_dims Numeric or character string vector with identifiers of the -#' input plot dimensions of the plot function specified in 'fun'. If -#' character labels are provided, names(dim(var)) or attr('dimensions', var) -#' will be checked to locate the dimensions. As many plots as -#' prod(dim(var)[-plot_dims]) will be generated. If multiple arrays are -#' provided in 'var', 'plot_dims' can be sent a list with a vector of plot -#' dimensions for each. If a single vector is provided, it will be used for -#' all the arrays in 'var'. -#'@param var Multi-dimensional array with at least the dimensions expected by -#' the specified plot function in 'fun'. The dimensions reqired by the -#' function must be specified in 'plot_dims'. The dimensions can be -#' disordered and will be reordered automatically. Dimensions can optionally -#' be labelled in order to refer to them with names in 'plot_dims'. All the -#' available plottable sub-arrays will be automatically plotted and arranged -#' in consecutive cells of an automatically arranged layout. A list of -#' multiple (super-)arrays can be specified. The process will be repeated for -#' each of them, by default applying the same plot function to all of them -#' or, if properly specified in 'fun', a different plot function will be -#' applied to each of them. NAs can be passed to the list: a NA will yield a -#' blank cell in the layout, which can be populated after -#' (see .SwitchToFigure). -#'@param \dots Parameters to be sent to the plotting function 'fun'. If -#' multiple arrays are provided in 'var' and multiple functions are provided -#' in 'fun', the parameters provided through \dots will be sent to all the -#' plot functions, as common parameters. To specify concrete arguments for -#' each of the plot functions see parameter 'special_args'. -#'@param special_args List of sub-lists, each sub-list having specific extra -#' arguments for each of the plot functions provided in 'fun'. If you want to -#' fix a different value for each plot in the layout you can do so by -#' a) splitting your array into a list of sub-arrays (each with the data for -#' one plot) and providing it as parameter 'var', -#' b) providing a list of named sub-lists in 'special_args', where the names -#' of each sub-list match the names of the parameters to be adjusted, and -#' each value in a sub-list contains the value of the corresponding parameter. -#' For example, if the plots are two maps with different arguments, the -#' structure would be like:\cr -#' var:\cr -#' List of 2\cr -#' $ : num [1:360, 1:181] 1 3.82 5.02 6.63 8.72 ...\cr -#' $ : num [1:360, 1:181] 2.27 2.82 4.82 7.7 10.32 ...\cr -#' special_args:\cr -#' List of 2\cr -#' $ :List of 2\cr -#' ..$ arg1: ...\cr -#' ..$ arg2: ...\cr -#' $ :List of 1\cr -#' ..$ arg1: ...\cr -#'@param nrow Numeric value to force the number of rows in the automatically -#' generated layout. If higher than the required, this will yield blank cells -#' in the layout (which can then be populated). If lower than the required -#' the function will stop. By default it is configured to arrange the layout -#' in a shape as square as possible. Blank cells can be manually populated -#' after with customized plots (see SwitchTofigure). -#'@param ncol Numeric value to force the number of columns in the -#' automatically generated layout. If higher than the required, this will -#' yield blank cells in the layout (which can then be populated). If lower -#' than the required the function will stop. By default it is configured to -#' arrange the layout in a shape as square as possible. Blank cells can be -#' manually populated after with customized plots (see SwitchTofigure). -#'@param toptitle Topt title for the multi-pannel. Blank by default. -#'@param row_titles Character string vector with titles for each of the rows -#' in the layout. Blank by default. -#'@param col_titles Character string vector with titles for each of the -#' columns in the layout. Blank by default. -#'@param bar_scale Scale factor for the common colour bar. Takes 1 by default. -#'@param title_scale Scale factor for the multi-pannel title. Takes 1 by -#' default. -#'@param title_margin_scale Scale factor for the margins surrounding the top -#' title. Takes 1 by default. -#'@param title_left_shift_scale When plotting row titles, a shift is added -#' to the horizontal positioning of the top title in order to center it to -#' the region of the figures (without taking row titles into account). This -#' shift can be reduced. A value of 0 will remove the shift completely, -#' centering the title to the total width of the device. This parameter will -#' be disregarded if no 'row_titles' are provided. -#'@param subtitle_scale Scale factor for the row titles and column titles -#' (specified in 'row_titles' and 'col_titles'). Takes 1 by default. -#'@param subtitle_margin_scale Scale factor for the margins surrounding the -#' subtitles. Takes 1 by default. -#'@param units Title at the top of the colour bar, most commonly the units of -#' the variable provided in parameter 'var'. -#'@param brks,cols,bar_limits,triangle_ends Usually only providing 'brks' is -#' enough to generate the desired colour bar. These parameters allow to -#' define n breaks that define n - 1 intervals to classify each of the values -#' in 'var'. The corresponding grid cell of a given value in 'var' will be -#' coloured in function of the interval it belongs to. These parameters are -#' sent to \code{ColorBar()} to generate the breaks and colours. Additional -#' colours for values beyond the limits of the colour bar are also generated -#' and applied to the plot if 'bar_limits' or 'brks' and 'triangle_ends' are -#' properly provided to do so. See ?ColorBar for a full explanation. -#'@param col_inf,col_sup Colour identifiers to colour the values in 'var' that -#' go beyond the extremes of the colour bar and to colour NA values, -#' respectively. 'colNA' takes 'white' by default. 'col_inf' and 'col_sup' -#' will take the value of 'colNA' if not specified. See ?ColorBar for a full -#' explanation on 'col_inf' and 'col_sup'. -#'@param color_fun,subsampleg,bar_extra_labels,draw_bar_ticks,draw_separators,triangle_ends_scale,bar_label_digits,bar_label_scale,units_scale,bar_tick_scale,bar_extra_margin Set of parameters to control the visual aspect of the drawn colour bar. See ?ColorBar for a full explanation. -#'@param drawleg Where to draw the common colour bar. Can take values TRUE, -#' FALSE or:\cr -#' 'up', 'u', 'U', 'top', 't', 'T', 'north', 'n', 'N'\cr -#' 'down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S' (default)\cr -#' 'right', 'r', 'R', 'east', 'e', 'E'\cr -#' 'left', 'l', 'L', 'west', 'w', 'W' -#'@param titles Character string vector with titles for each of the figures in -#' the multi-pannel, from top-left to bottom-right. Blank by default. -#'@param bar_left_shift_scale When plotting row titles, a shift is added to -#' the horizontal positioning of the colour bar in order to center it to the -#' region of the figures (without taking row titles into account). This shift -#' can be reduced. A value of 0 will remove the shift completely, centering -#' the colour bar to the total width of the device. This parameter will be -#' disregarded if no 'row_titles' are provided. -#'@param extra_margin Extra margins to be added around the layout, in the -#' format c(y1, x1, y2, x2). The units are margin lines. Takes rep(0, 4) -#' by default. -#'@param layout_by_rows Logical indicating wether the panels should be filled -#' by columns (FALSE) or by raws (TRUE, default). -#'@param fileout File where to save the plot. If not specified (default) a -#' graphics device will pop up. Extensions allowed: eps/ps, jpeg, png, pdf, -#' bmp and tiff. -#'@param width Width in inches of the multi-pannel. 7 by default, or 11 if -#' 'fielout' has been specified. -#'@param height Height in inches of the multi-pannel. 7 by default, or 11 if -#' 'fileout' has been specified. -#'@param size_units Units of the size of the device (file or window) to plot -#' in. Inches ('in') by default. See ?Devices and the creator function of -#' the corresponding device. -#'@param res Resolution of the device (file or window) to plot in. See -#' ?Devices and the creator function of the corresponding device. -#'@param close_device Whether to close the graphics device after plotting -#' the layout and a 'fileout' has been specified. This is useful to avoid -#' closing the device when saving the layout into a file and willing to add -#' extra elements or figures. Takes TRUE by default. Disregarded if no -#' 'fileout' has been specified. -#' -#'@return -#'\item{brks}{ -#' Breaks used for colouring the map (and legend if drawleg = TRUE). -#'} -#'\item{cols}{ -#' Colours used for colouring the map (and legend if drawleg = TRUE). -#' Always of length length(brks) - 1. -#'} -#'\item{col_inf}{ -#' Colour used to draw the lower triangle end in the colour bar -#' (NULL if not drawn at all). -#'} -#'\item{col_sup}{ -#' Colour used to draw the upper triangle end in the colour bar -#' (NULL if not drawn at all). -#'} -#'\item{layout_matrix}{ -#' Underlying matrix of the layout. Useful to later set any of the layout -#' cells as current figure to add plot elements. See .SwitchToFigure. -#'} -#' -#'@examples -#'# See examples on Load() to understand the first lines in this example -#' \dontrun{ -#'data_path <- system.file('sample_data', package = 's2dv') -#'expA <- list(name = 'experiment', path = file.path(data_path, -#' 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', -#' '$VAR_NAME$_$START_DATE$.nc')) -#'obsX <- list(name = 'observation', path = file.path(data_path, -#' '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', -#' '$VAR_NAME$_$YEAR$$MONTH$.nc')) -#' -#'# Now we are ready to use Load(). -#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- Load('tos', list(expA), list(obsX), startDates, -#' leadtimemin = 1, leadtimemax = 4, output = 'lonlat', -#' latmin = 27, latmax = 48, lonmin = -12, lonmax = 40) -#' } -#' \dontshow{ -#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), -#' c('observation'), startDates, -#' leadtimemin = 1, -#' leadtimemax = 4, -#' output = 'lonlat', -#' latmin = 27, latmax = 48, -#' lonmin = -12, lonmax = 40) -#' } -#'PlotLayout(PlotEquiMap, c('lat', 'lon'), sampleData$mod[1, , 1, 1, , ], -#' sampleData$lon, sampleData$lat, -#' toptitle = 'Predicted tos for Nov 1960 from 1st Nov', -#' titles = paste('Member', 1:15)) -#' -#'@importFrom grDevices dev.cur dev.new dev.off -#'@export -PlotLayout <- function(fun, plot_dims, var, ..., special_args = NULL, - nrow = NULL, ncol = NULL, toptitle = NULL, - row_titles = NULL, col_titles = NULL, bar_scale = 1, - title_scale = 1, title_margin_scale = 1, - title_left_shift_scale = 1, - subtitle_scale = 1, subtitle_margin_scale = 1, - brks = NULL, cols = NULL, drawleg = 'S', titles = NULL, - subsampleg = NULL, bar_limits = NULL, - triangle_ends = NULL, col_inf = NULL, col_sup = NULL, - color_fun = clim.colors, - draw_bar_ticks = TRUE, draw_separators = FALSE, - triangle_ends_scale = 1, bar_extra_labels = NULL, - units = NULL, units_scale = 1, bar_label_scale = 1, - bar_tick_scale = 1, bar_extra_margin = rep(0, 4), - bar_left_shift_scale = 1, bar_label_digits = 4, - extra_margin = rep(0, 4), layout_by_rows = TRUE, - fileout = NULL, width = NULL, height = NULL, - size_units = 'in', res = 100, close_device = TRUE) { - # If there is any filenames to store the graphics, process them - # to select the right device - if (!is.null(fileout)) { - deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, units = size_units, res = res) - saveToFile <- deviceInfo$fun - fileout <- deviceInfo$files - } - - is_single_na <- function(x) ifelse(length(x) > 1, FALSE, is.na(x)) - # Check var - if (!is.list(var) & (is.array(var) || (is_single_na(var)))) { - var <- list(var) - } else if (is.list(var)) { - if (!all(sapply(var, is.array) | sapply(var, is_single_na))) { - stop("Parameter 'var' must be an array or a list of arrays (or NA values).") - } - } else { - stop("Parameter 'var' must be an array or a list of arrays.") - } - - # Check fun - if (length(fun) == 1) { - if (is.function(fun)) { - fun <- as.character(substitute(fun)) - } - if (is.character(fun)) { - fun <- rep(fun, length(var)) - } - } - if (!is.character(fun) || (length(fun) != length(var))) { - stop("Parameter 'fun' must be a single function or a vector of function names, one for each array provided in parameter 'var'.") - } - - # Check special_args - if (!is.null(special_args)) { - if (!is.list(special_args) || any(!sapply(special_args, is.list))) { - stop("Parameter 'special_args' must be a list of lists.") - } else if (length(special_args) != length(var)) { - stop("Parameter 'special_args' must contain a list of special arguments for each array provided in 'var'.") - } - } - - # Check plot_dims - if (is.character(plot_dims) || is.numeric(plot_dims)) { - plot_dims <- replicate(length(var), plot_dims, simplify = FALSE) - } - if (!is.list(plot_dims) || !all(sapply(plot_dims, is.character) | sapply(plot_dims, is.numeric)) || - (length(plot_dims) != length(var))) { - stop("Parameter 'plot_dims' must contain a single numeric or character vector with dimension identifiers or a vector for each array provided in parameter 'var'.") - } - - # Check nrow - if (!is.null(nrow)) { - if (!is.numeric(nrow)) { - stop("Parameter 'nrow' must be numeric or NULL.") - } - nrow <- round(nrow) - } - - # Check ncol - if (!is.null(ncol)) { - if (!is.numeric(ncol)) { - stop("Parameter 'ncol' must be numeric or NULL.") - } - ncol <- round(ncol) - } - # Check layout_by_rows - if (!is.logical(layout_by_rows)) { - stop("Parameter 'layout_by_rows' must be logical.") - } - - # Check toptitle - if (is.null(toptitle) || is.na(toptitle)) { - toptitle <- '' - } - if (!is.character(toptitle)) { - stop("Parameter 'toptitle' must be a character string.") - } - - # Check row_titles - if (!is.null(row_titles)) { - if (!is.character(row_titles)) { - stop("Parameter 'row_titles' must be a vector of character strings.") - } - } - - # Check col_titles - if (!is.null(row_titles)) { - if (!is.character(row_titles)) { - stop("Parameter 'row_titles' must be a vector of character strings.") - } - } - - # Check drawleg - if (is.character(drawleg)) { - if (drawleg %in% c('up', 'u', 'U', 'top', 't', 'T', 'north', 'n', 'N')) { - drawleg <- 'N' - } else if (drawleg %in% c('down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S')) { - drawleg <- 'S' - } else if (drawleg %in% c('right', 'r', 'R', 'east', 'e', 'E')) { - drawleg <- 'E' - } else if (drawleg %in% c('left', 'l', 'L', 'west', 'w', 'W')) { - drawleg <- 'W' - } else { - stop("Parameter 'drawleg' must be either TRUE, FALSE or a valid identifier of a position (see ?PlotMultiMap).") - } - } else if (!is.logical(drawleg)) { - stop("Parameter 'drawleg' must be either TRUE, FALSE or a valid identifier of a position (see ?PlotMultiMap).") - } - if (drawleg != FALSE && all(sapply(var, is_single_na)) && - (is.null(brks) || length(brks) < 2)) { - stop("Either data arrays in 'var' or breaks in 'brks' must be provided if 'drawleg' is requested.") - } - - # Check the rest of parameters (unless the user simply wants to build an empty layout) - var_limits <- NULL - if (!all(sapply(var, is_single_na))) { - var_limits <- c(min(unlist(var), na.rm = TRUE), max(unlist(var), na.rm = TRUE)) - if ((any(is.infinite(var_limits)) || var_limits[1] == var_limits[2])) { - stop("Arrays in parameter 'var' must contain at least 2 different values.") - } - } - colorbar <- ColorBar(brks, cols, FALSE, subsampleg, bar_limits, - var_limits, triangle_ends, col_inf, col_sup, color_fun, - plot = FALSE, draw_bar_ticks, - draw_separators, triangle_ends_scale, bar_extra_labels, - units, units_scale, bar_label_scale, bar_tick_scale, - bar_extra_margin, bar_label_digits) - - # Check bar_scale - if (!is.numeric(bar_scale)) { - stop("Parameter 'bar_scale' must be numeric.") - } - - # Check bar_left_shift_scale - if (!is.numeric(bar_left_shift_scale)) { - stop("Parameter 'bar_left_shift_scale' must be numeric.") - } - - # Check title_scale - if (!is.numeric(title_scale)) { - stop("Parameter 'title_scale' must be numeric.") - } - - # Check title_margin_scale - if (!is.numeric(title_margin_scale)) { - stop("Parameter 'title_margin_scale' must be numeric.") - } - - # Check title_left_shift_scale - if (!is.numeric(title_left_shift_scale)) { - stop("Parameter 'title_left_shift_scale' must be numeric.") - } - - # Check subtitle_scale - if (!is.numeric(subtitle_scale)) { - stop("Parameter 'subtite_scale' must be numeric.") - } - - # Check subtitle_margin_scale - if (!is.numeric(subtitle_margin_scale)) { - stop("Parameter 'subtite_margin_scale' must be numeric.") - } - - # Check titles - if (!all(sapply(titles, is.character))) { - stop("Parameter 'titles' must be a vector of character strings.") - } - - # Check extra_margin - if (!is.numeric(extra_margin) || length(extra_margin) != 4) { - stop("Parameter 'extra_margin' must be a numeric vector with 4 elements.") - } - - # Check width - if (is.null(width)) { - if (is.null(fileout)) { - width <- 7 - } else { - width <- 11 - } - } - if (!is.numeric(width)) { - stop("Parameter 'width' must be numeric.") - } - - # Check height - if (is.null(height)) { - if (is.null(fileout)) { - height <- 7 - } else { - height <- 8 - } - } - if (!is.numeric(height)) { - stop("Parameter 'height' must be numeric.") - } - - # Check close_device - if (!is.logical(close_device)) { - stop("Parameter 'close_device' must be logical.") - } - - # Count the total number of maps and reorder each array of maps to have the lat and lon dimensions at the end. - n_plots <- 0 - plot_array_i <- 1 - for (plot_array in var) { - if (is_single_na(plot_array)) { - n_plots <- n_plots + 1 - } else { - dim_ids <- plot_dims[[plot_array_i]] - if (is.character(dim_ids)) { - dimnames <- NULL - if (!is.null(names(dim(plot_array)))) { - dimnames <- names(dim(plot_array)) - } else if (!is.null(attr(plot_array, 'dimensions'))) { - dimnames <- attr(plot_array, 'dimensions') - } - if (!is.null(dimnames)) { - if (any(!sapply(dim_ids, `%in%`, dimnames))) { - stop("All arrays provided in parameter 'var' must have all the dimensions in 'plot_dims'.") - } - dim_ids <- sapply(dim_ids, function(x) which(dimnames == x)[1]) - var[[plot_array_i]] <- Reorder(var[[plot_array_i]], c((1:length(dim(plot_array)))[-dim_ids], dim_ids)) - } else { - .warning(paste0("Assuming the ", plot_array_i, "th array provided in 'var' has 'plot_dims' as last dimensions (right-most).")) - dims <- tail(c(rep(1, length(dim_ids)), dim(plot_array)), length(dim_ids)) - dim_ids <- tail(1:length(dim(plot_array)), length(dim_ids)) - if (length(dim(var[[plot_array_i]])) < length(dims)) { - dim(var[[plot_array_i]]) <- dims - } - } - } else if (any(dim_ids > length(dim(plot_array)))) { - stop("Parameter 'plot_dims' contains dimension identifiers out of range.") - } - n_plots <- n_plots + prod(dim(plot_array)[-dim_ids]) - #n_plots <- n_plots + prod(head(c(rep(1, length(dim_ids)), dim(plot_array)), length(dim(plot_array)))) - if (length(dim(var[[plot_array_i]])) == length(dim_ids)) { - dim(var[[plot_array_i]]) <- c(1, dim(var[[plot_array_i]])) - dim_ids <- dim_ids + 1 - } - plot_dims[[plot_array_i]] <- dim_ids - } - plot_array_i <- plot_array_i + 1 - } - if (is.null(nrow) && is.null(ncol)) { - ncol <- ceiling(sqrt(n_plots)) - nrow <- ceiling(n_plots/ncol) - } else if (is.null(ncol)) { - ncol <- ceiling(n_plots/nrow) - } else if (is.null(nrow)) { - nrow <- ceiling(n_plots/ncol) - } else if (nrow * ncol < n_plots) { - stop("There are more arrays to plot in 'var' than cells defined by 'nrow' x 'ncol'.") - } - - if (is.logical(drawleg) && drawleg) { - if (nrow > ncol) { - drawleg <- 'S' - } else { - drawleg <- 'E' - } - } - vertical <- drawleg %in% c('E', 'W') - - # Open connection to graphical device - if (!is.null(fileout)) { - saveToFile(fileout) - } else if (names(dev.cur()) == 'null device') { - dev.new(units = size_units, res = res, width = width, height = height) - } else if (prod(par('mfrow')) > 1) { - dev.new(units = units, res = res, width = width, height = height) - } - - # Take size of device and set up layout: - # --------------------------------------------- - # |0000000000000000000000000000000000000000000| - # |0000000000000000 TOP TITLE 0000000000000000| - # |0000000000000000000000000000000000000000000| - # |-------------------------------------------| - # |00000|0000000000000000000000000000000000000| - # |00000|000000000000 ROW TITLES 0000000000000| - # |00000|0000000000000000000000000000000000000| - # |00000|-------------------------------------| - # |0 0|222222222222222222|333333333333333333| - # |0 C 0|222222222222222222|333333333333333333| - # |0 O 0|222222222222222222|333333333333333333| - # |0 L 0|2222 FIGURE 1 2222|3333 FIGURE 2 3333| - # |0 0|222222222222222222|333333333333333333| - # |0 T 0|222222222222222222|333333333333333333| - # |0 I 0|222222222222222222|333333333333333333| - # |0 T 0|-------------------------------------| - # |0 L 0|444444444444444444|555555555555555555| - # |0 S 0|444444444444444444|555555555555555555| - # |0 0|444444444444444444|555555555555555555| - # |00000|4444 FIGURE 3 4444|5555 FIGURE 4 5555| - # |00000|444444444444444444|555555555555555555| - # |00000|444444444444444444|555555555555555555| - # |00000|444444444444444444|555555555555555555| - # |-------------------------------------------| - # |1111111111111111111111111111111111111111111| - # |1111111111111111 COLOR BAR 1111111111111111| - # |1111111111111111111111111111111111111111111| - # --------------------------------------------- - device_size <- par('din') - device_size[1] <- device_size[1] - sum(extra_margin[c(2, 4)]) - device_size[2] <- device_size[2] - sum(extra_margin[c(1, 3)]) - cs <- char_size <- par('csi') - title_cex <- 2.5 * title_scale - title_margin <- 0.5 * title_cex * title_margin_scale - subtitle_cex <- 1.5 * subtitle_scale - subtitle_margin <- 0.5 * sqrt(nrow * ncol) * subtitle_cex * subtitle_margin_scale - mat_layout <- 1:(nrow * ncol) - if (drawleg != FALSE) { - if (fun == 'PlotMostLikelyQuantileMap') { #multi_colorbar - multi_colorbar <- TRUE - cat_dim <- list(...)$cat_dim - nmap <- as.numeric(dim(var[[1]])[cat_dim]) - mat_layout <- mat_layout + nmap - } else { - multi_colorbar <- FALSE - mat_layout <- mat_layout + 1 - } - } - mat_layout <- matrix(mat_layout, nrow, ncol, byrow = layout_by_rows) - fsu <- figure_size_units <- 10 # unitless - widths <- rep(fsu, ncol) - heights <- rep(fsu, nrow) - # Useless -# n_figures <- nrow * ncol - - if (drawleg != FALSE) { - if (drawleg == 'N') { - mat_layout <- rbind(rep(1, dim(mat_layout)[2]), mat_layout) - heights <- c(round(bar_scale * 2 * nrow), heights) - } else if (drawleg == 'S') { - if (multi_colorbar) { - new_mat_layout <- c() - for (i_col in 1:ncol) { - new_mat_layout <- c(new_mat_layout, rep(mat_layout[, i_col], nmap)) - } - new_mat_layout <- matrix(new_mat_layout, nrow, nmap * ncol) - colorbar_row <- rep(1:nmap, each = ncol) - mat_layout <- rbind(new_mat_layout, as.numeric(colorbar_row)) - widths <- rep(widths, nmap) - } else { - mat_layout <- rbind(mat_layout, rep(1, dim(mat_layout)[2])) - } - heights <- c(heights, round(bar_scale * 2 * nrow)) - } else if (drawleg == 'W') { - mat_layout <- cbind(rep(1, dim(mat_layout)[1]), mat_layout) - widths <- c(round(bar_scale * 3 * ncol), widths) - } else if (drawleg == 'E') { - mat_layout <- cbind(mat_layout, rep(1, dim(mat_layout)[1])) - widths <- c(widths, round(bar_scale * 3 * ncol)) - } - # Useless -# n_figures <- n_figures + 1 - } - - # row and col titles - if (length(row_titles) > 0) { - mat_layout <- cbind(rep(0, dim(mat_layout)[1]), mat_layout) - widths <- c(((subtitle_cex + subtitle_margin / 2) * cs / device_size[1]) * ncol * fsu, widths) - } - if (length(col_titles) > 0) { - mat_layout <- rbind(rep(0, dim(mat_layout)[2]), mat_layout) - heights <- c(((subtitle_cex + subtitle_margin) * cs / device_size[2]) * nrow * fsu, heights) - } - # toptitle - if (toptitle != '') { - mat_layout <- rbind(rep(0, dim(mat_layout)[2]), mat_layout) - heights <- c(((title_cex + title_margin) * cs / device_size[2]) * nrow * fsu, heights) - } - par(oma = extra_margin) - layout(mat_layout, widths, heights) - # Draw the color bar - if (drawleg != FALSE) { - if (length(row_titles) > 0) { - bar_extra_margin[2] <- bar_extra_margin[2] + (subtitle_cex + subtitle_margin / 2) * - bar_left_shift_scale - } - - if (multi_colorbar) { # multiple colorbar - if (!is.null(list(...)$bar_titles)) { - bar_titles <- list(...)$bar_titles - } else { - bar_titles <- NULL - } - multi_ColorBar(nmap = nmap, - brks = brks, cols = cols, vertical = vertical, subsampleg = subsampleg, - bar_limits = bar_limits, var_limits = var_limits, - triangle_ends = triangle_ends, plot = TRUE, - draw_separators = draw_separators, - bar_titles = bar_titles, title_scale = units_scale, - label_scale = bar_label_scale, extra_margin = bar_extra_margin) - - } else { # one colorbar - ColorBar(brks = colorbar$brks, cols = colorbar$cols, vertical = vertical, subsampleg = subsampleg, - bar_limits = bar_limits, var_limits = var_limits, - triangle_ends = triangle_ends, col_inf = colorbar$col_inf, - col_sup = colorbar$col_sup, color_fun = color_fun, plot = TRUE, draw_ticks = draw_bar_ticks, - draw_separators = draw_separators, triangle_ends_scale = triangle_ends_scale, - extra_labels = bar_extra_labels, - title = units, title_scale = units_scale, label_scale = bar_label_scale, tick_scale = bar_tick_scale, - extra_margin = bar_extra_margin, label_digits = bar_label_digits) - - } - } - - # Draw titles - if (toptitle != '' || length(col_titles) > 0 || length(row_titles) > 0) { - plot(0, type = 'n', ann = FALSE, axes = FALSE, xaxs = 'i', yaxs = 'i', - xlim = c(0, 1), ylim = c(0, 1)) - width_lines <- par('fin')[1] / par('csi') - plot_lines <- par('pin')[1] / par('csi') - plot_range <- par('xaxp')[2] - par('xaxp')[1] - size_units_per_line <- plot_range / plot_lines - if (toptitle != '') { - title_x_center <- par('xaxp')[1] - par('mar')[2] * size_units_per_line + - ncol * width_lines * size_units_per_line / 2 - if (length(row_titles) > 0) { - title_x_center <- title_x_center - (1 - title_left_shift_scale) * - (subtitle_cex + subtitle_margin) / 2 * size_units_per_line - } - title_y_center <- par('mar')[3] + (title_margin + title_cex) / 2 - if (length(col_titles > 0)) { - title_y_center <- title_y_center + (subtitle_margin + subtitle_cex) - } - mtext(toptitle, cex = title_cex, line = title_y_center, at = title_x_center, - padj = 0.5) - } - if (length(col_titles) > 0) { - t_x_center <- par('xaxp')[1] - par('mar')[2] * size_units_per_line - for (t in 1:ncol) { - mtext(col_titles[t], cex = subtitle_cex, - line = par('mar')[3] + (subtitle_margin + subtitle_cex) / 2, - at = t_x_center + (t - 0.5) * width_lines * size_units_per_line, - padj = 0.5) - } - } - height_lines <- par('fin')[2] / par('csi') - plot_lines <- par('pin')[2] / par('csi') - plot_range <- par('yaxp')[2] - par('yaxp')[1] - size_units_per_line <- plot_range / plot_lines - if (length(row_titles) > 0) { - t_y_center <- par('yaxp')[1] - par('mar')[1] * size_units_per_line - for (t in 1:nrow) { - mtext(row_titles[t], cex = subtitle_cex, - line = par('mar')[2] + (subtitle_margin + subtitle_cex) / 2, - at = t_y_center - (t - 1.5) * height_lines * size_units_per_line, - padj = 0.5, side = 2) - } - } - par(new = TRUE) - } - - array_number <- 1 - plot_number <- 1 - # For each array provided in var - lapply(var, function(x) { - if (is_single_na(x)) { - if (!all(sapply(var[array_number:length(var)], is_single_na))) { - plot.new() - par(new = FALSE) - } - plot_number <<- plot_number + 1 - } else { - if (is.character(plot_dims[[array_number]])) { - plot_dim_indices <- which(names(dim(x)) %in% plot_dims[[array_number]]) - } else { - plot_dim_indices <- plot_dims[[array_number]] - } - # For each of the arrays provided in that array - apply(x, (1:length(dim(x)))[-plot_dim_indices], - function(y) { - # Do the plot. colorbar is not drew. - fun_args <- c(list(y, toptitle = titles[plot_number], drawleg = FALSE), list(...), - special_args[[array_number]]) -# funct <- fun[[array_number]] - if (fun[[array_number]] %in% c('PlotEquiMap', 'PlotStereoMap', 'PlotSection')) { - fun_args <- c(fun_args, list(brks = colorbar$brks, cols = colorbar$cols, - col_inf = colorbar$col_inf, - col_sup = colorbar$col_sup)) - } else if (fun[[array_number]] %in% 'PlotMostLikelyQuantileMap') { - #TODO: pre-generate colorbar params? like above - fun_args <- c(fun_args, list(brks = brks, cols = cols)) - } - do.call(fun[[array_number]], fun_args) - plot_number <<- plot_number + 1 - }) - } - array_number <<- array_number + 1 - }) - - # If the graphic was saved to file, close the connection with the device - if (!is.null(fileout) && close_device) dev.off() - - invisible(list(brks = colorbar$brks, cols = colorbar$cols, - col_inf = colorbar$col_inf, col_sup = colorbar$col_sup, - layout_matrix = mat_layout)) -} -- GitLab From 8ca31a17a907a31aa91ae563f6550e9a329726e5 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 18 Oct 2022 15:23:16 +0200 Subject: [PATCH 64/81] Rearrange function arguments in seasonal sample script --- modules/test_seasonal.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index 5f59794f..eb7fbf41 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -12,13 +12,13 @@ archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets data <- load_datasets(recipe_file) # Calibrate datasets -calibrated_data <- calibrate_datasets(data, recipe) +calibrated_data <- calibrate_datasets(recipe, data) # Compute skill metrics -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) # Compute percentiles and probability bins -probabilities <- compute_probabilities(calibrated_data$hcst, recipe) +probabilities <- compute_probabilities(recipe, calibrated_data$hcst) # Export all data to netCDF -save_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities) +save_data(recipe, data, calibrated_data, skill_metrics, probabilities) # Plot data -plot_data(recipe, archive, data, calibrated_data, skill_metrics, - probabilities, significance = T) +plot_data(recipe, data, calibrated_data, skill_metrics, probabilities, + significance = T) -- GitLab From 6d4f80f2a158b94dd94179e46113aa9bef0506d4 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 18 Oct 2022 15:25:12 +0200 Subject: [PATCH 65/81] Rearrange function args in decadal sample script, comment archive line --- modules/test_decadal.R | 14 +++++++------- modules/test_seasonal.R | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/test_decadal.R b/modules/test_decadal.R index 01cf2d92..686d91bb 100644 --- a/modules/test_decadal.R +++ b/modules/test_decadal.R @@ -7,24 +7,24 @@ source("modules/Visualization/Visualization.R") recipe_file <- "modules/Loading/testing_recipes/recipe_decadal.yml" recipe <- read_yaml(recipe_file) -archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive +# archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive # Load datasets data <- load_datasets(recipe_file) # Calibrate datasets -calibrated_data <- calibrate_datasets(data, recipe) +calibrated_data <- calibrate_datasets(recipe, data) # Compute skill metrics -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) # Compute percentiles and probability bins -probabilities <- compute_probabilities(calibrated_data$hcst, recipe) +probabilities <- compute_probabilities(recipe, calibrated_data$hcst) # Export all data to netCDF -save_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities) +save_data(recipe, data, calibrated_data, skill_metrics, probabilities) # Plot data -plot_data(recipe, archive, data, calibrated_data, skill_metrics, - probabilities, significance = T) +plot_data(recipe, data, calibrated_data, skill_metrics, probabilities, + significance = T) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index eb7fbf41..436f8c9e 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -7,7 +7,7 @@ source("modules/Visualization/Visualization.R") recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-tas.yml" recipe <- read_yaml(recipe_file) -archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive +# archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets data <- load_datasets(recipe_file) -- GitLab From 27598a8a7785094d9633f54ad92244f8454604aa Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 21 Oct 2022 14:38:51 +0200 Subject: [PATCH 66/81] Incorporate logger --- modules/Calibration/Calibration.R | 20 ++++---- modules/Loading/Loading.R | 41 ++++++---------- .../testing_recipes/recipe_test-logging.yml | 47 +++++++++++++++++++ modules/Saving/Saving.R | 29 +++++++----- modules/Skill/Skill.R | 15 ++++-- modules/Visualization/Visualization.R | 19 ++++---- modules/test_seasonal.R | 7 ++- tools/data_summary.R | 24 +++++----- tools/prepare_outputs.R | 30 +++++++----- 9 files changed, 145 insertions(+), 87 deletions(-) create mode 100644 modules/Loading/testing_recipes/recipe_test-logging.yml diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index d49dd9d9..3320a7f5 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -13,9 +13,9 @@ calibrate_datasets <- function(recipe, data) { method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (method == "raw") { - warning("The Calibration module has been called, but the calibration ", - "method in the recipe is 'raw'. The hcst and fcst will not be ", - "calibrated.") + warn(recipe$Run$logger, "The Calibration module has been called, + but the calibration method in the recipe is 'raw'. + The hcst and fcst will not be calibrated.") fcst_calibrated <- data$fcst hcst_calibrated <- data$hcst CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####" @@ -53,8 +53,9 @@ calibrate_datasets <- function(recipe, data) { ## TODO: implement other calibration methods ## TODO: Restructure the code? if (!(method %in% CST_CALIB_METHODS)) { - stop("Calibration method in the recipe is not available for monthly", - " data.") + error(recipe$Run$logger, "Calibration method in the recipe is not + available for monthly data.") + stop() } else { ## Alba's version of CST_Calibration (pending merge) is being used # Calibrate the hindcast @@ -89,8 +90,10 @@ calibrate_datasets <- function(recipe, data) { } else if (recipe$Analysis$Variables$freq == "daily_mean") { # Daily data calibration using Quantile Mapping if (!(method %in% c("qmap"))) { - stop("Calibration method in the recipe is not available at daily ", - "frequency. Only quantile mapping 'qmap' is implemented.") + error(recipe$Run$logger, "Calibration method in the recipe is not + available for daily data. Only quantile mapping 'qmap is + implemented.") + stop() } # Calibrate the hindcast hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, @@ -121,7 +124,6 @@ calibrate_datasets <- function(recipe, data) { } } } -print(CALIB_MSG) - ## TODO: Return observations too? + info(recipe$Run$logger, CALIB_MSG) return(list(hcst = hcst_calibrated, fcst = fcst_calibrated)) } diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index a93be8ca..2493af35 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -5,24 +5,8 @@ source("modules/Loading/dates2load.R") source("modules/Loading/check_latlon.R") source("tools/libs.R") -# RECIPE FOR TESTING -# -------------------------------------------------------------------------------- -# recipe_file <- "modules/Loading/testing_recipes/recipe_3.yml" -# recipe_file <- "modules/Loading/testing_recipes/recipe_2.yml" -# recipe_file <- "modules/Loading/testing_recipes/recipe_1.yml" -load_datasets <- function(recipe_file) { - - recipe <- read_yaml(recipe_file) - recipe$filepath <- recipe_file - recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) - - ## TODO: this should come from the main script - # Create output folder and log: - logger <- prepare_outputs(recipe = recipe) - folder <- logger$foldername - log_file <- logger$logname - logger <- logger$logger +load_datasets <- function(recipe) { # ------------------------------------------- # Set params ----------------------------------------- @@ -40,7 +24,8 @@ load_datasets <- function(recipe_file) { store.freq <- recipe$Analysis$Variables$freq # get sdates array - sdates <- dates2load(recipe, logger) + ## LOGGER: Change dates2load to extract logger from recipe? + sdates <- dates2load(recipe, recipe$Run$logger) idxs <- NULL idxs$hcst <- get_timeidx(sdates$hcst, @@ -308,7 +293,8 @@ load_datasets <- function(recipe_file) { # Remove negative values in accumulative variables dictionary <- read_yaml("conf/variable-dictionary.yml") if (dictionary$vars[[variable]]$accum) { - info(logger, "Accumulated variable: setting negative values to zero.") + info(recipe$Run$logger, + "Accumulated variable: setting negative values to zero.") obs$data[obs$data < 0] <- 0 hcst$data[hcst$data < 0] <- 0 if (!is.null(fcst)) { @@ -324,7 +310,7 @@ load_datasets <- function(recipe_file) { attr(hcst$Variable, "variable")$units) && (attr(obs$Variable, "variable")$units == "m s-1")) { - info(logger, "Converting precipitation from m/s to mm/day.") + info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") obs$data <- obs$data*84000*1000 attr(obs$Variable, "variable")$units <- "mm/day" hcst$data <- hcst$data*84000*1000 @@ -337,13 +323,14 @@ load_datasets <- function(recipe_file) { } # Print a summary of the loaded data for the user, for each object - data_summary(hcst, store.freq) - data_summary(obs, store.freq) + data_summary(hcst, recipe) + data_summary(obs, recipe) if (!is.null(fcst)) { - data_summary(fcst, store.freq) + data_summary(fcst, recipe) } - print("##### DATA LOADING COMPLETED SUCCESSFULLY #####") + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") ############################################################################ # @@ -360,7 +347,7 @@ load_datasets <- function(recipe_file) { # freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") # #if (any(is.na(hcst))){ - # fatal(logger, + # fatal(recipe$Run$logger, # paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", # " ################################################# ", # " ###### MISSING FILES #### ", @@ -374,7 +361,7 @@ load_datasets <- function(recipe_file) { #} # #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ - # fatal(logger, + # fatal(recipe$logger, # paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", # " ################################################# ", # " ###### MISSING FILES #### ", @@ -387,7 +374,7 @@ load_datasets <- function(recipe_file) { # quit(status=1) #} # - #info(logger, + #info(recipe$logger, # "######### DATA LOADING COMPLETED SUCCESFULLY ##############") ############################################################################ diff --git a/modules/Loading/testing_recipes/recipe_test-logging.yml b/modules/Loading/testing_recipes/recipe_test-logging.yml new file mode 100644 index 00000000..372f6d83 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_test-logging.yml @@ -0,0 +1,47 @@ +Description: + Author: V. Agudetse + Info: Light recipe to raise some errors/warnings and test the logging system + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 1 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: qmap + Skill: + metric: mean_bias bias_SS + Probabilities: + percentiles: + Indicators: + index: no + ncores: 1 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index cf2ec1d0..ed0933f2 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -215,7 +215,8 @@ save_forecast <- function(data_cube, leadtimes <- as.numeric(dates - init_date)/3600 syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) # expect dim = [sday = 1, sweek = 1, syear, time] + # expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) for (i in syears) { # Select year from array and rearrange dimensions fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) @@ -254,8 +255,9 @@ save_forecast <- function(data_cube, # Select start date if (fcst.horizon == 'decadal') { - #NOTE: Not good to use data_cube$load_parameters$dat1 because decadal data has been reshaped -# fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') + ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal + ## data has been reshaped + # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') # init_date is like "1990-11-01" init_date <- as.POSIXct(init_date) @@ -289,7 +291,7 @@ save_forecast <- function(data_cube, ArrayToNc(vars, outfile) } } - print("##### FCST SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, "##### FCST SAVED TO NETCDF FILE #####") } @@ -337,7 +339,8 @@ save_observations <- function(data_cube, leadtimes <- as.numeric(dates - init_date)/3600 syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) # expect dim = [sday = 1, sweek = 1, syear, time] + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) for (i in syears) { # Select year from array and rearrange dimensions fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) @@ -424,11 +427,11 @@ save_observations <- function(data_cube, ArrayToNc(vars, outfile) } } - print("##### OBS SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") } ## TODO: Place inside a function somewhere -# if (tolower(agg) == "country"){ +# if (tolower(agg) == "country") { # load(mask.path) # grid <- europe.countries.iso # } else { @@ -541,7 +544,7 @@ save_metrics <- function(skill, vars <- c(vars, skill) ArrayToNc(vars, outfile) } - print("##### SKILL METRICS SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") } save_corr <- function(skill, @@ -648,7 +651,8 @@ save_corr <- function(skill, vars <- c(vars, skill) ArrayToNc(vars, outfile) } - print("##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, + "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") } save_percentiles <- function(percentiles, @@ -747,7 +751,7 @@ save_percentiles <- function(percentiles, vars <- c(vars, percentiles) ArrayToNc(vars, outfile) } - print("##### PERCENTILES SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") } save_probabilities <- function(probs, @@ -793,7 +797,8 @@ save_probabilities <- function(probs, leadtimes <- as.numeric(dates - init_date)/3600 syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) # expect dim = [sday = 1, sweek = 1, syear, time] + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) for (i in syears) { # Select year from array and rearrange dimensions probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') @@ -855,5 +860,5 @@ save_probabilities <- function(probs, ArrayToNc(vars, outfile) } } - print("##### PROBABILITIES SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, "##### PROBABILITIES SAVED TO NETCDF FILE #####") } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 726cd605..11365ba8 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -186,7 +186,9 @@ compute_skill_metrics <- function(recipe, exp, obs) { metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', 'rpss'))) { - stop("Some of the requested metrics are not available.") + ## TODO: Test this scenario + warn(recipe$Run$logger, + "Some of the requested metrics are not available.") } capture.output( skill <- Compute_verif_metrics(exp$data, obs$data, @@ -203,7 +205,7 @@ compute_skill_metrics <- function(recipe, exp, obs) { skill_metrics[[ metric ]] <- skill } } - print("##### SKILL METRIC COMPUTATION COMPLETE #####") + info(recipe$Run$logger, "##### SKILL METRIC COMPUTATION COMPLETE #####") return(skill_metrics) } @@ -224,8 +226,9 @@ compute_probabilities <- function(recipe, data) { named_probs <- list() named_quantiles <- list() if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { - stop("Quantiles and probability bins have been requested, but no ", - "thresholds are provided in the recipe.") + error(recipe$Run$logger, "Quantiles and probability bins have been + requested, but no thresholds are provided in the recipe.") + stop() } else { for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { # Parse thresholds in recipe @@ -261,10 +264,12 @@ compute_probabilities <- function(recipe, data) { named_probs <- lapply(named_probs, function(x) {.drop_dims(x)}) named_quantiles <- lapply(named_quantiles, function(x) {.drop_dims(x)}) } - print("##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") + info(recipe$Run$logger, + "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") return(list(probs=named_probs, percentiles=named_quantiles)) } +## TODO: Replace with ClimProjDiags::Subset .drop_dims <- function(metric_array) { # Drop all singleton dimensions metric_array <- drop(metric_array) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 4b63de8c..28879742 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -53,8 +53,8 @@ plot_data <- function(recipe, if (!is.null(calibrated_data$fcst)) { plot_ensemble_mean(recipe, archive, calibrated_data$fcst, outdir) } else if (!is.null(data$fcst)) { - warning("Only the uncalibrated forecast was provided. Using this data ", - "to plot the forecast ensemble mean.") + warn(recipe$Run$logger, "Only the uncalibrated forecast was provided. + Using this data to plot the forecast ensemble mean.") plot_ensemble_mean(recipe, archive, data$fcst, outdir) } @@ -63,8 +63,8 @@ plot_data <- function(recipe, plot_most_likely_terciles(recipe, archive, calibrated_data$fcst, probabilities$percentiles, outdir) } else if ((!is.null(probabilities)) && (!is.null(data$fcst))) { - warning("Only the uncalibrated forecast was provided. Using this data ", - "to plot the most likely terciles.") + warn(recipe$Run$logger, "Only the uncalibrated forecast was provided. + Using this data to plot the most likely terciles.") plot_most_likely_terciles(recipe, archive, data$fcst, probabilities$percentiles, outdir) } @@ -78,7 +78,7 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, stop("Visualization functions not yet implemented for daily data.") } # Abort if skill_metrics is not list - if (!is.list(skill_metrics)) { + if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { stop("The element 'skill_metrics' must be a list of named arrays.") } @@ -190,7 +190,8 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, } } - print("##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") + info(recipe$Run$logger, + "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") } plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { @@ -263,7 +264,8 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { bar_label_digits = 4) } - print("##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") + info(recipe$Run$logger, + "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") } plot_most_likely_terciles <- function(recipe, archive, @@ -347,5 +349,6 @@ plot_most_likely_terciles <- function(recipe, archive, ) } - print("##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") + info(recipe$Run$logger, + "##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") } diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index 436f8c9e..4071b648 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -1,16 +1,15 @@ - source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-tas.yml" -recipe <- read_yaml(recipe_file) +recipe_file <- "modules/Loading/testing_recipes/recipe_test-logging.yml" +recipe <- prepare_outputs(recipe_file) # archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) # Calibrate datasets calibrated_data <- calibrate_datasets(recipe, data) # Compute skill metrics diff --git a/tools/data_summary.R b/tools/data_summary.R index e211e202..34b6bd6e 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -4,27 +4,29 @@ ## TODO: Adapt to daily/subseasonal cases ## TODO: Add check for missing files/NAs by dimension -data_summary <- function(object, frequency) { +data_summary <- function(data_cube, recipe) { # Get name, leadtime months and date range - object_name <- deparse(substitute(object)) - if (tolower(frequency) == "monthly_mean") { + object_name <- deparse(substitute(data_cube)) + if (recipe$Analysis$Variables$freq == "monthly_mean") { date_format <- '%b %Y' - } else if (tolower(frequency) == "daily_mean") { + } else if (recipe$Analysis$Variables$freq == "daily_mean") { date_format <- '%b %d %Y' } - months <- unique(format(as.Date(object$Dates[[1]]), format = '%B')) + months <- unique(format(as.Date(data_cube$Dates[[1]]), format = '%B')) months <- paste(as.character(months), collapse=", ") - sdate_min <- format(min(as.Date(object$Dates[[1]])), format = date_format) - sdate_max <- format(max(as.Date(object$Dates[[1]])), format = date_format) + sdate_min <- format(min(as.Date(data_cube$Dates[[1]])), format = date_format) + sdate_max <- format(max(as.Date(data_cube$Dates[[1]])), format = date_format) - print("DATA SUMMARY:") + # Create log instance and sink output to logfile and terminal + info(recipe$Run$logger, "DATA SUMMARY:") + sink(recipe$Run$logfile, append = TRUE, split = TRUE) print(paste0(object_name, " months: ", months)) print(paste0(object_name, " range: ", sdate_min, " to ", sdate_max)) print(paste0(object_name, " dimensions: ")) - print(dim(object$data)) + print(dim(data_cube$data)) print(paste0("Statistical summary of the data in ", object_name, ":")) - print(summary(object$data)) + print(summary(data_cube$data)) print("---------------------------------------------") - + sink() } diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 18cc2e58..9c557046 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -5,28 +5,30 @@ #'the recipe. It returns an object of class logger that stores information on #'the recipe configuration and errors. #' -#'@param recipe Auto-S2S configuration recipe as returned by read_yaml() +#'@param recipe_file path to a YAML file with Auto-S2S configuration recipe #' -#'@return list contaning logger object, log filename and log directory name +#'@return list contaning recipe with logger, log file name and log dir name #' #'@import log4r +#'@import yaml #' #'@examples #'setwd("/esarchive/scratch/vagudets/repos/auto-s2s/") #'library(yaml) -#'recipe <- read_yaml("modules/data_load/recipe_1.yml") -#'logger <- prepare_outputs(recipe) -#'folder <- logger$foldername -#'log_file <- logger$logname -#'logger <- logger$logger +#'recipe <- prepare_outputs("modules/data_load/recipe_1.yml") +#'info(recipe$Run$logger, "This is an info message") #' #'@export -prepare_outputs <- function(recipe) { +prepare_outputs <- function(recipe_file) { # recipe: the content of the readed recipe # file: the recipe file name + recipe <- read_yaml(recipe_file) + recipe$recipe_path <- recipe_file + recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) + output_dir = recipe$Run$output_dir # Create output folders: folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$name)), "_", @@ -43,6 +45,7 @@ prepare_outputs <- function(recipe) { file.copy(recipe$filepath, file.path(output_dir, folder_name, 'logs')) logfile <- file.path(output_dir, folder_name, 'logs', 'log.txt') + file.create(logfile) # Set default behaviour of log output file: if (is.null(recipe$Run)) { @@ -51,6 +54,7 @@ prepare_outputs <- function(recipe) { if (is.null(recipe$Run$Loglevel)) { recipe$Run$Loglevel <- 'INFO' } + if (!is.logical(recipe$Run$Terminal)) { recipe$Run$Terminal <- TRUE } @@ -61,9 +65,13 @@ prepare_outputs <- function(recipe) { layout = default_log_layout()))) } else { logger <- logger(threshold = recipe$Run$Loglevel, - appenders = list(file_appender(logfile, append = TRUE, + appenders = list(file_appende(logfile, append = TRUE, layout = default_log_layout()))) } - return(list(logger = logger, logname = logfile, - foldername = file.path(output_dir, folder_name))) + + recipe$Run$output_dir <- file.path(output_dir, folder_name) + recipe$Run$logger <- logger + recipe$Run$logfile <- logfile + + return(recipe) } -- GitLab From dce9ddff5d7241c3f89a8318cfb403d4cff9522f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 21 Oct 2022 15:00:29 +0200 Subject: [PATCH 67/81] Update AbsBiasSS() with bugfix --- modules/Skill/Skill.R | 4 + modules/Skill/tmp/AbsBiasSS.R | 281 +++++++++++++++++++++++++++++ modules/Skill/tmp/Bias.R | 189 +++++++++++++++++++ modules/Skill/tmp/RandomWalkTest.R | 82 +++++++++ 4 files changed, 556 insertions(+) create mode 100644 modules/Skill/tmp/AbsBiasSS.R create mode 100644 modules/Skill/tmp/Bias.R create mode 100644 modules/Skill/tmp/RandomWalkTest.R diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 9a1df4f8..253553ca 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -10,6 +10,10 @@ source("modules/Skill/compute_quants.R") source("modules/Skill/compute_probs.R") source("modules/Skill/s2s.metrics.R") +## TODO: Remove when new version of s2dv is released +source("modules/Skill/tmp/RandomWalkTest.R") +source("modules/Skill/tmp/Bias.R") +source("modules/Skill/tmp/AbsBiasSS.R") ## TODO: Implement this in the future ## Which parameter are required? diff --git a/modules/Skill/tmp/AbsBiasSS.R b/modules/Skill/tmp/AbsBiasSS.R new file mode 100644 index 00000000..0ceb009c --- /dev/null +++ b/modules/Skill/tmp/AbsBiasSS.R @@ -0,0 +1,281 @@ +#'Compute the Absolute Mean Bias Skill Score +#' +#'The Absolute Mean Bias Skill Score is based on the Absolute Mean Error (Wilks, +#' 2011) between the ensemble mean forecast and the observations. It measures +#'the accuracy of the forecast in comparison with a reference forecast to assess +#'whether the forecast presents an improvement or a worsening with respect to +#'that reference. The Mean Bias Skill Score ranges between minus infinite and 1. +#'Positive values indicate that the forecast has higher skill than the reference +#'forecast, while negative values indicate that it has a lower skill. Examples +#'of reference forecasts are the climatological forecast (average of the +#'observations), a previous model version, or another model. It is computed as +#'\code{AbsBiasSS = 1 - AbsBias_exp / AbsBias_ref}. The statistical significance +#'is obtained based on a Random Walk test at the 95% confidence level (DelSole +#'and Tippett, 2016). If there is more than one dataset, the result will be +#'computed for each pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as 'exp' except +#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +#' not have dataset dimension. If there is corresponding reference for each +#' experiement, the dataset dimension must have the same length as in 'exp'. If +#' 'ref' is NULL, the climatological forecast is used as reference forecast. +#' The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' +#' and 'ref' are already the ensemble mean. The default value is NULL. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$biasSS}{ +#' A numerical array of BiasSS with dimensions nexp, nobs and the rest +#' dimensions of 'exp' except 'time_dim' and 'memb_dim'. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the BiasSS +#' with the same dimensions as $biasSS. nexp is the number of +#' experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#' (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'ref <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) +#'biasSS1 <- AbsBiasSS(exp = exp, obs = obs, ref = ref, memb_dim = 'member') +#'biasSS2 <- AbsBiasSS(exp = exp, obs = obs, ref = NULL, memb_dim = 'member') +#' +#'@import multiApply +#'@export +AbsBiasSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = NULL, + dat_dim = NULL, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (!is.array(obs) | !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop("Parameter 'ref' must be a numeric array.") + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp, obs, and ref (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim) && memb_dim %in% name_ref) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.")) + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) + } + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################ + + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = na.rm) + if (!is.null(ref) & memb_dim %in% names(dim(ref))) { + ref <- MeanDims(ref, memb_dim, na.rm = na.rm) + } + } + + ## Mean bias skill score + if (!is.null(ref)) { # use "ref" as reference forecast + if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { + target_dims_ref <- c(time_dim, dat_dim) + } else { + target_dims_ref <- c(time_dim) + } + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = target_dims_ref) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim)) + } + + output <- Apply(data, + target_dims = target_dims, + fun = .AbsBiasSS, + dat_dim = dat_dim, + na.rm = na.rm, + ncores = ncores) + + return(output) +} + +.AbsBiasSS <- function(exp, obs, ref = NULL, dat_dim = NULL, na.rm = FALSE) { + # exp and obs: [sdate, (dat_dim)] + # ref: [sdate, (dat_dim)] or NULL + + # Adjust exp, obs, ref to have dat_dim temporarily + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + exp <- InsertDim(exp, posdim = 2, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = 'dataset') + if (!is.null(ref)) { + ref <- InsertDim(ref, posdim = 2, lendim = 1, name = 'dataset') + } + ref_dat_dim <- FALSE + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + if (length(dim(ref)) == 1) { # ref: [sdate] + ref_dat_dim <- FALSE + } else { + ref_dat_dim <- TRUE + } + } + + biasSS <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + exp_data <- exp[, i] + if (isTRUE(ref_dat_dim)) { + ref_data <- ref[, i] + } else { + ref_data <- ref + } + for (j in 1:nobs) { + obs_data <- obs[, j] + + if (isTRUE(na.rm)) { + if (is.null(ref)) { + good_values <- !is.na(exp_data) & !is.na(obs_data) + exp_data <- exp_data[good_values] + obs_data <- obs_data[good_values] + } else { + good_values <- !is.na(exp_data) & !is.na(ref_data) & !is.na(obs_data) + exp_data <- exp_data[good_values] + ref_data <- ref_data[good_values] + obs_data <- obs_data[good_values] + } + } + + ## Bias of the exp + bias_exp <- .Bias(exp = exp_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = FALSE) + ## Bias of the ref + if (is.null(ref)) { ## Climatological forecast + ref_data <- rep(mean(obs_data, na.rm = na.rm), length(obs_data)) + } + bias_ref <- .Bias(exp = ref_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = FALSE) + ## Skill score and significance + biasSS[i, j] <- 1 - mean(bias_exp) / mean(bias_ref) + sign[i, j] <- .RandomWalkTest(skill_A = bias_exp, skill_B = bias_ref)$signif + } + } + + if (is.null(dat_dim)) { + dim(biasSS) <- NULL + dim(sign) <- NULL + } + + + return(list(biasSS = biasSS, sign = sign)) +} diff --git a/modules/Skill/tmp/Bias.R b/modules/Skill/tmp/Bias.R new file mode 100644 index 00000000..0319a0f0 --- /dev/null +++ b/modules/Skill/tmp/Bias.R @@ -0,0 +1,189 @@ +#'Compute the Mean Bias +#' +#'The Mean Bias or Mean Error (Wilks, 2011) is defined as the mean difference +#'between the ensemble mean forecast and the observations. It is a deterministic +#'metric. Positive values indicate that the forecasts are on average too high +#'and negative values indicate that the forecasts are on average too low. +#'It also allows to compute the Absolute Mean Bias or bias without temporal +#'mean. If there is more than one dataset, the result will be computed for each +#'pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter +#' 'exp' is already the ensemble mean. The default value is NULL. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is FALSE. +#'@param absolute A logical value indicating whether to compute the absolute +#' bias. The default value is FALSE. +#'@param time_mean A logical value indicating whether to compute the temporal +#' mean of the bias. The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number +#'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) +#'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop("Parameter 'exp' must be a numeric array.") + if (!is.array(obs) | !is.numeric(obs)) + stop("Parameter 'obs' must be a numeric array.") + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop("Parameter 'time_dim' must be a character string.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.")) + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## absolute + if (!is.logical(absolute) | length(absolute) > 1) { + stop("Parameter 'absolute' must be one logical value.") + } + ## time_mean + if (!is.logical(time_mean) | length(time_mean) > 1) { + stop("Parameter 'time_mean' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = na.rm) + } + + ## (Mean) Bias + bias <- Apply(data = list(exp, obs), + target_dims = c(time_dim, dat_dim), + fun = .Bias, + time_dim = time_dim, + dat_dim = dat_dim, + na.rm = na.rm, + absolute = absolute, + time_mean = time_mean, + ncores = ncores)$output1 + + return(bias) +} + + +.Bias <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE) { + # exp and obs: [sdate, (dat)] + + if (is.null(dat_dim)) { + bias <- exp - obs + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- mean(bias, na.rm = na.rm) + } + + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + bias[, i, j] <- exp[, i] - obs[, j] + } + } + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- MeanDims(bias, time_dim, na.rm = na.rm) + } + } + + return(bias) +} diff --git a/modules/Skill/tmp/RandomWalkTest.R b/modules/Skill/tmp/RandomWalkTest.R new file mode 100644 index 00000000..adeadc1e --- /dev/null +++ b/modules/Skill/tmp/RandomWalkTest.R @@ -0,0 +1,82 @@ +#'Random walk test for skill differences +#' +#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a +#'common reference) based on Random Walks, with significance estimate at the 95% +#'confidence level, as in DelSole and Tippett (2016). +#' +#'@param skill_A A numerical array of the time series of the skill with the +#' forecaster A's. +#'@param skill_B A numerical array of the time series of the skill with the +#' forecaster B's. The dimensions should be identical as parameter 'skill_A'. +#'@param time_dim A character string indicating the name of the dimension along +#' which the tests are computed. The default value is 'sdate'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of 2: +#'\item{$score}{ +#' A numerical array with the same dimensions as the input arrays except +#' 'time_dim'. The number of times that forecaster A has been better than +#' forecaster B minus the number of times that forecaster B has been better +#' than forecaster A (for skill positively oriented). If $score is positive +#' forecaster A is better than forecaster B, and if $score is negative +#' forecaster B is better than forecaster B. +#'} +#'\item{$signif}{ +#' A logical array with the same dimensions as the input arrays except +#' 'time_dim'. Whether the difference is significant or not at the 5% +#' significance level. +#'} +#' +#'@examples +#' fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_B <- array(c(21:60), dim = c(sdate = 10, lat = 2, lon = 2)) +#' reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +#' skill_A <- abs(fcst_A - reference) +#' skill_B <- abs(fcst_B - reference) +#' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) +#' +#'@import multiApply +#'@export +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ + + ## Check inputs + if (is.null(skill_A) | is.null(skill_B)){ + stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") + } + if(!is.numeric(skill_A) | !is.numeric(skill_B)){ + stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") + } + if (!identical(dim(skill_A),dim(skill_B))){ + stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") + } + if(!is.character(time_dim)){ + stop("Parameter 'time_dim' must be a character string.") + } + if(!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))){ + stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") + } + if (!is.null(ncores)){ + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ## Compute the Random Walk Test + res <- multiApply::Apply(data = list(skill_A, skill_B), + target_dims = time_dim, + fun = .RandomWalkTest, + ncores = ncores) + return(res) +} + +.RandomWalkTest <- function(skill_A, skill_B){ + + score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) + + ## TRUE if significant (if last value is above or below 2*sqrt(N)) + signif<- ifelse(test = (score[length(skill_A)] < (-2)*sqrt(length(skill_A))) | (score[length(skill_A)] > 2*sqrt(length(skill_A))), + yes = TRUE, no = FALSE) + + return(list("score"=score[length(skill_A)],"signif"=signif)) +} -- GitLab From 9f25dac89166c146d8039de6c3f9c27e4175c765 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 24 Oct 2022 14:59:04 +0200 Subject: [PATCH 68/81] Modify decadal scripts, fix pipeline attempt --- modules/Loading/Loading_decadal.R | 23 +++++++++-------------- modules/test_decadal.R | 4 ++-- modules/test_seasonal.R | 2 +- tests/testthat/test-decadal_daily_1.R | 4 ++-- tests/testthat/test-decadal_monthly_1.R | 4 ++-- tests/testthat/test-decadal_monthly_2.R | 5 ++--- tests/testthat/test-decadal_monthly_3.R | 4 ++-- tests/testthat/test-seasonal_daily.R | 7 +++---- tests/testthat/test-seasonal_monthly.R | 5 +++-- tools/prepare_outputs.R | 9 +++++---- 10 files changed, 31 insertions(+), 36 deletions(-) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 9c4bb33d..2f6d0310 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -13,18 +13,14 @@ source("modules/Loading/check_latlon.R") source("tools/libs.R") ## TODO: Remove once the fun is included in CSTools source("tools/tmp/as.s2dv_cube.R") - +## TODO: Change stops to logger error messages #==================================================================== # recipe_file <- "modules/Loading/testing_recipes/recipe_decadal.yml" # recipe_file <- "modules/Loading/testing_recipes/recipe_decadal_daily.yml" -load_datasets <- function(recipe_file) { - - recipe <- read_yaml(recipe_file) - recipe$filepath <- recipe_file - recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) +load_datasets <- function(recipe) { archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive @@ -33,11 +29,7 @@ load_datasets <- function(recipe_file) { ## TODO: this should come from the main script # Create output folder and log: - logger <- prepare_outputs(recipe = recipe) - folder <- logger$foldername - log_file <- logger$logname - logger <- logger$logger - + #------------------------- # Read from recipe: #------------------------- @@ -413,7 +405,8 @@ load_datasets <- function(recipe_file) { # Remove negative values in accumulative variables dictionary <- read_yaml("conf/variable-dictionary.yml") if (dictionary$vars[[variable]]$accum) { - info(logger, " Accumulated variable: setting negative values to zero.") + info(recipe$Run$logger, + " Accumulated variable: setting negative values to zero.") obs$data[obs$data < 0] <- 0 hcst$data[hcst$data < 0] <- 0 if (!is.null(fcst)) { @@ -428,7 +421,8 @@ load_datasets <- function(recipe_file) { attr(hcst$Variable, "variable")$units) && (attr(obs$Variable, "variable")$units == "m s-1")) { - info(logger, "Converting precipitation from m/s to mm/day.") + info(recipe$Run$logger, + "Converting precipitation from m/s to mm/day.") obs$data <- obs$data*84000*1000 attr(obs$Variable, "variable")$units <- "mm/day" hcst$data <- hcst$data*84000*1000 @@ -451,7 +445,8 @@ load_datasets <- function(recipe_file) { data_summary(fcst, store.freq) } - print("##### DATA LOADING COMPLETED SUCCESSFULLY #####") + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") return(list(hcst = hcst, fcst = fcst, obs = obs)) diff --git a/modules/test_decadal.R b/modules/test_decadal.R index 686d91bb..80304f97 100644 --- a/modules/test_decadal.R +++ b/modules/test_decadal.R @@ -6,11 +6,11 @@ source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") recipe_file <- "modules/Loading/testing_recipes/recipe_decadal.yml" -recipe <- read_yaml(recipe_file) +recipe <- prepare_outputs(recipe_file) # archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive # Load datasets -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) # Calibrate datasets calibrated_data <- calibrate_datasets(recipe, data) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index 4071b648..d8eb5c4e 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -4,7 +4,7 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_test-logging.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-tas.yml" recipe <- prepare_outputs(recipe_file) # archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R index a78fd135..c9833d2b 100644 --- a/tests/testthat/test-decadal_daily_1.R +++ b/tests/testthat/test-decadal_daily_1.R @@ -8,12 +8,12 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-decadal_daily_1.yml" -recipe <- read_yaml(recipe_file) +recipe <- prepare_outputs(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) ))}) ## Calibrate datasets diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 39c8d900..5cf1922e 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -9,12 +9,12 @@ source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_1.yml" -recipe <- read_yaml(recipe_file) +recipe <- prepare_outputs(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) ))}) # Calibrate datasets diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 98fa66cb..4dd72ebf 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -8,12 +8,11 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_2.yml" -recipe <- read_yaml(recipe_file) -archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive +recipe <- prepare_outputs(recipe_file) # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) ))}) # Calibrate datasets diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 22f47d4c..2a5f7ef9 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -8,12 +8,12 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_3.yml" -recipe <- read_yaml(recipe_file) +recipe <- prepate_outputs(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) ))}) # Calibrate datasets diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index 11c01f19..4fb4a71e 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -6,14 +6,13 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-seasonal_daily_1.yml" - +recipe <- prepare_outputs(recipe_file) # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) ))}) -recipe <- read_yaml(recipe_file) - +# Calibrate data suppressWarnings({invisible(capture.output( calibrated_data <- calibrate_datasets(recipe, data) ))}) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 9423cde9..86feedfb 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -7,14 +7,15 @@ source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") recipe_file <- "tests/recipes/recipe-seasonal_monthly_1.yml" -recipe <- read_yaml(recipe_file) +recipe <- prepare_outputs(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) ))}) +# Calibrate data suppressWarnings({invisible(capture.output( calibrated_data <- calibrate_datasets(recipe, data) ))}) diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 9c557046..8a683178 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -34,15 +34,16 @@ prepare_outputs <- function(recipe_file) { folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$name)), "_", gsub(" ", "", gsub(":", "", gsub("-", "", Sys.time())))) + print("Saving all outputs to:") print(output_dir) print(folder_name) - dir.create(file.path(output_dir, folder_name, 'plots'), recursive = TRUE) - dir.create(file.path(output_dir, folder_name, 'outputs')) + dir.create(file.path(output_dir, folder_name, 'outputs'), recursive = TRUE) dir.create(file.path(output_dir, folder_name, 'logs')) dir.create(file.path(output_dir, folder_name, 'logs', 'recipes')) - file.copy(recipe$filepath, file.path(output_dir, folder_name, 'logs')) + file.copy(recipe$recipe_path, file.path(output_dir, folder_name, 'logs', + 'recipes')) logfile <- file.path(output_dir, folder_name, 'logs', 'log.txt') file.create(logfile) @@ -51,7 +52,7 @@ prepare_outputs <- function(recipe_file) { if (is.null(recipe$Run)) { recipe$Run <- list(Loglevel = 'INFO', Terminal = TRUE) } - if (is.null(recipe$Run$Loglevel)) { + if (is.null(recipe$Run$Loglevel)) { recipe$Run$Loglevel <- 'INFO' } -- GitLab From f78b9a769c5afb3de0b82a70bf68830656f1ccee Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 25 Oct 2022 09:39:43 +0200 Subject: [PATCH 69/81] fix decadal pipeline --- modules/Loading/Loading_decadal.R | 6 +++--- tests/testthat/test-decadal_monthly_3.R | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 2f6d0310..7b6b352f 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -439,10 +439,10 @@ load_datasets <- function(recipe) { #------------------------------------------- # Print a summary of the loaded data for the user, for each object - data_summary(hcst, store.freq) - data_summary(obs, store.freq) + data_summary(hcst, recipe) + data_summary(obs, recipe) if (!is.null(fcst)) { - data_summary(fcst, store.freq) + data_summary(fcst, recipe) } info(recipe$Run$logger, diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 2a5f7ef9..7535e8dc 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -8,7 +8,7 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_3.yml" -recipe <- prepate_outputs(recipe_file) +recipe <- prepare_outputs(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive # Load datasets -- GitLab From 1901998f50636690cb452dec9b904acd188f3f77 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 25 Oct 2022 09:46:24 +0200 Subject: [PATCH 70/81] Save outputs inside generated directory --- modules/Saving/paths2save.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 70f6cc92..f48ebe7b 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -39,7 +39,7 @@ get_dir <- function(recipe, agg = "global") { ## TODO: Get aggregation from recipe ## TODO: Add time frequency - outdir <- recipe$Run$output_dir + outdir <- paste0(recipe$Run$output_dir, "/outputs/") variable <- recipe$Analysis$Variables$name if (!is.null(recipe$Analysis$Time$fcst_year)) { if (tolower(recipe$Analysis$Horizon) == 'decadal') { -- GitLab From f8ac6fffd30965c85ff63006f9cf8da4670840f8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 25 Oct 2022 10:29:22 +0200 Subject: [PATCH 71/81] Change stop() messages to log4r::error --- modules/Loading/Loading_decadal.R | 57 +++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 7b6b352f..e6f19fac 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -13,7 +13,6 @@ source("modules/Loading/check_latlon.R") source("tools/libs.R") ## TODO: Remove once the fun is included in CSTools source("tools/tmp/as.s2dv_cube.R") -## TODO: Change stops to logger error messages #==================================================================== @@ -165,7 +164,9 @@ load_datasets <- function(recipe) { # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) { - stop("hcst has problem in matching data and time attr dimension.") + error(recipe$Run$logger, + "hcst has problem in matching data and time attr dimension.") + stop() } dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) @@ -237,7 +238,9 @@ load_datasets <- function(recipe) { # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { - stop("fcst has problem in matching data and time attr dimension.") + error(recipe$Run$logger, + "fcst has problem in matching data and time attr dimension.") + stop() } dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) @@ -253,7 +256,9 @@ load_datasets <- function(recipe) { # Only syear could be different if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { - stop("hcst and fcst do not share the same dimension structure.") + error(recipe$Run$logger, + "hcst and fcst do not share the same dimension structure.") + stop() } } else { @@ -341,7 +346,9 @@ load_datasets <- function(recipe) { # Only ensemble dim could be different if (!identical(dim(obs), dim(hcst$data)[-9])) { - stop("obs and hcst dimensions do not match.") + error(recipe$Run$logger, + "obs and hcst dimensions do not match.") + stop() } # Add ensemble dim to obs dim(obs) <- c(dim(obs), ensemble = 1) @@ -357,45 +364,65 @@ load_datasets <- function(recipe) { #------------------------------------------- # dimension if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) { - stop("hcst and obs don't share the same dimension names.") + error(recipe$Run$logger, + "hcst and obs don't share the same dimension names.") + stop() } else { ens_ind <- which(names(dim(obs$data)) == 'ensemble') match_ind <- match(names(dim(obs$data))[-ens_ind], names(dim(hcst$data))) - if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) stop("hcst and obs don't share the same dimension length.") + if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) { + error(recipe$Run$logger, + "hcst and obs don't share the same dimension length.") + stop() + } } # time attribute if (!identical(format(hcst$Dates$start, '%Y%m'), format(obs$Dates$start, '%Y%m'))) - stop("hcst and obs don't share the same time.") + error(recipe$Run$logger, + "hcst and obs don't share the same time.") + stop() # lat and lon attributes if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) - stop("hcst and obs don't share the same latitude.") + error(recipe$Run$logger, + "hcst and obs don't share the same latitude.") + stop() if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) - stop("hcst and obs don't share the same longitude.") + error(recipe$Run$logger, + "hcst and obs don't share the same longitude.") + stop() # Check fcst if (!is.null(fcst)) { # dimension if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) { - stop("hcst and fcst don't share the same dimension names.") + error(recipe$Run$logger, + "hcst and fcst don't share the same dimension names.") + stop() } else { ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear')) match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data))) - if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) - stop("hcst and fcst don't share the same dimension length.") + if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) + error(recipe$Run$logger, + "hcst and fcst don't share the same dimension length.") + stop() } # lat and lon attributes if (!identical(as.vector(hcst$lat), as.vector(fcst$lat))) - stop("hcst and fcst don't share the same latitude.") + error(recipe$Run$logger, + "hcst and fcst don't share the same latitude.") + stop() if (!identical(as.vector(hcst$lon), as.vector(fcst$lon))) - stop("hcst and fcst don't share the same longitude.") + error(recipe$Run$logger, + "hcst and fcst don't share the same longitude.") + stop() } -- GitLab From 5bf48230d504a47f0aac6dcb4b2e2d5bb95099a0 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 25 Oct 2022 11:56:06 +0200 Subject: [PATCH 72/81] Fix pipeline --- modules/Loading/Loading_decadal.R | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index e6f19fac..2aadfb95 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -379,22 +379,25 @@ load_datasets <- function(recipe) { # time attribute if (!identical(format(hcst$Dates$start, '%Y%m'), - format(obs$Dates$start, '%Y%m'))) + format(obs$Dates$start, '%Y%m'))) { error(recipe$Run$logger, "hcst and obs don't share the same time.") stop() + } # lat and lon attributes if (!identical(as.vector(hcst$lat), - as.vector(obs$lat))) + as.vector(obs$lat))) { error(recipe$Run$logger, "hcst and obs don't share the same latitude.") stop() + } if (!identical(as.vector(hcst$lon), - as.vector(obs$lon))) + as.vector(obs$lon))) { error(recipe$Run$logger, "hcst and obs don't share the same longitude.") stop() + } # Check fcst if (!is.null(fcst)) { @@ -406,23 +409,26 @@ load_datasets <- function(recipe) { } else { ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear')) match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data))) - if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) - error(recipe$Run$logger, - "hcst and fcst don't share the same dimension length.") - stop() + if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) { + error(recipe$Run$logger, + "hcst and fcst don't share the same dimension length.") + stop() + } } # lat and lon attributes if (!identical(as.vector(hcst$lat), - as.vector(fcst$lat))) + as.vector(fcst$lat))) { error(recipe$Run$logger, "hcst and fcst don't share the same latitude.") stop() + } if (!identical(as.vector(hcst$lon), - as.vector(fcst$lon))) + as.vector(fcst$lon))) { error(recipe$Run$logger, "hcst and fcst don't share the same longitude.") stop() + } } -- GitLab From d40b8d58b1463c1ae75f1c7c435692fdffcf54f5 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 25 Oct 2022 16:34:04 +0200 Subject: [PATCH 73/81] Bugfix: change logger to recipe --- modules/Loading/Loading.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 4436edc6..f78bd144 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -286,26 +286,26 @@ load_datasets <- function(recipe) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") - error(logger, lat_error_msg) + error(recipe$Run$logger, lat_error_msg) hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) - info(logger, hcst_lat_msg) + info(recipe$Run$logger, hcst_lat_msg) obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], "; Last obs lat: ", obs$lat[length(obs$lat)]) - info(logger, obs_lat_msg) + info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") - error(logger, lon_error_msg) + error(recipe$Run$logger, lon_error_msg) hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) - info(logger, hcst_lon_msg) + info(recipe$Run$logger, hcst_lon_msg) obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], "; Last obs lon: ", obs$lon[length(obs$lon)]) - info(logger, obs_lon_msg) + info(recipe$Run$logger, obs_lon_msg) stop("hcst and obs don't share the same longitudes.") } -- GitLab From c08be573d7c9b4f46e7fe2ef59a8ce3d0dd39bf2 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 26 Oct 2022 09:49:29 +0200 Subject: [PATCH 74/81] Update Calibration module to the new version of CST_QuantileMapping() --- conf/archive.yml | 2 +- modules/Calibration/Calibration.R | 29 +-- tests/testthat/test-seasonal_daily.R | 8 +- tools/tmp/CST_QuantileMapping.R | 325 +++++++++++++++++++++++++++ 4 files changed, 346 insertions(+), 18 deletions(-) create mode 100644 tools/tmp/CST_QuantileMapping.R diff --git a/conf/archive.yml b/conf/archive.yml index 9d994ee5..c50909c3 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -19,7 +19,7 @@ archive: calendar: "proleptic_gregorian" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" system7c3s: - name: "Méteo-France System 7" + name: "Meteo-France System 7" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/meteofrance/system7c3s/" monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 85b1b007..15e51e82 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -1,7 +1,7 @@ ## TODO: Remove once Alba's fun is merged in CSTools source("tools/tmp/CST_Calibration.R") - +source("tools/tmp/CST_QuantileMapping.R") ## Entry params data and recipe? calibrate_datasets <- function(data, recipe) { # Function that calibrates the hindcast using the method stated in the @@ -93,29 +93,32 @@ calibrate_datasets <- function(data, recipe) { "frequency. Only quantile mapping 'qmap' is implemented.") } # Calibrate the hindcast + dim_order <- names(dim(data$hcst$data)) hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, exp_cor = NULL, - sample_dims = c("syear", - "time", - "ensemble"), - sample_length = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", method = "QUANT", - wet.day = FALSE, ncores = ncores, - na.rm = na.rm) + na.rm = na.rm, + wet.day = F) + # Restore dimension order + hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) if (!is.null(data$fcst)) { # Calibrate the forecast fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, exp_cor = data$fcst, - sample_dims = c("syear", - "time", - "ensemble"), - sample_length = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", method = "QUANT", - wet.day = FALSE, ncores = ncores, - na.rm = na.rm) + na.rm = na.rm, + wet.day = F) + # Restore dimension order + fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) } else { fcst_calibrated <- NULL } diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index c37b5514..237674e0 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -123,17 +123,17 @@ c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, lon ) expect_equal( mean(calibrated_data$hcst$data), -289.6468, +289.5354, tolerance = 0.0001 ) expect_equal( as.vector(drop(calibrated_data$hcst$data)[1, 1:4, 2, 3, 4]), -c(295.1077, 294.2161, 294.5801, 292.6326), +c(291.5555, 291.9029, 293.2685, 290.7782), tolerance = 0.0001 ) expect_equal( range(calibrated_data$hcst$data), -c(283.9447, 297.7496), +c(284.2823, 296.7545), tolerance = 0.0001 ) }) @@ -160,7 +160,7 @@ c(time = 31, latitude = 4, longitude = 4) ) expect_equal( skill_metrics$enscorr_specs[1:3, 1, 1], -c(0.8159317, 0.8956195, 0.8355627), +c(0.7509920, 0.6514916, 0.5118371), tolerance=0.0001 ) }) diff --git a/tools/tmp/CST_QuantileMapping.R b/tools/tmp/CST_QuantileMapping.R new file mode 100644 index 00000000..a4e4a9d6 --- /dev/null +++ b/tools/tmp/CST_QuantileMapping.R @@ -0,0 +1,325 @@ +#'Quantile Mapping for seasonal or decadal forecast data +#' +#'@description This function is a wrapper of fitQmap and doQmap from package +#''qmap' to be applied on the object of class 's2dv_cube'. The quantile mapping +#'adjustment between an experiment, typically a hindcast, and observation is +#'applied to the experiment itself or to a provided forecast. + +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#'@param exp An object of class \code{s2dv_cube}. +#'@param obs An object of class \code{s2dv_cube}. +#'@param exp_cor An object of class \code{s2dv_cube} in which the quantile +#' mapping correction should be applied. If it is not specified, the correction +#' is applied in object 'exp'. +#'@param sdate_dim A character string indicating the dimension name in which +#' cross-validation would be applied when exp_cor is not provided. 'sdate' by +#' default. +#'@param memb_dim A character string indicating the dimension name where +#' ensemble members are stored in the experimental arrays. 'member' by default. +#'@param window_dim A character string indicating the dimension name where +#' samples have been stored. It can be NULL (default) in case all samples are +#' used. +#'@param method A character string indicating the method to be used:'PTF', +#' 'DIST','RQUANT','QUANT','SSPLIN'. By default, the empirical quantile mapping +#' 'QUANT' is used. +#'@param na.rm A logical value indicating if missing values should be removed +#' (FALSE by default). +#'@param ncores An integer indicating the number of cores for parallel +#' computation using multiApply function. The default value is NULL (1). +#'@param ... Additional parameters to be used by the method choosen. See qmap +#' package for details. +#' +#'@return An object of class \code{s2dv_cube} containing the experimental data +#'after applying the quantile mapping correction. +#' +#'@seealso \code{\link[qmap]{fitQmap}} and \code{\link[qmap]{doQmap}} +#'@examples +#'# Use synthetic data +#'exp <- NULL +#'exp$data <- 1 : c(1 * 10 * 20 * 60 * 6 * 7) +#'dim(exp$data) <- c(dataset = 1, member = 10, sdate = 20, ftime = 60 , +#' lat = 6, lon = 7) +#'class(exp) <- 's2dv_cube' +#'obs$data <- 101 : c(100 + 1 * 1 * 20 * 60 * 6 * 7) +#'dim(obs$data) <- c(dataset = 1, member = 1, sdate = 20, ftime = 60 , +#' lat = 6, lon = 7) +#'class(obs) <- 's2dv_cube' +#'res <- CST_QuantileMapping(exp, obs) +#' +#'# Use data in package +#'exp <- lonlat_temp$exp +#'exp$data <- exp$data[, , 1:4, , 1:2, 1:3] +#'dim(exp$data) <- c(dataset = 1, member = 15, sdate = 4, ftime = 3, +#' lat = 2, lon = 3) +#'obs <- lonlat_temp$obs +#'obs$data <- obs$data[, , 1:4, , 1:2, 1:3] +#'dim(obs$data) <- c(dataset = 1, member = 1, sdate = 4, ftime = 3, +#' lat = 2, lon = 3) +#'exp_cor <- lonlat_temp$exp +#'exp_cor$data <- exp_cor$data[, 1, 5:6, , 1:2, 1:3] +#'dim(exp_cor$data) <- c(dataset = 1, member = 1, sdate = 2, ftime = 3, +#' lat = 2, lon = 3) +#'res <- CST_QuantileMapping(exp, obs, exp_cor, window_dim = 'ftime') +#' +#'@import qmap +#'@import multiApply +#'@import s2dv +#'@export +CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', + memb_dim = 'member', window_dim = NULL, + method = 'QUANT', na.rm = FALSE, + ncores = NULL, ...) { + if (!inherits(exp, 's2dv_cube') || !inherits(obs, 's2dv_cube')) { + stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + if (!is.null(exp_cor)) { + if (!inherits(exp_cor, 's2dv_cube')) { + stop("Parameter 'exp_cor' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + } + + QMapped <- QuantileMapping(exp = exp$data, obs = obs$data, + exp_cor = exp_cor$data, + sdate_dim = sdate_dim, memb_dim = memb_dim, + window_dim = window_dim, method = method, + na.rm = na.rm, ncores = ncores, ...) + if (is.null(exp_cor)) { + exp$data <- QMapped + exp$Datasets <- c(exp$Datasets, obs$Datasets) + exp$source_files <- c(exp$source_files, obs$source_files) + return(exp) + + } else { + exp_cor$data <- QMapped + exp_cor$Datasets <- c(exp_cor$Datasets, exp$Datasets, obs$Datasets) + exp_cor$source_files <- c(exp_cor$source_files, exp$source_files, obs$source_files) + return(exp_cor) + } + + +} + +#'Quantile Mapping for seasonal or decadal forecast data +#' +#'@description This function is a wrapper of fitQmap and doQmap from package +#''qmap' to be applied on multi-dimensional arrays. The quantile mapping +#'adjustment between an experiment, typically a hindcast, and observation is +#'applied to the experiment itself or to a provided forecast. +#' +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#'@param exp A multidimensional array with named dimensions containing the +#' hindcast. +#'@param obs A multidimensional array with named dimensions containing the +#' reference dataset. +#'@param exp_cor A multidimensional array with named dimensions in which the +#' quantile mapping correction should be applied. If it is not specified, the +#' correction is applied on object 'exp'. +#'@param sdate_dim A character string indicating the dimension name in which +#' cross-validation would be applied when exp_cor is not provided. 'sdate' by +#' default. +#'@param memb_dim A character string indicating the dimension name where +#' ensemble members are stored in the experimental arrays. 'member' by +#' default. +#'@param window_dim A character string indicating the dimension name where +#' samples have been stored. It can be NULL (default) in case all samples are +#' used. +#'@param method A character string indicating the method to be used: 'PTF', +#' 'DIST','RQUANT','QUANT','SSPLIN'. By default, the empirical quantile mapping +#' 'QUANT' is used. +#'@param na.rm A logical value indicating if missing values should be removed +#' (FALSE by default). +#'@param ncores An integer indicating the number of cores for parallel +#' computation using multiApply function. The default value is NULL (1). +#'@param ... Additional parameters to be used by the method choosen. See qmap +#' package for details. +#' +#'@return An array containing the experimental data after applying the quantile +#'mapping correction. +#' +#'@seealso \code{\link[qmap]{fitQmap}} and \code{\link[qmap]{doQmap}} +#'@examples +#'# Use synthetic data +#'exp <- 1 : c(1 * 10 * 20 * 60 * 6 * 7) +#'dim(exp) <- c(dataset = 1, member = 10, sdate = 20, ftime = 60 , +#' lat = 6, lon = 7) +#'class(exp) <- 's2dv_cube' +#'obs <- 101 : c(100 + 1 * 1 * 20 * 60 * 6 * 7) +#'dim(obs) <- c(dataset = 1, member = 1, sdate = 20, ftime = 60 , +#' lat = 6, lon = 7) +#'res <- QuantileMapping(exp, obs) +#'# Use data in package +#'exp <- lonlat_temp$exp$data[, , 1:4, , 1:2, 1:3] +#'dim(exp) <- c(dataset = 1, member = 15, sdate = 4, ftime = 3, +#' lat = 2, lon = 3) +#'obs <- lonlat_temp$obs$data[, , 1:4, , 1:2, 1:3] +#'dim(obs) <- c(dataset = 1, member = 1, sdate = 4, ftime = 3, +#' lat = 2, lon = 3) +#'exp_cor <- lonlat_temp$exp$data[, 1, 5:6, , 1:2, 1:3] +#'dim(exp_cor) <- c(dataset = 1, member = 1, sdate = 2, ftime = 3, +#' lat = 2, lon = 3) +#'res <- QuantileMapping(exp, obs, exp_cor, window_dim = 'ftime') +#' +#'@import qmap +#'@import multiApply +#'@import s2dv +#'@export +QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', + memb_dim = 'member', window_dim = NULL, + method = 'QUANT', + na.rm = FALSE, ncores = NULL, ...) { + # exp and obs + obsdims <- names(dim(obs)) + expdims <- names(dim(exp)) + if (!is.array(exp) || !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (!is.array(obs) || !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (is.null(expdims)) { + stop("Parameter 'exp' must have dimension names.") + } + if (is.null(obsdims)) { + stop("Parameter 'obs' must have dimension names.") + } + # sdate_dim + if (!is.character(sdate_dim) | length(sdate_dim) != 1) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (!sdate_dim %in% expdims | !sdate_dim %in% obsdims) { + stop("Parameter 'sdate_dim' is not found in 'exp' or 'obs' dimension.") + } + if (dim(exp)[sdate_dim] == 1 || dim(obs)[sdate_dim] == 1) { + stop("Parameter 'exp' and 'obs' must have dimension length of 'sdate_dim' bigger than 1.") + } + # exp_cor + if (!is.null(exp_cor)) { + if (is.null(names(dim(exp_cor)))) { + stop("Parameter 'exp_cor' must have dimension names.") + } + if (!sdate_dim %in% names(dim(exp_cor))) { + stop("Parameter 'sdate_dim' is not found in 'exp_cor' dimension.") + } + } + # method + if (!(method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) | length(method) != 1) { + stop("Parameter 'method' must be one of the following methods: ", + "'PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN'.") + } + # memb_dim + if (!all(memb_dim %in% obsdims)) { + obs <- InsertDim(obs, posdim = 1, lendim = 1, + name = memb_dim[!(memb_dim %in% obsdims)]) + } + if (any(!memb_dim %in% expdims)) { + stop("Parameter 'memb_dim' is not found in 'exp' dimensions.") + } + sample_dims <- c(memb_dim, sdate_dim) + # window_dim + if (!is.null(window_dim)) { + if (!(window_dim %in% obsdims)) { + stop("Parameter 'window_dim' is not found in 'obs'.") + } + obs <- CSTools::MergeDims(obs, c(memb_dim, window_dim)) + if (window_dim %in% expdims) { + exp <- CSTools::MergeDims(exp, c(memb_dim, window_dim)) + warning("Parameter 'window_dim' is found in exp and is merged to 'memb_dim'.") + } + } + # na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + if (!is.null(exp_cor)) { + qmaped <- Apply(list(exp, obs, exp_cor), target_dims = sample_dims, + fun = .qmapcor, method = method, sdate_dim = sdate_dim, + na.rm = na.rm, ..., + ncores = ncores)$output1 + } else { + qmaped <- Apply(list(exp, obs), target_dims = sample_dims, + fun = .qmapcor, exp_cor = NULL, method = method, + sdate_dim = sdate_dim, na.rm = na.rm, ..., + ncores = ncores)$output1 + } + return(qmaped) +} + +.qmapcor <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', method = 'QUANT', + na.rm = FALSE, ...) { + + # exp: [memb (+ window), sdate] + # obs: [memb (+ window), sdate] + # exp_cor: NULL or [memb, sdate] + + if (is.null(exp_cor)) { + applied <- exp * NA + for (sd in 1:dim(exp)[sdate_dim]) { + if (na.rm) { + # select start date for cross-val + nas_pos <- which(!is.na(exp[, sd])) + obs2 <- as.vector(obs[, -sd]) + exp2 <- as.vector(exp[, -sd]) + exp_cor2 <- as.vector(exp[, sd]) + # remove NAs + obs2 <- obs2[!is.na(obs2)] + exp2 <- exp2[!is.na(exp2)] + exp_cor2 <- exp_cor2[!is.na(exp_cor2)] + tryCatch({ + adjust <- fitQmap(obs2, exp2, method = method, ...) + applied[nas_pos, sd] <- doQmap(exp_cor2, adjust, ...) + }, + error = function(error_message) { + return(applied[, sd]) + }) + } else { + # na.rm = FALSE shouldn't fail, just return NA + if (anyNA(obs[, -sd]) | anyNA(exp[, -sd])) { + applied[, sd] <- NA + } else { + adjust <- fitQmap(as.vector(obs[, -sd]), as.vector(exp[, -sd]), + method = method, ...) + exp2 <- exp[, sd] + if (sum(is.na(exp2)) >= 1) { + app <- rep(NA, length(exp2)) + nas_pos <- which(is.na(exp2)) + exp2 <- exp2[!is.na(exp2)] + app[-nas_pos] <- doQmap(as.vector(exp2), adjust, ...) + } else { + app <- doQmap(as.vector(exp2), adjust, ...) + } + applied[, sd] <- app + } + } + } + } else { + applied <- exp_cor * NA + if (na.rm) { + tryCatch({ + adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], + method = method, ...) + applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], + adjust, ...) + }, + error = function(error_message) { + return(applied) + }) + } else { + adjust <- fitQmap(as.vector(obs), as.vector(exp), method = method, ...) + applied <- doQmap(as.vector(exp_cor), adjust, ...) + } + dim(applied) <- dim(exp_cor) + } + return(applied) +} -- GitLab From 32dfaca00bb74352c379e7f4b3aeb692b5ce1871 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 26 Oct 2022 11:04:49 +0200 Subject: [PATCH 75/81] Update README --- README.md | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e540516a..7802cfd5 100644 --- a/README.md +++ b/README.md @@ -9,13 +9,21 @@ The main developers of the tool are Victòria Agudetse (@vagudets), An-Chi Ho (@ Resources --------- -Here you can access a presentation containing information relevant to the tool: -[ESS Verification Suite](https://docs.google.com/presentation/d/1R8Gcz5R_NTgcBQvXBkCPG3jY31BVPDur/edit#slide=id.p1?target=_blank) +You can access the documentation of the Verification Suite through the wiki: +[Auto-s2s Wiki](https://earth.bsc.es/gitlab/es/auto-s2s/-/wikis/home?target=_blank) + +You may also find useful information in the slides from past user meetings: +[User meeting September 2022](https://docs.google.com/presentation/d/14-qq__fblMt7xvJDaqS5UqfQMXWCf3Ju/edit#slide=id.p1?target=_blank) +[User meeting June 2022](https://docs.google.com/presentation/d/1R8Gcz5R_NTgcBQvXBkCPG3jY31BVPDur/edit#slide=id.p1?target=_blank) Branching strategy ------------------ Branches containing developments that are to be merged into the tool must contain "dev-" at the beginning of the name, followed by a short, meaningful description of the development in question. E.g. "dev-loading-subseasonal" for the branch containing developments related to the loading of subseasonal datasets. -Users may have their own branches for personal use. These branches should start with "user-\", which can optionally be followed by a brief description. E.g. "user-vagudets-FOCUS". +Users that wish to incorporate their own developments into the core of the tool are encouraged to create a personal fork of the Auto-S2S repository to work on their projects. Please contact Victòria Agudetse at victoria.agudetse@bsc.es to discuss the first steps. + +Mailing list +------------ +User meetings, internal releases and news are announced through the mailing list. You can send an email to victoria.agudetse@bsc.es or an.ho@bsc.es to request subscription. -- GitLab From e8080cb5f12ba2ba15d68899c202d44ffa98a23b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 26 Oct 2022 11:11:41 +0200 Subject: [PATCH 76/81] Fix typo in README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 7802cfd5..4df05ecc 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,9 @@ You can access the documentation of the Verification Suite through the wiki: [Auto-s2s Wiki](https://earth.bsc.es/gitlab/es/auto-s2s/-/wikis/home?target=_blank) You may also find useful information in the slides from past user meetings: + [User meeting September 2022](https://docs.google.com/presentation/d/14-qq__fblMt7xvJDaqS5UqfQMXWCf3Ju/edit#slide=id.p1?target=_blank) + [User meeting June 2022](https://docs.google.com/presentation/d/1R8Gcz5R_NTgcBQvXBkCPG3jY31BVPDur/edit#slide=id.p1?target=_blank) Branching strategy -- GitLab From 73693d881b441dc46259c6c05ebb1b757f1a479e Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 26 Oct 2022 15:41:51 +0200 Subject: [PATCH 77/81] Refine lat/lon error message --- modules/Loading/Loading_decadal.R | 81 ++++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 23 deletions(-) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 2aadfb95..e9f8b274 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -386,18 +386,35 @@ load_datasets <- function(recipe) { } # lat and lon attributes - if (!identical(as.vector(hcst$lat), - as.vector(obs$lat))) { - error(recipe$Run$logger, - "hcst and obs don't share the same latitude.") - stop() - } - if (!identical(as.vector(hcst$lon), - as.vector(obs$lon))) { - error(recipe$Run$logger, - "hcst and obs don't share the same longitude.") - stop() - } + if (!(recipe$Analysis$Regrid$type == 'none')) { + if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + + if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + } # Check fcst if (!is.null(fcst)) { @@ -417,18 +434,36 @@ load_datasets <- function(recipe) { } # lat and lon attributes - if (!identical(as.vector(hcst$lat), - as.vector(fcst$lat))) { - error(recipe$Run$logger, - "hcst and fcst don't share the same latitude.") - stop() - } - if (!identical(as.vector(hcst$lon), - as.vector(fcst$lon))) { - error(recipe$Run$logger, - "hcst and fcst don't share the same longitude.") - stop() + if (!(recipe$Analysis$Regrid$type == 'none')) { + if (!identical(as.vector(hcst$lat), as.vector(fcst$lat))) { + lat_error_msg <- paste("Latitude mismatch between hcst and fcst.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + fcst_lat_msg <- paste0("First fcst lat: ", fcst$lat[1], + "; Last fcst lat: ", fcst$lat[length(fcst$lat)]) + info(recipe$Run$logger, fcst_lat_msg) + stop("hcst and fcst don't share the same latitudes.") + } + + if (!identical(as.vector(hcst$lon), as.vector(fcst$lon))) { + lon_error_msg <- paste("Longitude mismatch between hcst and fcst.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + fcst_lon_msg <- paste0("First fcst lon: ", fcst$lon[1], + "; Last fcst lon: ", fcst$lon[length(fcst$lon)]) + info(recipe$Run$logger, fcst_lon_msg) + stop("hcst and fcst don't share the same longitudes.") + } } + } -- GitLab From fe09a2c58ef50d249a24ed56a5df5db429d21e2d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 27 Oct 2022 13:21:45 +0200 Subject: [PATCH 78/81] Remove local copies CSTools functions after new release --- modules/Calibration/Calibration.R | 4 - tools/tmp/CST_Calibration.R | 563 ------------------------------ tools/tmp/CST_QuantileMapping.R | 325 ----------------- 3 files changed, 892 deletions(-) delete mode 100644 tools/tmp/CST_Calibration.R delete mode 100644 tools/tmp/CST_QuantileMapping.R diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 15e51e82..ec4aabf2 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -1,8 +1,4 @@ -## TODO: Remove once Alba's fun is merged in CSTools -source("tools/tmp/CST_Calibration.R") -source("tools/tmp/CST_QuantileMapping.R") -## Entry params data and recipe? calibrate_datasets <- function(data, recipe) { # Function that calibrates the hindcast using the method stated in the # recipe. If the forecast is not null, it calibrates it as well. diff --git a/tools/tmp/CST_Calibration.R b/tools/tmp/CST_Calibration.R deleted file mode 100644 index 79b41951..00000000 --- a/tools/tmp/CST_Calibration.R +++ /dev/null @@ -1,563 +0,0 @@ -#'Forecast Calibration -#' -#'@author Verónica Torralba, \email{veronica.torralba@bsc.es} -#'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be} -#'@description Equivalent to function \code{Calibration} but for objects of class \code{s2dv_cube}. -#' -#'@param exp an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal hindcast experiment data in the element named \code{$data}. The hindcast is used to calibrate the forecast in case the forecast is provided; if not, the same hindcast will be calibrated instead. -#'@param obs an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the observed data in the element named \code{$data}. -#'@param exp_cor an optional object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}. If the forecast is provided, it will be calibrated using the hindcast and observations; if not, the hindcast will be calibrated instead. -#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}. -#'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. In case the forecast is provided, any chosen eval.method is over-ruled and a third option is used. -#'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. -#'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. -#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. See Details section for further information about its use and compatibility with \code{na.fill}. -#'@param apply_to is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. -#'@param alpha is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. -#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. -#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. -#'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. -#'@return an object of class \code{s2dv_cube} containing the calibrated forecasts in the element \code{$data} with the same dimensions as the one in the exp object. -#' -#'@importFrom s2dv InsertDim -#'@import abind -#' -#'@seealso \code{\link{CST_Load}} -#' -#'@examples -#'# Example 1: -#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) -#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) -#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'lon <- seq(0, 30, 5) -#'lat <- seq(0, 25, 5) -#'exp <- list(data = mod1, lat = lat, lon = lon) -#'obs <- list(data = obs1, lat = lat, lon = lon) -#'attr(exp, 'class') <- 's2dv_cube' -#'attr(obs, 'class') <- 's2dv_cube' -#'a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", eval.method = "in-sample") -#'str(a) -#' -#'# Example 2: -#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) -#'mod2 <- 1 : (1 * 3 * 1 * 5 * 6 * 7) -#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'dim(mod2) <- c(dataset = 1, member = 3, sdate = 1, ftime = 5, lat = 6, lon = 7) -#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) -#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'lon <- seq(0, 30, 5) -#'lat <- seq(0, 25, 5) -#'exp <- list(data = mod1, lat = lat, lon = lon) -#'obs <- list(data = obs1, lat = lat, lon = lon) -#'exp_cor <- list(data = mod2, lat = lat, lon = lon) -#'attr(exp, 'class') <- 's2dv_cube' -#'attr(obs, 'class') <- 's2dv_cube' -#'attr(exp_cor, 'class') <- 's2dv_cube' -#'a <- CST_Calibration(exp = exp, obs = obs, exp_cor = exp_cor, cal.method = "evmos") -#'str(a) -#'@export - -CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", - eval.method = "leave-one-out", multi.model = FALSE, - na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, - memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { - - if(!missing(multi.model) & !(cal.method == "mse_min")){ - warning(paste0("The multi.model parameter is ignored when using the calibration method ", cal.method)) - } - - if(is.null(exp_cor)){ #exp will be used to calibrate and will also be calibrated: "calibrate hindcast" - if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) { - stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - exp$data <- Calibration(exp = exp$data, obs = obs$data, exp_cor = NULL, - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - memb_dim = memb_dim, sdate_dim = sdate_dim, - ncores = ncores) - exp$Datasets <- c(exp$Datasets, obs$Datasets) - exp$source_files <- c(exp$source_files, obs$source_files) - - return(exp) - - }else{ #if exp_cor is provided, it will be calibrated: "calibrate forecast instead of hindcast" - eval.method = "hindcast-vs-forecast" #if exp_cor is provided, eval.method is overrruled (because if exp_cor is provided, the train data will be all data of "exp" and the evalutaion data will be all data of "exp_cor"; no need for "leave-one-out" or "in-sample") - if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube") || !inherits(exp_cor, "s2dv_cube")) { - stop("Parameter 'exp', 'obs' and 'exp_cor' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - exp_cor$data <- Calibration(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - memb_dim = memb_dim, sdate_dim = sdate_dim, - ncores = ncores) - exp_cor$Datasets <- c(exp_cor$Datasets, obs$Datasets) - exp_cor$source_files <- c(exp_cor$source_files, exp$source_files, obs$source_files) - - return(exp_cor) - - } -} - - - -#'Forecast Calibration -#' -#'@author Verónica Torralba, \email{veronica.torralba@bsc.es} -#'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be} -#'@description Five types of member-by-member bias correction can be performed. The \code{"bias"} method corrects the bias only, the \code{"evmos"} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). The \code{"rpc-based"} method adjusts the forecast variance ensuring that the ratio of predictable components (RPC) is equal to one, as in Eade et al. (2014). -#'@description Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. -#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x -#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate predictions underestimate the predictability of the read world? Geophysical Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 -#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. doi:10.5194/npg-18-147-2011 -#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using member-by-member approaches: theoretical aspects. Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. doi:10.1002/qj.2397 -#' -#'@param exp a multidimensional array with named dimensions (at least 'sdate' and 'member') containing the seasonal hindcast experiment data. The hindcast is used to calibrate the forecast in case the forecast is provided; if not, the same hindcast will be calibrated instead. -#'@param obs a multidimensional array with named dimensions (at least 'sdate') containing the observed data. -#'@param exp_cor an optional multidimensional array with named dimensions (at least 'sdate' and 'member') containing the seasonal forecast experiment data. If the forecast is provided, it will be calibrated using the hindcast and observations; if not, the hindcast will be calibrated instead. -#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}. -#'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. In case the forecast is provided, any chosen eval.method is over-ruled and a third option is used. -#'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. -#'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. -#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. -#'@param apply_to is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. -#'@param alpha is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. -#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. -#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. -#'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. -#'@return an array containing the calibrated forecasts with the same dimensions as the \code{exp} array. -#' -#'@importFrom s2dv InsertDim MeanDims Reorder -#'@import abind -#'@import multiApply -#'@importFrom s2dverification Subset -#' -#'@seealso \code{\link{CST_Load}} -#' -#'@details -#'Both the \code{na.fill} and \code{na.rm} parameters can be used to indicate how the function has to handle the NA values. The \code{na.fill} parameter checks whether there are more than three forecast-observations pairs to perform the computation. In case there are three or less pairs, the computation is not carried out, and the value returned by the function depends on the value of this parameter (either NA if \code{na.fill == TRUE} or the uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} is used to indicate the function whether to remove the missing values during the computation of the parameters needed to perform the calibration. -#' -#'@examples -#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) -#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) -#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'a <- Calibration(exp = mod1, obs = obs1) -#'str(a) -#'@export -Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", - eval.method = "leave-one-out", - multi.model = FALSE, na.fill = TRUE, - na.rm = TRUE, apply_to = NULL, alpha = NULL, - memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { - - dim.exp <- dim(exp) - amt.dims.exp <- length(dim.exp) - dim.obs <- dim(obs) - amt.dims.obs <- length(dim.obs) - dim.names.exp <- names(dim.exp) - dim.names.obs <- names(dim.obs) - if(!is.null(exp_cor)){ - dim.exp_cor <- dim(exp_cor) - amt.dims.exp_cor <- length(dim.exp_cor) - dim.names.exp_cor <- names(dim.exp_cor) - } - if (is.null(memb_dim) || !is.character(memb_dim)) { - stop("Parameter 'memb_dim' should be a character string indicating the", - "name of the dimension where members are stored in 'exp'.") - } - if (length(memb_dim) > 1) { - memb_dim <- memb_dim[1] - warning("Parameter 'memb_dim' has length greater than 1 and only", - " the first element will be used.") - } - - if (is.null(sdate_dim) || !is.character(sdate_dim)) { - stop("Parameter 'sdate_dim' should be a character string indicating the", - "name of the dimension where start dates are stored in 'exp'.") - } - if (length(sdate_dim) > 1) { - sdate_dim <- sdate_dim[1] - warning("Parameter 'sdate_dim' has length greater than 1 and only", - " the first element will be used.") - } - target.dim.names.exp <- c(memb_dim, sdate_dim) - target.dim.names.obs <- sdate_dim - - if (!all(target.dim.names.exp %in% dim.names.exp)) { - stop("Parameter 'exp' must have the dimensions defined in memb_dim ", - "and sdate_dim.") - } - - if(!is.null(exp_cor)){ - if (!all(target.dim.names.exp %in% dim.names.exp_cor)) { - stop("Parameter 'exp_cor' must have the dimensions defined in memb_dim ", - "and sdate_dim.") - } - } - - if (!all(c(sdate_dim) %in% dim.names.obs)) { - stop("Parameter 'obs' must have the dimension defined in sdate_dim ", - "parameter.") - } - - if (any(is.na(exp))) { - warning("Parameter 'exp' contains NA values.") - } - - if(!is.null(exp_cor)){ - if (any(is.na(exp_cor))) { - warning("Parameter 'exp_cor' contains NA values.") - } - } - - if (any(is.na(obs))) { - warning("Parameter 'obs' contains NA values.") - } - - if (memb_dim %in% names(dim(obs))) { - obs <- Subset(obs, along = memb_dim, indices = 1, drop = "selected") - } - data.set.sufficiently.large.out <- - Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), - ncores = ncores, - fun = .data.set.sufficiently.large)$output1 - - if(!all(data.set.sufficiently.large.out)){ - if(na.fill){ - warning("Some forecast data could not be corrected due to data lack", - " and is replaced with NA values") - } else { - warning("Some forecast data could not be corrected due to data lack", - " and is replaced with uncorrected values") - } - } - - if (!na.rm %in% c(TRUE,FALSE)) { - stop("Parameter 'na.rm' must be TRUE or FALSE.") - } - if (cal.method == 'rpc-based') { - if (is.null(apply_to)) { - apply_to <- 'sign' - warning("'apply_to' cannot be NULL for 'rpc-based' method so it has been set to 'sign', as in Eade et al. (2014).") - } else if (!apply_to %in% c('all','sign')) { - stop("'apply_to' must be either 'all' or 'sign' when 'rpc-based' method is used.") - } - if (apply_to == 'sign') { - if (is.null(alpha)) { - alpha <- 0.1 - warning("'alpha' cannot be NULL for 'rpc-based' method so it has been set to 0.1, as in Eade et al. (2014).") - } else if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1) { - stop("'alpha' must be a number between 0 and 1.") - } - } - } - - if(is.null(exp_cor)){ - calibrated <- Apply(data = list(exp = exp, obs = obs), - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), - ncores = ncores, output_dims = target.dim.names.exp, - fun = .cal)$output1 - dexes <- match(names(dim(exp)), names(dim(calibrated))) - calibrated <- aperm(calibrated, dexes) - dimnames(calibrated) <- dimnames(exp)[dexes] - }else{ - calibrated <- Apply(data = list(exp = exp, obs = obs, exp_cor = exp_cor), - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs, exp_cor = target.dim.names.exp), - ncores = ncores, output_dims = target.dim.names.exp, - fun = .cal)$output1 - dexes <- match(names(dim(exp_cor)), names(dim(calibrated))) - calibrated <- aperm(calibrated, dexes) - dimnames(calibrated) <- dimnames(exp_cor)[dexes] - } - - return(calibrated) -} - - -.data.set.sufficiently.large <- function(exp, obs){ - amt.min.samples <- 3 - amt.good.pts <- sum(!is.na(obs) & !apply(exp, c(2), function(x) all(is.na(x)))) - return(amt.good.pts > amt.min.samples) -} - -.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor){ - if(eval.method == "leave-one-out"){ - dexes.lst <- lapply(seq(1, amt.points), function(x) return(list(eval.dexes = x, - train.dexes = seq(1, amt.points)[-x]))) - } else if (eval.method == "in-sample"){ - dexes.lst <- list(list(eval.dexes = seq(1, amt.points), - train.dexes = seq(1, amt.points))) - } else if (eval.method == "hindcast-vs-forecast"){ - dexes.lst <- list(list(eval.dexes = seq(1,amt.points_cor), - train.dexes = seq(1, amt.points))) - } else { - stop(paste0("unknown sampling method: ",eval.method)) - } - return(dexes.lst) -} - -.cal <- function(exp, obs, exp_cor = NULL, cal.method, eval.method, multi.model, na.fill, na.rm, apply_to, alpha) { - if(is.null(exp_cor)){ - exp_cor <- exp ## generate a copy of exp so that the same function can run - ## when exp_cor is provided and when it's not - } - obs <- as.vector(obs) - dims.fc <- dim(exp) - dims.fc_cor <- dim(exp_cor) ## new line - amt.mbr <- dims.fc[1] - amt.sdate <- dims.fc[2] - amt.sdate_cor <- dims.fc_cor[2] ## new line - var.cor.fc <- NA * exp_cor ## modified line (exp_cor instead of exp); - ## in case of exp_cor not provided, at this point exp_cor - ## is already the same as exp so no change - names(dim(var.cor.fc)) <- dims.fc - - if(!.data.set.sufficiently.large(exp = exp, obs = obs)){ - if(na.fill){ - return(var.cor.fc) - } else { - var.cor.fc[] <- exp[] - return(var.cor.fc) - } - } - eval.train.dexeses <- .make.eval.train.dexes(eval.method, amt.points = amt.sdate, - amt.points_cor = amt.sdate_cor) - amt.resamples <- length(eval.train.dexeses) - for (i.sample in seq(1, amt.resamples)) { - # defining training (tr) and evaluation (ev) subsets - eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes - train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes - - fc.ev <- exp_cor[ , eval.dexes, drop = FALSE] ## modified line (exp_cor instead of exp) - ## fc.ev is used to evaluate (not train; train should be done with exp (hindcast)) - fc.tr <- exp[ , train.dexes] - obs.tr <- obs[train.dexes , drop = FALSE] - - if(cal.method == "bias"){ - var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) - } else if(cal.method == "evmos"){ # forecast correction implemented - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) - #calculate value for regression parameters - init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) - } else if (cal.method == "mse_min"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) - #calculate value for regression parameters - init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm) - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) - } else if (cal.method == "crps_min"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) - #calculate initial value for regression parameters - init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) - init.par[3] <- sqrt(init.par[3]) - #calculate regression parameters on training dataset - optim.tmp <- optim(par = init.par, - fn = .calc.crps.opt, - gr = .calc.crps.grad.opt, - quant.obs.fc = quant.obs.fc.tr, - na.rm = na.rm, - method = "BFGS") - - mbm.par <- optim.tmp$par - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm) - } else if (cal.method == 'rpc-based') { - ens_mean.ev <- multiApply::Apply(data = fc.ev, target_dims = names(amt.mbr), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean - ens_mean.tr <- multiApply::Apply(data = fc.tr, target_dims = names(amt.mbr), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean - ens_spread.tr <- multiApply::Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(amt.sdate), fun = "-")$output1 ## Ensemble spread - exp_mean.tr <- mean(fc.tr, na.rm = na.rm) ## Mean (climatology) - var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) ## Ensemble mean variance - var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) ## Variance of ensemble members about ensemble mean (= spread) - var_obs.tr <- var(obs.tr, na.rm = na.rm) ## Variance in the observations - r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) ## Correlation between observations and the ensemble mean - if ((apply_to == 'all') || (apply_to == 'sign' && cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { - ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr - var.cor.fc[ , eval.dexes] <- s2dv::Reorder(data = multiApply::Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, ens_mean_cal = ens_mean_cal), target_dims = names(amt.sdate), fun = .CalibrationMembersRPC, var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, - order = names(dims.fc)) - dim(var.cor.fc) <- dims.fc - } else { ## no significant -> replacing with observed climatology - var.cor.fc[ , eval.dexes] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.ev)) - } - } else { - stop("unknown calibration method: ",cal.method) - } - } - return(var.cor.fc) -} - -.calc.obs.fc.quant <- function(obs, fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts and corresponding observations - amt.mbr <- dim(fc)[1] - obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) - fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) - cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") - obs.av <- mean(obs, na.rm = na.rm) - obs.sd <- sd(obs, na.rm = na.rm) - return( - append( - .calc.fc.quant(fc = fc, na.rm = na.rm), - list( - obs.per.ens = obs.per.ens, - cor.obs.fc = cor.obs.fc, - obs.av = obs.av, - obs.sd = obs.sd - ) - ) - ) -} - -.calc.obs.fc.quant.ext <- function(obs, fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations - amt.mbr <- dim(fc)[1] - obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) - fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) - cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") - obs.av <- mean(obs, na.rm = na.rm) - obs.sd <- sd(obs, na.rm = na.rm) - - return( - append( - .calc.fc.quant.ext(fc = fc, na.rm = na.rm), - list( - obs.per.ens = obs.per.ens, - cor.obs.fc = cor.obs.fc, - obs.av = obs.av, - obs.sd = obs.sd - ) - ) - ) -} - - -.calc.fc.quant <- function(fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts - amt.mbr <- dim(fc)[1] - fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) - fc.ens.av.av <- mean(fc.ens.av, na.rm = na.rm) - fc.ens.av.sd <- sd(fc.ens.av, na.rm = na.rm) - fc.ens.av.per.ens <- InsertDim(fc.ens.av, posdim = 1, lendim = amt.mbr) - fc.ens.sd <- apply(fc, c(2), sd, na.rm = na.rm) - fc.ens.var.av.sqrt <- sqrt(mean(fc.ens.sd^2, na.rm = na.rm)) - fc.dev <- fc - fc.ens.av.per.ens - fc.dev.sd <- sd(fc.dev, na.rm = na.rm) - fc.av <- mean(fc, na.rm = na.rm) - fc.sd <- sd(fc, na.rm = na.rm) - return( - list( - fc.ens.av = fc.ens.av, - fc.ens.av.av = fc.ens.av.av, - fc.ens.av.sd = fc.ens.av.sd, - fc.ens.av.per.ens = fc.ens.av.per.ens, - fc.ens.sd = fc.ens.sd, - fc.ens.var.av.sqrt = fc.ens.var.av.sqrt, - fc.dev = fc.dev, - fc.dev.sd = fc.dev.sd, - fc.av = fc.av, - fc.sd = fc.sd - ) - ) -} - -.calc.fc.quant.ext <- function(fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts - - amt.mbr <- dim(fc)[1] - repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr) - repmat2.tmp <- aperm(repmat1.tmp, c(2, 1, 3)) - spr.abs <- apply(abs(repmat1.tmp - repmat2.tmp), c(3), mean, na.rm = na.rm) - spr.abs.per.ens <- InsertDim(spr.abs, posdim = 1, lendim = amt.mbr) - - return( - append(.calc.fc.quant(fc, na.rm = na.rm), - list(spr.abs = spr.abs, spr.abs.per.ens = spr.abs.per.ens)) - ) -} - -#Below are the core or elementary functions to calculate the regression parameters for the different methods -.calc.mse.min.par <- function(quant.obs.fc, multi.model = F, na.rm){ - par.out <- rep(NA, 3) - - if(multi.model){ - par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.ens.var.av.sqrt) - } else { - par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.dev.sd) - } - par.out[2] <- with(quant.obs.fc, abs(cor.obs.fc) * obs.sd / fc.ens.av.sd) - par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm) - - return(par.out) -} -.calc.evmos.par <- function(quant.obs.fc, na.rm){ - par.out <- rep(NA, 2) - par.out[2] <- with(quant.obs.fc, obs.sd / fc.sd) - par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm) - return(par.out) -} -#Below are the core or elementary functions to calculate the functions necessary for the minimization of crps -.calc.crps.opt <- function(par, quant.obs.fc, na.rm){ - return( - with(quant.obs.fc, - mean(abs(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens + - ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev)), na.rm = na.rm) - - mean(abs((par[3])^2 * spr.abs + par[4]) / 2., na.rm = na.rm) - ) - ) -} - -.calc.crps.grad.opt <- function(par, quant.obs.fc, na.rm){ - sgn1 <- with(quant.obs.fc,sign(obs.per.ens - - (par[1] + par[2] * fc.ens.av.per.ens + - ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev))) - sgn2 <- with(quant.obs.fc, sign((par[3])^2 + par[4] / spr.abs.per.ens)) - sgn3 <- with(quant.obs.fc,sign((par[3])^2 * spr.abs + par[4])) - deriv.par1 <- mean(sgn1, na.rm = na.rm) - deriv.par2 <- with(quant.obs.fc, mean(sgn1 * fc.dev, na.rm = na.rm)) - deriv.par3 <- with(quant.obs.fc, - mean(2* par[3] * sgn1 * sgn2 * fc.ens.av.per.ens, na.rm = na.rm) - - mean(spr.abs * sgn3, na.rm = na.rm) / 2.) - deriv.par4 <- with(quant.obs.fc, - mean(sgn1 * sgn2 * fc.ens.av.per.ens / spr.abs.per.ens, na.rm = na.rm) - - mean(sgn3, na.rm = na.rm) / 2.) - return(c(deriv.par1, deriv.par2, deriv.par3, deriv.par4)) -} - -#Below are the core or elementary functions to correct the evaluation set based on the regression parameters -.correct.evmos.fc <- function(fc, par, na.rm){ - quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm) - return(with(quant.fc.mp, par[1] + par[2] * fc)) -} -.correct.mse.min.fc <- function(fc, par, na.rm){ - quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm) - return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * par[3])) -} -.correct.crps.min.fc <- function(fc, par, na.rm){ - quant.fc.mp <- .calc.fc.quant.ext(fc = fc, na.rm = na.rm) - return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * abs((par[3])^2 + par[4] / spr.abs))) -} - -# Function to calibrate the individual members with the RPC-based method -.CalibrationMembersRPC <- function(exp, ens_mean, ens_mean_cal, var_obs, var_noise, r){ - member_cal <- (exp - ens_mean) * sqrt(var_obs) * sqrt(1 - r^2) / sqrt(var_noise) + ens_mean_cal - return(member_cal) -} diff --git a/tools/tmp/CST_QuantileMapping.R b/tools/tmp/CST_QuantileMapping.R deleted file mode 100644 index a4e4a9d6..00000000 --- a/tools/tmp/CST_QuantileMapping.R +++ /dev/null @@ -1,325 +0,0 @@ -#'Quantile Mapping for seasonal or decadal forecast data -#' -#'@description This function is a wrapper of fitQmap and doQmap from package -#''qmap' to be applied on the object of class 's2dv_cube'. The quantile mapping -#'adjustment between an experiment, typically a hindcast, and observation is -#'applied to the experiment itself or to a provided forecast. - -#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} -#'@param exp An object of class \code{s2dv_cube}. -#'@param obs An object of class \code{s2dv_cube}. -#'@param exp_cor An object of class \code{s2dv_cube} in which the quantile -#' mapping correction should be applied. If it is not specified, the correction -#' is applied in object 'exp'. -#'@param sdate_dim A character string indicating the dimension name in which -#' cross-validation would be applied when exp_cor is not provided. 'sdate' by -#' default. -#'@param memb_dim A character string indicating the dimension name where -#' ensemble members are stored in the experimental arrays. 'member' by default. -#'@param window_dim A character string indicating the dimension name where -#' samples have been stored. It can be NULL (default) in case all samples are -#' used. -#'@param method A character string indicating the method to be used:'PTF', -#' 'DIST','RQUANT','QUANT','SSPLIN'. By default, the empirical quantile mapping -#' 'QUANT' is used. -#'@param na.rm A logical value indicating if missing values should be removed -#' (FALSE by default). -#'@param ncores An integer indicating the number of cores for parallel -#' computation using multiApply function. The default value is NULL (1). -#'@param ... Additional parameters to be used by the method choosen. See qmap -#' package for details. -#' -#'@return An object of class \code{s2dv_cube} containing the experimental data -#'after applying the quantile mapping correction. -#' -#'@seealso \code{\link[qmap]{fitQmap}} and \code{\link[qmap]{doQmap}} -#'@examples -#'# Use synthetic data -#'exp <- NULL -#'exp$data <- 1 : c(1 * 10 * 20 * 60 * 6 * 7) -#'dim(exp$data) <- c(dataset = 1, member = 10, sdate = 20, ftime = 60 , -#' lat = 6, lon = 7) -#'class(exp) <- 's2dv_cube' -#'obs$data <- 101 : c(100 + 1 * 1 * 20 * 60 * 6 * 7) -#'dim(obs$data) <- c(dataset = 1, member = 1, sdate = 20, ftime = 60 , -#' lat = 6, lon = 7) -#'class(obs) <- 's2dv_cube' -#'res <- CST_QuantileMapping(exp, obs) -#' -#'# Use data in package -#'exp <- lonlat_temp$exp -#'exp$data <- exp$data[, , 1:4, , 1:2, 1:3] -#'dim(exp$data) <- c(dataset = 1, member = 15, sdate = 4, ftime = 3, -#' lat = 2, lon = 3) -#'obs <- lonlat_temp$obs -#'obs$data <- obs$data[, , 1:4, , 1:2, 1:3] -#'dim(obs$data) <- c(dataset = 1, member = 1, sdate = 4, ftime = 3, -#' lat = 2, lon = 3) -#'exp_cor <- lonlat_temp$exp -#'exp_cor$data <- exp_cor$data[, 1, 5:6, , 1:2, 1:3] -#'dim(exp_cor$data) <- c(dataset = 1, member = 1, sdate = 2, ftime = 3, -#' lat = 2, lon = 3) -#'res <- CST_QuantileMapping(exp, obs, exp_cor, window_dim = 'ftime') -#' -#'@import qmap -#'@import multiApply -#'@import s2dv -#'@export -CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', - memb_dim = 'member', window_dim = NULL, - method = 'QUANT', na.rm = FALSE, - ncores = NULL, ...) { - if (!inherits(exp, 's2dv_cube') || !inherits(obs, 's2dv_cube')) { - stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - if (!is.null(exp_cor)) { - if (!inherits(exp_cor, 's2dv_cube')) { - stop("Parameter 'exp_cor' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - } - - QMapped <- QuantileMapping(exp = exp$data, obs = obs$data, - exp_cor = exp_cor$data, - sdate_dim = sdate_dim, memb_dim = memb_dim, - window_dim = window_dim, method = method, - na.rm = na.rm, ncores = ncores, ...) - if (is.null(exp_cor)) { - exp$data <- QMapped - exp$Datasets <- c(exp$Datasets, obs$Datasets) - exp$source_files <- c(exp$source_files, obs$source_files) - return(exp) - - } else { - exp_cor$data <- QMapped - exp_cor$Datasets <- c(exp_cor$Datasets, exp$Datasets, obs$Datasets) - exp_cor$source_files <- c(exp_cor$source_files, exp$source_files, obs$source_files) - return(exp_cor) - } - - -} - -#'Quantile Mapping for seasonal or decadal forecast data -#' -#'@description This function is a wrapper of fitQmap and doQmap from package -#''qmap' to be applied on multi-dimensional arrays. The quantile mapping -#'adjustment between an experiment, typically a hindcast, and observation is -#'applied to the experiment itself or to a provided forecast. -#' -#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} -#'@param exp A multidimensional array with named dimensions containing the -#' hindcast. -#'@param obs A multidimensional array with named dimensions containing the -#' reference dataset. -#'@param exp_cor A multidimensional array with named dimensions in which the -#' quantile mapping correction should be applied. If it is not specified, the -#' correction is applied on object 'exp'. -#'@param sdate_dim A character string indicating the dimension name in which -#' cross-validation would be applied when exp_cor is not provided. 'sdate' by -#' default. -#'@param memb_dim A character string indicating the dimension name where -#' ensemble members are stored in the experimental arrays. 'member' by -#' default. -#'@param window_dim A character string indicating the dimension name where -#' samples have been stored. It can be NULL (default) in case all samples are -#' used. -#'@param method A character string indicating the method to be used: 'PTF', -#' 'DIST','RQUANT','QUANT','SSPLIN'. By default, the empirical quantile mapping -#' 'QUANT' is used. -#'@param na.rm A logical value indicating if missing values should be removed -#' (FALSE by default). -#'@param ncores An integer indicating the number of cores for parallel -#' computation using multiApply function. The default value is NULL (1). -#'@param ... Additional parameters to be used by the method choosen. See qmap -#' package for details. -#' -#'@return An array containing the experimental data after applying the quantile -#'mapping correction. -#' -#'@seealso \code{\link[qmap]{fitQmap}} and \code{\link[qmap]{doQmap}} -#'@examples -#'# Use synthetic data -#'exp <- 1 : c(1 * 10 * 20 * 60 * 6 * 7) -#'dim(exp) <- c(dataset = 1, member = 10, sdate = 20, ftime = 60 , -#' lat = 6, lon = 7) -#'class(exp) <- 's2dv_cube' -#'obs <- 101 : c(100 + 1 * 1 * 20 * 60 * 6 * 7) -#'dim(obs) <- c(dataset = 1, member = 1, sdate = 20, ftime = 60 , -#' lat = 6, lon = 7) -#'res <- QuantileMapping(exp, obs) -#'# Use data in package -#'exp <- lonlat_temp$exp$data[, , 1:4, , 1:2, 1:3] -#'dim(exp) <- c(dataset = 1, member = 15, sdate = 4, ftime = 3, -#' lat = 2, lon = 3) -#'obs <- lonlat_temp$obs$data[, , 1:4, , 1:2, 1:3] -#'dim(obs) <- c(dataset = 1, member = 1, sdate = 4, ftime = 3, -#' lat = 2, lon = 3) -#'exp_cor <- lonlat_temp$exp$data[, 1, 5:6, , 1:2, 1:3] -#'dim(exp_cor) <- c(dataset = 1, member = 1, sdate = 2, ftime = 3, -#' lat = 2, lon = 3) -#'res <- QuantileMapping(exp, obs, exp_cor, window_dim = 'ftime') -#' -#'@import qmap -#'@import multiApply -#'@import s2dv -#'@export -QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', - memb_dim = 'member', window_dim = NULL, - method = 'QUANT', - na.rm = FALSE, ncores = NULL, ...) { - # exp and obs - obsdims <- names(dim(obs)) - expdims <- names(dim(exp)) - if (!is.array(exp) || !is.numeric(exp)) { - stop("Parameter 'exp' must be a numeric array.") - } - if (!is.array(obs) || !is.numeric(obs)) { - stop("Parameter 'obs' must be a numeric array.") - } - if (is.null(expdims)) { - stop("Parameter 'exp' must have dimension names.") - } - if (is.null(obsdims)) { - stop("Parameter 'obs' must have dimension names.") - } - # sdate_dim - if (!is.character(sdate_dim) | length(sdate_dim) != 1) { - stop("Parameter 'sdate_dim' must be a character string.") - } - if (!sdate_dim %in% expdims | !sdate_dim %in% obsdims) { - stop("Parameter 'sdate_dim' is not found in 'exp' or 'obs' dimension.") - } - if (dim(exp)[sdate_dim] == 1 || dim(obs)[sdate_dim] == 1) { - stop("Parameter 'exp' and 'obs' must have dimension length of 'sdate_dim' bigger than 1.") - } - # exp_cor - if (!is.null(exp_cor)) { - if (is.null(names(dim(exp_cor)))) { - stop("Parameter 'exp_cor' must have dimension names.") - } - if (!sdate_dim %in% names(dim(exp_cor))) { - stop("Parameter 'sdate_dim' is not found in 'exp_cor' dimension.") - } - } - # method - if (!(method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) | length(method) != 1) { - stop("Parameter 'method' must be one of the following methods: ", - "'PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN'.") - } - # memb_dim - if (!all(memb_dim %in% obsdims)) { - obs <- InsertDim(obs, posdim = 1, lendim = 1, - name = memb_dim[!(memb_dim %in% obsdims)]) - } - if (any(!memb_dim %in% expdims)) { - stop("Parameter 'memb_dim' is not found in 'exp' dimensions.") - } - sample_dims <- c(memb_dim, sdate_dim) - # window_dim - if (!is.null(window_dim)) { - if (!(window_dim %in% obsdims)) { - stop("Parameter 'window_dim' is not found in 'obs'.") - } - obs <- CSTools::MergeDims(obs, c(memb_dim, window_dim)) - if (window_dim %in% expdims) { - exp <- CSTools::MergeDims(exp, c(memb_dim, window_dim)) - warning("Parameter 'window_dim' is found in exp and is merged to 'memb_dim'.") - } - } - # na.rm - if (!is.logical(na.rm) | length(na.rm) > 1) { - stop("Parameter 'na.rm' must be one logical value.") - } - # ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################### - - if (!is.null(exp_cor)) { - qmaped <- Apply(list(exp, obs, exp_cor), target_dims = sample_dims, - fun = .qmapcor, method = method, sdate_dim = sdate_dim, - na.rm = na.rm, ..., - ncores = ncores)$output1 - } else { - qmaped <- Apply(list(exp, obs), target_dims = sample_dims, - fun = .qmapcor, exp_cor = NULL, method = method, - sdate_dim = sdate_dim, na.rm = na.rm, ..., - ncores = ncores)$output1 - } - return(qmaped) -} - -.qmapcor <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', method = 'QUANT', - na.rm = FALSE, ...) { - - # exp: [memb (+ window), sdate] - # obs: [memb (+ window), sdate] - # exp_cor: NULL or [memb, sdate] - - if (is.null(exp_cor)) { - applied <- exp * NA - for (sd in 1:dim(exp)[sdate_dim]) { - if (na.rm) { - # select start date for cross-val - nas_pos <- which(!is.na(exp[, sd])) - obs2 <- as.vector(obs[, -sd]) - exp2 <- as.vector(exp[, -sd]) - exp_cor2 <- as.vector(exp[, sd]) - # remove NAs - obs2 <- obs2[!is.na(obs2)] - exp2 <- exp2[!is.na(exp2)] - exp_cor2 <- exp_cor2[!is.na(exp_cor2)] - tryCatch({ - adjust <- fitQmap(obs2, exp2, method = method, ...) - applied[nas_pos, sd] <- doQmap(exp_cor2, adjust, ...) - }, - error = function(error_message) { - return(applied[, sd]) - }) - } else { - # na.rm = FALSE shouldn't fail, just return NA - if (anyNA(obs[, -sd]) | anyNA(exp[, -sd])) { - applied[, sd] <- NA - } else { - adjust <- fitQmap(as.vector(obs[, -sd]), as.vector(exp[, -sd]), - method = method, ...) - exp2 <- exp[, sd] - if (sum(is.na(exp2)) >= 1) { - app <- rep(NA, length(exp2)) - nas_pos <- which(is.na(exp2)) - exp2 <- exp2[!is.na(exp2)] - app[-nas_pos] <- doQmap(as.vector(exp2), adjust, ...) - } else { - app <- doQmap(as.vector(exp2), adjust, ...) - } - applied[, sd] <- app - } - } - } - } else { - applied <- exp_cor * NA - if (na.rm) { - tryCatch({ - adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], - method = method, ...) - applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], - adjust, ...) - }, - error = function(error_message) { - return(applied) - }) - } else { - adjust <- fitQmap(as.vector(obs), as.vector(exp), method = method, ...) - applied <- doQmap(as.vector(exp_cor), adjust, ...) - } - dim(applied) <- dim(exp_cor) - } - return(applied) -} -- GitLab From b548380f61eb0ff2669cebd1594260c8433101db Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 27 Oct 2022 15:46:56 +0200 Subject: [PATCH 79/81] Fix pipeline --- modules/Calibration/Calibration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index df96789f..59e5451a 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -1,5 +1,5 @@ -calibrate_datasets <- function(data, recipe) { +calibrate_datasets <- function(recipe, data) { # Function that calibrates the hindcast using the method stated in the # recipe. If the forecast is not null, it calibrates it as well. # -- GitLab From 69c770b0345910e396bc7dbb79ff13938bd366dd Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 28 Oct 2022 13:34:42 +0200 Subject: [PATCH 80/81] Update gitignore, remove old dict --- .gitignore | 1 + conf/vars-dict.yml | 114 --------------------------------------------- 2 files changed, 1 insertion(+), 114 deletions(-) delete mode 100644 conf/vars-dict.yml diff --git a/.gitignore b/.gitignore index 8fec461d..d17d7634 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ out-logs/ /modules/Calibration/test_victoria.R modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml +conf/vitigeoss-vars-dict.yml diff --git a/conf/vars-dict.yml b/conf/vars-dict.yml deleted file mode 100644 index 04549d36..00000000 --- a/conf/vars-dict.yml +++ /dev/null @@ -1,114 +0,0 @@ - -vars: -# ECVs - tas: - units: "°C" - longname: "Daily mean temperature at surface" - outname: ~ - tasmin: - units: "°C" - longname: "Minimum daily temperature at surface" - outname: ~ - tasmax: - units: "°C" - longname: "Maximum daily temperature at surface" - outname: ~ - sfcwind: - units: "m/s" - longname: "Surface wind speed module" - outname: ~ - rsds: - units: "W/m2" - longname: "Surface solar radiation downwards" - outname: ~ - psl: - units: "hPa" - longname: "Mean sea level pressure" - outname: ~ - prlr: - units: "mm" - longname: "Total precipitation" - outname: ~ -# CFs - cfwnd1: - units: "%" - longname: "Wind Capacity factor IEC1" - outname: ~ - cfwnd2: - units: "%" - longname: "Wind Capacity factor IEC2" - outname: ~ - cfwnd3: - units: "%" - longname: "Wind Capacity factor IEC3" - outname: ~ - cfslr: - units: "%" - longname: "Solar Capacity factor" - outname: ~ -# Energy - edmnd: - units: "GW" - longname: "Electricity Demmand" - outname: ~ - wndpwo: - units: "GW" - longname: "Wind Power" - outname: ~ - dmndnetwnd: - units: "GW" - longname: "Demmand-net-Wind" - outname: ~ -# Indices - Spr32: - units: "days" - longname: > - Total count of days when daily maximum temp exceeded 32°C - from April 21st to June 21st - outname: ~ - SU35: - units: "days" - longname: > - Total count of days when daily maximum temp exceeded 35°C - from June 21st to September 21st - outname: ~ - SU36: - units: "days" - longname: > - Total count of days when daily maximum temp exceeded 36°C - from June 21st to September 21st - outname: ~ - SU40: - units: "days" - longname: > - Total count of days when daily maximum temp exceeded 40°C - from June 21st to September 21st - outname: ~ - GDD: - units: "days" - longname: > - The sum of the daily differences between daily mean - temperature and 10°C from April 1st to October 31st - outname: ~ - GST: - units: "°C" - longname: "The average temperature from April 1st to October 31st" - outname: ~ - SprTX: - units: "°C" - longname: "The average daily maximum temperature from April 1st to October 31st" - outname: ~ - WSDI: - units: "" - longname: > - The total count of days with at least 6 consecutives days - when the daily temperature maximum exceeds its 90th percentile - outname: ~ - SprR: - units: "mm" - longname: 'Total precipitation from April 21st to June 21st' - outname: ~ - HarR: - units: "mm" - longname: 'Total precipitation from August 21st to September 21st' - outname: ~ -- GitLab From 2cebd66393d5305933d13cfadb5c25a2712ec455 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 28 Oct 2022 14:44:47 +0200 Subject: [PATCH 81/81] Display data summary when log level is INFO or lower --- modules/Loading/Loading.R | 10 ++++++---- modules/Loading/Loading_decadal.R | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index f78bd144..8d54d63d 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -344,10 +344,12 @@ load_datasets <- function(recipe) { } # Print a summary of the loaded data for the user, for each object - data_summary(hcst, recipe) - data_summary(obs, recipe) - if (!is.null(fcst)) { - data_summary(fcst, recipe) + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } } info(recipe$Run$logger, diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index e9f8b274..8046344b 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -507,10 +507,12 @@ load_datasets <- function(recipe) { #------------------------------------------- # Print a summary of the loaded data for the user, for each object - data_summary(hcst, recipe) - data_summary(obs, recipe) - if (!is.null(fcst)) { - data_summary(fcst, recipe) + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } } info(recipe$Run$logger, -- GitLab