diff --git a/MODULES b/MODULES index ce379c23eb6f1bee1c611f9e073f5366a55612c0..24ae65641cd24503f2bab3f7f17629c4c9ea92af 100644 --- a/MODULES +++ b/MODULES @@ -40,6 +40,16 @@ elif [[ $HOSTNAME == "bsceshub02.bsc.es" ]]; then module load GDAL/3.5.2-foss-2021b-Python-3.9.6 module load PROJ/9.1.0-foss-2021b module load Phantomjs/2.1.1 + +elif [[ $BSC_MACHINE == "amd" ]]; then + + module purge + module load CDO/1.9.10-foss-2019b + module load R/4.1.2-foss-2019b + module load GEOS/3.7.2-foss-2019b-Python-3.7.4 + module load GDAL/3.5.0-foss-2019b-Python-3.7.4 + module load PROJ/9.0.0-GCCcore-8.3.0 + module load Phantomjs/2.1.1 else diff --git a/bsc_logo_small.png b/bsc_logo_small.png new file mode 100644 index 0000000000000000000000000000000000000000..56c7dcf3b559caf94defd7544446e3fbfd1e1a8d Binary files /dev/null and b/bsc_logo_small.png differ diff --git a/build_compute_workflow_crossval.R b/build_compute_workflow_crossval.R index 9b1cbb3d5c8bf06bdd171b86eab3e76739f5cc22..0519e1a58f0de748cfbbd9303375cad4c1f1785a 100644 --- a/build_compute_workflow_crossval.R +++ b/build_compute_workflow_crossval.R @@ -11,7 +11,10 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, appenders = list(console_appender(layout = .custom_log_layout()))) modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) - return_datasets <- strsplit(recipe$Run$startR_workflow$return, ", | |,")[[1]] + return_datasets <- NULL + if (!is.null(recipe$Run$startR_workflow$return)) { + return_datasets <- strsplit(recipe$Run$startR_workflow$return, ", | |,")[[1]] + } ## TODO: Decide on file path # for (module in modules) { @@ -51,8 +54,12 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, data <- Units(recipe, data, retrieve = F) # Call workflows for (module in modules) { - if (module == "aggregation") { - data <- Aggregation(recipe, data, retrieve = F, nchunks = nchunks) + if (module == "orography" && recipe$Analysis$Variables$name == 'tas') { + ## TODO: This should work differently + source("modules/Loading/R/orography_correction.R") + data <- orography_correction(recipe = recipe, data = data) + } else if (module == "aggregation") { + data <- Aggregation(recipe = recipe, data = data, retrieve = F, nchunks = nchunks) } else if (module == "crossval_anomalies") { data <- Crossval_anomalies(recipe = recipe, data = data, retrieve = FALSE) } else if (module == "crossval_metrics") { @@ -71,7 +78,8 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, return_list <- list(hcst = data$hcst$data, fcst = data$fcst$data, obs = data$obs$data, - skill = skill_metrics, + skill = skill_metrics$skill, + skill_syear = skill_metrics$skill_syear, statistics = statistics, probabilities = probabilities) # Eliminate NULL elements from the return list @@ -103,7 +111,10 @@ run_compute_workflow <- function(recipe, data, last_module = NULL) { # Step 1: Retrieve the modules that will be called inside the workflow modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) - return_datasets <- strsplit(recipe$Run$startR_workflow$return, ", | |,")[[1]] + return_datasets <- NULL + if (!is.null(recipe$Run$startR_workflow$return)) { + return_datasets <- strsplit(recipe$Run$startR_workflow$return, ", | |,")[[1]] + } # --------------------------------------------------------------------------- # Step 2: Define the inputs and outputs for Compute() @@ -156,6 +167,12 @@ run_compute_workflow <- function(recipe, data, last_module = NULL) { skill_output_dims <- skill_dims[!skill_dims %in% chunking_dims] output_dims <- c(output_dims, list(skill = skill_output_dims)) + if (grepl("syear", recipe$Analysis$Workflow$Skill$metric)) { + skill_syear_dims <- c('metric', 'syear', 'var', 'time', spatial_output_dims) + skill_syear_output_dims <- skill_syear_dims[!skill_syear_dims %in% chunking_dims] + output_dims <- c(output_dims, + list(skill_syear = skill_syear_output_dims)) + } } # Add statistics if statistics module is called if ("statistics" %in% modules) { @@ -277,35 +294,35 @@ run_compute_workflow <- function(recipe, data, last_module = NULL) { } ## TODO: Replicate for probabilities and other modules; refactor - if (!is.null(res$skill)) { - tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Skill") - metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) - res$skill <- ClimProjDiags::ArrayToList(res$skill, - dim = 'metric', - level = 'list', - names = metric_list) - # Put chunked metadata back together - res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir, - chunks = recipe$Run$startR_workflow$chunk_along, - array_dims = dim(res$skill[[1]]), - metadata_file_pattern = "metric_metadata") + atomic_name <- "" + if (!is.null(recipe$Run$atomic_name)) { + atomic_name <- recipe$Run$atomic_name } - if (!is.null(res$statistics)) { - tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Statistics") - metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) - res$statistics <- ClimProjDiags::ArrayToList(res$statistics, - dim = 'metric', - level = 'list', - names = metric_list) - res$statistics$metadata <- retrieve_metadata(tmp_dir = tmp_dir, - chunks = recipe$Run$startR_workflow$chunk_along, - array_dims = dim(res$statistics[[1]]), - metadata_file_pattern = "metric_metadata") + for (metrics in c("skill", "skill_syear", "statistics")) { + if (!is.null(res[[metrics]])) { + tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", + atomic_name, str_to_title(metrics)) + metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) + res[[metrics]] <- ClimProjDiags::ArrayToList(res[[metrics]], + dim = 'metric', + level = 'list', + names = metric_list) + res[[metrics]]$metadata <- retrieve_metadata(tmp_dir = tmp_dir, + chunks = recipe$Run$startR_workflow$chunk_along, + array_dims = dim(res[[metrics]][[1]]), + metadata_file_pattern = "metric_metadata") + } + } + if (!is.null(res$skill_syear)) { + res$skill_syear$metadata <- NULL + res$skill <- c(res$skill[-which(names(res$skill) == "metadata")], + res$skill_syear, res$skill["metadata"]) + res$skill_syear <- NULL } - + # --------------------------------------------------------------------------- # Step 5: Remove temporary files - unlink(file.path(recipe$Run$output_dir, "outputs", "tmp"), recursive = TRUE) + unlink(file.path(recipe$Run$output_dir, "outputs", "tmp", atomic_name), recursive = TRUE) # --------------------------------------------------------------------------- # Step 6: Save data diff --git a/conf/archive_reference.yml b/conf/archive_reference.yml index ac7bf27fd8045331122dbdde64de64d49f737f2a..a7dd541324f0c3a9fa981fc8321082f2691865e5 100644 --- a/conf/archive_reference.yml +++ b/conf/archive_reference.yml @@ -12,6 +12,7 @@ gpfs: calendar: "standard" reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/orography.nc" esarchive: src_ref: "/esarchive/" @@ -53,6 +54,7 @@ esarchive: calendar: "gregorian" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" land_sea_mask: "/esarchive/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + orography: "/esarchive/recon/ecmwf/era5/constant/orography.nc" ERA5-Land: name: "ERA5-Land" institution: "European Centre for Medium-Range Weather Forecasts" diff --git a/conf/archive_seasonal.yml b/conf/archive_seasonal.yml index 3ce0db5cb886f4443f98345dbe328f8a7e505968..2332bdad11ae650950ebe86333b5389070f5750a 100644 --- a/conf/archive_seasonal.yml +++ b/conf/archive_seasonal.yml @@ -35,6 +35,23 @@ gpfs: time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_system51c3s.txt" land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/orography.nc" + ECMWF-SEAS5: + name: "ECMWF SEAS5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/system5c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system5c3s/constant/lsm/lsm.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system5c3s/constant/orography.nc" CMCC-SPS3.5: name: "CMCC-SPS3.5" institution: "European Centre for Medium-Range Weather Forecasts" @@ -49,6 +66,7 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system35c3s.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/cmcc/system35c3s/constant/orography.nc" Meteo-France-System8: name: "Meteo-France System 8" institution: "European Centre for Medium-Range Weather Forecasts" @@ -63,6 +81,8 @@ gpfs: time_stamp_lag: "+1" calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system7c3s.txt" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/meteofrance/system8c3s/constant/lsm/lsm.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/meteofrance/system8c3s/constant/orography.nc" UK-MetOffice-Glosea601: name: "UK MetOffice GloSea 6 (v6.01)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -77,6 +97,7 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo600.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ukmo/glosea6_system601-c3s/constant/orography.nc" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? @@ -91,6 +112,7 @@ gpfs: calendar: "gregorian" time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/ncep/cfs-v2/constant/orography.nc" DWD-GCFS2.1: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" @@ -107,6 +129,7 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system21_m1.txt" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/dwd/system21c3s/constant/orography.nc" ECCC-CanCM4i: name: "ECCC CanCM4i (v3)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -121,18 +144,7 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" - Reference: - ERA5: - name: "ERA5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/ecmwf/era5/" - monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", - "psl":"monthly_mean/psl_f1h-r1440x721cds/", - "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", - "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} - calendar: "standard" - reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" - land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + orography: "/gpfs/projects/bsc32/esarchive_cache/exp/eccc/eccc3/constant/orography.nc" ######################################################################### esarchive: @@ -166,6 +178,7 @@ esarchive: time_stamp_lag: "0" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" land_sea_mask: "/esarchive/exp/ecmwf/system5c3s/constant/lsm/lsm.nc" + orography: "/esarchive/exp/ecmwf/system5c3s/constant/orography.nc" ECMWF-SEAS5.1: name: "ECMWF SEAS5 (v5.1)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -185,6 +198,8 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_system51c3s.txt" + land_sea_mask: "/esarchive/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" + orography: "/esarchive/exp/ecmwf/system51c3s/constant/orography.nc" Meteo-France-System7: name: "Meteo-France System 7" institution: "European Centre for Medium-Range Weather Forecasts" @@ -199,6 +214,21 @@ esarchive: time_stamp_lag: "+1" calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system7c3s.txt" + Meteo-France-System8: + name: "Meteo-France System 8" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/meteofrance/system8c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "g500":"monthly_mean/g500_f12h/", + "prlr":"monthly_mean/prlr_f24h/", "sfcWind": "monthly_mean/sfcWind_f6h/", + "tasmax":"monthly_mean/tasmax_f24h/", "tasmin": "monthly_mean/tasmin_f24h/", + "tos":"monthly_mean/tos_f6h/", "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system8c3s.txt" + orography: "/esarchive/exp/meteofrance/system8c3s/constant/orography.nc" DWD-GCFS2.1: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" @@ -212,6 +242,7 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system21_m1.txt" + orography: "/esarchive/exp/dwd/system21_m1/constant/orography.nc" CMCC-SPS3.5: name: "CMCC-SPS3.5" institution: "European Centre for Medium-Range Weather Forecasts" @@ -225,6 +256,7 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system35c3s.txt" + orography: "/esarchive/exp/cmcc/system35c3s/constant/orography.nc" JMA-CPS2: name: "JMA System 2" institution: "European Centre for Medium-Range Weather Forecasts" @@ -240,15 +272,17 @@ esarchive: ECCC-CanCM4i: name: "ECCC CanCM4i" institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/eccc/eccc1/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", - "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} + src: "exp/eccc/eccc3/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", "tasmin":"monthly_mean/tasmin_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 10 hcst: 10 calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" + orography: "/esarchive/exp/eccc/eccc3/constant/orography.nc" UK-MetOffice-Glosea600: name: "UK MetOffice GloSea 6 (v6.0)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -261,6 +295,34 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo600.txt" + UK-MetOffice-Glosea601: + name: "UK MetOffice GloSea 6 (v6.01)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system601-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tasmin":"monthly_mean/tasmin_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo601.txt" + orography: "/esarchive/exp/ukmo/glosea6_system601-c3s/constant/orography.nc" + UK-MetOffice-Glosea602: + name: "UK MetOffice GloSea 602" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system602-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tasmin":"monthly_mean/tasmin_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo602.txt" + orography: "/esarchive/exp/ukmo/glosea6_system602-c3s/constant/orography.nc" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? @@ -273,6 +335,7 @@ esarchive: calendar: "gregorian" time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" + orography: "/esarchive/exp/ncep/cfs-v2/constant/orography.nc" mars: src_sys: "/esarchive/scratch/aho/tmp/GRIB/" #"/mars/" System: diff --git a/conf/grid_description/griddes_system8c3s.txt b/conf/grid_description/griddes_system8c3s.txt new file mode 100644 index 0000000000000000000000000000000000000000..942238c6cd86a6f981d82a3c73ae67df95ac7abe --- /dev/null +++ b/conf/grid_description/griddes_system8c3s.txt @@ -0,0 +1,17 @@ +# +# gridID 30 +# +gridtype = lonlat +gridsize = 64800 +xname = lon +xlongname = longitude +xunits = degrees_east +yname = lat +ylongname = latitude +yunits = degrees_north +xsize = 360 +ysize = 180 +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/grid_description/griddes_ukmo601.txt b/conf/grid_description/griddes_ukmo601.txt new file mode 100644 index 0000000000000000000000000000000000000000..942238c6cd86a6f981d82a3c73ae67df95ac7abe --- /dev/null +++ b/conf/grid_description/griddes_ukmo601.txt @@ -0,0 +1,17 @@ +# +# gridID 30 +# +gridtype = lonlat +gridsize = 64800 +xname = lon +xlongname = longitude +xunits = degrees_east +yname = lat +ylongname = latitude +yunits = degrees_north +xsize = 360 +ysize = 180 +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/grid_description/griddes_ukmo602.txt b/conf/grid_description/griddes_ukmo602.txt new file mode 100644 index 0000000000000000000000000000000000000000..05a49ea654aa4e283b88b57116cc55148e8d301d --- /dev/null +++ b/conf/grid_description/griddes_ukmo602.txt @@ -0,0 +1,17 @@ +# Grid description file for UKMO602 (CDS) +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index 78b19b1a3fbd08002cd59e18da6786cd7dc3f33b..466653b2e55475312c677270ec1032f434b3b4cd 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -219,6 +219,17 @@ vars: long_name: "NAO index" standard_name: "nao_index" accum: no + spei: + units: "adim" + long_name: "Standardized Precipitation-Evapotranspiration Index" + standard_name: "SPEI" + accum: no + spi: + units: "adim" + long_name: "Standardized Precipitation Index" + standard_name: "SPI" + accum: no + # Coordinates coords: diff --git a/example_scripts/exec_units.R b/example_scripts/exec_units.R index 081038d220bf8bdff914d2ea1ef48cb9963420e3..e0c49958782ff059c46bdd9ef69f791bfba755b1 100644 --- a/example_scripts/exec_units.R +++ b/example_scripts/exec_units.R @@ -1,6 +1,5 @@ rm(list=ls()) gc() -setwd("/esarchive/scratch/nperez/git/auto-s2s") source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") @@ -11,12 +10,13 @@ source("modules/Visualization/Visualization.R") source("tools/prepare_outputs.R") # Read recipe -#args = commandArgs(trailingOnly = TRUE) -#recipe_file <- args[1] -#recipe <- read_atomic_recipe(recipe_file) -## to test a single recipe: - # recipe_file <- "recipes/examples/recipe_tas_seasonal_units.yml" - # recipe_file <- "recipes/examples/recipe_prlr_seasonal_units.yml" +## To run many recipes using launch_SUNSET.sh: +# args = commandArgs(trailingOnly = TRUE) +# recipe_file <- args[1] +# recipe <- read_atomic_recipe(recipe_file) +## To test a single recipe: +# recipe_file <- "recipes/examples/recipe_tas_seasonal_units.yml" +# recipe_file <- "recipes/examples/recipe_prlr_seasonal_units.yml" recipe <- prepare_outputs(recipe_file) @@ -31,8 +31,53 @@ data <- Calibration(recipe, test) skill_metrics <- Skill(recipe, data) # Compute percentiles and probability bins probabilities <- Probabilities(recipe, data) -# Export all data to netCDF -## TODO: Fix plotting -# save_data(recipe, data, skill_metrics, probabilities) + # Plot data -plot_data(recipe, data, skill_metrics, probabilities, significance = T) +# Below, many examples of different ways to plot the data are shown, using +# different projections and ways of adding statistical significance masking. +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = F) +list.files(paste0(recipe$Run$output_dir, "/plots/ECMWF-SEAS51/ERA5/evmos/tas/")) +# signif TRUE: +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) +list.files(paste0(recipe$Run$output_dir, "/plots/ECMWF-SEAS51/ERA5/evmos/tas/")) +# signif mask: this option is not available in PlotEquiMap, it return dots +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'mask') +list.files(paste0(recipe$Run$output_dir, "/plots/ECMWF-SEAS51/ERA5/evmos/tas/")) +## The difference between TRUE and mask for PlotEquiMap is the file name + +# signif dots: +visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'dots') +list.files(paste0(recipe$run$output_dir, "/plots/ecmwf-seas51/era5/evmos/tas/")) + +# In this case duplicates files but simplifying the code makes it very complex +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'both') + +recipe$Analysis$Workflow$Visualization$multi_panel <- TRUE +recipe$Analysis$Workflow$Visualization$mask_terciles <- NULL +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = F) + +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) + +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'mask') + +recipe$Analysis$Workflow$Visualization$multi_panel <- FALSE +recipe$Analysis$Workflow$Visualization$projection <- 'Robinson' +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = F) +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'dots') +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'mask') +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = 'both') + diff --git a/example_scripts/test_compute_crossval.R b/example_scripts/test_compute_crossval.R index 4751dcf673eed9456cbce0bf3b17eec629e74c2d..0f90da9949b40b1d76cf8ac51d2df430ddedebbd 100644 --- a/example_scripts/test_compute_crossval.R +++ b/example_scripts/test_compute_crossval.R @@ -6,18 +6,24 @@ source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") source("build_compute_workflow_crossval.R") -recipe_file <- "../vic-personal-code/auto-s2s-tests/cross-validation-global/recipe_scorecards_example.yml" +# recipe_file <- "/gpfs/projects/bsc32/bsc032762/git/vic-personal-code/auto-s2s-tests/cross-validation-global/recipe_scorecards_example.yml" # recipe_file <- "/home/Earth/vagudets/oldhome/git/victoria-personal-code/auto-s2s-tests/cross-validation-global/recipe_scorecards_example.yml" -recipe <- prepare_outputs(recipe_file) +# recipe <- prepare_outputs(recipe_file) + +args <- commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) + +#### LOAD MODULES #### ## TODOs: ## 1. Statistics -> DONE ## 2. Do not return hcst, fcst and obs -> DONE-ish (Should be generalized) ## 3. Recipe checks -> DONE-ish (should be generalized) -## 4. Test without orography correction (full workflow) +## 4. Test without orography correction (full workflow) -> DONE ## 5. Fix everything to make sure retrieve = TRUE works well (full workflow) -## 6. orography correction -## 7. enssprerr and SprErr do not have the same name: problem? Check Nadia's branch +## 6. orography correction -> DONE +## 7. enssprerr and SprErr do not have the same name: problem? Check Nadia's branch -> DONE-ish (needs merging) ## 8. Workflow requirements (check multimodel case) # Load datasets @@ -39,8 +45,8 @@ if (recipe$Analysis$Workflow$Scorecards$metric_aggregation == 'score') { } ## Add logo to plots -# source("tools/add_logo.R") -# add_logo(recipe, "rsz_rsz_bsc_logo.png") +source("tools/add_logo.R") +add_logo(recipe, "rsz_rsz_bsc_logo.png") if (recipe$Analysis$Dataset$System$name == 'UK-MetOffice-Glosea601' & recipe$Analysis$Time$sdate == '0101' diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index 6149a9639604942f58897363dd0c854f010b79a2..9722010e3342e7478c57397f7a8d95d8afe4df4b 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -128,6 +128,9 @@ if [[ $run_method == "sbatch" ]]; then mkdir -p $logdir echo "Slurm job logs will be stored in $logdir" + # Get corrected recipe + recipe=${outdir}/logs/recipes/$(basename $recipe) + # Launch one job per atomic recipe cd $codedir job_number=0 diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index 5becdac8393a400a4674b6e0d394fa23d4ea5591..02acc31b9a4079df425dd98396f3a8e2932aa202 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -9,6 +9,7 @@ source("modules/Crossval/R/tmp/Corr.R") source("modules/Crossval/R/tmp/Bias.R") source("modules/Crossval/R/tmp/SprErr.R") source("modules/Crossval/R/tmp/Eno.R") +source("modules/Skill/R/tmp/CRPS.R") ## data_crossval is the result from function full_crossval_anomalies or similar. ## this is a list with the required elements: @@ -70,7 +71,7 @@ Crossval_metrics <- function(recipe, data_crossval, rps_clim <- Apply(list(data_crossval$probs$obs_ev[[1]]), target_dims = c('cat', 'syear'), RPS_clim, bin_dim_abs = 'cat', Fair = fair, - cross.val = FALSE, ncores = ncores)$output1 + cross.val = FALSE, ncores = ncores)$output1 skill_metrics$rps <- rps skill_metrics$rps_clim <- rps_clim # names based on the categories: @@ -78,6 +79,22 @@ Crossval_metrics <- function(recipe, data_crossval, #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps #skill_metrics[[paste0('rps_clim', # exe_rps[ps])]] <- rps_clim + } + if ('rps_syear' %in% requested_metrics) { + rps_syear <- RPS(exp = data_crossval$probs$hcst_ev[[ps]], + obs = data_crossval$probs$obs_ev[[1]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, return_mean = FALSE, + ncores = ncores) + skill_metrics$rps_syear <- rps_syear + } + if ('rps_clim_syear' %in% requested_metrics) { + rps_clim_syear <- Apply(list(data_crossval$probs$obs_ev[[1]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = fair, + cross.val = FALSE, return_mean = FALSE, + output_dims = 'syear', ncores = ncores)$output1 + skill_metrics$rps_clim_syear <- rps_clim_syear } if ('rpss' %in% requested_metrics) { rpss <- RPSS(exp = data_crossval$probs$hcst_ev[[1]], @@ -92,6 +109,7 @@ Crossval_metrics <- function(recipe, data_crossval, cross.val = FALSE, na.rm = na.rm, sig_method.type = 'two.sided.approx', alpha = alpha, ncores = ncores) + ## TODO: These lines are probably not needed skill_metrics$rpss <- rpss$rpss skill_metrics$rpss_significance <- rpss$sign # TO USE IT when visualization works for more rpsss @@ -114,6 +132,21 @@ Crossval_metrics <- function(recipe, data_crossval, Fair = fair, ncores = ncores) skill_metrics$crps_clim <- crps_clim } + if ('crps_syear' %in% requested_metrics) { + crps_syear <- CRPS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, return_mean = FALSE, + ncores = ncores) + skill_metrics$crps_syear <- crps_syear + } + if ('crps_clim_syear' %in% requested_metrics) { + crps_clim_syear <- CRPS(exp = data_crossval$ref_obs_tr, + obs = data_crossval$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, return_mean = FALSE, ncores = ncores) + skill_metrics$crps_clim_syear <- crps_clim_syear + } if ('crpss' %in% requested_metrics) { crpss <- CRPSS(exp = data_crossval$hcst$data, obs = data_crossval$obs$data, @@ -162,8 +195,8 @@ Crossval_metrics <- function(recipe, data_crossval, memb_dim = 'ensemble', dat_dim = NULL, time_dim = 'syear', pval = TRUE, ncores = ncores) - skill_metrics$SprErr <- enssprerr$ratio - skill_metrics$SprErr_significance <- enssprerr$p.val <= alpha + skill_metrics$enssprerr <- enssprerr$ratio + skill_metrics$enssprerr_significance <- enssprerr$p.val <= alpha } if ('rms' %in% requested_metrics) { rms <- RMS(exp = data_crossval$hcst$data, @@ -248,6 +281,12 @@ Crossval_metrics <- function(recipe, data_crossval, # reduce dimension to work with Visualization module: skill_metrics <- lapply(skill_metrics, function(x) {.drop_dims(x)}) + ## TODO: Generalize this or find a better way to handle it + # Separate skill metrics that have syear dimension + syear_metrics <- grepl("syear", names(skill_metrics)) + skill_metrics_syear <- skill_metrics[syear_metrics] + skill_metrics <- skill_metrics[!syear_metrics] + # Save metrics if (retrieve) { ## TODO: Update this part @@ -259,11 +298,26 @@ Crossval_metrics <- function(recipe, data_crossval, outdir = recipe$Run$output_dir) } else { source("tools/save_metadata_chunks.R") - skill_metrics <- save_metadata_chunks(recipe = recipe, - metrics = skill_metrics, - data_cube = data_crossval$hcst, - module = "Skill", - nchunks = nchunks) + if (length(skill_metrics) > 0) { + skill_metrics <- save_metadata_chunks(recipe = recipe, + metrics = skill_metrics, + data_cube = data_crossval$hcst, + module = "Skill", + nchunks = nchunks) + } else { + skill_metrics <- NULL + } + if (length(skill_metrics_syear) > 0) { + skill_metrics_syear <- save_metadata_chunks(recipe = recipe, + metrics = skill_metrics_syear, + data_cube = data_crossval$hcst, + module = "Skill_syear", + nchunks = nchunks) + } else { + skill_metrics_syear <- NULL + } + skill_metrics <- list(skill = skill_metrics, + skill_syear = skill_metrics_syear) } return(skill_metrics) } diff --git a/modules/Indicators/Indicators.R b/modules/Indicators/Indicators.R new file mode 100644 index 0000000000000000000000000000000000000000..24b25884f0337679995b69ec28db0017a326a5b1 --- /dev/null +++ b/modules/Indicators/Indicators.R @@ -0,0 +1,118 @@ +# Load functions +source("modules/Indicators/R/data_format_csindicators.R") +source("modules/Indicators/R/spei_spi.R") +source("modules/Indicators/R/threshold.R") +source("modules/Indicators/R/data_transformation.R") +source("modules/Indicators/R/malaria_or_ticks.R") + +source("modules/Indicators/R/tmp/CSIndicators_CST_PeriodStandardization.R") #### tmp!!! + +Indicators <- function(recipe, data) { + var.list <- strsplit(recipe$Analysis$Variables$name, ", ")[[1]] + ncores <- recipe$Analysis$ncores + + # SPEI + if (!is.null(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + if (recipe$Analysis$Workflow$Indicators$SPEI$return_spei) { + # read recipe parameters + pet_method <- recipe$Analysis$Workflow$Indicators$SPEI$PET_method + if (pet_method == "none") { + pet_method <- NULL + } + spei_accum <- recipe$Analysis$Workflow$Indicators$SPEI$Nmonths_accum + spei_standardization <- recipe$Analysis$Workflow$Indicators$SPEI$standardization + spei_stand_refperiod <- recipe$Analysis$Workflow$Indicators$SPEI$standardization_ref_period + if (is.null(spei_stand_refperiod)) { + spei_stand_refperiod <- c(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$hcst_end) + } + spei_stand_refperiod <- as.numeric(spei_stand_refperiod) + spei_stand_handleinf <- recipe$Analysis$Workflow$Indicators$SPEI$standardization_handle_infinity + + # call spei_spi function from modules/Indicators/R + result_spei <- spei_spi( + data = data, + indicator = "spei", + var.list = var.list, + pet_method = pet_method, + accum = spei_accum, + standardization = spei_standardization, + stand_refperiod = spei_stand_refperiod, + stand_handleinf = spei_stand_handleinf, + ncores = ncores + ) + } + } + + # SPI + if (!is.null(recipe$Analysis$Workflow$Indicators$SPI$return_spi)) { + if (recipe$Analysis$Workflow$Indicators$SPI$return_spi) { + # read recipe parameters + spi_accum <- recipe$Analysis$Workflow$Indicators$SPI$Nmonths_accum + spi_standardization <- recipe$Analysis$Workflow$Indicators$SPI$standardization + spi_stand_refperiod <- recipe$Analysis$Workflow$Indicators$SPI$standardization_ref_period + if (is.null(spi_stand_refperiod)) { + spi_stand_refperiod <- c(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$hcst_end) + } + spi_stand_refperiod <- as.numeric(spi_stand_refperiod) + spi_stand_handleinf <- recipe$Analysis$Workflow$Indicators$SPI$standardization_handle_infinity + + # call spei_spi function from modules/Indicators/R + result_spi <- spei_spi( + data = data, + indicator = "spi", + var.list = var.list, + pet_method = NULL, + accum = spi_accum, + standardization = spi_standardization, + stand_refperiod = spi_stand_refperiod, + stand_handleinf = spi_stand_handleinf, + ncores = ncores + ) + } + } + + # Threshold based: + if (!is.null(recipe$Analysis$Workflow$Indicators$SelectedThreshold$return_thresholdbased)) { + if (recipe$Analysis$Workflow$Indicators$SelectedThreshold$return_thresholdbased) { + thrs <- recipe$Analysis$Workflow$Indicators$SelectedThreshold$threshold + return.values <- recipe$Analysis$Workflow$Indicators$SelectedThreshold$returnValues + result_threshold <- threshold(data, thrs, var.list, return.values) + } + } + + # Malaria indicator: + if (!is.null(recipe$Analysis$Workflow$Indicators$Malaria$return_climate_suitability)) { + if (recipe$Analysis$Workflow$Indicators$Malaria$return_climate_suitability) { + result_malaria <- list() + for (ssp in recipe$Analysis$Workflow$Indicators$Malaria$ssp) { + result_malaria[[ssp]] <- malaria_or_ticks(data, var.list, ssp) + } + } + } + # Tick-borne disease indicator: + if (!is.null(recipe$Analysis$Workflow$Indicators$Ticks$return_climate_suitability)) { + if (recipe$Analysis$Workflow$Indicators$Ticks$return_climate_suitability) { + result_ticks <- list() + for (ssp in recipe$Analysis$Workflow$Indicators$Ticks$ssp) { + result_ticks[[ssp]] <- malaria_or_ticks(data, var.list, ssp) + } + } + } + + indicators.list <- c( + "SPEI" = "result_spei", + "SPI" = "result_spi", + "ThresholdBased" = "result_threshold", + "Malaria" = "result_malaria", + "Ticks" = "result_ticks" + ) + + result <- list() + for (ind in indicators.list) { + if (exists(ind)) { + result[[names(indicators.list)[which(indicators.list == ind)]]] <- eval(parse(text = ind)) + } + } + + return(result) +} diff --git a/modules/Indicators/R/data_format_csindicators.R b/modules/Indicators/R/data_format_csindicators.R new file mode 100644 index 0000000000000000000000000000000000000000..15fe06a40568e95c7365aacbec56658109ddc426 --- /dev/null +++ b/modules/Indicators/R/data_format_csindicators.R @@ -0,0 +1,36 @@ +# data format for input in CSIndicators SPEI functions +data_format_csindicators <- function(data, vars, var.list) { + dict <- c('tas' = 'tmean', + 'tasmax' = 'tmax', + 'tasmin' = 'tmin', + 'prlr' = 'pr', + 'pet' = 'pet', + 'hurs' = 'hurs', + 'tdps' = 'tdps') + dim.names <- names(dim(data$data)) + + result <- list() + for (var in vars) { + if (var %in% var.list) { + data.var <- CST_Subset(x = data, + var_dim = "var", + dat_dim = "dat", + along = "var", + indices = list(which(var.list == var)), + drop = FALSE) + + # read original units + var.metadata.pos <- which(names(data$attrs$Variable$metadata) == var) + units <- data$attrs$Variable$metadata[[var.metadata.pos]]$units + + ## TODO: Is this needed in any way? + data.var$attrs$Units <- units + } else { + data.var <- NULL + } + # append all variables in a list of s2dv_cubes + result <- append(result, list(data.var)) + } + names(result) <- dict[vars] + return(result) +} diff --git a/modules/Indicators/R/data_transformation.R b/modules/Indicators/R/data_transformation.R new file mode 100644 index 0000000000000000000000000000000000000000..cc9397b1ccaa509e1b912bb8141e3e10268ef144 --- /dev/null +++ b/modules/Indicators/R/data_transformation.R @@ -0,0 +1,15 @@ +data_transform_hurs <- function(tas, tdps) { + # obtain relative humidity (in %; tas and tdps need to be in C) + # ref: https://www.osti.gov/servlets/purl/548871&lang=en + # Alduchov OA, Eskridge RE. + # Improved Magnus form approximation of saturation vapor pressure. + # Journal of Applied Meteorology and Climatology. 1996 Apr;35(4):601-9 + + b = 17.625 + c = 243.04 + hurs <- tas # for metadata (lon, lat, and attrs$Dates) + hurs$data <- 100*exp(b * c * (tdps$data - tas$data) / ((c + tdps$data) * (c + tas$data))) + hurs$attrs$Units <- '%' + + return(hurs) # metadata: coords and dates +} diff --git a/modules/Indicators/R/malaria_or_ticks.R b/modules/Indicators/R/malaria_or_ticks.R new file mode 100644 index 0000000000000000000000000000000000000000..9113ea25c9441cda2598877ef47ae494168c4714 --- /dev/null +++ b/modules/Indicators/R/malaria_or_ticks.R @@ -0,0 +1,83 @@ +malaria_or_ticks <- function(data, var.list, ssp) { + # define thresholds: + if (tolower(ssp) == "p.falciparum") { + thrs_tas <- c(18, 32) + thrs_prlr <- c(80, Inf) + thrs_hurs <- c(60, Inf) + var.thrs.list <- c("tas" = "thrs_tas", + "prlr" = "thrs_prlr", + "hurs" = "thrs_hurs") + # check in prepare_outputs that variables in names(var.list) are requested + } else if (tolower(ssp) == "p.vivax") { + thrs_tas <- c(14.5, 33) + thrs_prlr <- c(80, Inf) + thrs_hurs <- c(60, Inf) + var.thrs.list <- c("tas" = "thrs_tas", + "prlr" = "thrs_prlr", + "hurs" = "thrs_hurs") + # check in prepare_outputs that variables in names(var.list) are requested + } else if (tolower(ssp) == "i.ricinus") { + thrs_tas <- c(10, 26) + thrs_hurs <- c(45, Inf) + var.thrs.list <- c("tas" = "thrs_tas", + "hurs" = "thrs_hurs") + # check in prepare_outputs that variables in names(var.list) are requested + } else { + var.thrs.list <- c() # no variables to comply thresholds + } + + # obtain climatic suitability for obs, hcst and fcst (or elements of "data") + result <- data + for (ll in names(result)) { + data_element <- data[[ll]] + if (!is.null(data_element)) { + # prepare data + ## TODO: There is probably a way to simplify this into a single call + tas <- data_format_csindicators(data_element, + vars = c("tas"), + var.list = var.list)[[1]] + if ("hurs" %in% var.list) { + hurs <- data_format_csindicators(data_element, + vars = c("hurs"), + var.list = var.list)[[1]] + } else { + tdps <- data_format_csindicators(data_element, + vars = c("tdps"), + var.list = var.list)[[1]] + hurs <- data_transform_hurs(tas, tdps) + } + if ("prlr" %in% var.list) { + prlr <- data_format_csindicators(data_element, + vars = c("prlr"), + var.list = var.list)[[1]] + } + + # create result object + if (length(var.thrs.list > 0)) { + result_element <- eval(parse(text = names(var.thrs.list)[1])) # tas + result_element$attrs$Units <- NULL + result_element$data[which(!is.na(result_element$data))] <- 1 + } else { + result_element <- NULL + } + + # apply threholds + if (length(var.thrs.list > 0)) { + for (var in names(var.thrs.list)) { + thrs <- eval(parse(text = var.thrs.list[which(names(var.thrs.list) == var)])) + var.data <- eval(parse(text = var)) + new.data <- array(0, dim = dim(result_element$data)) + new.data[which(var.data$data >= thrs[1] & var.data$data <= thrs[2])] <- 1 + result_element$data <- result_element$data * new.data + ## TODO: Change s2dv_cube metadata here? + result_element$metadata <- append(result_element$metadata, + paste0(var, " suitability threshold: ", + thrs[1], " to ", thrs[2], + " (", var.data$attrs$Units, ")")) + } + } + } # end "if (!is.null...)" + result[[ll]] <- result_element + } # end "for" + return(result) +} diff --git a/modules/Indicators/R/spei_spi.R b/modules/Indicators/R/spei_spi.R new file mode 100644 index 0000000000000000000000000000000000000000..ebe701264f93f2ff0da97140f4d8f5e4a939973f --- /dev/null +++ b/modules/Indicators/R/spei_spi.R @@ -0,0 +1,193 @@ +spei_spi <- function(data, indicator, + var.list, + pet_method = NULL, + accum, + standardization, + stand_refperiod, + stand_handleinf, + ncores = NULL) { + + if (indicator == "spei") { + long_name <- "Standardized Precipitation-Evapotranspiration Index" + # obtain PET + if (is.null(pet_method)) { # alredy checked (prepare_outputs) that PET exists + # in the data when SPEI is requested without PET method + data_obs <- data_format_csindicators(data$obs, + vars = c("pet", "prlr"), + var.list = var.list) + if (!is.null(data$hcst)) { + data_hcst <- data_format_csindicators(data$hcst, + vars = c("pet", "prlr"), + var.list = var.list) + } else { + data_hcst <- NULL + } + if (!is.null(data$fcst)) { + data_fcst <- data_format_csindicators(data$fcst, + vars = c("pet", "prlr"), + var.list = var.list) + } else { + data_fcst <- NULL + } + } else { + if (pet_method == "hargreaves") { + vars <- c("tasmax", "tasmin") + } else if (pet_method == "hargreaves_modified") { + vars <- c("tasmax", "tasmin", "prlr") + } else if (pet_method == "thornthwaite") { + vars <- c("tas") + } + + # add prlr to the data for prlr-pet + if (!("prlr" %in% vars)) { + vars <- c(vars, "prlr") + } + + # call CST_PeriodPET from CSIndicators + data_obs <- data_format_csindicators(data$obs, + vars = vars, + var.list = var.list) + data_obs$pet <- CST_PeriodPET(data = data_obs, + pet_method = pet_method, + ncores = ncores) + + if (!is.null(data$hcst)) { + data_hcst <- data_format_csindicators(data$hcst, + vars = vars, + var.list = var.list) + data_hcst$pet <- CST_PeriodPET(data = data_hcst, + pet_method = pet_method, + ncores = ncores) + } + + if (!is.null(data$fcst)) { + data_fcst <- data_format_csindicators(data$fcst, + vars = vars, + var.list = var.list) + data_fcst$pet <- CST_PeriodPET(data = data_fcst, + pet_method = pet_method, + ncores = ncores) + } + } + + # Obtain difference Precipitation - PET + data_obs_diff <- data_obs$pr + data_obs_diff$data <- data_obs$pr$data - data_obs$pet$data + + if (!is.null(data$hcst)) { + data_hcst_diff <- data_hcst$pr + data_hcst_diff$data <- data_hcst$pr$data - data_hcst$pet$data + } + + if (!is.null(data$fcst)) { + data_fcst_diff <- data_fcst$pr + data_fcst_diff$data <- data_fcst$pr$data - data_fcst$pet$data + } + } else if (indicator == "spi") { + # spi (no PET calculation and use of precipitation directly instead of Precipitation - PET) + long_name <- "Standardized Precipitation Index" + data_obs <- data_format_csindicators(data$obs, + vars = "prlr", + var.list = var.list) + data_obs_diff <- data_obs$pr + + if (!is.null(data$hcst)) { + data_hcst <- data_format_csindicators(data$hcst, + vars = "prlr", + var.list = var.list) + data_hcst_diff <- data_hcst$pr + } + + if (!is.null(data$fcst)) { + data_fcst <- data_format_csindicators(data$fcst, + vars = "prlr", + var.list = var.list) + data_fcst_diff <- data_fcst$pr + } + } + + #### same workflow for SPEI and SPI starting here + + # call CST_PeriodAccumulation function from CSIndicators + data_obs_accum <- CST_PeriodAccumulation( + data = data_obs_diff, + rollwidth = accum, + sdate_dim = "syear", + ncores = ncores + ) + if (!is.null(data$hcst)) { + data_hcst_accum <- CST_PeriodAccumulation( + data = data_hcst_diff, + rollwidth = accum, + sdate_dim = "syear", + ncores = ncores + ) + } else { + data_hcst_accum <- NULL + } + if (!is.null(data$fcst)) { + data_fcst_accum <- CST_PeriodAccumulation( + data = data_fcst_diff, + rollwidth = accum, + sdate_dim = "syear", + ncores = ncores + ) + data$fcst$accum + } else { + data_fcst_accum <- NULL + } + + # call CST_PeriodStandardization function from CSIndicators + if (standardization) { + data_obs_ind <- CST_PeriodStandardization( + data = data_obs_accum, + data_cor = NULL, + ref_period = stand_refperiod, + handle_infinity = stand_handleinf, + ncores = ncores + ) + if (!is.null(data$hcst)) { + data_hcst_ind <- CST_PeriodStandardization( + data = data_hcst_accum, + data_cor = NULL, + ref_period = stand_refperiod, + handle_infinity = stand_handleinf, + ncores = ncores + ) + } else { + data_hcst_ind <- NULL + } + + if (!is.null(data$fcst)) { + data_fcst_ind <- CST_PeriodStandardization( + data = data_hcst_accum, + data_cor = data_fcst_accum, + ref_period = stand_refperiod, + handle_infinity = stand_handleinf, + ncores = ncores + ) + } else { + data_fcst_ind <- NULL + } + } else { + data_obs_ind <- data_obs_accum + data_hcst_ind <- data_hcst_accum + data_fcst_ind <- data_fcst_accum + } + + # result: spi or spei (create list of previous data s2dv_cubes) + result <- list(hcst = data_hcst_ind, + fcst = data_fcst_ind, + obs = data_obs_ind) + for (element in names(result[!is.null(result)])) { + old_var_name <- result[[element]]$attrs$Variable$varName + old_metadata <- result[[element]]$attrs$Variable$metadata[[old_var_name]] + result[[element]]$attrs$Variable$metadata[[old_var_name]] <- NULL + result[[element]]$coords$var <- indicator + result[[element]]$attrs$Variable$varName <- indicator + result[[element]]$attrs$Variable$metadata[[indicator]] <- old_metadata + result[[element]]$attrs$Variable$metadata[[indicator]] <- + list(long_name = long_name, units = "") + } + return(result) +} diff --git a/modules/Indicators/R/threshold.R b/modules/Indicators/R/threshold.R new file mode 100644 index 0000000000000000000000000000000000000000..7a2768a18b82bceadba9197477a80d74c8bebd5b --- /dev/null +++ b/modules/Indicators/R/threshold.R @@ -0,0 +1,61 @@ +threshold <- function(data, thrs, var.list, return.values = TRUE) { + if (!is.null(data$obs)) { + result_obs <- c() + dim_var <- which(names(dim(data$obs$data)) == "var") + for (var in var.list) { + data_tmp <- Subset(data$obs$data, along = "var", indices = which(var.list == var), drop = FALSE) + data_threshold <- data_tmp + data_threshold[which(data_tmp >= thrs[[which(var.list == var)]][[1]] & data_tmp <= thrs[[which(var.list == var)]][[2]])] <- TRUE + data_threshold[which(data_tmp < thrs[[which(var.list == var)]][[1]] | data_tmp > thrs[[which(var.list == var)]][[2]])] <- FALSE + data_tmp[which(!data_threshold)] <- NA + if (return.values) { + result_obs <- abind(result_obs, data_tmp, along = dim_var) + } else { + result_obs <- abind(result_obs, data_threshold, along = dim_var) + } + } + names(dim(result_obs)) <- names(dim(data$obs$data)) + data$obs$data <- result_obs + } + + if (!is.null(data$hcst)) { + result_hcst <- c() + dim_var <- which(names(dim(data$hcst$data)) == "var") + for (var in var.list) { + data_tmp <- Subset(data$hcst$data, along = "var", indices = which(var.list == var), drop = FALSE) + data_threshold <- data_tmp + data_threshold[which(data_tmp >= thrs[[which(var.list == var)]][[1]] & data_tmp <= thrs[[which(var.list == var)]][[2]])] <- TRUE + data_threshold[which(data_tmp < thrs[[which(var.list == var)]][[1]] | data_tmp > thrs[[which(var.list == var)]][[2]])] <- FALSE + data_tmp[which(!data_threshold)] <- NA + if (return.values) { + result_hcst <- abind(result_hcst, data_tmp, along = dim_var) + } else { + result_hcst <- abind(result_hcst, data_threshold, along = dim_var) + } + } + names(dim(result_hcst)) <- names(dim(data$hcst$data)) + data$hcst$data <- result_hcst + } + + if (!is.null(data$fcst)) { + result_fcst <- c() + dim_var <- which(names(dim(data$fcst$data)) == "var") + for (var in var.list) { + data_tmp <- Subset(data$fcst$data, along = "var", indices = which(var.list == var), drop = FALSE) + data_threshold <- data_tmp + data_threshold[which(data_tmp >= thrs[[which(var.list == var)]][[1]] & data_tmp <= thrs[[which(var.list == var)]][[2]])] <- TRUE + data_threshold[which(data_tmp < thrs[[which(var.list == var)]][[1]] | data_tmp > thrs[[which(var.list == var)]][[2]])] <- FALSE + data_tmp[which(!data_threshold)] <- NA + if (return.values) { + result_fcst <- abind(result_fcst, data_tmp, along = dim_var) + } else { + result_fcst <- abind(result_fcst, data_threshold, along = dim_var) + } + } + names(dim(result_fcst)) <- names(dim(data$fcst$data)) + data$fcst$data <- result_fcst + } + + # result <- list(obs = result_obs, hcst = result_hcst, fcst = result_fcst) + return(data) +} diff --git a/modules/Indicators/R/tmp/CSIndicators_CST_PeriodStandardization.R b/modules/Indicators/R/tmp/CSIndicators_CST_PeriodStandardization.R new file mode 100644 index 0000000000000000000000000000000000000000..b71270f14086d2db804f4a046b0766b6f1952edc --- /dev/null +++ b/modules/Indicators/R/tmp/CSIndicators_CST_PeriodStandardization.R @@ -0,0 +1,647 @@ +#'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'The Standardization of the data is the last step of computing the SPEI +#'(Standarized Precipitation-Evapotranspiration Index). With this function the +#'data is fit to a probability distribution to transform the original values to +#'standardized units that are comparable in space and time and at different SPEI +#'time scales. +#' +#'Next, some specifications for the calculation of the standardization will be +#'discussed. If there are NAs in the data and they are not removed with the +#'parameter 'na.rm', the standardization cannot be carried out for those +#'coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. When NAs are not removed, if the length of the data for +#'a computational step is smaller than 4, there will not be enough data for +#'standardization and the result will be also filled with NAs for those coordinates. +#'About the distribution used to fit the data, there are only two possibilities: +#''log-logistic' and 'Gamma'. The 'Gamma' method works only when precipitation +#'is the sole variable provided, and all other variables are 0 because it is positive +#'defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +#'standardization is computed with cross validation. This function is built to +#'be compatible with other tools in that work with 's2dv_cube' object +#'class. The input data must be this object class. If you don't work with +#''s2dv_cube', see PeriodStandardization. For more information on the SPEI +#'indicator calculation, see CST_PeriodPET and CST_PeriodAccumulation. +#' +#'@param data An 's2dv_cube' that element 'data' stores a multidimensional +#' array containing the data to be standardized. +#'@param data_cor An 's2dv_cube' that element 'data' stores a multidimensional +#' array containing the data in which the standardization should be applied +#' using the fitting parameters from 'data'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params An optional parameter that needs to be a multidimensional array +#' with named dimensions. This option overrides computation of fitting +#' parameters. It needs to be of same time dimensions (specified in 'time_dim' +#' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +#' of the coefficients needed for the used distribution (for 'Gamma' coef +#' dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +#' to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +#' will only be used if 'data_cor' is not provided. +#'@param handle_infinity A logical value wether to return infinite values (TRUE) +#' or not (FALSE). When it is TRUE, the positive infinite values (negative +#' infinite) are substituted by the maximum (minimum) values of each +#' computation step, a subset of the array of dimensions time_dim, leadtime_dim +#' and memb_dim. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param return_params A logical value indicating wether to return parameters +#' array (TRUE) or not (FALSE). It is FALSE by default. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' standardization cannot be carried out for those coordinates and therefore, +#' the result will be filled with NA for the specific coordinates. If it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return An object of class \code{s2dv_cube} containing the standardized data. +#'If 'data_cor' is provided the array stored in element data will be of the same +#'dimensions as 'data_cor'. If 'data_cor' is not provided, the array stored in +#'element data will be of the same dimensions as 'data'. The parameters of the +#'standardization will only be returned if 'return_params' is TRUE, in this +#'case, the output will be a list of two objects one for the standardized data +#'and one for the parameters. +#' +#'@examples +#'dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) +#'data <- NULL +#'data$data <- array(rnorm(600, -204.1, 78.1), dim = dims) +#'class(data) <- 's2dv_cube' +#'SPEI <- CST_PeriodStandardization(data = data) +#'@export +CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + ref_period = NULL, + handle_infinity = FALSE, + method = 'parametric', + distribution = 'log-Logistic', + params = NULL, return_params = FALSE, + na.rm = FALSE, ncores = NULL) { + # Check 's2dv_cube' + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of 's2dv_cube' class.") + } + if (!is.null(data_cor)) { + if (!inherits(data_cor, 's2dv_cube')) { + stop("Parameter 'data_cor' must be of 's2dv_cube' class.") + } + } + res <- PeriodStandardization(data = data$data, data_cor = data_cor$data, + dates = data$attrs$Dates, + time_dim = time_dim, leadtime_dim = leadtime_dim, + memb_dim = memb_dim, + ref_period = ref_period, + handle_infinity = handle_infinity, method = method, + distribution = distribution, + params = params, return_params = return_params, + na.rm = na.rm, ncores = ncores) + if (return_params) { + std <- res$spei + params <- res$params + } else { + std <- res + } + + if (is.null(data_cor)) { + data$data <- std + data$attrs$Variable$varName <- paste0(data$attrs$Variable$varName, ' standardized') + if (return_params) { + return(list(spei = data, params = params)) + } else { + return(data) + } + } else { + data_cor$data <- std + data_cor$attrs$Variable$varName <- paste0(data_cor$attrs$Variable$varName, ' standardized') + data_cor$attrs$Datasets <- c(data_cor$attrs$Datasets, data$attrs$Datasets) + data_cor$attrs$source_files <- c(data_cor$attrs$source_files, data$attrs$source_files) + return(data_cor) + } +} + +#'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'The Standardization of the data is the last step of computing the SPEI +#'indicator. With this function the data is fit to a probability distribution to +#'transform the original values to standardized units that are comparable in +#'space and time and at different SPEI time scales. +#' +#'Next, some specifications for the calculation of the standardization will be +#'discussed. If there are NAs in the data and they are not removed with the +#'parameter 'na.rm', the standardization cannot be carried out for those +#'coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. When NAs are not removed, if the length of the data for +#'a computational step is smaller than 4, there will not be enough data for +#'standarize and the result will be also filled with NAs for that coordinates. +#'About the distribution used to fit the data, there are only two possibilities: +#''log-logistic' and 'Gamma'. The 'Gamma' method only works when only +#'precipitation is provided and other variables are 0 because it is positive +#'defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +#'standardization is computed with cross validation. For more information about +#'SPEI, see functions PeriodPET and PeriodAccumulation. +#' +#'@param data A multidimensional array containing the data to be standardized. +#'@param data_cor A multidimensional array containing the data in which the +#' standardization should be applied using the fitting parameters from 'data'. +#'@param dates An array containing the dates of the data with the same time +#' dimensions as the data. It is optional and only necessary for using the +#' parameter 'ref_period' to select a reference period directly from dates. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params An optional parameter that needs to be a multidimensional array +#' with named dimensions. This option overrides computation of fitting +#' parameters. It needs to be of same time dimensions (specified in 'time_dim' +#' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +#' of the coefficients needed for the used distribution (for 'Gamma' coef +#' dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +#' to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +#' will only be used if 'data_cor' is not provided. +#'@param handle_infinity A logical value wether to return infinite values (TRUE) +#' or not (FALSE). When it is TRUE, the positive infinite values (negative +#' infinite) are substituted by the maximum (minimum) values of each +#' computation step, a subset of the array of dimensions time_dim, leadtime_dim +#' and memb_dim. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param return_params A logical value indicating wether to return parameters +#' array (TRUE) or not (FALSE). It is FALSE by default. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' standardization cannot be carried out for those coordinates and therefore, +#' the result will be filled with NA for the specific coordinates. If it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return A multidimensional array containing the standardized data. +#'If 'data_cor' is provided the array will be of the same dimensions as +#''data_cor'. If 'data_cor' is not provided, the array will be of the same +#'dimensions as 'data'. The parameters of the standardization will only be +#'returned if 'return_params' is TRUE, in this case, the output will be a list +#'of two objects one for the standardized data and one for the parameters. +#' +#'@examples +#'dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) +#'dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) +#'data <- array(rnorm(600, -194.5, 64.8), dim = dims) +#'datacor <- array(rnorm(100, -217.8, 68.29), dim = dimscor) +#' +#'SPEI <- PeriodStandardization(data = data) +#'SPEIcor <- PeriodStandardization(data = data, data_cor = datacor) +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom lmomco pwm.pp pwm.ub pwm2lmom are.lmom.valid parglo pargam parpe3 +#'@importFrom lmom cdfglo cdfgam cdfpe3 pelglo pelgam pelpe3 +#'@importFrom SPEI parglo.maxlik +#'@importFrom stats qnorm sd window +#'@export +PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', + ref_period = NULL, handle_infinity = FALSE, + method = 'parametric', + distribution = 'log-Logistic', + params = NULL, return_params = FALSE, + na.rm = FALSE, ncores = NULL) { + # Check inputs + ## data + if (!is.array(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have dimension names.") + } + ## data_cor + if (!is.null(data_cor)) { + if (!is.array(data_cor)) { + stop("Parameter 'data_cor' must be a numeric array.") + } + if (is.null(names(dim(data_cor)))) { + stop("Parameter 'data_cor' must have dimension names.") + } + } + ## dates + if (!is.null(dates)) { + if (!any(inherits(dates, 'Date'), inherits(dates, 'POSIXct'))) { + stop("Parameter 'dates' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!time_dim %in% names(dim(dates)) | !leadtime_dim %in% names(dim(dates))) { + stop("Parameter 'dates' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } + if (dim(data)[c(time_dim)] != dim(dates)[c(time_dim)]) { + stop("Parameter 'dates' needs to have the same length of 'time_dim' ", + "as 'data'.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!time_dim %in% names(dim(data_cor))) { + stop("Parameter 'time_dim' is not found in 'data_cor' dimension.") + } + } + ## leadtime_dim + if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { + stop("Parameter 'leadtime_dim' must be a character string.") + } + if (!leadtime_dim %in% names(dim(data))) { + stop("Parameter 'leadtime_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!leadtime_dim %in% names(dim(data_cor))) { + stop("Parameter 'leadtime_dim' is not found in 'data_cor' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) != 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!memb_dim %in% names(dim(data_cor))) { + stop("Parameter 'memb_dim' is not found in 'data_cor' dimension.") + } + } + ## data_cor (2) + if (!is.null(data_cor)) { + if (dim(data)[leadtime_dim] != dim(data_cor)[leadtime_dim]) { + stop("Parameter 'data' and 'data_cor' have dimension 'leadtime_dim' ", + "of different length.") + } + } + ## ref_period + if (!is.null(ref_period)) { + years_dates <- format(dates, "%Y") + if (is.null(dates)) { + warning("Parameter 'dates' is not provided so 'ref_period' can't be ", + "used.") + ref_period <- NULL + } else if (length(ref_period) != 2) { + warning("Parameter 'ref_period' must be of length two indicating the ", + "first and end years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (!all(sapply(ref_period, is.numeric))) { + warning("Parameter 'ref_period' must be a numeric vector indicating the ", + "'start' and 'end' years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (ref_period[[1]] > ref_period[[2]]) { + warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", + "will not be used.") + ref_period <- NULL + } else if (!all(unlist(ref_period) %in% years_dates)) { + warning("Parameter 'ref_period' contains years outside the dates. ", + "It will not be used.") + ref_period <- NULL + } else { + years <- format(Subset(dates, along = leadtime_dim, indices = 1), "%Y") + ref_period[[1]] <- which(ref_period[[1]] == years) + ref_period[[2]] <- which(ref_period[[2]] == years) + } + } + ## handle_infinity + if (!is.logical(handle_infinity)) { + stop("Parameter 'handle_infinity' must be a logical value.") + } + ## method + if (!(method %in% c('parametric', 'non-parametric'))) { + stop("Parameter 'method' must be a character string containing one of ", + "the following methods: 'parametric' or 'non-parametric'.") + } + ## distribution + if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { + stop("Parameter 'distribution' must be a character string containing one ", + "of the following distributions: 'log-Logistic', 'Gamma' or ", + "'PearsonIII'.") + } + ## params + if (!is.null(params)) { + if (!is.numeric(params)) { + stop("Parameter 'params' must be numeric.") + } + if (!all(c(time_dim, leadtime_dim, 'coef') %in% names(dim(params)))) { + stop("Parameter 'params' must be a multidimensional array with named ", + "dimensions: '", time_dim, "', '", leadtime_dim, "' and 'coef'.") + } + dims_data <- dim(data)[-which(names(dim(data)) == memb_dim)] + dims_params <- dim(params)[-which(names(dim(params)) == 'coef')] + if (!all(dims_data == dims_params)) { + stop("Parameter 'data' and 'params' must have same common dimensions ", + "except 'memb_dim' and 'coef'.") + } + + if (distribution == "Gamma") { + if (dim(params)['coef'] != 2) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 2.") + } + } else { + if (dim(params)['coef'] != 3) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 3.") + } + } + } + ## return_params + if (!is.logical(return_params)) { + stop("Parameter 'return_params' must be logical.") + } + ## na.rm + if (!is.logical(na.rm)) { + stop("Parameter 'na.rm' must be logical.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + if (is.null(ref_period)) { + ref_start <- NULL + ref_end <- NULL + } else { + ref_start <- ref_period[[1]] + ref_end <- ref_period[[2]] + } + + # Standardization + if (is.null(data_cor)) { + if (is.null(params)) { + res <- Apply(data = list(data), + target_dims = c(leadtime_dim, time_dim, memb_dim), + fun = .standardization, data_cor = NULL, params = NULL, + leadtime_dim = leadtime_dim, time_dim = time_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } else { + res <- Apply(data = list(data = data, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), + params = c(leadtime_dim, time_dim, 'coef')), + fun = .standardization, data_cor = NULL, + leadtime_dim = leadtime_dim, time_dim = time_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } + } else { + res <- Apply(data = list(data = data, data_cor = data_cor), + target_dims = c(leadtime_dim, time_dim, memb_dim), + fun = .standardization, params = NULL, + leadtime_dim = leadtime_dim, time_dim = time_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } + if (return_params) { + spei <- res$spei + params <- res$params + } else { + spei <- res$output1 + } + + if (is.null(data_cor)) { + pos <- match(names(dim(data)), names(dim(spei))) + spei <- aperm(spei, pos) + } else { + pos <- match(names(dim(data_cor)), names(dim(spei))) + spei <- aperm(spei, pos) + } + + if (return_params) { + pos <- match(c(names(dim(spei))[-which(names(dim(spei)) == memb_dim)], 'coef'), + names(dim(params))) + params <- aperm(params, pos) + return(list('spei' = spei, 'params' = params)) + } else { + return(spei) + } +} + +.standardization <- function(data, data_cor = NULL, params = NULL, + leadtime_dim = 'time', time_dim = 'syear', + ref_start = NULL, ref_end = NULL, handle_infinity = FALSE, + method = 'parametric', distribution = 'log-Logistic', + return_params = FALSE, na.rm = FALSE) { + # data (data_cor): [leadtime_dim, time_dim, memb_dim] + dims <- dim(data)[-1] + fit = 'ub-pwm' + + coef = switch(distribution, + "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha', 'beta'))), + "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi', 'alpha', 'kappa'))), + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu', 'sigma', 'gamma')))) + + if (is.null(data_cor)) { + # cross_val = TRUE + spei_mod <- data*NA + if (return_params) { + params_result <- array(dim = c(dim(data)[-length(dim(data))], coef = length(coef))) + } + for (ff in 1:dim(data)[leadtime_dim]) { + data2 <- data[ff, , ] + dim(data2) <- dims + if (method == 'non-parametric') { + bp <- matrix(0, length(data2), 1) + for (i in 1:length(data2)) { + bp[i,1] = sum(data2[] <= data2[i], na.rm = na.rm); # Writes the rank of the data + } + std_index <- qnorm((bp - 0.44)/(length(data2) + 0.12)) + dim(std_index) <- dims + spei_mod[ff, , ] <- std_index + } else { + if (!is.null(ref_start) && !is.null(ref_end)) { + data_fit <- window(data2, ref_start, ref_end) + } else { + data_fit <- data2 + } + for (nsd in 1:dim(data)[time_dim]) { + if (is.null(params)) { + acu <- as.vector(data_fit[-nsd, ]) + if (na.rm) { + acu_sorted <- sort.default(acu, method = "quick") + } else { + acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) + } + f_params <- NA + if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { + acu_sd <- sd(acu_sorted) + if (!is.na(acu_sd) & acu_sd != 0) { + if (distribution != "log-Logistic") { + acu_sorted <- acu_sorted[acu_sorted > 0] + } + if (length(acu_sorted) >= 4) { + f_params <- .std(data = acu_sorted, fit = fit, + distribution = distribution) + } + } + } + } else { + f_params <- params[ff, nsd, ] + } + if (all(is.na(f_params))) { + cdf_res <- NA + } else { + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data2, f_params), + "Gamma" = lmom::cdfgam(data2, f_params), + "PearsonIII" = lmom::cdfpe3(data2, f_params)) + } + std_index_cv <- array(qnorm(cdf_res), dim = dims) + spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] + if (return_params) params_result[ff, nsd, ] <- f_params + } + } + } + } else { + # cross_val = FALSE + spei_mod <- data_cor*NA + dimscor <- dim(data_cor)[-1] + if (return_params) { + params_result <- array(dim = c(dim(data_cor)[-length(dim(data_cor))], coef = length(coef))) + } + for (ff in 1:dim(data)[leadtime_dim]) { + data_cor2 <- data_cor[ff, , ] + dim(data_cor2) <- dimscor + if (method == 'non-parametric') { + bp <- matrix(0, length(data_cor2), 1) + for (i in 1:length(data_cor2)) { + bp[i,1] = sum(data_cor2[] <= data_cor2[i], na.rm = na.rm); # Writes the rank of the data + } + std_index <- qnorm((bp - 0.44)/(length(data_cor2) + 0.12)) + dim(std_index) <- dimscor + spei_mod[ff, , ] <- std_index + } else { + data2 <- data[ff, , ] + dim(data2) <- dims + if (!is.null(ref_start) && !is.null(ref_end)) { + data_fit <- window(data2, ref_start, ref_end) + } else { + data_fit <- data2 + } + acu <- as.vector(data_fit) + if (na.rm) { + acu_sorted <- sort.default(acu, method = "quick") + } else { + acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) + } + if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { + acu_sd <- sd(acu_sorted) + if (!is.na(acu_sd) & acu_sd != 0) { + if (distribution != "log-Logistic") { + acu_sorted <- acu_sorted[acu_sorted > 0] + } + if (length(acu_sorted) >= 4) { + f_params <- .std(data = acu_sorted, fit = fit, + distribution = distribution) + } + if (all(is.na(f_params))) { + cdf_res <- NA + } else { + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data_cor2, f_params), + "Gamma" = lmom::cdfgam(data_cor2, f_params), + "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) + } + std_index_cv <- array(qnorm(cdf_res), dim = dimscor) + spei_mod[ff, , ] <- std_index_cv + if (return_params) params_result[ff, , ] <- f_params + } + } + } + } + } + if (handle_infinity) { + # could also use "param_error" ?; we are giving it the min/max value of the grid point + spei_mod[is.infinite(spei_mod) & spei_mod < 0] <- min(spei_mod[!is.infinite(spei_mod)]) + spei_mod[is.infinite(spei_mod) & spei_mod > 0] <- max(spei_mod[!is.infinite(spei_mod)]) + } + if (return_params) { + return(list(spei = spei_mod, params = params_result)) + } else { + return(spei_mod) + } +} + +.std <- function(data, fit = 'pp-pwm', distribution = 'log-Logistic') { + pwm = switch(fit, + 'pp-pwm' = lmomco::pwm.pp(data, -0.35, 0, nmom = 3), + lmomco::pwm.ub(data, nmom = 3) + # TLMoments::PWM(data, order = 0:2) + ) + lmom <- lmomco::pwm2lmom(pwm) + if (!any(!lmomco::are.lmom.valid(lmom), anyNA(lmom[[1]]), any(is.nan(lmom[[1]])))) { + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + params_result = switch(distribution, + 'log-Logistic' = tryCatch(lmom::pelglo(fortran_vec), + error = function(e){lmomco::parglo(lmom)$para}), + 'Gamma' = tryCatch(lmom::pelgam(fortran_vec), + error = function(e){lmomco::pargam(lmom)$para}), + 'PearsonIII' = tryCatch(lmom::pelpe3(fortran_vec), + error = function(e){lmomco::parpe3(lmom)$para})) + if (distribution == 'log-Logistic' && fit == 'max-lik') { + params_result = SPEI::parglo.maxlik(data, params_result)$para + } + return(params_result) + } else { + return(NA) + } +} \ No newline at end of file diff --git a/modules/Indices/R/tmp/EOF.R b/modules/Indices/R/tmp/EOF.R deleted file mode 100644 index 38c3fae0970f3e487441d64bea40311fbf02e37c..0000000000000000000000000000000000000000 --- a/modules/Indices/R/tmp/EOF.R +++ /dev/null @@ -1,292 +0,0 @@ -#'Area-weighted empirical orthogonal function analysis using SVD -#' -#'Perform an area-weighted EOF analysis using single value decomposition (SVD) -#'based on a covariance matrix or a correlation matrix if parameter 'corr' is -#'set to TRUE. -#' -#'@param ano A numerical array of anomalies with named dimensions to calculate -#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs -#' could exist but it should be consistent along time_dim. That is, if one grid -#' point has NAs, all the time steps at this point should be NAs. -#'@param lat A vector of the latitudes of 'ano'. -#'@param lon A vector of the longitudes of 'ano'. -#'@param time_dim A character string indicating the name of the time dimension -#' of 'ano'. The default value is 'sdate'. -#'@param space_dim A vector of two character strings. The first is the dimension -#' name of latitude of 'ano' and the second is the dimension name of longitude -#' of 'ano'. The default value is c('lat', 'lon'). -#'@param neofs A positive integer of the modes to be kept. The default value is -#' 15. If time length or the product of the length of space_dim is smaller than -#' neofs, neofs will be changed to the minimum of the three values. -#'@param corr A logical value indicating whether to base on a correlation (TRUE) -#' or on a covariance matrix (FALSE). The default value is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A list containing: -#'\item{EOFs}{ -#' An array of EOF patterns normalized to 1 (unitless) with dimensions -#' (number of modes, rest of the dimensions of 'ano' except 'time_dim'). -#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed -#' field. -#'} -#'\item{PCs}{ -#' An array of principal components with the units of the original field to -#' the power of 2, with dimensions (time_dim, number of modes, rest of the -#' dimensions of 'ano' except 'space_dim'). -#' 'PCs' contains already the percentage of explained variance so, -#' to reconstruct the original field it's only needed to multiply 'EOFs' -#' by 'PCs'. -#'} -#'\item{var}{ -#' An array of the percentage (%) of variance fraction of total variance -#' explained by each mode (number of modes). The dimensions are (number of -#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). -#'} -#'\item{mask}{ -#' An array of the mask with dimensions (space_dim, rest of the dimensions of -#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that -#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to -#' replace NAs with 0s for EOF calculation and mask the result with NAs again -#' after the calculation. -#'} -#'\item{wght}{ -#' An array of the area weighting with dimensions 'space_dim'. It is calculated -#' by cosine of 'lat' and used to compute the fraction of variance explained by -#' each EOFs. -#'} -#'\item{tot_var}{ -#' A number or a numeric array of the total variance explained by all the modes. -#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. -#'} -#' -#'@seealso ProjectField, NAO, PlotBoxWhisker -#'@examples -#'# This example computes the EOFs along forecast horizons and plots the one -#'# that explains the greatest amount of variability. The example data has low -#'# resolution so the result may not be explanatory, but it displays how to -#'# use this function. -#'\dontshow{ -#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), -#' c('observation'), startDates, -#' leadtimemin = 1, -#' leadtimemax = 4, -#' output = 'lonlat', -#' latmin = 27, latmax = 48, -#' lonmin = -12, lonmax = 40) -#'} -#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) -#'tmp <- MeanDims(ano$exp, c('dataset', 'member')) -#'ano <- tmp[1, , ,] -#'names(dim(ano)) <- names(dim(tmp))[-2] -#'eof <- EOF(ano, sampleData$lat, sampleData$lon) -#'\dontrun{ -#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) -#'} -#' -#'@import multiApply -#'@importFrom stats sd -#'@export -EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), - neofs = 15, corr = FALSE, ncores = NULL) { - - # Check inputs - ## ano - if (is.null(ano)) { - stop("Parameter 'ano' cannot be NULL.") - } - if (!is.numeric(ano)) { - stop("Parameter 'ano' must be a numeric array.") - } - if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { - stop("Parameter 'ano' must have dimension names.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) > 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!time_dim %in% names(dim(ano))) { - stop("Parameter 'time_dim' is not found in 'ano' dimension.") - } - ## space_dim - if (!is.character(space_dim) | length(space_dim) != 2) { - stop("Parameter 'space_dim' must be a character vector of 2.") - } - if (!all(space_dim %in% names(dim(ano)))) { - stop("Parameter 'space_dim' is not found in 'ano' dimension.") - } - ## lat - if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { - stop("Parameter 'lat' must be a numeric vector with the same ", - "length as the latitude dimension of 'ano'.") - } - if (any(lat > 90 | lat < -90)) { - stop("Parameter 'lat' must contain values within the range [-90, 90].") - } - ## lon - if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { - stop("Parameter 'lon' must be a numeric vector with the same ", - "length as the longitude dimension of 'ano'.") - } - if (any(lon > 360 | lon < -360)) { - .warning("Some 'lon' is out of the range [-360, 360].") - } - ## neofs - if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) { - stop("Parameter 'neofs' must be a positive integer.") - } - ## corr - if (!is.logical(corr) | length(corr) > 1) { - stop("Parameter 'corr' must be one logical value.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - - ############################### - # Calculate EOF - -# # Replace mask of NAs with 0s for EOF analysis. -# ano[!is.finite(ano)] <- 0 - - # Area weighting. Weights for EOF; needed to compute the - # fraction of variance explained by each EOFs - space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) - wght <- array(cos(lat * pi / 180), dim = dim(ano)[space_ind]) - - # We want the covariance matrix to be weigthed by the grid - # cell area so the anomaly field is weighted by its square - # root since the covariance matrix equals transpose(ano) - # times ano. - wght <- sqrt(wght) - - # neofs is bounded - if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) { - neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs) - .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ", - "the length of time_dim, the production of the length of space_dim, ", - "and neofs.")) - } - - res <- Apply(ano, - target_dims = c(time_dim, space_dim), - output_dims = list(EOFs = c('mode', space_dim), - PCs = c(time_dim, 'mode'), - var = 'mode', - tot_var = NULL, - mask = space_dim), - fun = .EOF, - corr = corr, neofs = neofs, - wght = wght, - ncores = ncores) - - return(c(res, wght = list(wght))) - -} - -.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) { - # ano: [time, lat, lon] - - # Dimensions - nt <- dim(ano)[1] - ny <- dim(ano)[2] - nx <- dim(ano)[3] - - # Check if all the time steps at one grid point are NA-consistent. - # The grid point should have all NAs or no NA along time dim. - if (anyNA(ano)) { - ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon] - na_ind <- which(is.na(ano_latlon), arr.ind = T) - if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) { - stop("Detect certain grid points have NAs but not consistent across time ", - "dimension. If the grid point is NA, it should have NA at all time step.") - } - } - - # Build the mask - mask <- ano[1, , ] - mask[!is.finite(mask)] <- NA - mask[is.finite(mask)] <- 1 - dim(mask) <- c(ny, nx) - - # Replace mask of NAs with 0s for EOF analysis. - ano[!is.finite(ano)] <- 0 - - ano <- ano * InsertDim(wght, 1, nt) - - # The use of the correlation matrix is done under the option corr. - if (corr) { - stdv <- apply(ano, c(2, 3), sd, na.rm = T) - ano <- ano / InsertDim(stdv, 1, nt) - } - - # Time/space matrix for SVD - dim(ano) <- c(nt, ny * nx) - dim.dat <- dim(ano) - - # 'transpose' means the array needs to be transposed before - # calling La.svd for computational efficiency because the - # spatial dimension is larger than the time dimension. This - # goes with transposing the outputs of LA.svd also. - if (dim.dat[2] > dim.dat[1]) { - transpose <- TRUE - } else { - transpose <- FALSE - } - if (transpose) { - pca <- La.svd(t(ano)) - } else { - pca <- La.svd(ano) - } - - # La.svd conventions: decomposition X = U D t(V) La.svd$u - # returns U La.svd$d returns diagonal values of D La.svd$v - # returns t(V) !! The usual convention is PC=U and EOF=V. - # If La.svd is called for ano (transpose=FALSE case): EOFs: - # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE - # case): EOFs: t($u) PCs: t($v) - - if (transpose) { - pca.EOFs <- t(pca$u) - pca.PCs <- t(pca$v) - } else { - pca.EOFs <- pca$v - pca.PCs <- pca$u - } - - # The numbers of transposition is limited to neofs - PC <- pca.PCs[, 1:neofs] - EOF <- pca.EOFs[1:neofs, ] - dim(EOF) <- c(neofs, ny, nx) - - # To sort out crash when neofs=1. - if (neofs == 1) { - PC <- InsertDim(PC, 2, 1, name = 'new') - } - - # Computation of the % of variance associated with each mode - W <- pca$d[1:neofs] - tot.var <- sum(pca$d^2) - var.eof <- 100 * pca$d[1:neofs]^2 / tot.var - - for (e in 1:neofs) { - # Set all masked grid points to NA in the EOFs - # Divide patterns by area weights so that EOF * PC gives unweigthed (original) data - EOF[e, , ] <- EOF[e, , ] * mask / wght - # PC is multiplied by the explained variance, - # so that the reconstruction is only EOF * PC - PC[, e] <- PC[, e] * W[e] - } - - if (neofs == 1) { - var.eof <- as.array(var.eof) - } - - return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask))) -} diff --git a/modules/Indices/R/tmp/GetProbs.R b/modules/Indices/R/tmp/GetProbs.R new file mode 100644 index 0000000000000000000000000000000000000000..983d96654fe6f2c48ede669b967ddae132a8139d --- /dev/null +++ b/modules/Indices/R/tmp/GetProbs.R @@ -0,0 +1,346 @@ +#'Compute probabilistic forecasts or the corresponding observations +#' +#'Compute probabilistic forecasts from an ensemble based on the relative +#'thresholds, or the probabilistic observations (i.e., which probabilistic +#'category was observed). A reference period can be specified to calculate the +#'absolute thresholds between each probabilistic category. The absolute +#'thresholds can be computed in cross-validation mode. If data is an ensemble, +#'the probabilities are calculated as the percentage of members that fall into +#'each category. For observations (or forecast without member dimension), 1 +#'means that the event happened, while 0 indicates that the event did not +#'happen. Weighted probabilities can be computed if the weights are provided for +#'each ensemble member and time step. The absolute thresholds can also be +#'provided directly for probabilities calculation. +#' +#'@param data A named numerical array of the forecasts or observations with, at +#' least, time dimension. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast, or NULL if there is no member +#' dimension (e.g., for observations, or for forecast with only one ensemble +#' member). The default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param abs_thresholds A numeric array or vector of the absolute thresholds in +#' the same units as \code{data}. If an array is provided, it should have at +#' least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different +#' thresholds for different locations, i.e. lon and lat dimensions), they +#' should match the dimensions of \code{data}, except the member dimension +#' which should not be included. The default value is NULL and, in this case, +#' 'prob_thresholds' is used for calculating the probabilities. +#'@param bin_dim_abs A character string of the dimension name of +#' 'abs_thresholds' array in which category limits are stored. It will also be +#' the probabilistic category dimension name in the output. The default value +#' is 'bin'. +#'@param indices_for_quantiles A vector of the indices to be taken along +#' 'time_dim' for computing the absolute thresholds between the probabilistic +#' categories. If NULL (default), the whole period is used. It is only used +#' when 'prob_thresholds' is provided. +#'@param weights A named numerical array of the weights for 'data' with +#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value +#' is NULL. The ensemble should have at least 70 members or span at least 10 +#' time steps and have more than 45 members if consistency between the weighted +#' and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation mode. The default value +#' is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of probabilities with dimensions c(bin_dim_abs, the rest +#'dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of +#'probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. +#' +#'@examples +#'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) +#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' indices_for_quantiles = 4:17) +#' +#'# abs_thresholds is provided +#'abs_thr1 <- c(-0.2, 0.3) +#'abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) +#'res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr1) +#'res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', + indices_for_quantiles = NULL, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + bin_dim_abs = 'bin', weights = NULL, cross.val = FALSE, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimensions.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimensions. If no member ", + "dimension exists, set it as NULL.") + } + } + ## bin_dim_abs + if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { + stop('Parameter "bin_dim_abs" must be a character string.') + } + ## prob_thresholds, abs_thresholds + if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { + .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", + "Only the first one is used.")) + abs_thresholds <- NULL + } else if (is.null(abs_thresholds) & is.null(prob_thresholds)) { + stop("One of the parameters 'prob_thresholds' and 'abs_thresholds' must be provided.") + } + if (!is.null(prob_thresholds)) { + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_quantiles + if (is.null(indices_for_quantiles)) { + indices_for_quantiles <- 1:dim(data)[time_dim] + } else { + if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { + stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") + } else if (length(indices_for_quantiles) > dim(data)[time_dim] | + max(indices_for_quantiles) > dim(data)[time_dim] | + any(indices_for_quantiles < 1)) { + stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + } + } + + } else { # abs_thresholds + + if (is.null(dim(abs_thresholds))) { # a vector + dim(abs_thresholds) <- length(abs_thresholds) + names(dim(abs_thresholds)) <- bin_dim_abs + } + # bin_dim_abs + if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { + stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") + } + if (!is.null(memb_dim) && memb_dim %in% names(dim(abs_thresholds))) { + stop("Parameter abs_thresholds' cannot have member dimension.") + } + dim_name_abs <- names(dim(abs_thresholds))[which(names(dim(abs_thresholds)) != bin_dim_abs)] + if (any(!dim_name_abs %in% names(dim(data)))) { + stop("Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well.") + } else { + if (any(dim(abs_thresholds)[dim_name_abs] != dim(data)[dim_name_abs])) { + stop("Parameter 'abs_thresholds' dimensions must have the same length as 'data'.") + } + } + if (!is.null(indices_for_quantiles)) { + warning("Parameter 'indices_for_quantiles' is not used when 'abs_thresholds' are provided.") + } + abs_target_dims <- bin_dim_abs + if (time_dim %in% names(dim(abs_thresholds))) { + abs_target_dims <- c(bin_dim_abs, time_dim) + } + + } + + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + + # if (is.null(dat_dim)) { + if (!is.null(memb_dim)) { + lendim_weights <- 2 + namesdim_weights <- c(time_dim, memb_dim) + } else { + lendim_weights <- 1 + namesdim_weights <- c(time_dim) + } + if (length(dim(weights)) != lendim_weights | + any(!names(dim(weights)) %in% namesdim_weights)) { + stop(paste0("Parameter 'weights' must have dimension ", + paste0(namesdim_weights, collapse = ' and '), ".")) + } + if (any(dim(weights)[namesdim_weights] != dim(data)[namesdim_weights])) { + stop(paste0("Parameter 'weights' must have the same dimension length as ", + paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.")) + } + weights <- Reorder(weights, namesdim_weights) + + # } else { + # if (length(dim(weights)) != 3 | any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) + # stop("Parameter 'weights' must have three dimensions with the names of 'memb_dim', 'time_dim' and 'dat_dim'.") + # if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + # dim(weights)[time_dim] != dim(exp)[time_dim] | + # dim(weights)[dat_dim] != dim(exp)[dat_dim]) { + # stop(paste0("Parameter 'weights' must have the same dimension lengths ", + # "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) + # } + # weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) + # } + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + if (is.null(abs_thresholds)) { + res <- Apply(data = list(data = data), + target_dims = c(time_dim, memb_dim), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = prob_thresholds, + indices_for_quantiles = indices_for_quantiles, + weights = weights, cross.val = cross.val, ncores = ncores)$output1 + } else { + res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), + target_dims = list(c(time_dim, memb_dim), abs_target_dims), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = NULL, + indices_for_quantiles = NULL, + weights = NULL, cross.val = FALSE, ncores = ncores)$output1 + } + + return(res) +} + +.GetProbs <- function(data, indices_for_quantiles, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + weights = NULL, cross.val = FALSE) { + # .GetProbs() is used in RPS, RPSS, ROCSS + # data + ## if data is exp: [sdate, memb] + ## if data is obs: [sdate, (memb)] + # weights: [sdate, (memb)], same as data + # if abs_thresholds is not NULL: [bin, (sdate)] + + # Add dim [memb = 1] to data if it doesn't have memb_dim + if (length(dim(data)) == 1) { + dim(data) <- c(dim(data), 1) + if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) + } + + # Calculate absolute thresholds + if (is.null(abs_thresholds)) { + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i_time in 1:dim(data)[1]) { + if (is.null(weights)) { + quantiles[, i_time] <- quantile(x = as.vector(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ], + weights[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[, i_time] <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + } + + } else { + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], + weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + quantiles <- array(rep(quantiles, dim(data)[1]), + dim = c(bin = length(quantiles), dim(data)[1])) + } + + } else { # abs_thresholds provided + quantiles <- abs_thresholds + if (length(dim(quantiles)) == 1) { + quantiles <- InsertDim(quantiles, len = dim(data)[1], + pos = 2, name = names(dim(data))[1]) + } + } + # quantiles: [bin-1, sdate] + + # Probabilities + probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] + for (i_time in 1:dim(data)[1]) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) + } else { + if (is.null(weights)) { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], + threshold = quantiles[, i_time])) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = dim(quantiles)) + for (i_quant in 1:dim(quantiles)[1]) { + # for thresholds falling under the distribution + if (quantiles[i_quant, i_time] < min(sorted_data)) { + integrated_probs[i_quant, i_time] <- 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant, i_time]) { + integrated_probs[i_quant, i_time] <- 1 + } else { + integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, + quantiles[i_quant, i_time], "linear")$y + } + } + probs[, i_time] <- append(integrated_probs[, i_time], 1) - append(0, integrated_probs[, i_time]) + if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { + stop(paste0("Probability in i_time = ", i_time, " is out of [0, 1].")) + } + } + } + } + + return(probs) +} + +.sorted_distributions <- function(data_vector, weights_vector) { + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list("data" = data_vector[sorter], "cumulative_weights" = cumulative_weights)) +} + + + diff --git a/modules/Indices/R/tmp/NAO.R b/modules/Indices/R/tmp/NAO.R deleted file mode 100644 index 75731055c83d9330ca1b5b67056d3a1025856690..0000000000000000000000000000000000000000 --- a/modules/Indices/R/tmp/NAO.R +++ /dev/null @@ -1,434 +0,0 @@ -#'Compute the North Atlantic Oscillation (NAO) Index -#' -#'Compute the North Atlantic Oscillation (NAO) index based on the leading EOF -#'of the sea level pressure (SLP) anomalies over the north Atlantic region -#'(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and -#'observed anomalies onto the observed EOF pattern or the forecast -#'anomalies onto the EOF pattern of the other years of the forecast. -#'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month -#'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns -#'cross-validated PCs of the NAO index for forecast (exp) and observations -#'(obs) based on the leading EOF pattern. -#' -#'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) -#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with -#' dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. -#' If only NAO of observational data needs to be computed, this parameter can -#' be left to NULL. The default value is NULL. -#'@param obs A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) -#' observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with -#' dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. -#' If only NAO of experimental data needs to be computed, this parameter can -#' be left to NULL. The default value is NULL. -#'@param lat A vector of the latitudes of 'exp' and 'obs'. -#'@param lon A vector of the longitudes of 'exp' and 'obs'. -#'@param time_dim A character string indicating the name of the time dimension -#' of 'exp' and 'obs'. The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member -#' dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length -#' must be 1. The default value is 'member'. -#'@param space_dim A vector of two character strings. The first is the dimension -#' name of latitude of 'ano' and the second is the dimension name of longitude -#' of 'ano'. The default value is c('lat', 'lon'). -#'@param ftime_dim A character string indicating the name of the forecast time -#' dimension of 'exp' and 'obs'. The default value is 'ftime'. -#'@param ftime_avg A numeric vector of the forecast time steps to average -#' across the target period. If average is not needed, set NULL. The default -#' value is 2:4, i.e., from 2nd to 4th forecast time steps. -#'@param obsproj A logical value indicating whether to compute the NAO index by -#' projecting the forecast anomalies onto the leading EOF of observational -#' reference (TRUE) or compute the NAO by first computing the leading -#' EOF of the forecast anomalies (in cross-validation mode, i.e. leaving the -#' year you are evaluating out), and then projecting forecast anomalies onto -#' this EOF (FALSE). The default value is TRUE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A list which contains: -#'\item{exp}{ -#' A numeric array of forecast NAO index in verification format with the same -#' dimensions as 'exp' except space_dim and ftime_dim. If ftime_avg is NULL, -#' ftime_dim remains. -#' } -#'\item{obs}{ -#' A numeric array of observed NAO index in verification format with the same -#' dimensions as 'obs' except space_dim and ftime_dim. If ftime_avg is NULL, -#' ftime_dim remains. -#'} -#' -#'@references -#'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of -#' multi-model seasonal forecasts of the wintertime North Atlantic -#' Oscillation. Climate Dynamics, 21, 501-514. -#' DOI: 10.1007/s00382-003-0350-4 -#' -#'@examples -#'# Make up synthetic data -#'set.seed(1) -#'exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) -#'set.seed(2) -#'obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) -#'lat <- seq(20, 80, length.out = 6) -#'lon <- seq(-80, 40, length.out = 9) -#'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) -#' -#'# plot the NAO index -#' \dontrun{ -#'nao$exp <- Reorder(nao$exp, c(2, 1)) -#'nao$obs <- Reorder(nao$obs, c(2, 1)) -#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", -#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") -#' } -#' -#'@import multiApply -#'@importFrom ClimProjDiags Subset -#'@export -NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', - memb_dim = 'member', space_dim = c('lat', 'lon'), - ftime_dim = 'ftime', ftime_avg = 2:4, - obsproj = TRUE, ncores = NULL) { - - # Check inputs - ## exp and obs (1) - if (is.null(obs) & is.null(exp)) { - stop("Parameter 'exp' and 'obs' cannot both be NULL.") - } - if (!is.null(exp)) { - if (!is.numeric(exp)) { - stop("Parameter 'exp' must be a numeric array.") - } - if (is.null(dim(exp))) { - stop("Parameter 'exp' must have at least dimensions ", - "time_dim, memb_dim, space_dim, and ftime_dim.") - } - if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { - stop("Parameter 'exp' must have dimension names.") - } - } - if (!is.null(obs)) { - if (!is.numeric(obs)) { - stop("Parameter 'obs' must be a numeric array.") - } - if (is.null(dim(obs))) { - stop("Parameter 'obs' must have at least dimensions ", - "time_dim, space_dim, and ftime_dim.") - } - if (any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { - stop("Parameter 'obs' must have dimension names.") - } - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) > 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!is.null(exp)) { - if (!time_dim %in% names(dim(exp))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - } - if (!is.null(obs)) { - if (!time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!is.null(exp)) { - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - } - if (!is.null(obs)) { - if (memb_dim %in% names(dim(obs))) { - if (dim(obs)[memb_dim] != 1) { - stop("The length of parameter 'memb_dim' in 'obs' must be 1.") - } else { - add_member_back <- TRUE - obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected') - } - } else { - add_member_back <- FALSE - } - } - ## space_dim - if (!is.character(space_dim) | length(space_dim) != 2) { - stop("Parameter 'space_dim' must be a character vector of 2.") - } - if (!is.null(exp)) { - if (!all(space_dim %in% names(dim(exp)))) { - stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") - } - } - if (!is.null(obs)) { - if (!all(space_dim %in% names(dim(obs)))) { - stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") - } - } - ## ftime_dim - if (!is.character(ftime_dim) | length(ftime_dim) > 1) { - stop("Parameter 'ftime_dim' must be a character string.") - } - if (!is.null(exp) && !is.null(ftime_avg)) { - if (!ftime_dim %in% names(dim(exp))) { - stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") - } - } - if (!is.null(obs) && !is.null(ftime_avg)) { - if (!ftime_dim %in% names(dim(obs))) { - stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") - } - } - ## exp and obs (2) - if (!is.null(exp) & !is.null(obs)) { - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - throw_error <- FALSE - if (length(name_exp) != length(name_obs)) { - throw_error <- TRUE - } else if (any(name_exp != name_obs)) { - throw_error <- TRUE - } else if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - throw_error <- TRUE - } - if (throw_error) { - stop("Parameter 'exp' and 'obs' must have the same names and lengths ", - "of all the dimensions except 'memb_dim'.") - } - } - ## ftime_avg - if (!is.null(ftime_avg)) { - if (!is.vector(ftime_avg) | !is.numeric(ftime_avg)) { - stop("Parameter 'ftime_avg' must be an integer vector.") - } - if (!is.null(exp)) { - if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) { - stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") - } - } else { - if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) { - stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") - } - } - } - ## sdate >= 2 - if (!is.null(exp)) { - if (dim(exp)[time_dim] < 2) { - stop("The length of time_dim must be at least 2.") - } - } else { - if (dim(obs)[time_dim] < 2) { - stop("The length of time_dim must be at least 2.") - } - } - ## lat and lon - if (!is.null(exp)) { - if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { - stop("Parameter 'lat' must be a numeric vector with the same ", - "length as the latitude dimension of 'exp' and 'obs'.") - } - if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { - stop("Parameter 'lon' must be a numeric vector with the same ", - "length as the longitude dimension of 'exp' and 'obs'.") - } - } else { - if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) { - stop("Parameter 'lat' must be a numeric vector with the same ", - "length as the latitude dimension of 'exp' and 'obs'.") - } - if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) { - stop("Parameter 'lon' must be a numeric vector with the same ", - "length as the longitude dimension of 'exp' and 'obs'.") - } - } - stop_needed <- FALSE - if (max(lat) > 80 | min(lat) < 20) { - stop_needed <- TRUE - } - #NOTE: different from s2dverification - # lon is not used in the calculation actually. EOF only uses lat to do the - # weight. So we just need to ensure the data is in this region, regardless - # the order. - if (any(lon < 0)) { #[-180, 180] - if (!(min(lon) > -90 & min(lon) < -70 & max(lon) < 50 & max(lon) > 30)) { - stop_needed <- TRUE - } - } else { #[0, 360] - if (any(lon >= 50 & lon <= 270)) { - stop_needed <- TRUE - } else { - lon_E <- lon[which(lon < 50)] - lon_W <- lon[-which(lon < 50)] - if (max(lon_E) < 30 | min(lon_W) > 290) { - stop_needed <- TRUE - } - } - } - if (stop_needed) { - stop("The typical domain used to compute the NAO is 20N-80N, ", - "80W-40E. 'lat' or 'lon' is out of range.") - } - ## obsproj - if (!is.logical(obsproj) | length(obsproj) > 1) { - stop("Parameter 'obsproj' must be either TRUE or FALSE.") - } - if (obsproj) { - if (is.null(obs)) { - stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.") - } - if (is.null(exp)) { - .warning("parameter 'obsproj' set to TRUE but no 'exp' provided.") - } - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores == 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - - # Average ftime - if (!is.null(ftime_avg)) { - if (!is.null(exp)) { - exp_sub <- ClimProjDiags::Subset(exp, ftime_dim, ftime_avg, drop = FALSE) - exp <- MeanDims(exp_sub, ftime_dim, na.rm = TRUE) - ## Cross-validated PCs. Fabian. This should be extended to - ## nmod and nlt by simple loops. Virginie - } - if (!is.null(obs)) { - obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE) - obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE) - } - } - - # wght - wght <- array(sqrt(cos(lat * pi / 180)), dim = c(length(lat), length(lon))) - - if (!is.null(exp) & !is.null(obs)) { - res <- Apply(list(exp, obs), - target_dims = list(exp = c(memb_dim, time_dim, space_dim), - obs = c(time_dim, space_dim)), - fun = .NAO, - lat = lat, wght = wght, - obsproj = obsproj, add_member_back = add_member_back, - ncores = ncores) - } else if (!is.null(exp)) { - res <- Apply(list(exp = exp), - target_dims = list(exp = c(memb_dim, time_dim, space_dim)), - fun = .NAO, - lat = lat, wght = wght, obs = NULL, - obsproj = obsproj, add_member_back = FALSE, - ncores = ncores) - } else if (!is.null(obs)) { - if (add_member_back) { - output_dims <- list(obs = c(time_dim, memb_dim)) - } else { - output_dims <- list(obs = time_dim) - } - res <- Apply(list(obs = obs), - target_dims = list(obs = c(time_dim, space_dim)), - output_dims = output_dims, - fun = .NAO, - lat = lat, wght = wght, exp = NULL, - obsproj = obsproj, add_member_back = add_member_back, - ncores = ncores) - } - return(res) -} - -.NAO <- function(exp = NULL, obs = NULL, lat, wght, obsproj = TRUE, add_member_back = FALSE) { - # exp: [memb_exp, sdate, lat, lon] - # obs: [sdate, lat, lon] - # wght: [lat, lon] - - if (!is.null(exp)) { - ntime <- dim(exp)[2] - nlat <- dim(exp)[3] - nlon <- dim(exp)[4] - nmemb_exp <- dim(exp)[1] - } else { - ntime <- dim(obs)[1] - nlat <- dim(obs)[2] - nlon <- dim(obs)[3] - } - - if (!is.null(obs)) NAOO.ver <- array(NA, dim = ntime) - if (!is.null(exp)) NAOF.ver <- array(NA, dim = c(ntime, nmemb_exp)) - - for (tt in 1:ntime) { #sdate - - if (!is.null(obs)) { - ## Calculate observation EOF. Excluding one forecast start year. - obs_sub <- obs[(1:ntime)[-tt], , , drop = FALSE] - obs_EOF <- .EOF(obs_sub, neofs = 1, wght = wght) # $EOFs: [mode, lat, lon] - - ## Correct polarity of pattern. - # dim(obs_EOF$EOFs): [mode, lat, lon] - if (0 < mean(obs_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) { - obs_EOF$EOFs <- obs_EOF$EOFs * (-1) -# obs_EOF$PCs <- obs_EOF$PCs * (-1) # not used - } - ## Project observed anomalies. - PF <- .ProjectField(obs, eof_mode = obs_EOF$EOFs[1, , ], wght = wght) # [sdate] - ## Keep PCs of excluded forecast start year. Fabian. - NAOO.ver[tt] <- PF[tt] - } - - if (!is.null(exp)) { - if (!obsproj) { - exp_sub <- exp[, (1:ntime)[-tt], , , drop = FALSE] - # Combine 'memb' and 'sdate' to calculate EOF - dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) - exp_EOF <- .EOF(exp_sub, neofs = 1, wght = wght) # $EOFs: [mode, lat, lon] - - ## Correct polarity of pattern. - ##NOTE: different from s2dverification, which doesn't use mean(). -# if (0 < exp_EOF$EOFs[1, which.min(abs(lat - 65)), ]) { - if (0 < mean(exp_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) { - exp_EOF$EOFs <- exp_EOF$EOFs * (-1) -# exp_EOF$PCs <- exp_EOF$PCs * sign # not used - } - - ### Lines below could be simplified further by computing - ### ProjectField() only on the year of interest... (though this is - ### not vital). Lauriane - for (imemb in 1:nmemb_exp) { - PF <- .ProjectField(exp[imemb, , , ], - eof_mode = exp_EOF$EOFs[1, , ], - wght = wght) # [sdate, memb] - NAOF.ver[tt, imemb] <- PF[tt] - } - } else { - ## Project forecast anomalies on obs EOF - for (imemb in 1:nmemb_exp) { - PF <- .ProjectField(exp[imemb, , , ], - eof_mode = obs_EOF$EOFs[1, , ], - wght = wght) # [sdate] - NAOF.ver[tt, imemb] <- PF[tt] - } - } - } - - } # for loop sdate - - # add_member_back - if (add_member_back) { - suppressWarnings({ - NAOO.ver <- InsertDim(NAOO.ver, 2, 1, name = names(dim(exp))[1]) - }) - } - - #NOTE: EOFs_obs is not returned because it's only the result of the last sdate - # (It is returned in s2dverification.) - if (!is.null(exp) & !is.null(obs)) { - return(list(exp = NAOF.ver, obs = NAOO.ver)) #, EOFs_obs = obs_EOF)) - } else if (!is.null(exp)) { - return(list(exp = NAOF.ver)) - } else if (!is.null(obs)) { - return(list(obs = NAOO.ver)) - } -} diff --git a/modules/Indices/R/tmp/ProjectField.R b/modules/Indices/R/tmp/ProjectField.R deleted file mode 100644 index 4fdf1892f2a8cd38a2f6492906700aa0cafb1cf2..0000000000000000000000000000000000000000 --- a/modules/Indices/R/tmp/ProjectField.R +++ /dev/null @@ -1,272 +0,0 @@ -#'Project anomalies onto modes of variability -#' -#'Project anomalies onto modes of variability to get the temporal evolution of -#'the EOF mode selected. It returns principal components (PCs) by area-weighted -#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from -#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and -#'returns NA if the whole spatial pattern is NA. -#' -#'@param ano A numerical array of anomalies with named dimensions. The -#' dimensions must have at least 'time_dim' and 'space_dim'. It can be -#' generated by Ano(). -#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which -#' are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and -#' 'space_dim' at least. 'wght' has dimensions space_dim. It can be generated -#' by EOF() or REOF(). -#'@param time_dim A character string indicating the name of the time dimension -#' of 'ano'. The default value is 'sdate'. -#'@param space_dim A vector of two character strings. The first is the dimension -#' name of latitude of 'ano' and the second is the dimension name of longitude -#' of 'ano'. The default value is c('lat', 'lon'). -#'@param mode An integer of the variability mode number in the EOF to be -#' projected on. The default value is NULL, which means all the modes of 'eof' -#' is calculated. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return A numerical array of the principal components in the verification -#' format. The dimensions are the same as 'ano' except 'space_dim'. -#' -#'@seealso EOF, NAO, PlotBoxWhisker -#'@examples -#'\dontshow{ -#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), -#' c('observation'), startDates, -#' leadtimemin = 1, -#' leadtimemax = 4, -#' output = 'lonlat', -#' latmin = 27, latmax = 48, -#' lonmin = -12, lonmax = 40) -#'} -#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) -#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) -#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) -#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) -#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) -#' -#'\dontrun{ -#' # Plot the forecast and the observation of the first mode for the last year -#' # of forecast -#' sdate_dim_length <- dim(mode1_obs)['sdate'] -#' plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), -#' lwd = 2) -#' for (i in 1:dim(mode1_exp)['member']) { -#' par(new = TRUE) -#' plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], -#' ylim = c(-15000, 15000)) -#' } -#'} -#' -#'@import multiApply -#'@export -ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), - mode = NULL, ncores = NULL) { - - # Check inputs - ## ano (1) - if (is.null(ano)) { - stop("Parameter 'ano' cannot be NULL.") - } - if (!is.numeric(ano)) { - stop("Parameter 'ano' must be a numeric array.") - } - if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { - stop("Parameter 'ano' must have dimension names.") - } - ## eof (1) - if (is.null(eof)) { - stop("Parameter 'eof' cannot be NULL.") - } - if (!is.list(eof)) { - stop("Parameter 'eof' must be a list generated by EOF() or REOF().") - } - if ('EOFs' %in% names(eof)) { - EOFs <- "EOFs" - } else if ('REOFs' %in% names(eof)) { - EOFs <- "REOFs" - } else if ('patterns' %in% names(eof)) { - EOFs <- "patterns" - } else { - stop("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", - "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().") - } - if (!'wght' %in% names(eof)) { - stop("Parameter 'eof' must be a list that contains 'wght'. ", - "It can be generated by EOF() or REOF().") - } - if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { - stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") - } - if (!is.numeric(eof$wght) || !is.array(eof$wght)) { - stop("The component 'wght' of parameter 'eof' must be a numeric array.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) > 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!time_dim %in% names(dim(ano))) { - stop("Parameter 'time_dim' is not found in 'ano' dimension.") - } - ## space_dim - if (!is.character(space_dim) | length(space_dim) != 2) { - stop("Parameter 'space_dim' must be a character vector of 2.") - } - if (!all(space_dim %in% names(dim(ano)))) { - stop("Parameter 'space_dim' is not found in 'ano' dimension.") - } - ## ano (2) - if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { - stop("Parameter 'ano' must be an array with dimensions named as ", - "parameter 'space_dim' and 'time_dim'.") - } - ## eof (2) - if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | - !'mode' %in% names(dim(eof[[EOFs]]))) { - stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", - "with dimensions named as parameter 'space_dim' and 'mode'.") - } - if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { - stop("The component 'wght' of parameter 'eof' must be an array ", - "with dimensions named as parameter 'space_dim'.") - } - ## mode - if (!is.null(mode)) { - if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) { - stop("Parameter 'mode' must be NULL or a positive integer.") - } - if (mode > dim(eof[[EOFs]])['mode']) { - stop("Parameter 'mode' is greater than the number of available ", - "modes in 'eof'.") - } - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - -#------------------------------------------------------- - - # Keep the chosen mode - if (!is.null(mode)) { - eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected') - } else { - eof_mode <- eof[[EOFs]] - } - - if ('mode' %in% names(dim(eof_mode))) { - dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')] - } else { - dimnames_without_mode <- names(dim(eof_mode)) - } - - if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon] - if ('mode' %in% names(dim(eof_mode))) { - eof_mode_target <- c('mode', space_dim) - output_dims <- c('mode', time_dim) - } else { - eof_mode_target <- space_dim - output_dims <- time_dim - } - res <- Apply(list(ano, eof_mode), - target_dims = list(c(time_dim, space_dim), - eof_mode_target), - output_dims = output_dims, - wght = eof$wght, - fun = .ProjectField, - ncores = ncores)$output1 - - } else { - - if (!all(dimnames_without_mode %in% names(dim(ano)))) { - stop("The array 'EOF' in parameter 'eof' has dimension not in parameter ", - "'ano'. Check if 'ano' and 'eof' are compatible.") - } - - common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] - if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != - dim(eof_mode)[dimnames_without_mode])) { - stop("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", - "with different length. Check if 'ano' and 'eof' are compatible.") - } - - # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent - # between ano and eof. - additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] - additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] - if (length(additional_dims) != 0) { - for (i in seq_along(additional_dims)) { - eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), - lendim = additional_dims[i], name = names(additional_dims)[i]) - } - } - if ('mode' %in% names(dim(eof_mode))) { - eof_mode_target <- c('mode', space_dim) - output_dims <- c('mode', time_dim) - } else { - eof_mode_target <- space_dim - output_dims <- time_dim - } - res <- Apply(list(ano, eof_mode), - target_dims = list(c(time_dim, space_dim), - eof_mode_target), - output_dims = output_dims, - wght = eof$wght, - fun = .ProjectField, - ncores = ncores)$output1 - } - - return(res) -} - - -.ProjectField <- function(ano, eof_mode, wght) { - # ano: [sdate, lat, lon] - # eof_mode: [lat, lon] or [mode, lat, lon] - # wght: [lat, lon] - - ntime <- dim(ano)[1] - - if (length(dim(eof_mode)) == 2) { # mode != NULL - # Initialization of pc.ver. - pc.ver <- array(NA, dim = ntime) #[sdate] - - # Weight - e.1 <- eof_mode * wght - ano <- ano * InsertDim(wght, 1, ntime) - #ano <- aaply(ano, 1, '*', wght) # much heavier - - na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA - #na <- apply(ano, 1, mean, na.rm = TRUE) # much heavier - tmp <- ano * InsertDim(e.1, 1, ntime) # [sdate, lat, lon] - rm(ano) - #pc.ver <- apply(tmp, 1, sum, na.rm = TRUE) # much heavier - pc.ver <- rowSums(tmp, na.rm = TRUE) - pc.ver[which(is.na(na))] <- NA - - } else { # mode = NULL - # Weight - e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1]) - dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3])) # [mode, lat*lon] - ano <- ano * InsertDim(wght, 1, ntime) - dim(ano) <- c(ntime, prod(dim(ano)[2:3])) # [sdate, lat*lon] - - na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA - na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1)) - - # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate] - # Result: [mode, sdate] - pc.ver <- e.1 %*% t(ano) - pc.ver[which(is.na(na))] <- NA - -# # Change back dimensions to feet original input -# dim(projection) <- c(moredims, mode = unname(neofs)) -# return(projection) - } - - return(pc.ver) -} - diff --git a/modules/Indices/R/tmp/Utils.R b/modules/Indices/R/tmp/Utils.R deleted file mode 100644 index 77b7328f591c6e2b426b68514afa7767fc96b958..0000000000000000000000000000000000000000 --- a/modules/Indices/R/tmp/Utils.R +++ /dev/null @@ -1,1884 +0,0 @@ -#'@importFrom abind abind -#'@import plyr ncdf4 -#'@importFrom grDevices png jpeg pdf svg bmp tiff -#'@importFrom easyVerification convert2prob - -## Function to tell if a regexpr() match is a complete match to a specified name -.IsFullMatch <- function(x, name) { - x > 0 && attributes(x)$match.length == nchar(name) -} - -.ConfigReplaceVariablesInString <- function(string, replace_values, - allow_undefined_key_vars = FALSE) { - # This function replaces all the occurrences of a variable in a string by - # their corresponding string stored in the replace_values. - if (length(strsplit(string, "\\$")[[1]]) > 1) { - parts <- strsplit(string, "\\$")[[1]] - output <- "" - i <- 0 - for (part in parts) { - if (i %% 2 == 0) { - output <- paste0(output, part) - } else { - if (part %in% names(replace_values)) { - output <- paste0(output, - .ConfigReplaceVariablesInString(replace_values[[part]], - replace_values, - allow_undefined_key_vars)) - } else if (allow_undefined_key_vars) { - output <- paste0(output, "$", part, "$") - } else { - stop('Error: The variable $', part, - '$ was not defined in the configuration file.', sep = '') - } - } - i <- i + 1 - } - output - } else { - string - } -} - -.KnownLonNames <- function() { - known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') - return(known_lon_names) -} - -.KnownLatNames <- function() { - known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') - return(known_lat_names) -} - -.t2nlatlon <- function(t) { - ## As seen in cdo's griddes.c: ntr2nlat() - nlats <- (t * 3 + 1) / 2 - if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) { - nlats <- ceiling(nlats) - } else { - nlats <- round(nlats) - } - if (nlats %% 2 > 0) { - nlats <- nlats + 1 - } - ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF - nlons <- 2 * nlats - keep_going <- TRUE - while (keep_going) { - n <- nlons - if (n %% 8 == 0) n <- trunc(n / 8) - while (n %% 6 == 0) n <- trunc(n / 6) - while (n %% 5 == 0) n <- trunc(n / 5) - while (n %% 4 == 0) n <- trunc(n / 4) - while (n %% 3 == 0) n <- trunc(n / 3) - if (n %% 2 == 0) n <- trunc(n / 2) - if (n <= 8) { - keep_going <- FALSE - } else { - nlons <- nlons + 2 - if (nlons > 9999) { - stop("Error: pick another gaussian grid truncation. ", - "It doesn't fulfill the standards to apply FFT.") - } - } - } - c(nlats, nlons) -} - -.nlat2t <- function(nlats) { - trunc((nlats * 2 - 1) / 3) -} - -# .LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) { -# # The purpose, working modes, inputs and outputs of this function are -# # explained in ?LoadDataFile -# #suppressPackageStartupMessages({library(ncdf4)}) -# #suppressPackageStartupMessages({library(bigmemory)}) -# #suppressPackageStartupMessages({library(plyr)}) -# # Auxiliar function to convert array indices to lineal indices -# arrayIndex2VectorIndex <- function(indices, dims) { -# if (length(indices) > length(dims)) { -# stop("Error: indices do not match dimensions in arrayIndex2VectorIndex.") -# } -# position <- 1 -# dims <- rev(dims) -# indices <- rev(indices) -# for (i in seq_along(indices)) { -# position <- position + (indices[i] - 1) * prod(dims[-(1:i)]) -# } -# position -# } -# -# found_file <- NULL -# dims <- NULL -# grid_name <- units <- var_long_name <- NULL -# is_2d_var <- array_across_gw <- NULL -# data_across_gw <- NULL -# -# filename <- work_piece[['filename']] -# namevar <- work_piece[['namevar']] -# output <- work_piece[['output']] -# # The names of all data files in the directory of the repository that match -# # the pattern are obtained. -# if (any(grep("^http", filename))) { -# is_url <- TRUE -# files <- filename -# ## TODO: Check that the user is not using shell globbing exps. -# } else { -# is_url <- FALSE -# files <- Sys.glob(filename) -# } -# -# # If we don't find any, we leave the flag 'found_file' with a NULL value. -# if (length(files) > 0) { -# # The first file that matches the pattern is chosen and read. -# filename <- head(files, 1) -# filein <- filename -# found_file <- filename -# mask <- work_piece[['mask']] -# -# if (!silent && explore_dims) { -# .message(paste("Exploring dimensions...", filename)) -# ##} else { -# ## cat(paste("* Reading & processing data...", filename, '\n')) -# ##} -# } -# -# # We will fill in 'expected_dims' with the names of the expected dimensions of -# # the data array we'll retrieve from the file. -# expected_dims <- NULL -# remap_needed <- FALSE -# # But first we open the file and work out whether the requested variable is 2d -# fnc <- nc_open(filein) -# if (!(namevar %in% names(fnc$var))) { -# stop("Error: The variable", namevar, "is not defined in the file", filename) -# } -# var_long_name <- fnc$var[[namevar]]$longname -# units <- fnc$var[[namevar]]$units -# file_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) -# # The following two 'ifs' are to allow for 'lon'/'lat' by default, instead of -# # 'longitude'/'latitude'. -# if (!(work_piece[['dimnames']][['lon']] %in% file_dimnames) && -# (work_piece[['dimnames']][['lon']] == 'longitude') && -# ('lon' %in% file_dimnames)) { -# work_piece[['dimnames']][['lon']] <- 'lon' -# } -# if (!(work_piece[['dimnames']][['lat']] %in% file_dimnames) && -# (work_piece[['dimnames']][['lat']] == 'latitude') && -# ('lat' %in% file_dimnames)) { -# work_piece[['dimnames']][['lat']] <- 'lat' -# } -# if (is.null(work_piece[['is_2d_var']])) { -# is_2d_var <- all(c(work_piece[['dimnames']][['lon']], -# work_piece[['dimnames']][['lat']]) %in% -# unlist(lapply(fnc$var[[namevar]][['dim']], -# '[[', 'name'))) -# } else { -# is_2d_var <- work_piece[['is_2d_var']] -# } -# if ((is_2d_var || work_piece[['is_file_per_dataset']])) { -# if (Sys.which("cdo")[[1]] == "") { -# stop("Error: CDO libraries not available") -# } -# -# cdo_version <- -# strsplit(suppressWarnings( -# system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] -# -# cdo_version <- -# as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) -# -# } -# # If the variable to load is 2-d, we need to determine whether: -# # - interpolation is needed -# # - subsetting is requested -# if (is_2d_var) { -# ## We read the longitudes and latitudes from the file. -# lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) -# lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) -# first_lon_in_original_file <- lon[1] -# # If a common grid is requested or we are exploring the file dimensions -# # we need to read the grid type and size of the file to finally work out the -# # CDO grid name. -# if (!is.null(work_piece[['grid']]) || explore_dims) { -# # Here we read the grid type and its number of longitudes and latitudes -# file_info <- system(paste('cdo -s griddes', filein, '2> /dev/null'), intern = TRUE) -# grids_positions <- grep('# gridID', file_info) -# if (length(grids_positions) < 1) { -# stop("The grid should be defined in the files.") -# } -# grids_first_lines <- grids_positions + 2 -# grids_last_lines <- c((grids_positions - 2)[-1], length(file_info)) -# grids_info <- as.list(seq_along(grids_positions)) -# grids_info <- lapply(grids_info, -# function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) -# grids_info <- lapply(grids_info, function (x) gsub(" *", " ", x)) -# grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x)) -# grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = "))) -# grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1])) -# grids_matches <- unlist(lapply(grids_info, function (x) { -# nlons <- if (any(grep('xsize', x))) { -# as.numeric(x[grep('xsize', x) + 1]) -# } else { -# NA -# } -# nlats <- if (any(grep('ysize', x))) { -# as.numeric(x[grep('ysize', x) + 1]) -# } else { -# NA -# } -# result <- FALSE -# if (!anyNA(c(nlons, nlats))) { -# if ((nlons == length(lon)) && -# (nlats == length(lat))) { -# result <- TRUE -# } -# } -# result -# })) -# grids_matches <- grids_matches[which(grids_types %in% c('gaussian', 'lonlat'))] -# grids_info <- grids_info[which(grids_types %in% c('gaussian', 'lonlat'))] -# grids_types <- grids_types[which(grids_types %in% c('gaussian', 'lonlat'))] -# if (length(grids_matches) == 0) { -# stop("Error: Only 'gaussian' and 'lonlat' grids supported. See e.g: cdo sinfo ", filename) -# } -# if (sum(grids_matches) > 1) { -# if ((all(grids_types[which(grids_matches)] == 'gaussian') || -# all(grids_types[which(grids_matches)] == 'lonlat')) && -# all(unlist(lapply(grids_info[which(grids_matches)], identical, -# grids_info[which(grids_matches)][[1]])))) { -# grid_type <- grids_types[which(grids_matches)][1] -# } else { -# stop("Error: Load() can't disambiguate: ", -# "More than one lonlat/gaussian grids with the same size as ", -# "the requested variable defined in ", filename) -# } -# } else if (sum(grids_matches) == 1) { -# grid_type <- grids_types[which(grids_matches)] -# } else { -# stop("Unexpected error.") -# } -# grid_lons <- length(lon) -# grid_lats <- length(lat) -# # Convert to CDO grid name as seen in cdo's griddes.c: nlat2ntr() -# if (grid_type == 'lonlat') { -# grid_name <- paste0('r', grid_lons, 'x', grid_lats) -# } else { -# grid_name <- paste0('t', .nlat2t(grid_lats), 'grid') -# } -# if (is.null(work_piece[['grid']])) { -# .warning(paste0("Detect the grid type to be '", grid_name, "'. ", -# "If it is not expected, assign parameter 'grid' to avoid wrong result.")) -# } -# } -# # If a common grid is requested, we will also calculate its size which we will use -# # later on. -# if (!is.null(work_piece[['grid']])) { -# # Now we calculate the common grid type and its lons and lats -# if (any(grep('^t\\d{1,+}grid$', work_piece[['grid']]))) { -# common_grid_type <- 'gaussian' -# common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) -# nlonlat <- .t2nlatlon(common_grid_res) -# common_grid_lats <- nlonlat[1] -# common_grid_lons <- nlonlat[2] -# } else if (any(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']]))) { -# common_grid_type <- 'lonlat' -# common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) -# common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3]) -# } else { -# stop("Error: Only supported grid types in parameter 'grid' are tgrid and rx") -# } -# } else { -# ## If no 'grid' is specified, there is no common grid. -# ## But these variables are filled in for consistency in the code. -# common_grid_lons <- length(lon) -# common_grid_lats <- length(lat) -# } -# first_common_grid_lon <- 0 -# last_common_grid_lon <- 360 - 360 / common_grid_lons -# ## This is not true for gaussian grids or for some regular grids, but -# ## is a safe estimation -# first_common_grid_lat <- -90 -# last_common_grid_lat <- 90 -# # And finally determine whether interpolation is needed or not -# remove_shift <- FALSE -# if (!is.null(work_piece[['grid']])) { -# if ((grid_lons != common_grid_lons) || -# (grid_lats != common_grid_lats) || -# (grid_type != common_grid_type) || -# (lon[1] != first_common_grid_lon)) { -# if (grid_lons == common_grid_lons && grid_lats == common_grid_lats && -# grid_type == common_grid_type && lon[1] != first_common_grid_lon) { -# remove_shift <- TRUE -# } -# remap_needed <- TRUE -# common_grid_name <- work_piece[['grid']] -# } -# } else if ((lon[1] != first_common_grid_lon) && explore_dims && -# !work_piece[['single_dataset']]) { -# remap_needed <- TRUE -# common_grid_name <- grid_name -# remove_shift <- TRUE -# } -# if (remap_needed && (work_piece[['remap']] == 'con') && -# (cdo_version >= as.numeric_version('1.7.0'))) { -# work_piece[['remap']] <- 'ycon' -# } -# if (remove_shift && !explore_dims) { -# if (!is.null(work_piece[['progress_amount']])) { -# cat("\n") -# } -# .warning(paste0("The dataset with index ", -# tail(work_piece[['indices']], 1), " in '", -# work_piece[['dataset_type']], -# "' doesn't start at longitude 0 and will be re-interpolated in order ", -# "to align its longitudes with the standard CDO grids definable with ", -# "the names 'tgrid' or 'rx', which are by definition ", -# "starting at the longitude 0.\n")) -# if (!is.null(mask)) { -# .warning(paste0("A mask was provided for the dataset with index ", -# tail(work_piece[['indices']], 1), " in '", -# work_piece[['dataset_type']], -# "'. This dataset has been re-interpolated to align its longitudes to ", -# "start at 0. You must re-interpolate the corresponding mask to align ", -# "its longitudes to start at 0 as well, if you haven't done so yet. ", -# "Running cdo remapcon,", common_grid_name, -# " original_mask_file.nc new_mask_file.nc will fix it.\n")) -# } -# } -# if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) { -# if (!is.null(work_piece[['progress_amount']])) { -# cat("\n") -# } -# if (!explore_dims) { -# .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), -# " in '", work_piece[['dataset_type']], "' is originally on ", -# "a grid coarser than the common grid and it has been ", -# "extrapolated. Check the results carefully. It is ", -# "recommended to specify as common grid the coarsest grid ", -# "among all requested datasets via the parameter 'grid'.\n")) -# } -# } -# # Now calculate if the user requests for a lonlat subset or for the -# # entire field -# lonmin <- work_piece[['lon_limits']][1] -# lonmax <- work_piece[['lon_limits']][2] -# latmin <- work_piece[['lat_limits']][1] -# latmax <- work_piece[['lat_limits']][2] -# lon_subsetting_requested <- FALSE -# lonlat_subsetting_requested <- FALSE -# if (lonmin <= lonmax) { -# if ((lonmin > first_common_grid_lon) || (lonmax < last_common_grid_lon)) { -# lon_subsetting_requested <- TRUE -# } -# } else { -# if ((lonmin - lonmax) > 360 / common_grid_lons) { -# lon_subsetting_requested <- TRUE -# } else { -# gap_width <- floor(lonmin / (360 / common_grid_lons)) - -# floor(lonmax / (360 / common_grid_lons)) -# if (gap_width > 0) { -# if (!(gap_width == 1 && (lonmin %% (360 / common_grid_lons) == 0) && -# (lonmax %% (360 / common_grid_lons) == 0))) { -# lon_subsetting_requested <- TRUE -# } -# } -# } -# } -# if ((latmin > first_common_grid_lat) || (latmax < last_common_grid_lat) -# || (lon_subsetting_requested)) { -# lonlat_subsetting_requested <- TRUE -# } -# # Now that we know if subsetting was requested, we can say if final data -# # will go across greenwich -# if (lonmax < lonmin) { -# data_across_gw <- TRUE -# } else { -# data_across_gw <- !lon_subsetting_requested -# } -# -# # When remap is needed but no subsetting, the file is copied locally -# # so that cdo works faster, and then interpolated. -# # Otherwise the file is kept as is and the subset will have to be -# # interpolated still. -# if (!lonlat_subsetting_requested && remap_needed) { -# nc_close(fnc) -# filecopy <- tempfile(pattern = "load", fileext = ".nc") -# file.copy(filein, filecopy) -# filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") -# # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the -# # underlying hdf5 library. -# system(paste0("cdo -L -s remap", work_piece[['remap']], ",", -# common_grid_name, -# " -selname,", namevar, " ", filecopy, " ", filein, -# " 2>/dev/null")) -# file.remove(filecopy) -# work_piece[['dimnames']][['lon']] <- 'lon' -# work_piece[['dimnames']][['lat']] <- 'lat' -# fnc <- nc_open(filein) -# lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) -# lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) -# } -# -# # Read and check also the mask -# if (!is.null(mask)) { -# ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') -# if (is.list(mask)) { -# if (!file.exists(mask[['path']])) { -# stop("Error: Couldn't find the mask file", mask[['path']]) -# } -# mask_file <- mask[['path']] -# ###file.copy(work_piece[['mask']][['path']], mask_file) -# fnc_mask <- nc_open(mask_file) -# vars_in_mask <- sapply(fnc_mask$var, '[[', 'name') -# if ('nc_var_name' %in% names(mask)) { -# if (!(mask[['nc_var_name']] %in% -# vars_in_mask)) { -# stop("Error: couldn't find variable", mask[['nc_var_name']], -# "in the mask file", mask[['path']]) -# } -# } else { -# if (length(vars_in_mask) != 1) { -# stop("Error: one and only one non-coordinate variable should be ", -# "defined in the mask file", -# mask[['path']], -# "if the component 'nc_var_name' is not specified. ", -# "Currently found: ", -# toString(vars_in_mask), ".") -# } else { -# mask[['nc_var_name']] <- vars_in_mask -# } -# } -# if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) { -# stop("Error: the variable '", -# mask[['nc_var_name']], -# "' must be defined only over the dimensions '", -# work_piece[['dimnames']][['lon']], "' and '", -# work_piece[['dimnames']][['lat']], -# "' in the mask file ", -# mask[['path']]) -# } -# mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE) -# nc_close(fnc_mask) -# ### mask_lon <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lon']]) -# ### mask_lat <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lat']]) -# ###} else { -# ### dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) -# ### dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) -# ### ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') -# ### fnc_mask <- nc_create(mask_file, list(ncdf_var)) -# ### ncvar_put(fnc_mask, ncdf_var, work_piece[['mask']]) -# ### nc_close(fnc_mask) -# ### fnc_mask <- nc_open(mask_file) -# ### work_piece[['mask']] <- list(path = mask_file, nc_var_name = 'LSM') -# ### mask_lon <- lon -# ### mask_lat <- lat -# ###} -# ###} -# ### Now ready to check that the mask is right -# ##if (!(lonlat_subsetting_requested && remap_needed)) { -# ### if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) { -# ### stop(paste("Error: the mask of the dataset with index ", -# ### tail(work_piece[['indices']], 1), " in '", -# ### work_piece[['dataset_type']], "' is wrong. ", -# ### "It must be on the common grid if the selected output type is 'lonlat', ", -# ### "'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on ", -# ### "the grid of the corresponding dataset if the selected output type is ", -# ### "'areave' and no 'grid' has been specified. For more information ", -# ### "check ?Load and see help on parameters 'grid', 'maskmod' and ", -# ### "'maskobs'.", sep = "")) -# ### } -# ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { -# ### stop(paste0("Error: the longitudes and latitudes in the masks must be ", -# ### "identical to the ones in the corresponding data files if output = 'areave' ", -# ### " or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in ", -# ### "the mask file must start by 0 and the latitudes must be ordered from ", -# ### "highest to lowest. See\n ", -# ### work_piece[['mask']][['path']], " and ", filein)) -# ###} -# } -# } -# -# lon_indices <- seq_along(lon) -# if (!(lonlat_subsetting_requested && remap_needed)) { -# lon[which(lon < 0)] <- lon[which(lon < 0)] + 360 -# } -# if (lonmax >= lonmin) { -# lon_indices <- lon_indices[which(((lon %% 360) >= lonmin) & ((lon %% 360) <= lonmax))] -# } else if (!remap_needed) { -# lon_indices <- lon_indices[which(((lon %% 360) <= lonmax) | ((lon %% 360) >= lonmin))] -# } -# lat_indices <- which(lat >= latmin & lat <= latmax) -# ## In most of the cases the latitudes are ordered from -90 to 90. -# ## We will reorder them to be in the order from 90 to -90, so mostly -# ## always the latitudes are reordered. -# ## TODO: This could be avoided in future. -# if (lat[1] < lat[length(lat)]) { -# lat_indices <- lat_indices[rev(seq_along(lat_indices))] -# } -# if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) { -# if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) { -# stop("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), -# " in '", work_piece[['dataset_type']], "' is wrong. It must be on the ", -# "common grid if the selected output type is 'lonlat', 'lon' or 'lat', ", -# "or 'areave' and 'grid' has been specified. It must be on the grid of ", -# "the corresponding dataset if the selected output type is 'areave' and ", -# "no 'grid' has been specified. For more information check ?Load and see ", -# "help on parameters 'grid', 'maskmod' and 'maskobs'.") -# } -# mask <- mask[lon_indices, lat_indices] -# } -# ## If the user requests subsetting, we must extend the lon and lat limits if possible -# ## so that the interpolation after is done properly -# maximum_extra_points <- work_piece[['remapcells']] -# if (lonlat_subsetting_requested && remap_needed) { -# if ((maximum_extra_points > (head(lon_indices, 1) - 1)) || -# (maximum_extra_points > (length(lon) - tail(lon_indices, 1)))) { -# ## if the requested number of points goes beyond the left or right -# ## sides of the map, we need to take the entire map so that the -# ## interpolation works properly -# lon_indices <- seq_along(lon) -# } else { -# extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1) -# if (extra_points > 0) { -# lon_indices <- -# c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) -# } -# extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1)) -# if (extra_points > 0) { -# lon_indices <- c(lon_indices, -# (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) -# } -# } -# min_lat_ind <- min(lat_indices) -# max_lat_ind <- max(lat_indices) -# extra_points <- min(maximum_extra_points, min_lat_ind - 1) -# if (extra_points > 0) { -# if (lat[1] < tail(lat, 1)) { -# lat_indices <- c(lat_indices, (min_lat_ind - 1):(min_lat_ind - extra_points)) -# } else { -# lat_indices <- c((min_lat_ind - extra_points):(min_lat_ind - 1), lat_indices) -# } -# } -# extra_points <- min(maximum_extra_points, length(lat) - max_lat_ind) -# if (extra_points > 0) { -# if (lat[1] < tail(lat, 1)) { -# lat_indices <- c((max_lat_ind + extra_points):(max_lat_ind + 1), lat_indices) -# } else { -# lat_indices <- c(lat_indices, (max_lat_ind + 1):(max_lat_ind + extra_points)) -# } -# } -# } -# lon <- lon[lon_indices] -# lat <- lat[lat_indices] -# expected_dims <- c(work_piece[['dimnames']][['lon']], -# work_piece[['dimnames']][['lat']]) -# } else { -# lon <- 0 -# lat <- 0 -# } -# # We keep on filling the expected dimensions -# var_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) -# nmemb <- nltime <- NULL -# ## Sometimes CDO renames 'members' dimension to 'lev' -# old_members_dimname <- NULL -# if (('lev' %in% var_dimnames) && !(work_piece[['dimnames']][['member']] %in% var_dimnames)) { -# old_members_dimname <- work_piece[['dimnames']][['member']] -# work_piece[['dimnames']][['member']] <- 'lev' -# } -# if (work_piece[['dimnames']][['member']] %in% var_dimnames) { -# nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], -# var_dimnames)]]$len -# expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']]) -# } else { -# nmemb <- 1 -# } -# if (length(expected_dims) > 0) { -# dim_matches <- match(expected_dims, var_dimnames) -# if (anyNA(dim_matches)) { -# if (!is.null(old_members_dimname)) { -# expected_dims[which(expected_dims == 'lev')] <- old_members_dimname -# } -# stop("Error: the expected dimension(s)", -# toString(expected_dims[which(is.na(dim_matches))]), -# "were not found in", filename) -# } -# time_dimname <- var_dimnames[-dim_matches] -# } else { -# time_dimname <- var_dimnames -# } -# if (length(time_dimname) > 0) { -# if (length(time_dimname) == 1) { -# nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len -# expected_dims <- c(expected_dims, time_dimname) -# dim_matches <- match(expected_dims, var_dimnames) -# } else { -# if (!is.null(old_members_dimname)) { -# expected_dims[which(expected_dims == 'lev')] <- old_members_dimname -# } -# stop("Error: the variable ", namevar, -# " is defined over more dimensions than the expected (", -# toString(c(expected_dims, 'time')), -# "). It could also be that the members, longitude or latitude ", -# "dimensions are named incorrectly. In that case, either rename ", -# "the dimensions in the file or adjust Load() to recognize the actual ", -# "name with the parameter 'dimnames'. See file ", filename) -# } -# } else { -# nltime <- 1 -# } -# -# # Now we must retrieve the data from the file, but only the asked indices. -# # So we build up the indices to retrieve. -# # Longitudes or latitudes have been retrieved already. -# if (explore_dims) { -# # If we're exploring the file we only want one time step from one member, -# # to regrid it and work out the number of longitudes and latitudes. -# # We don't need more. -# members <- 1 -# ltimes_list <- list(1) -# } else { -# # The data is arranged in the array 'tmp' with the dimensions in a -# # common order: -# # 1) Longitudes -# # 2) Latitudes -# # 3) Members (even if is not a file per member experiment) -# # 4) Lead-times -# if (work_piece[['is_file_per_dataset']]) { -# time_indices <- 1:nltime -# mons <- strsplit(system(paste('cdo showmon ', filein, -# ' 2>/dev/null'), intern = TRUE), split = ' ') -# years <- strsplit(system(paste('cdo showyear ', filein, -# ' 2>/dev/null'), intern = TRUE), split = ' ') -# mons <- as.numeric(mons[[1]][which(mons[[1]] != "")]) -# years <- as.numeric(years[[1]][which(years[[1]] != "")]) -# time_indices <- ts(time_indices, start = c(years[1], mons[1]), -# end = c(years[length(years)], mons[length(mons)]), -# frequency = 12) -# ltimes_list <- list() -# for (sdate in work_piece[['startdates']]) { -# selected_time_indices <- window(time_indices, start = c(as.numeric( -# substr(sdate, 1, 4)), as.numeric(substr(sdate, 5, 6))), -# end = c(3000, 12), frequency = 12, extend = TRUE) -# selected_time_indices <- selected_time_indices[work_piece[['leadtimes']]] -# ltimes_list <- c(ltimes_list, list(selected_time_indices)) -# } -# } else { -# ltimes <- work_piece[['leadtimes']] -# #if (work_piece[['dataset_type']] == 'exp') { -# ltimes_list <- list(ltimes[which(ltimes <= nltime)]) -# #} -# } -# ## TODO: Put, when reading matrices, this kind of warnings -# # if (nmember < nmemb) { -# # cat("Warning: -# members <- 1:work_piece[['nmember']] -# members <- members[which(members <= nmemb)] -# } -# -# # Now, for each list of leadtimes to load (usually only one list with all leadtimes), -# # we'll join the indices and retrieve data -# found_disordered_dims <- FALSE -# for (ltimes in ltimes_list) { -# if (is_2d_var) { -# start <- c(min(lon_indices), min(lat_indices)) -# end <- c(max(lon_indices), max(lat_indices)) -# if (lonlat_subsetting_requested && remap_needed) { -# subset_indices <- list(min(lon_indices):max(lon_indices) - min(lon_indices) + 1, -# lat_indices - min(lat_indices) + 1) -# dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) -# dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) -# ncdf_dims <- list(dim_longitudes, dim_latitudes) -# } else { -# subset_indices <- list(lon_indices - min(lon_indices) + 1, -# lat_indices - min(lat_indices) + 1) -# ncdf_dims <- list() -# } -# final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1) -# } else { -# start <- end <- NULL -# subset_indices <- list() -# ncdf_dims <- list() -# final_dims <- c(1, 1, 1, 1) -# } -# -# if (work_piece[['dimnames']][['member']] %in% expected_dims) { -# start <- c(start, head(members, 1)) -# end <- c(end, tail(members, 1)) -# subset_indices <- c(subset_indices, list(members - head(members, 1) + 1)) -# dim_members <- ncdim_def(work_piece[['dimnames']][['member']], "", members) -# ncdf_dims <- c(ncdf_dims, list(dim_members)) -# final_dims[3] <- length(members) -# } -# if (time_dimname %in% expected_dims) { -# if (!all(is.na(ltimes))) { -# start <- c(start, head(ltimes[which(!is.na(ltimes))], 1)) -# end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1)) -# subset_indices <- c(subset_indices, -# list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) -# } else { -# start <- c(start, NA) -# end <- c(end, NA) -# subset_indices <- c(subset_indices, list(ltimes)) -# } -# dim_time <- ncdim_def(time_dimname, "", seq_along(ltimes), unlim = TRUE) -# ncdf_dims <- c(ncdf_dims, list(dim_time)) -# final_dims[4] <- length(ltimes) -# } -# count <- end - start + 1 -# start <- start[dim_matches] -# count <- count[dim_matches] -# subset_indices <- subset_indices[dim_matches] -# # Now that we have the indices to retrieve, we retrieve the data -# if (prod(final_dims) > 0) { -# tmp <- take(ncvar_get(fnc, namevar, start, count, -# collapse_degen = FALSE), -# seq_along(subset_indices), subset_indices) -# # The data is regridded if it corresponds to an atmospheric variable. When -# # the chosen output type is 'areave' the data is not regridded to not -# # waste computing time unless the user specified a common grid. -# if (is_2d_var) { -# ###if (!is.null(work_piece[['mask']]) && !(lonlat_subsetting_requested && remap_needed)) { -# ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], -# ### start[dim_matches[1:2]], count[dim_matches[1:2]], -# ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) -# ###} -# if (lonlat_subsetting_requested && remap_needed) { -# filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") -# filein2 <- tempfile(pattern = "loadRegridded2", fileext = ".nc") -# ncdf_var <- ncvar_def(namevar, "", ncdf_dims[dim_matches], -# fnc$var[[namevar]]$missval, -# prec = if (fnc$var[[namevar]]$prec == 'int') { -# 'integer' -# } else { -# fnc$var[[namevar]]$prec -# }) -# scale_factor <- ifelse(fnc$var[[namevar]]$hasScaleFact, fnc$var[[namevar]]$scaleFact, 1) -# add_offset <- ifelse(fnc$var[[namevar]]$hasAddOffset, fnc$var[[namevar]]$addOffset, 0) -# if (fnc$var[[namevar]]$hasScaleFact || fnc$var[[namevar]]$hasAddOffset) { -# tmp <- (tmp - add_offset) / scale_factor -# } -# #nc_close(fnc) -# fnc2 <- nc_create(filein2, list(ncdf_var)) -# ncvar_put(fnc2, ncdf_var, tmp) -# if (add_offset != 0) { -# ncatt_put(fnc2, ncdf_var, 'add_offset', add_offset) -# } -# if (scale_factor != 1) { -# ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor) -# } -# nc_close(fnc2) -# system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) { -# "0,360," -# } else { -# paste0(lonmin, ",", lonmax, ",") -# }, latmin, ",", latmax, -# " -remap", work_piece[['remap']], ",", common_grid_name, -# " ", filein2, " ", filein, " 2>/dev/null")) -# file.remove(filein2) -# fnc2 <- nc_open(filein) -# sub_lon <- ncvar_get(fnc2, 'lon') -# sub_lat <- ncvar_get(fnc2, 'lat') -# ## We read the longitudes and latitudes from the file. -# ## In principle cdo should put in order the longitudes -# ## and slice them properly unless data is across greenwich -# sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360 -# sub_lon_indices <- seq_along(sub_lon) -# if (lonmax < lonmin) { -# sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))] -# } -# sub_lat_indices <- seq_along(sub_lat) -# ## In principle cdo should put in order the latitudes -# if (sub_lat[1] < sub_lat[length(sub_lat)]) { -# sub_lat_indices <- rev(seq_along(sub_lat)) -# } -# final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices)) -# subset_indices[[dim_matches[1]]] <- sub_lon_indices -# subset_indices[[dim_matches[2]]] <- sub_lat_indices -# -# tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), -# seq_along(subset_indices), subset_indices) -# -# if (!is.null(mask)) { -# ## We create a very simple 2d netcdf file that is then interpolated to the common -# ## grid to know what are the lons and lats of our slice of data -# mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') -# mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc') -# dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], -# "degrees_east", c(0, 360)) -# dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], -# "degrees_north", c(-90, 90)) -# ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') -# fnc_mask <- nc_create(mask_file, list(ncdf_var)) -# ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) -# nc_close(fnc_mask) -# system(paste0("cdo -L -s remap", work_piece[['remap']], ",", -# common_grid_name, -# " ", mask_file, " ", mask_file_remap, " 2>/dev/null")) -# fnc_mask <- nc_open(mask_file_remap) -# mask_lons <- ncvar_get(fnc_mask, 'lon') -# mask_lats <- ncvar_get(fnc_mask, 'lat') -# nc_close(fnc_mask) -# file.remove(mask_file, mask_file_remap) -# if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) { -# stop("Error: the mask of the dataset with index ", -# tail(work_piece[['indices']], 1), " in '", -# work_piece[['dataset_type']], -# "' is wrong. It must be on the common grid if the ", -# "selected output type is 'lonlat', 'lon' or 'lat', ", -# "or 'areave' and 'grid' has been specified. It must ", -# "be on the grid of the corresponding dataset if the ", -# "selected output type is 'areave' and no 'grid' has been ", -# "specified. For more information check ?Load and see help ", -# "on parameters 'grid', 'maskmod' and 'maskobs'.") -# } -# mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360 -# if (lonmax >= lonmin) { -# mask_lon_indices <- which((mask_lons >= lonmin) & (mask_lons <= lonmax)) -# } else { -# mask_lon_indices <- which((mask_lons >= lonmin) | (mask_lons <= lonmax)) -# } -# mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax)) -# if (sub_lat[1] < sub_lat[length(sub_lat)]) { -# mask_lat_indices <- mask_lat_indices[rev(seq_along(mask_lat_indices))] -# } -# mask <- mask[mask_lon_indices, mask_lat_indices] -# } -# sub_lon <- sub_lon[sub_lon_indices] -# sub_lat <- sub_lat[sub_lat_indices] -# ### nc_close(fnc_mask) -# ### system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) { -# ### "0,360," -# ### } else { -# ### paste0(lonmin, ",", lonmax, ",") -# ### }, latmin, ",", latmax, -# ### " -remap", work_piece[['remap']], ",", common_grid_name, -# ###This is wrong: same files -# ### " ", mask_file, " ", mask_file, " 2>/dev/null", sep = "")) -# ### fnc_mask <- nc_open(mask_file) -# ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], -# ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) -# ###} -# } -# } -# if (is.unsorted(dim_matches)) { -# if (!found_disordered_dims && -# rev(work_piece[['indices']])[2] == 1 && -# rev(work_piece[['indices']])[3] == 1) { -# found_disordered_dims <- TRUE -# .warning(paste0("The dimensions for the variable ", namevar, -# " in the files of the experiment with index ", -# tail(work_piece[['indices']], 1), -# " are not in the optimal order for loading with Load(). ", -# "The optimal order would be '", -# toString(expected_dims), -# "'. One of the files of the dataset is stored in ", filename)) -# } -# tmp <- aperm(tmp, dim_matches) -# } -# dim(tmp) <- final_dims -# # If we are exploring the file we don't need to process and arrange -# # the retrieved data. We only need to keep the dimension sizes. -# if (is_2d_var && lonlat_subsetting_requested && remap_needed) { -# final_lons <- sub_lon -# final_lats <- sub_lat -# } else { -# final_lons <- lon -# final_lats <- lat -# } -# if (explore_dims) { -# if (work_piece[['is_file_per_member']]) { -# ## TODO: When the exp_full_path contains asterisks and is file_per_member -# ## members from different datasets may be accounted. -# ## Also if one file member is missing the accounting will be wrong. -# ## Should parse the file name and extract number of members. -# if (is_url) { -# nmemb <- NULL -# } else { -# nmemb <- length(files) -# } -# } -# dims <- list(member = nmemb, ftime = nltime, lon = final_lons, lat = final_lats) -# } else { -# # If we are not exploring, then we have to process the retrieved data -# if (is_2d_var) { -# tmp <- apply(tmp, c(3, 4), function(x) { -# # Disable of large values. -# if (!is.na(work_piece[['var_limits']][2])) { -# x[which(x > work_piece[['var_limits']][2])] <- NA -# } -# if (!is.na(work_piece[['var_limits']][1])) { -# x[which(x < work_piece[['var_limits']][1])] <- NA -# } -# if (!is.null(mask)) { -# x[which(mask < 0.5)] <- NA -# } -# -# if (output == 'areave' || output == 'lon') { -# weights <- InsertDim(cos(final_lats * pi / 180), 1, -# length(final_lons), name = 'lon') -# weights[which(is.na(x))] <- NA -# if (output == 'areave') { -# weights <- weights / mean(weights, na.rm = TRUE) -# mean(x * weights, na.rm = TRUE) -# } else { -# weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, -# length(final_lats), name = 'lat') -# MeanDims(x * weights, 2, na.rm = TRUE) -# } -# } else if (output == 'lat') { -# MeanDims(x, 1, na.rm = TRUE) -# } else if (output == 'lonlat') { -# signif(x, 5) -# } -# }) -# if (output == 'areave') { -# dim(tmp) <- c(1, 1, final_dims[3:4]) -# } else if (output == 'lon') { -# dim(tmp) <- c(final_dims[1], 1, final_dims[3:4]) -# } else if (output == 'lat') { -# dim(tmp) <- c(1, final_dims[c(2, 3, 4)]) -# } else if (output == 'lonlat') { -# dim(tmp) <- final_dims -# } -# } -# var_data <- attach.big.matrix(work_piece[['out_pointer']]) -# if (work_piece[['dims']][['member']] > 1 && nmemb > 1 && -# work_piece[['dims']][['ftime']] > 1 && -# nltime < work_piece[['dims']][['ftime']]) { -# work_piece[['indices']][2] <- work_piece[['indices']][2] - 1 -# for (jmemb in members) { -# work_piece[['indices']][2] <- work_piece[['indices']][2] + 1 -# out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) -# out_indices <- out_position:(out_position + length(tmp[, , jmemb, ]) - 1) -# var_data[out_indices] <- as.vector(tmp[, , jmemb, ]) -# } -# work_piece[['indices']][2] <- work_piece[['indices']][2] - tail(members, 1) + 1 -# } else { -# out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) -# out_indices <- out_position:(out_position + length(tmp) - 1) -# a <- aperm(tmp, c(1, 2, 4, 3)) -# as.vector(a) -# var_data[out_indices] <- as.vector(aperm(tmp, c(1, 2, 4, 3))) -# } -# work_piece[['indices']][3] <- work_piece[['indices']][3] + 1 -# } -# } -# } -# nc_close(fnc) -# if (is_2d_var) { -# if (remap_needed) { -# array_across_gw <- FALSE -# file.remove(filein) -# ###if (!is.null(mask) && lonlat_subsetting_requested) { -# ### file.remove(mask_file) -# ###} -# } else { -# if (first_lon_in_original_file < 0) { -# array_across_gw <- data_across_gw -# } else { -# array_across_gw <- FALSE -# } -# } -# } -# } -# if (explore_dims) { -# list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, -# units = units, var_long_name = var_long_name, -# data_across_gw = data_across_gw, array_across_gw = array_across_gw) -# } else { -# ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) { -# ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) -# ###} -# if (!silent && !is.null(work_piece[['progress_amount']])) { -# message(work_piece[['progress_amount']], appendLF = FALSE) -# } -# found_file -# } -# } - -.LoadSampleData <- function(var, exp = NULL, obs = NULL, sdates, - nmember = NULL, nmemberobs = NULL, - nleadtime = NULL, leadtimemin = 1, - leadtimemax = NULL, storefreq = 'monthly', - sampleperiod = 1, lonmin = 0, lonmax = 360, - latmin = -90, latmax = 90, output = 'areave', - method = 'conservative', grid = NULL, - maskmod = vector("list", 15), - maskobs = vector("list", 15), - configfile = NULL, suffixexp = NULL, - suffixobs = NULL, varmin = NULL, varmax = NULL, - silent = FALSE, nprocs = NULL) { - ## This function loads and selects sample data stored in sampleMap and - ## sampleTimeSeries and is used in the examples instead of Load() so as - ## to avoid nco and cdo system calls and computation time in the stage - ## of running examples in the CHECK process on CRAN. - selected_start_dates <- match(sdates, c('19851101', '19901101', '19951101', - '20001101', '20051101')) - start_dates_position <- 3 - lead_times_position <- 4 - - if (output == 'lonlat') { - sampleData <- s2dv::sampleMap - if (is.null(leadtimemax)) { - leadtimemax <- dim(sampleData$mod)[lead_times_position] - } - selected_lead_times <- leadtimemin:leadtimemax - - dataOut <- sampleData - dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ] - dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ] - } else if (output == 'areave') { - sampleData <- s2dv::sampleTimeSeries - if (is.null(leadtimemax)) { - leadtimemax <- dim(sampleData$mod)[lead_times_position] - } - selected_lead_times <- leadtimemin:leadtimemax - - dataOut <- sampleData - dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times] - dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times] - } - - dims_out <- dim(sampleData$mod) - dims_out[start_dates_position] <- length(selected_start_dates) - dims_out[lead_times_position] <- length(selected_lead_times) - dim(dataOut$mod) <- dims_out - - dims_out <- dim(sampleData$obs) - dims_out[start_dates_position] <- length(selected_start_dates) - dims_out[lead_times_position] <- length(selected_lead_times) - dim(dataOut$obs) <- dims_out - - invisible(list(mod = dataOut$mod, obs = dataOut$obs, - lat = dataOut$lat, lon = dataOut$lon)) -} - -.ConfigGetDatasetInfo <- function(matching_entries, table_name) { - # This function obtains the information of a dataset and variable pair, - # applying all the entries that match in the configuration file. - if (table_name == 'experiments') { - id <- 'EXP' - } else { - id <- 'OBS' - } - defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), - paste0('$DEFAULT_', id, '_FILE_PATH$'), - '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', - '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') - info <- NULL - - for (entry in matching_entries) { - if (is.null(info)) { - info <- entry[-1:-2] - info[which(info == '*')] <- defaults[which(info == '*')] - } else { - info[which(entry[-1:-2] != '*')] <- entry[-1:-2][which(entry[-1:-2] != '*')] - } - } - - info <- as.list(info) - names(info) <- c('main_path', 'file_path', 'nc_var_name', 'suffix', 'var_min', 'var_max') - info -} - -# .ReplaceGlobExpressions <- function(path_with_globs, actual_path, -# replace_values, tags_to_keep, -# dataset_name, permissive) { -# # The goal of this function is to replace the shell globbing expressions in -# # a path pattern (that may contain shell globbing expressions and Load() -# # tags) by the corresponding part of the real existing path. -# # What is done actually is to replace all the values of the tags in the -# # actual path by the corresponding $TAG$ -# # -# # It takes mainly two inputs. The path with expressions and tags, e.g.: -# # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc -# # and a complete known path to one of the matching files, e.g.: -# # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc -# # and it returns the path pattern but without shell globbing expressions: -# # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc -# # -# # To do that, it needs also as inputs the list of replace values (the -# # association of each tag to their value). -# # -# # All the tags not present in the parameter tags_to_keep will be repalced. -# # -# # Not all cases can be resolved with the implemented algorithm. In an -# # unsolvable case a warning is given and one possible guess is returned. -# # -# # In some cases it is interesting to replace only the expressions in the -# # path to the file, but not the ones in the file name itself. To keep the -# # expressions in the file name, the parameter permissive can be set to -# # TRUE. To replace all the expressions it can be set to FALSE. -# clean <- function(x) { -# if (nchar(x) > 0) { -# x <- gsub('\\\\', '', x) -# x <- gsub('\\^', '', x) -# x <- gsub('\\$', '', x) -# x <- unname(sapply(strsplit(x, '[', fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) -# do.call(paste0, as.list(x)) -# } else { -# x -# } -# } -# -# strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") -# -# if (permissive) { -# actual_path_chunks <- strsplit(actual_path, '/')[[1]] -# actual_path <- paste(actual_path_chunks[-length(actual_path_chunks)], collapse = '/') -# file_name <- tail(actual_path_chunks, 1) -# if (length(actual_path_chunks) > 1) { -# file_name <- paste0('/', file_name) -# } -# path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] -# path_with_globs <- paste(path_with_globs_chunks[-length(path_with_globs_chunks)], -# collapse = '/') -# path_with_globs <- .ConfigReplaceVariablesInString(path_with_globs, replace_values) -# file_name_with_globs <- tail(path_with_globs_chunks, 1) -# if (length(path_with_globs_chunks) > 1) { -# file_name_with_globs <- paste0('/', file_name_with_globs) -# } -# right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) -# right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values) -# path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags)) -# match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), -# paste0(actual_path, file_name)) -# if (match != 1) { -# stop("Incorrect parameters to replace glob expressions. ", -# "The path with expressions does not match the actual path.") -# } -# if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { -# path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*') -# file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) -# } -# } -# path_with_globs_rx <- utils::glob2rx(path_with_globs) -# values_to_replace <- NULL -# tags_to_replace_starts <- NULL -# tags_to_replace_ends <- NULL -# give_warning <- FALSE -# for (tag in tags_to_keep) { -# matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] -# lengths <- attr(matches, 'match.length') -# if (!(length(matches) == 1 && matches[1] == -1)) { -# for (i in seq_along(matches)) { -# left <- NULL -# if (matches[i] > 1) { -# left <- -# .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, -# matches[i] - 1), replace_values) -# left_known <- -# strReverse(head(strsplit(strReverse(left), -# strReverse('.*'), fixed = TRUE)[[1]], 1)) -# } -# right <- NULL -# if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { -# right <- -# .ConfigReplaceVariablesInString(substr(path_with_globs_rx, -# matches[i] + lengths[i], -# nchar(path_with_globs_rx)), -# replace_values) -# right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) -# } -# match_limits <- NULL -# if (!is.null(left)) { -# left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) -# match_len <- attr(left_match, 'match.length') -# left_match_limits <- -# c(left_match + match_len - 1 - nchar(clean(right_known)) - -# nchar(replace_values[[tag]]) + 1, -# left_match + match_len - 1 - nchar(clean(right_known))) -# if (!(left_match < 1)) { -# match_limits <- left_match_limits -# } -# } -# right_match <- NULL -# if (!is.null(right)) { -# right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) -# match_len <- attr(right_match, 'match.length') -# right_match_limits <- -# c(right_match + nchar(clean(left_known)), -# right_match + nchar(clean(left_known)) + -# nchar(replace_values[[tag]]) - 1) -# if (is.null(match_limits) && !(right_match < 1)) { -# match_limits <- right_match_limits -# } -# } -# if (!is.null(right_match) && !is.null(left_match)) { -# if (!identical(right_match_limits, left_match_limits)) { -# give_warning <- TRUE -# } -# } -# if (is.null(match_limits)) { -# stop("Too complex path pattern specified for ", dataset_name, -# ". Specify a simpler path pattern for this dataset.") -# } -# values_to_replace <- c(values_to_replace, tag) -# tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) -# tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) -# } -# } -# } -# -# if (length(tags_to_replace_starts) > 0) { -# reorder <- sort(tags_to_replace_starts, index.return = TRUE) -# tags_to_replace_starts <- reorder$x -# values_to_replace <- values_to_replace[reorder$ix] -# tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] -# while (length(values_to_replace) > 0) { -# actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1), -# '$', head(values_to_replace, 1), '$', -# substr(actual_path, head(tags_to_replace_ends, 1) + 1, -# nchar(actual_path))) -# extra_chars <- nchar(head(values_to_replace, 1)) + 2 - -# (head(tags_to_replace_ends, 1) - -# head(tags_to_replace_starts, 1) + 1) -# values_to_replace <- values_to_replace[-1] -# tags_to_replace_starts <- tags_to_replace_starts[-1] -# tags_to_replace_ends <- tags_to_replace_ends[-1] -# tags_to_replace_starts <- tags_to_replace_starts + extra_chars -# tags_to_replace_ends <- tags_to_replace_ends + extra_chars -# } -# } -# -# if (give_warning) { -# .warning(paste0("Too complex path pattern specified for ", dataset_name, -# ". Double check carefully the '$Files' fetched for this dataset ", -# "or specify a simpler path pattern.")) -# } -# -# if (permissive) { -# paste0(actual_path, file_name_with_globs) -# } else { -# actual_path -# } -# } - -.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { - tag <- paste0('\\$', tag, '\\$') - path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$') - parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]] - parts <- as.list(grep(tag, parts, value = TRUE)) - longest_couples <- NULL - pos_longest_couples <- NULL - found_value <- NULL - for (i in seq_along(parts)) { - parts[[i]] <- strsplit(parts[[i]], tag)[[1]] - if (length(parts[[i]]) == 1) { - parts[[i]] <- c(parts[[i]], '') - } - len_parts <- sapply(parts[[i]], nchar) - len_couples <- len_parts[-length(len_parts)] + len_parts[2:length(len_parts)] - pos_longest_couples <- c(pos_longest_couples, which.max(len_couples)) - longest_couples <- c(longest_couples, max(len_couples)) - } - chosen_part <- which.max(longest_couples) - parts[[chosen_part]] <- - parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] - if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) { - if (nchar(parts[[chosen_part]][1]) > 0) { - matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] - if (length(matches) == 1) { - match_left <- matches - actual_path <- - substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) - } - } - if (nchar(parts[[chosen_part]][2]) > 0) { - matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] - if (length(matches) == 1) { - match_right <- matches - found_value <- substr(actual_path, 0, match_right - 1) - } - } - } else { - if (nchar(parts[[chosen_part]][2]) > 0) { - matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] - if (length(matches) == 1) { - match_right <- matches - actual_path <- substr(actual_path, 0, match_right - 1) - } - } - if (nchar(parts[[chosen_part]][1]) > 0) { - matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] - if (length(matches) == 1) { - match_left <- matches - found_value <- - substr(actual_path, match_left + attr(match_left, 'match.length'), - nchar(actual_path)) - } - } - } - found_value -} - -.FilterUserGraphicArgs <- function(excludedArgs, ...) { - # This function filter the extra graphical parameters passed by the user in - # a plot function, excluding the ones that the plot function uses by default. - # Each plot function has a different set of arguments that are not allowed to - # be modified. - args <- list(...) - userArgs <- list() - for (name in names(args)) { - if ((name != "") & !is.element(name, excludedArgs)) { - # If the argument has a name and it is not in the list of excluded - # arguments, then it is added to the list that will be used - userArgs[[name]] <- args[[name]] - } else { - .warning(paste0("the argument '", name, "' can not be - modified and the new value will be ignored")) - } - } - userArgs -} - -.SelectDevice <- function(fileout, width, height, units, res) { - # This function is used in the plot functions to check the extension of the - # files where the graphics will be stored and select the right R device to - # save them. - # If the vector of filenames ('fileout') has files with different - # extensions, then it will only accept the first one, changing all the rest - # of the filenames to use that extension. - - # We extract the extension of the filenames: '.png', '.pdf', ... - ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) - - if (length(ext) != 0) { - # If there is an extension specified, select the correct device - ## units of width and height set to accept inches - if (ext[1] == ".png") { - saveToFile <- function(fileout) { - png(filename = fileout, width = width, height = height, res = res, units = units) - } - } else if (ext[1] == ".jpeg") { - saveToFile <- function(fileout) { - jpeg(filename = fileout, width = width, height = height, res = res, units = units) - } - } else if (ext[1] %in% c(".eps", ".ps")) { - saveToFile <- function(fileout) { - postscript(file = fileout, width = width, height = height) - } - } else if (ext[1] == ".pdf") { - saveToFile <- function(fileout) { - pdf(file = fileout, width = width, height = height) - } - } else if (ext[1] == ".svg") { - saveToFile <- function(fileout) { - svg(filename = fileout, width = width, height = height) - } - } else if (ext[1] == ".bmp") { - saveToFile <- function(fileout) { - bmp(filename = fileout, width = width, height = height, res = res, units = units) - } - } else if (ext[1] == ".tiff") { - saveToFile <- function(fileout) { - tiff(filename = fileout, width = width, height = height, res = res, units = units) - } - } else { - .warning("file extension not supported, it will be used '.eps' by default.") - ## In case there is only one filename - fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) - ext[1] <- ".eps" - saveToFile <- function(fileout) { - postscript(file = fileout, width = width, height = height) - } - } - # Change filenames when necessary - if (any(ext != ext[1])) { - .warning(paste0("some extensions of the filenames provided in 'fileout' ", - "are not ", ext[1], - ". The extensions are being converted to ", ext[1], ".")) - fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) - } - } else { - # Default filenames when there is no specification - .warning("there are no extensions specified in the filenames, default to '.eps'") - fileout <- paste0(fileout, ".eps") - saveToFile <- postscript - } - - # return the correct function with the graphical device, and the correct - # filenames - list(fun = saveToFile, files = fileout) -} - -.message <- function(...) { - # Function to use the 'message' R function with our custom settings - # Default: new line at end of message, indent to 0, exdent to 3, - # collapse to \n* - args <- list(...) - - ## In case we need to specify message arguments - if (!is.null(args[["appendLF"]])) { - appendLF <- args[["appendLF"]] - } else { - ## Default value in message function - appendLF <- TRUE - } - if (!is.null(args[["domain"]])) { - domain <- args[["domain"]] - } else { - ## Default value in message function - domain <- NULL - } - args[["appendLF"]] <- NULL - args[["domain"]] <- NULL - - ## To modify strwrap indent and exdent arguments - if (!is.null(args[["indent"]])) { - indent <- args[["indent"]] - } else { - indent <- 0 - } - if (!is.null(args[["exdent"]])) { - exdent <- args[["exdent"]] - } else { - exdent <- 3 - } - args[["indent"]] <- NULL - args[["exdent"]] <- NULL - - ## To modify paste collapse argument - if (!is.null(args[["collapse"]])) { - collapse <- args[["collapse"]] - } else { - collapse <- "\n*" - } - args[["collapse"]] <- NULL - - ## Message tag - if (!is.null(args[["tag"]])) { - tag <- args[["tag"]] - } else { - tag <- "* " - } - args[["tag"]] <- NULL - - tmp <- paste0(tag, - paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) - message(tmp, appendLF = appendLF, domain = domain) -} - -.warning <- function(...) { - # Function to use the 'warning' R function with our custom settings - # Default: no call information, indent to 0, exdent to 3, - # collapse to \n - args <- list(...) - - ## In case we need to specify warning arguments - if (!is.null(args[["call."]])) { - call <- args[["call."]] - } else { - ## Default: don't show info about the call where the warning came up - call <- FALSE - } - if (!is.null(args[["immediate."]])) { - immediate <- args[["immediate."]] - } else { - ## Default value in warning function - immediate <- FALSE - } - if (!is.null(args[["noBreaks."]])) { - noBreaks <- args[["noBreaks."]] - } else { - ## Default value warning function - noBreaks <- FALSE - } - if (!is.null(args[["domain"]])) { - domain <- args[["domain"]] - } else { - ## Default value warning function - domain <- NULL - } - args[["call."]] <- NULL - args[["immediate."]] <- NULL - args[["noBreaks."]] <- NULL - args[["domain"]] <- NULL - - ## To modify strwrap indent and exdent arguments - if (!is.null(args[["indent"]])) { - indent <- args[["indent"]] - } else { - indent <- 0 - } - if (!is.null(args[["exdent"]])) { - exdent <- args[["exdent"]] - } else { - exdent <- 3 - } - args[["indent"]] <- NULL - args[["exdent"]] <- NULL - - ## To modify paste collapse argument - if (!is.null(args[["collapse"]])) { - collapse <- args[["collapse"]] - } else { - collapse <- "\n!" - } - args[["collapse"]] <- NULL - - ## Warning tag - if (!is.null(args[["tag"]])) { - tag <- args[["tag"]] - } else { - tag <- "! Warning: " - } - args[["tag"]] <- NULL - - tmp <- paste0(tag, - paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) - warning(tmp, call. = call, immediate. = immediate, - noBreaks. = noBreaks, domain = domain) -} - -.IsColor <- function(x) { - res <- try(col2rgb(x), silent = TRUE) - return(!is(res, "try-error")) -} - -# This function switches to a specified figure at position (row, col) in a layout. -# This overcomes the bug in par(mfg = ...). However the mode par(new = TRUE) is -# activated, i.e., all drawn elements will be superimposed. Additionally, after -# using this function, the automatical pointing to the next figure in the layout -# will be spoiled: once the last figure in the layout is drawn, the pointer won't -# move to the first figure in the layout. -# Only figures with numbers other than 0 (when creating the layout) will be -# accessible. -# Inputs: either row and col, or n and mat -.SwitchToFigure <- function(row = NULL, col = NULL, n = NULL, mat = NULL) { - if (!is.null(n) && !is.null(mat)) { - if (!is.numeric(n) || length(n) != 1) { - stop("Parameter 'n' must be a single numeric value.") - } - n <- round(n) - if (!is.array(mat)) { - stop("Parameter 'mat' must be an array.") - } - target <- which(mat == n, arr.ind = TRUE)[1, ] - row <- target[1] - col <- target[2] - } else if (!is.null(row) && !is.null(col)) { - if (!is.numeric(row) || length(row) != 1) { - stop("Parameter 'row' must be a single numeric value.") - } - row <- round(row) - if (!is.numeric(col) || length(col) != 1) { - stop("Parameter 'col' must be a single numeric value.") - } - col <- round(col) - } else { - stop("Either 'row' and 'col' or 'n' and 'mat' must be provided.") - } - next_attempt <- c(row, col) - par(mfg = next_attempt) - i <- 1 - layout_size <- par('mfrow') - layout_cells <- matrix(1:prod(layout_size), layout_size[1], layout_size[2], - byrow = TRUE) - while (any((par('mfg')[1:2] != c(row, col)))) { - next_attempt <- which(layout_cells == i, arr.ind = TRUE)[1, ] - par(mfg = next_attempt) - i <- i + 1 - if (i > prod(layout_size)) { - stop("Figure not accessible.") - } - } - plot(0, type = 'n', axes = FALSE, ann = FALSE) - par(mfg = next_attempt) -} - -# Function to permute arrays of non-atomic elements (e.g. POSIXct) -.aperm2 <- function(x, new_order) { - old_dims <- dim(x) - attr_bk <- attributes(x) - if ('dim' %in% names(attr_bk)) { - attr_bk[['dim']] <- NULL - } - if (is.numeric(x)) { - x <- aperm(x, new_order) - } else { - y <- array(seq_along(x), dim = dim(x)) - y <- aperm(y, new_order) - x <- x[as.vector(y)] - } - dim(x) <- old_dims[new_order] - attributes(x) <- c(attributes(x), attr_bk) - x -} - -# This function is a helper for the function .MergeArrays. -# It expects as inputs two named numeric vectors, and it extends them -# with dimensions of length 1 until an ordered common dimension -# format is reached. -# The first output is dims1 extended with 1s. -# The second output is dims2 extended with 1s. -# The third output is a merged dimension vector. If dimensions with -# the same name are found in the two inputs, and they have a different -# length, the maximum is taken. -.MergeArrayDims <- function(dims1, dims2) { - new_dims1 <- NULL - new_dims2 <- NULL - while (length(dims1) > 0) { - if (names(dims1)[1] %in% names(dims2)) { - pos <- which(names(dims2) == names(dims1)[1]) - dims_to_add <- rep(1, pos - 1) - if (length(dims_to_add) > 0) { - names(dims_to_add) <- names(dims2[1:(pos - 1)]) - } - new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) - new_dims2 <- c(new_dims2, dims2[1:pos]) - dims1 <- dims1[-1] - dims2 <- dims2[-(1:pos)] - } else { - new_dims1 <- c(new_dims1, dims1[1]) - new_dims2 <- c(new_dims2, 1) - names(new_dims2)[length(new_dims2)] <- names(dims1)[1] - dims1 <- dims1[-1] - } - } - if (length(dims2) > 0) { - dims_to_add <- rep(1, length(dims2)) - names(dims_to_add) <- names(dims2) - new_dims1 <- c(new_dims1, dims_to_add) - new_dims2 <- c(new_dims2, dims2) - } - list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) -} - -# This function takes two named arrays and merges them, filling with -# NA where needed. -# dim(array1) -# 'b' 'c' 'e' 'f' -# 1 3 7 9 -# dim(array2) -# 'a' 'b' 'd' 'f' 'g' -# 2 3 5 9 11 -# dim(.MergeArrays(array1, array2, 'b')) -# 'a' 'b' 'c' 'e' 'd' 'f' 'g' -# 2 4 3 7 5 9 11 -.MergeArrays <- function(array1, array2, along) { - if (!(is.null(array1) || is.null(array2))) { - if (!(identical(names(dim(array1)), names(dim(array2))) && - identical(dim(array1)[-which(names(dim(array1)) == along)], - dim(array2)[-which(names(dim(array2)) == along)]))) { - new_dims <- .MergeArrayDims(dim(array1), dim(array2)) - dim(array1) <- new_dims[[1]] - dim(array2) <- new_dims[[2]] - for (j in seq_along(dim(array1))) { - if (names(dim(array1))[j] != along) { - if (dim(array1)[j] != dim(array2)[j]) { - if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { - na_array_dims <- dim(array2) - na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] - na_array <- array(dim = na_array_dims) - array2 <- abind(array2, na_array, along = j) - names(dim(array2)) <- names(na_array_dims) - } else { - na_array_dims <- dim(array1) - na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] - na_array <- array(dim = na_array_dims) - array1 <- abind(array1, na_array, along = j) - names(dim(array1)) <- names(na_array_dims) - } - } - } - } - } - if (!(along %in% names(dim(array2)))) { - stop("The dimension specified in 'along' is not present in the ", - "provided arrays.") - } - array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) - names(dim(array1)) <- names(dim(array2)) - } else if (is.null(array1)) { - array1 <- array2 - } - array1 -} - -# only can be used in Trend(). Needs generalization or be replaced by other function. -.reorder <- function(output, time_dim, dim_names) { - # Add dim name back - if (is.null(dim(output))) { - dim(output) <- c(stats = length(output)) - } else { #is an array - if (length(dim(output)) == 1) { - if (!is.null(names(dim(output)))) { - dim(output) <- c(1, dim(output)) - names(dim(output))[1] <- time_dim - } else { - names(dim(output)) <- time_dim - } - } else { # more than one dim - if (names(dim(output))[1] != "") { - dim(output) <- c(1, dim(output)) - names(dim(output))[1] <- time_dim - } else { #regular case - names(dim(output))[1] <- time_dim - } - } - } - # reorder - pos <- match(dim_names, names(dim(output))) - output <- aperm(output, pos) - names(dim(output)) <- dim_names - names(dim(output))[names(dim(output)) == time_dim] <- 'stats' - return(output) -} - -# to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R -.Indices <- function(data, type, monini, indices_for_clim, - fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) { - - if (type == 'dcpp') { - - fyear_dim <- 'fyear' - data <- Season(data = data, time_dim = fmonth_dim, - monini = monini, moninf = 1, monsup = 12, - method = mean, na.rm = na.rm) - names(dim(data))[which(names(dim(data)) == fmonth_dim)] <- fyear_dim - - if (identical(indices_for_clim, FALSE)) { ## data is already anomalies - - anom <- data - - } else { ## Different indices_for_clim for each forecast year (to use the same calendar years) - - n_fyears <- as.numeric(dim(data)[fyear_dim]) - n_sdates <- as.numeric(dim(data)[sdate_dim]) - - if (is.null(indices_for_clim)) { ## climatology over the whole (common) period - first_years_for_clim <- n_fyears : 1 - last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) - } else { ## indices_for_clim specified as a numeric vector - first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) - last_years_for_clim <- - seq(from = indices_for_clim[length(indices_for_clim)], - by = -1, length.out = n_fyears) - } - - data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) - anom <- array(data = NA, dim = dim(data)) - for (i in 1:n_fyears) { - clim <- mean(data[i, first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) - anom[i, ] <- data[i, ] - clim - } - } - - } else if (type %in% c('obs', 'hist')) { - - data <- multiApply::Apply(data = data, target_dims = month_dim, - fun = mean, na.rm = na.rm)$output1 - - if (identical(indices_for_clim, FALSE)) { ## data is already anomalies - clim <- 0 - } else if (is.null(indices_for_clim)) { - ## climatology over the whole period - clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, - na.rm = na.rm)$output1 - } else { - ## indices_for_clim specified as a numeric vector - clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, - indices = indices_for_clim), - target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 - } - - anom <- data - clim - - } else { - stop('type must be dcpp, hist or obs') - } - - return(anom) -} - -#TODO: Remove from s2dv when PlotLayout can get colorbar info from plotting function directly. -# The function is temporarily here because PlotLayout() needs to draw the colorbars of -# PlotMostLikelyQuantileMap(). -#Draws Color Bars for Categories -#A wrapper of s2dv::ColorBar to generate multiple color bars for different -#categories, and each category has different color set. -GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, - bar_limits, var_limits = NULL, - triangle_ends = NULL, plot = TRUE, - draw_separators = FALSE, - bar_titles = NULL, title_scale = 1, - label_scale = 1, extra_margin = rep(0, 4), - ...) { - # bar_limits - if (!is.numeric(bar_limits) || length(bar_limits) != 2) { - stop("Parameter 'bar_limits' must be a numeric vector of length 2.") - } - - # Check brks - if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { - num_brks <- 5 - if (is.numeric(brks)) { - num_brks <- brks - } - brks <- seq(from = bar_limits[1], to = bar_limits[2], length.out = num_brks) - } - if (!is.numeric(brks)) { - stop("Parameter 'brks' must be a numeric vector.") - } - # Check cols - col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), - c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), - c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), - c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), - c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) - if (is.null(cols)) { - if (length(col_sets) >= nmap) { - chosen_sets <- 1:nmap - chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) - } else { - chosen_sets <- array(seq_along(col_sets), nmap) - } - cols <- col_sets[chosen_sets] - } else { - if (!is.list(cols)) { - stop("Parameter 'cols' must be a list of character vectors.") - } - if (!all(sapply(cols, is.character))) { - stop("Parameter 'cols' must be a list of character vectors.") - } - if (length(cols) != nmap) { - stop("Parameter 'cols' must be a list of the same length as the number of ", - "maps in 'maps'.") - } - } - for (i in seq_along(cols)) { - if (length(cols[[i]]) != (length(brks) - 1)) { - cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) - } - } - - # Check bar_titles - if (is.null(bar_titles)) { - if (nmap == 3) { - bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") - } else if (nmap == 5) { - bar_titles <- c("Low (%)", "Below normal (%)", - "Normal (%)", "Above normal (%)", "High (%)") - } else { - bar_titles <- paste0("Cat. ", 1:nmap, " (%)") - } - } - - if (plot) { - for (k in 1:nmap) { - s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, -# bar_limits = bar_limits, var_limits = var_limits, - triangle_ends = triangle_ends, plot = TRUE, - draw_separators = draw_separators, - title = bar_titles[[k]], title_scale = title_scale, - label_scale = label_scale, extra_margin = extra_margin) - } - } else { - #TODO: col_inf and col_sup - return(list(brks = brks, cols = cols)) - } - -} - diff --git a/modules/Loading/R/load_tas_tos.R b/modules/Loading/R/load_tas_tos.R index acaf04001d12a9627b9e94e21dc0094cacea0502..ab4cc23e26a4b48f681d2408a72fd6758d49fdfd 100644 --- a/modules/Loading/R/load_tas_tos.R +++ b/modules/Loading/R/load_tas_tos.R @@ -19,12 +19,13 @@ load_tas_tos <- function(recipe, retrieve = TRUE) { lons.max <- recipe$Analysis$Region$lonmax ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name + ncores <- recipe$Analysis$ncores variable <- c("tas", "tos", "sic") store.freq <- recipe$Analysis$Variables$freq if(is.null(recipe$Analysis$Variables$sic_threshold)){ - sic.threshold = 0.15 + sic.threshold <- 0.15 } else { sic.threshold <- recipe$Analysis$Variables$sic_threshold } @@ -158,10 +159,15 @@ load_tas_tos <- function(recipe, retrieve = TRUE) { # Combine hcst tas and tos data #------------------------------------------------------------------- - hcst <- mask_tas_tos(input_data = hcst, region = c(lons.min, lons.max,lats.min, lats.max), - mask_path = archive$System[[exp.name]]$land_sea_mask, lsm_var_name = 'lsm', - lon = hcst$coords$longitude, lat = hcst$coords$latitude, - lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + hcst <- mask_tas_tos(input_data = hcst, + region = c(lons.min, lons.max,lats.min, lats.max), + mask_path = archive$System[[exp.name]]$land_sea_mask, + lsm_var_name = 'lsm', + lon = as.vector(hcst$coords$longitude), + lat = as.vector(hcst$coords$latitude), + lon_dim = 'longitude', + lat_dim = 'latitude', + ncores = ncores) hcst$dims[['var']] <- dim(hcst$data)[['var']] hcst$attrs$Variable$varName <- 'tas-tos' @@ -358,9 +364,12 @@ load_tas_tos <- function(recipe, retrieve = TRUE) { ## TODO: Ask about this list if(!recipe$Analysis$Datasets$Reference$name %in% c('HadCRUT4','HadCRUT5','BEST','GISTEMPv4')){ - obs <- mask_tas_tos(input_data = obs, region = c(lons.min, lons.max,lats.min, lats.max), - mask_path = archive$Reference[[ref.name]]$land_sea_mask, lsm_var_name = 'sftof', - lon = obs$coords$longitude, lat = obs$coords$latitude, + obs <- mask_tas_tos(input_data = obs, + region = c(lons.min, lons.max,lats.min, lats.max), + mask_path = archive$Reference[[ref.name]]$land_sea_mask, + lsm_var_name = 'sftof', + lon = as.vector(obs$coords$longitude), + lat = as.vector(obs$coords$latitude), lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) obs$dims[['var']] <- dim(obs$data)[['var']] diff --git a/modules/Loading/R/mask_tas_tos.R b/modules/Loading/R/mask_tas_tos.R index cc6937957a4aa88993b95b905765622e852f9560..c82bbb425a10b6f7d19d5924dba4592133893bfc 100644 --- a/modules/Loading/R/mask_tas_tos.R +++ b/modules/Loading/R/mask_tas_tos.R @@ -6,7 +6,6 @@ mask_tas_tos <- function(input_data, mask_path, lsm_var_name = lsm_var_name, lon lon_dim = 'lon', lat_dim = 'lat', ncores = NULL){ - mask <- .load_mask(mask_path = mask_path, lsm_var_name = lsm_var_name, lon_dim = lon_dim, lat_dim = lat_dim, sea_value = 1, land_value = 0, region = region) @@ -40,6 +39,8 @@ mask_tas_tos <- function(input_data, mask_path, lsm_var_name = lsm_var_name, lon lats.min <- region[3] lats.max <- region[4] + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + data <- startR::Start(dat = mask_path, var = lsm_var_name, lon = values(list(lons.min, lons.max)), @@ -48,7 +49,7 @@ mask_tas_tos <- function(input_data, mask_path, lsm_var_name = lsm_var_name, lon synonims = list(lon = c('lon','longitude'), lat = c('lat','latitude')), lat_reorder = Sort(decreasing = TRUE), - lon_reorder = CircularSort(0,360), + lon_reorder = circularsort, num_procs = 1, retrieve = TRUE) mask <- list(mask = drop(data), diff --git a/modules/Loading/R/orography_correction.R b/modules/Loading/R/orography_correction.R new file mode 100644 index 0000000000000000000000000000000000000000..29ff0a543009964210ec193b3d7676fa572e13d8 --- /dev/null +++ b/modules/Loading/R/orography_correction.R @@ -0,0 +1,76 @@ +## TODO: remove paths to personal scratchs +source("modules/Loading/R/get_regrid_params.R") +source("modules/Loading/R/check_latlon.R") + +orography_correction <- function(recipe, data) { + + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + + archive <- get_archive(recipe) + # archive <- read_yaml("conf/archive.yml")[[recipe$Run$filesystem]] + + # Define regrid parameters: + regrid_params <- get_regrid_params(recipe, archive) + + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + ## Load exp orography + orography_exp <- Start(dat = archive$System[[exp.name]]$orography, + var = 'orography', + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + return_vars = list(latitude = NULL, longitude = NULL), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + num_procs = 1, retrieve = TRUE) + + ## Set negative values to zero + orography_exp <- pmax(orography_exp, 0) + + ## load obs orography + orography_obs <- Start(dat = archive$Reference[[ref.name]]$orography, + var = 'orography', + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + return_vars = list(latitude = NULL, longitude = NULL), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + num_procs = 1, retrieve = TRUE) + + ## Set negative values to zero + orography_obs <- pmax(orography_obs, 0) + + ## Calculate difference between orographies + oro_diff <- orography_exp - orography_obs + + ## Apply lapse rate factor to correct temperature = -0.0065 K/m (-6.5 K/km) + oro_obs_corr <- oro_diff * -0.0065 + oro_obs_corr <- Reorder(data = drop(oro_obs_corr), order = c('latitude', 'longitude')) + + ## Apply correction to obs temperature data + data$obs$data <- Apply(data = list(data$obs$data, oro_obs_corr), + target_dims = c("latitude", "longitude"), + fun = "+", + ncores = recipe$Analysis$ncores)$output1 + data$obs$data <- Reorder(data$obs$data, names(data$obs$dims)) + + return(data) +} + diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index 85630239ec76222e712eba7609314010ea7643ce..5444a7a55a03fb180011e5e67d79c940432ee963 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -148,7 +148,7 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, system.name1 <- sys_dict$System[[system[sys]]]$name system.name <- c(system.name, system.name1) } - for(ref in 1:length(length)){ + for(ref in 1:length(reference)){ reference.name1 <- sys_dict$Reference[[reference[ref]]]$name reference.name <- c(reference.name, reference.name1) } @@ -207,9 +207,6 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, ## Find position of mean bias metric to calculate breaks if ('mean_bias' %in% metrics) { pos_bias <- which(metrics == 'mean_bias') - if(var == 'psl'){ - data[,,pos_bias,,,] <- data[,,pos_bias,,,]/100 ## temporary - } breaks_bias <- .SCBiasBreaks(Subset(data, along = c('metric','region'), indices = list(pos_bias,reg))) } @@ -244,7 +241,7 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, sign_sc_9 <- NULL } } - + VizScorecard(data = data_sc_9, sign = sign_sc_9, row_dim = model, diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index 783682770911f2f5f056b5e3743f0046c0bfdf85..517985d534a013b33eb8ea77b2c6553d39d3ec70 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -44,7 +44,8 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, system <- recipe$Analysis$Datasets$System$name reference <- recipe$Analysis$Datasets$Reference$name - var <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + var <- skill_metrics$metadata$attrs$Variable$varName + # var <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) forecast.months <- recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max @@ -58,6 +59,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + ## Define array to filled with data aggr_significance <- array(data = NA, @@ -65,7 +67,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, time = length(forecast.months), region = length(regions), metric = length(metrics))) - + ## For data that is already aggregated by region if ("region" %in% names(dim(skill_metrics[[1]]))) { aggr_metrics <- NULL @@ -111,14 +113,14 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, result[result == -Inf] <- NA } aggr_metrics[,,,which(metrics == met)] <- Reorder(data = result, - order = c('var','time', 'region')) + order = c('var', 'time', 'region')) } ## close on met } ## close if on skill ## Score Aggregation if (metric.aggregation == 'score') { ## Spatially aggregate data for each metric - for (met in metrics) { + for (met in metrics) { if (met == 'rpss') { rps_syear <- sapply(X = 1:length(regions), FUN = function(X) { @@ -214,7 +216,6 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_crpss, order = c('var', 'time', 'region')) } else if (met == 'enscorr') { - browser() cov <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = statistics$cov, @@ -280,10 +281,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) - sign_corr<- array(data = NA, - dim = c(var = length(var), - time = length(forecast.months), - region = length(regions))) + sign_corr <- array(data = NA, + dim = c(var = length(var), + time = length(forecast.months), + region = length(regions))) for (var in 1:dim(sign_corr)[['var']]) { for (time in 1:dim(sign_corr)[['time']]) { @@ -302,49 +303,52 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, aggr_metrics[,,,which(metrics == met)] <- Reorder(data = enscorr, order = c('var', 'time', 'region')) aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_corr, - order = c('var', 'time', 'region')) + order = c('var', 'time', 'region')) } else if (met == 'mean_bias') { + ## Temporarily removing mean_bias significance since degrees of freedom not included + ## and causing issues with SPEI data + # ## Calculate ensemble mean - hcst_data_ens <- MeanDims(data$hcst$data, dims = 'ensemble') - obs_data_ens <- MeanDims(data$obs$data, dims = 'ensemble') - - ## Aggregate data over regions - hcst_data_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = hcst_data_ens, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - obs_data_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = obs_data_ens, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(hcst_data_aggr))[length(dim(hcst_data_aggr))] <- 'region' - names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' - - ## Remove unnecessary dimension - hcst_data_aggr <- Subset(hcst_data_aggr, c('dat', 'sday', 'sweek'), - list(1,1,1) , drop = 'selected') - obs_data_aggr <- Subset(obs_data_aggr, c('dat', 'sday', 'sweek'), - list(1,1,1) , drop = 'selected') - - ## Calculate significance - pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), - target_dims = c('syear'), ncores = ncores, - fun = function(x,y) - {t.test(as.vector(x),as.vector(y))})$p.value - - sign_mean_bias <- pval_mean_bias <= alpha + # hcst_data_ens <- MeanDims(data$hcst$data, dims = 'ensemble') + # obs_data_ens <- MeanDims(data$obs$data, dims = 'ensemble') + # + # ## Aggregate data over regions + # hcst_data_aggr <- sapply(X = 1:length(regions), + # FUN = function(X) { + # WeightedMean(data = hcst_data_ens, + # region = regions[[X]], + # lon = lon, londim = lon_dim, + # lat = lat, latdim = lat_dim, + # na.rm = na.rm) + # }, simplify = 'array') + # + # obs_data_aggr <- sapply(X = 1:length(regions), + # FUN = function(X) { + # WeightedMean(data = obs_data_ens, + # region = regions[[X]], + # lon = lon, londim = lon_dim, + # lat = lat, latdim = lat_dim, + # na.rm = na.rm) + # }, simplify = 'array') + # + # ## Include name of region dimension + # names(dim(hcst_data_aggr))[length(dim(hcst_data_aggr))] <- 'region' + # names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' + # + # ## Remove unnecessary dimension + # hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','sday','sweek'), + # list(1,1,1) , drop = 'selected') + # obs_data_aggr <- Subset(obs_data_aggr, c('dat','sday','sweek'), + # list(1,1,1) , drop = 'selected') + # + # ## Calculate significance + # pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), + # target_dims = c('syear'), ncores = ncores, + # fun = function(x,y) + # {t.test(as.vector(x),as.vector(y))})$p.value + # + # sign_mean_bias <- pval_mean_bias <= alpha ## Calculate aggregated mean bias metric mean_bias <- sapply(X = 1:length(regions), @@ -365,8 +369,8 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Save metric result in array aggr_metrics[,,,which(metrics == met)] <- Reorder(data = mean_bias, order = c('var', 'time', 'region')) - aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_mean_bias, - order = c('var', 'time', 'region')) + # aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_mean_bias, + # order = c('var', 'time', 'region')) } else if (met == 'enssprerr') { ## Calculate metric @@ -447,10 +451,8 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, metadata = skill_metrics$metadata) ## Save metric data arrays - recipe$Run$output_dir <- file.path(recipe$Run$output_dir, - "outputs", "Scorecards/") save_metrics(recipe = recipe, metrics = aggr_scorecards, - agg = "region", module = "scorecards") + agg = "region", module = "Scorecards") # save_metrics(recipe = recipe, metrics = aggr_scorecards, # data_cube = data$hcst, agg = 'region', # module = "scorecards") diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index 2fa27549a20f414ae20d3db188b7918bd5ec26b0..2b92ed4b36a57651b9247cf3d41da081255bd200 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -33,4 +33,8 @@ for (variable in 1:length(recipe$Analysis$Variables)) { Scorecards_plotting(scorecard_recipe) } +# Add BSC logo to scorecards +source("tools/add_logo.R") +add_logo_scorecards(recipe, "bsc_logo_small.png") + print("##### SCORECARDS SAVED TO THE OUTPUT DIRECTORY #####") diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 19c5fdd83b4c151e44f7961c9adc2dc5a0edd804..a39a1a16eba0cf7f45aec6b85cd9a0693c648d4a 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -357,6 +357,18 @@ Skill <- function(recipe, data, agg = 'global', retrieve = TRUE, skill_metrics[[ metric ]] <- skill } } + # NaN values and NAs in logical arrays cause problems when saving + skill_metrics <- lapply(skill_metrics, + function(x) { + if (is.logical(x)) { + # x[] <- as.numeric(x) + x[is.na(x)] <- FALSE + } else { + x[is.nan(x)] <- NA + } + return(x) + }) + ## TODO: Create list of returns with data and metadata info(recipe$Run$logger, retrieve = retrieve, "##### SKILL METRIC COMPUTATION COMPLETE #####") diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index c39f0bd18318813e7fd51270fe5f87225e09949b..4952270cba834d91ecc0ecfa2a145e4fa03f08ea 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -143,7 +143,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o "\n", "Forecast Ensemble Mean / ", "Init.: ", i_syear) } # Plots - output_configuration <- output_conf$Multipanel$forecast_ensemble_mean + output_configuration <- output_conf$multipanel$forecast_ensemble_mean base_args <- list(fun = "PlotEquiMap", plot_dims = c('longitude', 'latitude'), var = i_var_ens_mean, lon = longitude, @@ -161,7 +161,6 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o # Define function and parameters depending on projection if (projection == 'cylindrical_equidistant') { fun <- PlotEquiMap - output_configuration <- output_conf$PlotEquiMap$forecast_ensemble_mean base_args <- list(var = NULL, dots = NULL, mask = NULL, lon = longitude, lat = latitude, dot_symbol = 20, title_scale = 0.6, @@ -169,7 +168,6 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o filled.continents = F, brks = brks, cols = cols, bar_label_digits = 4, bar_label_scale = 1.5, axes_label_scale = 1, units = units) - base_args[names(output_configuration)] <- output_configuration } else { fun <- PlotRobinson common_projections <- c("robinson", "stereographic", "lambert_europe") @@ -182,8 +180,11 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o lon = longitude, lat = latitude, lon_dim = 'longitude', lat_dim = 'latitude', target_proj = target_proj, legend = 's2dv', - style = 'point', brks = brks, cols = cols) + style = 'point', brks = brks, cols = cols, + dots_size = 0.2, dots_shape = 47) } + output_configuration <- output_conf[[projection]]$forecast_ensemble_mean + base_args[names(output_configuration)] <- output_configuration # Loop over forecast times for (i in 1:length(time_labels)) { # Get mask subset @@ -207,6 +208,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o " / Start date: ", format(as.Date(i_syear, format = "%Y%m%d"), "%d-%m-%Y")) + forecast_time_caption <- paste0("Forecast month: ", sprintf("%02d", i)) } else if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), @@ -214,6 +216,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o "Issued on ", format(ymd(start_date), "%d-%m-%Y"), "\n", time_labels[i], years[i]) + forecast_time_caption <- paste0("Forecast week: ", sprintf("%02d", i)) } else { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), @@ -221,6 +224,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o time_labels[i], " ", years[i], " / Start date: ", i_syear) + forecast_time_caption <- paste0("Forecast month: ", sprintf("%02d", i)) } # Define caption if (identical(fun, PlotRobinson)) { @@ -228,7 +232,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o base_args[['caption']] <- paste0("Nominal start date: ", format(ymd(start_date), "%d-%m-%Y"), "\n", - "Forecast week: ", sprintf("%02d", i), "\n", + forecast_time_caption, "\n", "Reference: ", recipe$Analysis$Datasets$Reference, "\n", "Units: ", units) } diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 4c5c807b5957958b8dfbbcdb7d9d3ea4fb475e9a..d1d9aed890ac66ddb430d8449a9b9c7874683056 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -1,5 +1,4 @@ -library(stringr) -library(lubridate) +# library(stringr) plot_metrics <- function(recipe, metrics, outdir, significance = F, output_conf) { @@ -7,7 +6,8 @@ plot_metrics <- function(recipe, metrics, # metrics: list of named skill metrics arrays # outdir: output directory # significance: T/F, whether to display the significance dots in the plots - # Abort if frequency is daily + + # Abort if frequency is daily if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { error(recipe$Run$logger, "Visualization functions not yet implemented for daily data.") @@ -32,7 +32,6 @@ plot_metrics <- function(recipe, metrics, } hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - start_date <- recipe$Analysis$Time$sdate if (tolower(recipe$Analysis$Horizon) == "seasonal") { init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, start = 1, stop = 2)) @@ -53,10 +52,10 @@ plot_metrics <- function(recipe, metrics, weeks <- paste0(0, ftime_min:ftime_max) # This week_label appears on the name of the file. It's just the start date. week_label <- recipe$Analysis$Time$sdate - } else { # Decadal + } else { # Decadal init_month <- 1 init_week <- 1 - months <- lubridate::month(Subset(data_cube$attrs$Dates, + months <- lubridate::month(Subset(cube_info$attrs$Dates, "syear", indices = 1), label = T, abb = F,locale = "en_GB") } @@ -66,7 +65,7 @@ plot_metrics <- function(recipe, metrics, } else { projection <- "cylindrical_equidistant" } - + # Define color palette and number of breaks according to output format if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { diverging_palette <- "purpleorange" @@ -81,20 +80,21 @@ plot_metrics <- function(recipe, metrics, "enscorr_specs", "rmsss", "msss") scores <- c("rps", "frps", "crps", "frps_specs", "mse") statistics <- c("cov", "std_hcst", "std_obs", "var_hcst", "var_obs", "n_eff") - + # Loop over variables and assign colorbar and plot parameters to each metric for (var in 1:cube_info$dims[['var']]) { var_name <- cube_info$attrs$Variable$varName[[var]] ## For statistics - var_metric <- lapply(metrics, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) + var_metric <- lapply(metrics, + function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) for (name in c(skill_scores, scores, statistics, "mean_bias", "enssprerr")) { if (name %in% names(metrics)) { units <- NULL # Define plot characteristics and metric name to display in plot if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90:_specs", "bss10_specs", + "rpss_specs", "bss90_specs", "bss10_specs", "rmsss", "msss")) { display_name <- toupper(strsplit(name, "_")[[1]][1]) metric <- var_metric[[name]] @@ -191,52 +191,57 @@ plot_metrics <- function(recipe, metrics, # Reorder dimensions metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, - # retrieve it and reorder its dimensions. - significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(metrics))) { - metric_significance <- var_metric[[significance_name]] - metric_significance <- Reorder(metric_significance, c("time", - "longitude", - "latitude")) - # Split significance into list of lists, along the time dimension - # This allows for plotting the significance dots correctly. - metric_significance <- ClimProjDiags::ArrayToList(metric_significance, - dim = "time", - level = "sublist", - names = "dots") + # retrieve it and reorder its dimensions. + if (significance != FALSE) { # Both, dots, mask or TRUE + significance_name <- paste0(name, "_significance") + if ((significance_name %in% names(metrics))) { + metric_significance <- var_metric[[significance_name]] + metric_significance <- Reorder(metric_significance, c("time", + "longitude", + "latitude")) + # Split significance into list of lists, along the time dimension + # This allows for plotting the significance dots correctly. + metric_significance <- ClimProjDiags::ArrayToList(metric_significance, + dim = "time", + level = "sublist", + names = "dots") + } else { + metric_significance <- NULL + } } else { metric_significance <- NULL } - # Define output file name and titles if (tolower(recipe$Analysis$Horizon) == "seasonal") { outfile <- paste0(outdir[var], name, "-", month_label) - } else if (tolower(recipe$Analysis$Horizon) == "subseasonal") { - outfile <- paste0(outdir[var], name, "-", week_label) } else { outfile <- paste0(outdir[var], name) } - # Get variable name and long name var_name <- cube_info$attrs$Variable$varName[[var]] var_long_name <- cube_info$attrs$Variable$metadata[[var_name]]$long_name + ## Removing "anomaly" from variable name + if (grepl ("anomaly", var_long_name)) { + var_long_name <- str_remove(var_long_name, 'anomaly') + } # Multi-panel or single-panel plots if (recipe$Analysis$Workflow$Visualization$multi_panel) { # Define titles if (tolower(recipe$Analysis$Horizon) == "seasonal") { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), - "\n", display_name, " / ", hcst_period) + "\n", display_name, " / ", hcst_period) ## time_bounds in cube_info to know if Time_aggregation was applied if (!is.null(attributes(cube_info$attrs$time_bounds))) { - info(recipe$Run$logger, "Using plotting attrs from time_bounds.") + info(recipe$Run$logger, retrieve = FALSE, + "Using plotting attrs from time_bounds.") if (length(attributes(cube_info$attrs$time_bounds)$plotting_attr) > 1) { titles <- unlist( - lapply(1:length(attributes(cube_info$attrs$time_bounds)$plotting_attr$ini_ftime), - function(x) { - paste("Forecast time", - attributes(cube_info$attrs$time_bounds)$plotting_attr$ini_ftime[x], - "to", - attributes(cube_info$attrs$time_bounds)$plotting_attr$end_ftime[x])})) + lapply(1:length(attributes(cube_info$attrs$time_bounds)$plotting_attr$ini_ftime), + function(x) { + paste("Forecast time", + attributes(cube_info$attrs$time_bounds)$plotting_attr$ini_ftime[x], + "to", + attributes(cube_info$attrs$time_bounds)$plotting_attr$end_ftime[x])})) } else { titles <- attributes(cube_info$attrs$time_bounds)$plotting_attr[[1]] } @@ -255,7 +260,7 @@ plot_metrics <- function(recipe, metrics, titles <- "Unknown" } ## TODO: Combine PlotLayout with PlotRobinson? - output_configuration <- output_conf$Multipanel$plot_metrics + output_configuration <- output_conf$multipanel$skill_metrics base_args <- list(fun = "PlotEquiMap", plot_dims = c('longitude', 'latitude'), var = asplit(metric, MARGIN = 1), @@ -276,17 +281,16 @@ plot_metrics <- function(recipe, metrics, # Define function and parameters depending on projection if (projection == 'cylindrical_equidistant') { fun <- PlotEquiMap - output_configuration <- output_conf$PlotEquiMap$skill_metric base_args <- list(var = NULL, dots = NULL, lon = longitude, lat = latitude, dot_symbol = 20, dot_size = 1, - title_scale = 0.6, margin_scale = c(1, 5, 5, 5), + title_scale = 0.6, filled.continents = F, brks = brks, cols = cols, col_inf = col_inf, col_sup = col_sup, units = units, font.main = 2, bar_label_digits = 3, bar_label_scale = 1.5, axes_label_scale = 1, width = 8, height = 5) - base_args[names(output_configuration)] <- output_configuration + base_args[names(output_configuration)] <- output_configuration } else { fun <- PlotRobinson common_projections <- c("robinson", "stereographic", "lambert_europe") @@ -300,10 +304,14 @@ plot_metrics <- function(recipe, metrics, lon = longitude, lat = latitude, lon_dim = 'longitude', lat_dim = 'latitude', target_proj = target_proj, legend = 's2dv', - style = 'point', brks = brks, cols = cols, + style = 'point', dots = NULL, brks = brks, + cols = cols, col_inf = col_inf, col_sup = col_sup, - units = units) + units = units, + dots_size = 0.2, dots_shape = 47) } + output_configuration <- output_conf[[projection]]$skill_metrics + base_args[names(output_configuration)] <- output_configuration # Loop over forecast times for (i in 1:dim(metric)[['time']]) { # Get forecast time label @@ -330,7 +338,7 @@ plot_metrics <- function(recipe, metrics, ## TODO: There's probably a better way to do this, like using ## time_bounds$start and time_bounds$end directly. forecast_time_ini <- init_month + forecast_time_ini - 1 - forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) + forecast_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) forecast_time_ini <- month.name[forecast_time_ini] forecast_time_end <- init_month + forecast_time_end - 1 forecast_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) @@ -349,6 +357,8 @@ plot_metrics <- function(recipe, metrics, hcst_period) } } + nominal_startdate_caption <- paste0("1st of ", str_to_title(month_label)) + forecast_time_caption <- paste0("Forecast month: ", forecast_time) } else if (tolower(recipe$Analysis$Horizon == "subseasonal")) { # Get forecast time label forecast_time <- weeks[i] @@ -360,11 +370,12 @@ plot_metrics <- function(recipe, metrics, paste("Valid from", format(week_valid_ini[i], "%d-%m"), "to", format(week_valid_end[i], "%d-%m"), "of", year(ymd(start_date)))) - # "/ valid week", format(weeks[i], - # "%Y-%m-%d"), "/", hcst_period) + + nominal_startdate_caption <- ymd(start_date) + forecast_time_caption <- paste0("Forecast week: ", sprintf("%02d", i)) } else if (recipe$Analysis$Horizon == "decadal") { # Case without time aggregation: - if (is.null(attributes(data_cube$attrs$time_bounds))) { + if (is.null(attributes(cube_info$attrs$time_bounds))) { forecast_time <- match(months[i], month.name) + floor((i - 1)/12)*12 forecast_time <- sprintf("%02d", forecast_time) # Define plot title @@ -373,17 +384,17 @@ plot_metrics <- function(recipe, metrics, "\n", display_name, "/", months[i], "/", hcst_period) } else { - if (length(attributes(data_cube$attrs$time_bounds)$plotting_attr) > 1) { - forecast_time_ini <- attributes(data_cube$attrs$time_bounds)$plotting_attr$ini[i] - forecast_time_end <- attributes(data_cube$attrs$time_bounds)$plotting_attr$end[i] + if (length(attributes(cube_info$attrs$time_bounds)$plotting_attr) > 1) { + forecast_time_ini <- attributes(cube_info$attrs$time_bounds)$plotting_attr$ini[i] + forecast_time_end <- attributes(cube_info$attrs$time_bounds)$plotting_attr$end[i] # labels for file name: forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) # title names: forecast_time_ini <- init_month + forecast_time_ini - 1 - forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) + forecast_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) forecast_time_ini <- month.name[forecast_time_ini] forecast_time_end <- init_month + forecast_time_end - 1 - forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) + forecast_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) forecast_time_end <- month.name[forecast_time_end] toptitle <- paste(system_name, "/", str_to_title(var_long_name), @@ -391,7 +402,7 @@ plot_metrics <- function(recipe, metrics, forecast_time_end, "/", hcst_period) } else { - forecast_time <- attributes(data_cube$attrs$time_bounds)$plotting_attr[[1]][i] + forecast_time <- attributes(cube_info$attrs$time_bounds)$plotting_attr[[1]][i] toptitle <- paste(system_name, "/", str_to_title(var_long_name), "\n", display_name, "/", @@ -399,31 +410,74 @@ plot_metrics <- function(recipe, metrics, hcst_period) } } + nominal_startdate_caption <- start_date + forecast_time_caption <- paste0("Forecast month: ", forecast_time) } else { warning("Unknown time horizon?") } # Modify base arguments base_args[[1]] <- metric[i, , ] if (!is.null(metric_significance)) { - base_args[[2]] <- metric_significance[[i]][[1]] - significance_caption <- "alpha = 0.05" + sign_file_label <- NULL + if (is.logical(significance)) { + if (significance) { + base_args[[2]] <- metric_significance[[i]][[1]] + sign_file_label <- '_mask' + } + } else { + if (significance == 'dots') { + if (projection != 'cylindrical_equidistant') { + base_args[[10]] <- metric_significance[[i]][[1]] + } else { + # The position of arguments in base_args requires this cond + # so PlotEquiMap plots dots when requested + base_args[[2]] <- metric_significance[[i]][[1]] + } + sign_file_label <- '_dots' + } else if (significance == 'mask') { + base_args[[2]] <- metric_significance[[i]][[1]] + sign_file_label <- '_mask' + } + } + significance_caption <- "Alpha: 0.05" } else { significance_caption <- NULL + sign_file_label <- NULL + } + + if (recipe$Analysis$Workflow$Anomalies$cross_validation == FALSE & + recipe$Analysis$Workflow$Skill$cross_validation == FALSE) { + cross_val <- 'none' + } else if (recipe$Analysis$Workflow$Anomalies$cross_validation == TRUE & + recipe$Analysis$Workflow$Skill$cross_validation == FALSE) { + cross_val <- 'anomalies' + } else if (recipe$Analysis$Workflow$Anomalies$cross_validation == FALSE & + recipe$Analysis$Workflow$Skill$cross_validation == TRUE) { + cross_val <- 'metrics' + } else if (recipe$Analysis$Workflow$Anomalies$cross_validation == TRUE & + recipe$Analysis$Workflow$Skill$cross_validation == TRUE) { + cross_val <- 'full' + } else { + cross_val <- '' } + if (identical(fun, PlotRobinson)) { ## TODO: Customize alpha and other technical details depending on the metric - base_args[['caption']] <- - paste0("Nominal start date: ", ymd(start_date), "\n", - "Forecast week: ", sprintf("%02d", i), "\n", ## This is specific for subseasonal, would need a loop to specify time horizon - "Reference: ", recipe$Analysis$Datasets$Reference, "\n", - "Units: ", cube_info$attrs$Variable$metadata[[var_name]]$units, "\n", - significance_caption) + base_args[['caption']] <- + paste0(" Nominal start date: ", + "1st of ", str_to_title(month_label), "\n", + " ", forecast_time_caption, "\n", + " Reference: ", recipe$Analysis$Datasets$Reference, "\n", + " Interpolation: ", recipe$Analysis$Regrid$type, "\n", + " Cross-validation: ", cross_val, "\n", + paste0(" ", significance_caption)) } - fileout <- paste0(outfile, "_ft", forecast_time, ".pdf") + fileout <- paste0(outfile, "_ft", forecast_time, + sign_file_label, ".pdf") # Plot info(recipe$Run$logger, retrieve = TRUE, paste("Plotting", display_name)) - + do.call(fun, args = c(base_args, list(toptitle = toptitle, @@ -435,4 +489,5 @@ plot_metrics <- function(recipe, metrics, } info(recipe$Run$logger, retrieve = TRUE, "##### METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") + } diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index 55660b218f9cdf9f365bb022f15bb1b30cef1cf1..29cd57056dcdfaae9c5795b8da4762b26ebf6dec 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -171,7 +171,7 @@ plot_most_likely_terciles <- function(recipe, triangle_ends = c(F, F)) # , width = 11, height = 8) ) } else { - output_configuration <- output_conf$PlotEquiMap$most_likely_terciles + output_configuration <- output_conf$cylindrical_equidistant$most_likely_terciles base_args <- list(cat_dim = 'bin', probs = NULL, lon = longitude, lat = latitude, @@ -194,7 +194,8 @@ plot_most_likely_terciles <- function(recipe, plot_margin = c(1, 5, 5, 5), # plot_margin = c(5.1, 4.1, 4.1, 2.1), return_leg = T, - triangle_ends = c(F, T), width = 10, height = 8) + triangle_ends = c(F, T), width = 10, height = 8, + dots_size = 0.2, dots_shape = 47) base_args[names(output_configuration)] <- output_configuration for (i in 1:length(time_labels)) { ## variables ending in *_i represent each forecast time diff --git a/modules/Visualization/R/tmp/PlotRobinson.R b/modules/Visualization/R/tmp/PlotRobinson.R index bd427448fad9bdc9482c3e13b161f05e2fd6c1a7..1e55ec848e61ab563c7992685899089a18ede4a6 100644 --- a/modules/Visualization/R/tmp/PlotRobinson.R +++ b/modules/Visualization/R/tmp/PlotRobinson.R @@ -118,11 +118,11 @@ PlotRobinson <- function(data, lon, lat, lon_dim = NULL, lat_dim = NULL, target_proj = 54030, legend = 's2dv', style = 'point', dots = NULL, mask = NULL, brks = NULL, cols = NULL, bar_limits = NULL, triangle_ends = NULL, col_inf = NULL, col_sup = NULL, colNA = NULL, - color_fun = clim.palette(), bar_extra_margin = rep(0, 4), vertical = TRUE, + color_fun = clim.palette(), bar_extra_margin = c(3.5, 0, 3.5, 0), vertical = TRUE, toptitle = NULL, caption = NULL, units = NULL, crop_coastlines = NULL, - point_size = "auto", title_size = 16, dots_size = 0.5, + point_size = "auto", title_size = 10, dots_size = 0.2, dots_shape = 47, coastlines_width = 0.3, - fileout = NULL, width = 8, height = 4, size_units = "in", + fileout = NULL, width = 8, height = 5, size_units = "in", res = 300) { # Sanity check @@ -234,12 +234,14 @@ PlotRobinson <- function(data, lon, lat, lon_dim = NULL, lat_dim = NULL, if (!identical(dim(mask), dim(data))) { stop("Parameter 'mask' must have the same dimensions as 'data'.") } else if (is.numeric(mask)) { + mask[which(is.na(mask))] <- 0 if (all(mask %in% c(0, 1))) { mask <- array(as.logical(mask), dim = dim(mask)) } else { stop("Parameter 'mask' must have only TRUE/FALSE or 0/1.") } } else if (is.logical(mask)) { + mask[which(is.na(mask))] <- F if (!all(mask %in% c(T, F))) { stop("Parameter 'mask' must have only TRUE/FALSE or 0/1.") } @@ -279,7 +281,7 @@ PlotRobinson <- function(data, lon, lat, lon_dim = NULL, lat_dim = NULL, title = units, title_scale = 1, # units_scale label_scale = 1, tick_scale = 1, #bar_tick_scale extra_margin = bar_extra_margin, label_digits = 4) - brks <- colorbar$brks + brks <- round(colorbar$brks, 2) cols <- colorbar$cols col_inf <- colorbar$col_inf col_sup <- colorbar$col_sup diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 522ddaf7a043a09ab511540b9f9ce56f924db824..6f22333891fc01f98f0639553049ad818620171d 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -77,15 +77,49 @@ Visualization <- function(recipe, "NULL, so there is no data that can be plotted.")) stop() } + # Set default single-panel plots if not specified if (is.null(recipe$Analysis$Workflow$Visualization$multi_panel)) { recipe$Analysis$Workflow$Visualization$multi_panel <- FALSE } + + # Warning if significance parameter not included in function call + if (!missing(significance) && !is.null(recipe$Analysis$Workflow$Visualization$significance)) { + if (significance != recipe$Analysis$Workflow$Visualization$significance) { + warning("The significance value provided in the function call does not match ", + "the value in the recipe. The function call value will be used.") + } + } + + # If not specified in function call, set significance to recipe value + if (missing(significance) && !is.null(recipe$Analysis$Workflow$Visualization$significance)) { + significance <- recipe$Analysis$Workflow$Visualization$significance + } + # Plot skill metrics if ("skill_metrics" %in% plots) { if (!is.null(skill_metrics)) { - plot_metrics(recipe, metrics = skill_metrics, significance = significance, - outdir = outdir, output_conf = output_conf) + if (is.logical(significance)) { + plot_metrics(recipe = recipe, metrics = skill_metrics, outdir = outdir, + significance = significance, output_conf = output_conf) + info(recipe$Run$logger, retrieve = TRUE, + paste("##### Skill metrics significance set as", + significance, " #####")) + } else { + if (significance %in% c('both', 'dots')) { + plot_metrics(recipe = recipe, metrics = skill_metrics, outdir = outdir, + significance = 'dots', output_conf = output_conf) + info(recipe$Run$logger, retrieve = TRUE, + "##### Skill metrics significance as dots #####") + } + if (significance %in% c('both', 'mask')) { + plot_metrics(recipe = recipe, metrics = skill_metrics, outdir = outdir, + significance = 'mask', output_conf = output_conf) + info(recipe$Run$logger, retrieve = TRUE, + "##### Skill metrics significance as mask #####") + + } + } } else { error(recipe$Run$logger, retrieve = TRUE, paste0("The skill metric plots have been requested, but the ", @@ -231,20 +265,7 @@ Visualization <- function(recipe, } # Convert plots to user-chosen format - if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && - tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { - extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) - plot_files <- list.files(path = recipe$Run$output_dir, pattern = "\\.pdf$", - recursive = TRUE, full.names = TRUE) - for (file in plot_files) { - system_command <- paste("convert -density 300", file, - "-resize 40% -alpha remove", - paste0(tools::file_path_sans_ext(file), ".", - extension)) - system(system_command) - } - unlink(plot_files) - info(recipe$Run$logger, retrieve = TRUE, - paste0("##### PLOT FILES CONVERTED TO ", toupper(extension), " #####")) - } + ## TODO: Think of a better system for parallel executions + source("tools/convert_plot_format.R") + convert_plot_format(recipe) } diff --git a/modules/Visualization/output_size.yml b/modules/Visualization/output_size.yml index 4e8e16dc4ac02c8773e8e213a71b9a0ef3f98a14..55cbfc8fe185257d008a696bd3496bfefb268eb0 100644 --- a/modules/Visualization/output_size.yml +++ b/modules/Visualization/output_size.yml @@ -1,6 +1,26 @@ +# This file allows you to define custom plotting parameters for any region. To add +# custom parameters for a region, simply include it following this file structure: +# If a parameter or region is not defined in this file, the default values will be +# used. +# The structure is as follows: +# Replace elements enclosed in <...> with the appropriate value. +# region: +# : (name of the region you will specify in the recipe) +# : (cylindrical_equidistant, robinson, stereographic, lambert_europe) +# : (can be skill_metrics, forecast_ensemble_mean or most_likely_terciles) +# parameter1: value +# parameter2: value +# ... +# +# * How do I know which parameters are available? +# - cylindrical_equidistant: uses s2dv::PlotEquiMap() +# - robinson, stereographic, lambert_europe: uses s2dv::PlotRobinson() +# - multipanel: uses s2dv::PlotEquiMap() and s2dv::PlotLayout() +# - For most_likely_terciles, only 'cylindrical_equidistant' is available + region: #units inches - EU: #latmin: 20, latmax: 80, lonmin: -20, lonmax: 40 - PlotEquiMap: + Europe: #latmin: 20, latmax: 80, lonmin: -20, lonmax: 40 + cylindrical_equidistant: # Projection skill_metrics: width: 8.5 height: 8.5 @@ -11,6 +31,7 @@ region: #units inches dot_symbol: 4 font.main: 1 colNA: "white" + country.borders: TRUE forecast_ensemble_mean: width: 8.5 height: 8.5 @@ -21,22 +42,24 @@ region: #units inches dot_size: 1.7 font.main: 1 colNA: "white" + country.borders: TRUE most_likely_terciles: width: 8.5 height: 8.5 dot_size: 2 plot_margin: !expr c(0, 4.1, 4.1, 2.1) colNA: "white" - Multipanel: + country.borders: TRUE + multipanel: forecast_ensemble_mean: width: 8.5 height: 8.5 - Robinson: + robinson: skill_metrics: {width: 8, height: 5} colNA: "white" NA-EU: #Norht Atlantic European region Iberia: #latmin: 34, latmax: 46, lonmin: -10, lonmax: 5 - PlotEquiMap: + cylindrical_equidistant: skill_metrics: width: 8 height: 7.5 @@ -70,7 +93,7 @@ region: #units inches plot_margin: !expr c(2, 5, 7.5, 5) bar_extra_margin: !expr c(2.5, 1, 0.5, 1) # colorbar proportions colNA: "white" - Multipanel: + multipanel: forecast_ensemble_mean: width: 8.5 height: 9.5 @@ -79,9 +102,46 @@ region: #units inches width: 8.5 height: 9.5 title_margin_scale: 4 - Robinson: + robinson: skill_metrics: {width: 8, height: 5} colNA: "white" Mediterranean: Global: + Ethiopia: + cylindrical_equidistant: + skill_metrics: + width: 8.5 + height: 8 + axes_label_scale: 0.8 + bar_label_scale: 1.2 + bar_extra_margin: !expr c(2,1,0.5,1) + dot_size: 1.7 + dot_symbol: 4 + font.main: 1 + colNA: "white" + country.borders: TRUE + intylat: 5 + intxlon: 5 + forecast_ensemble_mean: + width: 8.5 + height: 8 + axes_label_scale: 0.8 + bar_label_scale: 1.2 + bar_extra_margin: !expr c(1.7,1,0.5,1) + dot_symbol: 4 + dot_size: 1.7 + font.main: 1 + colNA: "white" + country.borders: TRUE + intylat: 5 + intxlon: 5 + most_likely_terciles: + width: 8.5 + height: 8 + dot_size: 2 + plot_margin: !expr c(0, 4.1, 4.1, 2.1) + colNA: "white" + country.borders: TRUE + intylat: 5 + intxlon: 5 # Add other regions diff --git a/recipe_template.yml b/recipe_template.yml index 0db9e8fbf84a10bddae0e8a0fe9cf365e30529b0..a9d383ce994fe905e81e639d40d9b188b11e3c5d 100644 --- a/recipe_template.yml +++ b/recipe_template.yml @@ -139,7 +139,8 @@ Analysis: plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) multi_panel: yes # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) projection: 'cylindrical_equidistant' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) - mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + significance: 'dots' # Type of mask for statistical significance. Options are 'dots', and yes/no. 'dots'. 'mask' and 'both' options are not available for projections other than cylindrical_equidistant. + mask_terciles: no # Whether to dot or mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. diff --git a/recipes/examples/recipe_tas_seasonal_units.yml b/recipes/examples/recipe_tas_seasonal_units.yml index d2b25321b8b6c43a2842c577d265469ba5131281..17df1efd1b2503510bea0a8598ec4fd28a55c679 100644 --- a/recipes/examples/recipe_tas_seasonal_units.yml +++ b/recipes/examples/recipe_tas_seasonal_units.yml @@ -59,5 +59,5 @@ Analysis: Run: Loglevel: INFO Terminal: TRUE - output_dir: /esarchive/scratch/nperez/cs_oper/ - code_dir: /esarchive/scratch/nperez/git/s2s-suite/ + output_dir: /esarchive/scratch/nperez/ + code_dir: /esarchive/scratch/nperez/git6/sunset/ diff --git a/tools/add_logo.R b/tools/add_logo.R index ad96cf1afd9f81389d0eb851468a27db9389debc..9490ffe147f7a893af27ebc1abb3b547141eda9e 100644 --- a/tools/add_logo.R +++ b/tools/add_logo.R @@ -1,16 +1,34 @@ +## TODO: Use get_dir() to unify add_logo <- function(recipe, logo) { # recipe: SUNSET recipe # logo: URL to the logo - system <- list.files(paste0(recipe$Run$output_dir, "/plots/")) - - files <- lapply(system, function(x) { + system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) + reference <- gsub('.','', recipe$Analysis$Datasets$Reference$name, fixed = T) + calibration <- recipe$Analysis$Workflow$Calibration$method + variable <- recipe$Analysis$Variable$name + files <- lapply(variable, function(x) { f <- list.files(paste0(recipe$Run$output_dir, "/plots/", - system, "/"), - recursive = TRUE, - full.names = TRUE) - })[[1]] + system, "/", reference, "/", calibration, "/", x)) + full_path <- paste0(recipe$Run$output_dir, "/plots/", + system,"/", reference, "/", calibration, "/", x,"/", f)})[[1]] dim(files) <- c(file = length(files)) Apply(list(files), target_dims = NULL, function(x) { system(paste("composite -gravity southeast -geometry +10+10", logo, x, x))}, ncores = recipe$Analysis$ncores) + +} + +add_logo_scorecards <- function(recipe, logo) { + # recipe: SUNSET recipe + # logo: URL to the logo + scorecards_f <- list.files(paste0(recipe$Run$output_dir, "/plots/Scorecards/")) + scorecards <- paste0(recipe$Run$output_dir, "/plots/Scorecards/", scorecards_f) + dim(scorecards) <- c(file = length(scorecards)) + Apply(list(scorecards), target_dims = NULL, function(x) { + + system(paste('convert ',x, ' -gravity north -background white -splice 0x15 ', x)) + + system(paste("composite -gravity northeast -geometry +15+15", + logo, x, x))}, ncores = recipe$Analysis$ncores) + } diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 1e4040f2ace6d829d0a93be19c18160d642844bf..605c4a9d07915d103b129fa3916a6c136359a9f0 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -677,6 +677,200 @@ check_recipe <- function(recipe) { } } + # Indicators + if ("Indicators" %in% names(recipe$Analysis$Workflow)) { + # list of variables requested to be loaded: + var.list <- unlist(strsplit(recipe_variables, ", | |,")) + # SPEI/SPI checks + # Check that precipiation is a requested variable + # when drought indices (SPEI or SPI) are requested + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + if (!('prlr' %in% var.list)) { + error(recipe$Run$logger, + paste0("Precipitation is necessary to calculate ", + "SPEI and it is not a variable in the recipe")) + error_status <- TRUE + } + } + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPI$return_spi)) { + if (!('prlr' %in% var.list)) { + error(recipe$Run$logger, + paste0("Precipitation is necessary to calculate ", + "SPI and it is not a variable in the recipe")) + error_status <- TRUE + } + } + # SPEI: check that necessary variables for the selected PET method are in the recipe + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + pet_method <- recipe$Analysis$Workflow$Indicators$SPEI$PET_method + if (!is.null(pet_method)) { + if (pet_method == 'none') { + # check that "pet" is in the variable list + ## NOTE: "pet" is not the correct abbr but no examples exist in esarchive now + if (!('pet' %in% var.list)) { + error(recipe$Run$logger, + paste0("a PET method is necessary to estimate potential ", + "evapotranspiration in the calculation of SPEI")) + error_status <- TRUE + } + } else { + if (pet_method == 'hargreaves') { + var.list.method <- c('tasmax', 'tasmin') + known_pet_method <- TRUE + } else if (pet_method == 'hargreaves_modified') { + var.list.method <- c('tasmax', 'tasmin', 'prlr') + known_pet_method <- TRUE + } else if (pet_method == 'thornthwaite') { + var.list.method <- c('tas') + known_pet_method <- TRUE + } else { + known_pet_method <- FALSE + error(recipe$Run$logger, + paste0("PET method ", pet_method, " unknown")) + error_status <- TRUE + } + if (known_pet_method) { + # check that the necessary variables are requested + missing.vars <- c() + for (var in var.list.method) { + if (identical(which(var.list == var), integer(0))) { + missing.vars <- c(missing.vars, var) + } + } + if (length(missing.vars) > 0) { + error(recipe$Run$logger, + paste0(missing.vars, " are necessary for ", pet_method, + " method and they are NOT selected in the recipe")) + error_status <- TRUE + } + } + } + } else { # same as not NULL but pet_method == 'none' + # check that "pet" is in the variable list + ## NOTE: "pet" is not the correct abbr but no examples exist in esarchive now + if (!('pet' %in% var.list)) { + error(recipe$Run$logger, + paste0("a PET method is necessary to estimate potential ", + "evapotranspiration in the calculation of SPEI")) + error_status <- TRUE + } + } + + } + + # SPEI/SPI check accum number + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + accum <- recipe$Analysis$Workflow$Indicators$SPEI$Nmonths_accum + ftime_interval <- recipe$Analysis$Time$ftime_max - recipe$Analysis$Time$ftime_min + 1 + if ((accum > 12 & ftime_interval < 12) || (accum > ftime_interval)) { + error(recipe$Run$logger, + paste0("not possible to accumulate ", accum, " months with the specified ftime ", + "in the calculation of SPEI")) + error_status <- TRUE + } + + } + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPI$return_spi)) { + accum <- recipe$Analysis$Workflow$Indicators$SPI$Nmonths_accum + ftime_interval <- recipe$Analysis$Time$ftime_max - recipe$Analysis$Time$ftime_min + 1 + if ((accum > 12 & ftime_interval < 12) || (accum > ftime_interval)) { + error(recipe$Run$logger, + paste0("not possible to accumulate ", accum, " months with the specified ftime ", + "in the calculation of SPI")) + error_status <- TRUE + } + } + + # Check standardization reference period + # SPEI + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + stand_refperiod <- recipe$Analysis$Workflow$Indicators$SPEI$standardization_ref_period + year_start <- recipe$Analysis$Time$hcst_start + year_end <- recipe$Analysis$Time$hcst_end + if (!is.null(stand_refperiod)) { + if (!(stand_refperiod[1] >= year_start & stand_refperiod[2] <= year_end)){ + error(recipe$Run$logger, + paste0("the standardization_ref_period needs to be contained ", + "in hcst_start and hcst_end period for the calculation of SPEI")) + error_status <- TRUE + } + } + } + # SPI + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPI$return_spi)) { + stand_refperiod <- recipe$Analysis$Workflow$Indicators$SPI$standardization_ref_period + year_start <- recipe$Analysis$Time$hcst_start + year_end <- recipe$Analysis$Time$hcst_end + if (!is.null(stand_refperiod)){ + if (!(stand_refperiod[1] >= year_start & stand_refperiod[2] <= year_end)){ + error(recipe$Run$logger, + paste0("the standardization_ref_period needs to be contained ", + "in hcst_start and hcst_end period for the calculation of SPI")) + error_status <- TRUE + } + } + } + + # Threshold indicator: check that length of requested thresholds matches length variables + if (isTRUE(recipe$Analysis$Workflow$Indicators$SelectedThreshold$return_thresholdbased)) { + thrs <- recipe$Analysis$Workflow$Indicators$SelectedThreshold$threshold + if (is.null(thrs)) { + error(recipe$Run$logger, + paste0("Threshold based indicator is requested but no threshold ", + "has been indicated")) + error_status <- TRUE + } else { + if (length(thrs) != length(var.list)){ + error(recipe$Run$logger, + paste0("Threshold based indicators is requested but thresholds ", + "do NOT match the number of requested variables")) + error_status <- TRUE + } + } + } + + # Threshold-based predifined indicators (Malaria and Ticks) + if (isTRUE(recipe$Analysis$Workflow$Indicators$Malaria$return_climate_suitability)) { + # check that necessary variables are requested + if ((!all(c("tas", "tdps", "prlr") %in% var.list)) & + (!all(c("tas", "hurs", "prlr") %in% var.list))) { + error(recipe$Run$logger, + paste0("Necessary variables for Malaria indicator are ", + " tas, tdps or hurs, and prlr, NOT included in requested ", + "variables: ", var.list)) + error_status <- TRUE + } + # check that ssp is known + for (ssp in recipe$Analysis$Workflow$Indicators$Malaria$ssp) { + if (ssp != 'p.falciparum' & ssp != 'p.vivax'){ + error(recipe$Run$logger, + paste0("Unknown requested ssp ", ssp)) + error_status <- TRUE + } + } + } + # Tick-borne disease indicator: + if (isTRUE(recipe$Analysis$Workflow$Indicators$Ticks$return_climate_suitability)) { + # check that necessary variables are requested + if ((!all(c("tas", "tdps") %in% var.list)) & + (!all(c("tas", "hurs") %in% var.list))) { + error(recipe$Run$logger, + paste0("Necessary variables for Tick indicator are ", + " tas, and tdps or hurs, NOT included in requested ", + "variables: ", var.list)) + error_status <- TRUE + } + # check that ssp is known + for (ssp in recipe$Analysis$Workflow$Indicators$Malaria$ssp) { + if (ssp != 'i.ricinus') { + error(recipe$Run$logger, + paste0("Unknown requested ssp ", ssp)) + error_status <- TRUE + } + } + } + } # end checks Indicators + # Visualization if ("Visualization" %in% names(recipe$Analysis$Workflow)) { PLOT_OPTIONS <- c("skill_metrics", "forecast_ensemble_mean", @@ -973,9 +1167,10 @@ check_recipe <- function(recipe) { STARTR_PARAMS <- c("modules", "chunk_along") STARTR_MODULES <- c("calibration", "anomalies", "downscaling", "skill", "probabilities", "indices", "aggregation", - "crossval_anomalies", "crossval_metrics", "statistics") + "crossval_anomalies", "crossval_metrics", "statistics", + "orography") CHUNK_DIMS <- c("var", "time", "latitude", "longitude") - MODULES_USING_LATLON <- c("downscaling", "indices") + MODULES_USING_LATLON <- c("downscaling", "indices", "orography") MODULES_USING_TIME <- c("indicators", "aggregation") MODULES_USING_VAR <- c("indicators") # Check that all required fields are present diff --git a/tools/convert_plot_format.R b/tools/convert_plot_format.R new file mode 100644 index 0000000000000000000000000000000000000000..5eb8b7ed4d7004ebe8095c113389c005c05cf114 --- /dev/null +++ b/tools/convert_plot_format.R @@ -0,0 +1,20 @@ +convert_plot_format <- function(recipe) { + # Convert plots to user-chosen format + if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && + tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { + extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) + plot_files <- list.files(path = paste0(recipe$Run$output_dir), + pattern = "\\.pdf$", + recursive = TRUE, full.names = TRUE) + for (file in plot_files) { + system_command <- paste("convert -density 300", file, + "-resize 40% -alpha remove", + paste0(tools::file_path_sans_ext(file), ".", + extension)) + system(system_command) + } + unlink(plot_files) + info(recipe$Run$logger, retrieve = TRUE, + paste0("##### PLOT FILES CONVERTED TO ", toupper(extension), " #####")) + } +} diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 2a6b6c0af21973aa1930051be7fd042255029802..3344e79b70ceff2d959f45bdd72278efb549922a 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -2,7 +2,8 @@ divide_recipe <- function(recipe) { ## TODO: Implement dependent vs independent verifications? - info(recipe$Run$logger, "Splitting recipe in single verifications.") + info(recipe$Run$logger, retrieve = TRUE, + "Splitting recipe in single verifications.") beta_recipe <- list(Description = append(recipe$Description, list(Origin = paste("Atomic recipe,", "split from:", @@ -20,7 +21,8 @@ divide_recipe <- function(recipe) { Output_format = recipe$Analysis$Output_format), Run = recipe$Run[c("Loglevel", "output_dir", "Terminal", - "code_dir", "logfile", "filesystem")]) + "code_dir", "logfile", "filesystem", + "startR_workflow")]) # duplicate recipe by independent variables: # If a single variable is not given inside a list, rebuild structure @@ -217,13 +219,15 @@ divide_recipe <- function(recipe) { ## TODO: Document recipe_model <- paste0("sys-", gsub('\\.', '', all_recipes[[reci]]$Analysis$Datasets$System$name)) - # + variable_name <- gsub(pattern = ", |,| ", + replacement = "-", + x = all_recipes[[reci]]$Analysis$Variables$name) recipe_split <- paste0("_ref-", all_recipes[[reci]]$Analysis$Datasets$Reference$name, - "_var-", all_recipes[[reci]]$Analysis$Variables$name, + "_var-", variable_name, "_reg-", all_recipes[[reci]]$Analysis$Region$name, "_sdate-", all_recipes[[reci]]$Analysis$Time$sdate) recipe_name <- paste0(recipe_model, recipe_split) - + all_recipes[[reci]]$Run$atomic_name <- recipe_name if (all_recipes[[reci]]$Analysis$Datasets$System$name == 'Multimodel') { recipe_dir <- paste0(recipe$Run$output_dir, "/logs/recipes/multimodel/") @@ -239,13 +243,13 @@ divide_recipe <- function(recipe) { } # Print information for user - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, paste("The main recipe has been divided into", length(chunk_to_recipe), "single model atomic recipes, plus", length(split_to_recipe), "multi-model atomic recipes.")) text <- paste0("Check output directory ", recipe$Run$output_dir, "/logs/recipes/ to see all the individual atomic recipes.") - info(recipe$Run$logger, text) + info(recipe$Run$logger, retrieve = TRUE, text) ## TODO: Change returns? return(list(n_atomic_recipes = length(chunk_to_recipe), # length(all_recipes) outdir = recipe$Run$output_dir, diff --git a/tools/libs.R b/tools/libs.R index 33f11882c970b7157e67c42fbb7d973ec9896fa8..0a2e06c94d5fd2a0cf217dff322a6c89963981ae 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -24,6 +24,7 @@ library(ncdf4) library(formattable) ## to plot horizontal color bars - used ?? library(kableExtra) library(memuse) # To check mem usage. +library(CSIndicators) # Functions ## To be removed when new package is done by library(CSOperational) diff --git a/tools/save_metadata_chunks.R b/tools/save_metadata_chunks.R index 5edc5c229bdf3d2917e1905ebb2ba31cc85abd10..489549d4f77204d033197bcf549999c366e1d406 100644 --- a/tools/save_metadata_chunks.R +++ b/tools/save_metadata_chunks.R @@ -22,7 +22,12 @@ save_metadata_chunks <- function(recipe, metrics, data_cube, module, nchunks) { # Retrieve dimname attributes metric_names <- dimnames(metrics)[[1]] ## TODO: Force module into Camel Case - tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", module) + if (!is.null(recipe$Run$atomic_name)) { + tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", + recipe$Run$atomic_name, module) + } else { + tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", module) + } # Save metric names (only done once) if (!dir.exists(tmp_dir)) { dir.create(tmp_dir, recursive = TRUE)