diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8c72037204e6310847da6daed5bc7fb7b336f59d..997870c821fec43485f1923b6802f853991c5633 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,6 +7,9 @@ unit-test-seasonal: # This job runs in the test stage. - echo "Loading modules..." - module load R/4.1.2-foss-2015a-bare - module load CDO/1.9.8-foss-2015a + - module load GEOS/3.7.2-foss-2015a-Python-3.7.3 + - module load GDAL/2.2.1-foss-2015a + - module load PROJ/4.8.0-foss-2015a - module list - echo "Running seasonal unit tests..." - Rscript ./tests/test_seasonal.R @@ -17,6 +20,9 @@ unit-test-decadal: # This job runs in the test stage. - echo "Loading modules..." - module load R/4.1.2-foss-2015a-bare - module load CDO/1.9.8-foss-2015a + - module load GEOS/3.7.2-foss-2015a-Python-3.7.3 + - module load GDAL/2.2.1-foss-2015a + - module load PROJ/4.8.0-foss-2015a - module list - echo "Running decadal unit tests..." - Rscript ./tests/test_decadal.R diff --git a/.gitlab/issue_templates/default.md b/.gitlab/issue_templates/default.md new file mode 100644 index 0000000000000000000000000000000000000000..dee99dd37bc869fe74b26e45f90733cc131ab848 --- /dev/null +++ b/.gitlab/issue_templates/default.md @@ -0,0 +1,35 @@ +(This is a template to report errors and bugs. Please replace the text in [brackets] with the corresponding information about the bug. If any field is not relevant, briefly state why. ) + +Hi @vagudets, + +#### Expected Behavior +[Briefly explain what you were trying to do and what you wanted/expected to happen.] + +#### Current Behavior +[Explain what actually happened. If you got any error messages, paste them here. Remember to use code blocks to format the messages correctly, so that they are easy to read.] + +``` +Example: +[ERROR!]: Something went really wrong! +This is the error message that showed up on the terminal. +``` + +#### Possible Solutions? +[If you have an idea of what could be causing the bug, how to solve it, or an example with code that gets the expected behavior, write it here.] + +#### Steps To Reproduce +- Recipe: +[The path to the recipe you were using.] + +- Script: +[The script you were using. Paste it in a code block or include a link. If the script is long or has a lot of steps, please reduce it to the minimal code necessary to reproduce the problem, and open a new R session to test that you can run it and the error is the same before you open the issue.] + +- Branch/SUNSET Version: +[The name of the branch you are using. If the branch is only in your local copy of the repository, please make sure that you include the path.] + +- Environment: +[Where and how you were running SUNSET. On your Workstation, Nord3v2, an external HPC machine? Were you using the modules or the conda environment?] + +#### Other Relevant Information +[Feel free to include screenshots (if the bug is related to plots), paths to output files, small examples, or any other context or information that might seem relevant.] + diff --git a/MODULES b/MODULES index 0a01a9796c2753a74a5e946f00024e33777105ac..6654c0920d879c620e6878f08d901170513dae2c 100644 --- a/MODULES +++ b/MODULES @@ -3,28 +3,39 @@ # WARNING: CDO HAS TO BE ON VERSION 1.9.4 # (If not, conflicts with weekly means computation could appear) -if [ $BSC_MACHINE == "power" ]; then - - module unuse /apps/modules/modulefiles/applications - module use /gpfs/projects/bsc32/software/rhel/7.4/ppc64le/POWER9/modules/all/ - - module load CDO/1.9.4-foss-2018b - module load R/3.6.1-foss-2018b - -elif [ $BSC_MACHINE == "nord3v2" ]; then +if [[ $BSC_MACHINE == "nord3v2" ]]; then module use /gpfs/projects/bsc32/software/suselinux/11/modules/all module unuse /apps/modules/modulefiles/applications /apps/modules/modulefiles/compilers /apps/modules/modulefiles/tools /apps/modules/modulefiles/libraries /apps/modules/modulefiles/environment - + module purge module load CDO/1.9.8-foss-2019b module load R/4.1.2-foss-2019b module load OpenMPI/4.0.5-GCC-8.3.0-nord3-v2 + module load GEOS/3.7.2-foss-2019b-Python-3.7.4 + module load GDAL/3.5.0-foss-2019b-Python-3.7.4 + module load PROJ/9.0.0-GCCcore-8.3.0 + module load Phantomjs/2.1.1 + +elif [[ $HOSTNAME == "bsceshub02.bsc.es" ]]; then + + module purge + module load CDO/1.9.8-foss-2021b + module load R/4.2.1-foss-2021b + module load GEOS/3.11.0-GCC-11.2.0 + module load GDAL/3.5.2-foss-2021b-Python-3.9.6 + module load PROJ/9.1.0-foss-2021b + module load Phantomjs/2.1.1 else + module purge module load CDO/1.9.8-foss-2015a module load R/4.1.2-foss-2015a-bare + module load GEOS/3.7.2-foss-2015a-Python-3.7.3 + module load GDAL/2.2.1-foss-2015a + module load PROJ/4.8.0-foss-2015a + module load Phantomjs/2.1.1 fi diff --git a/NEWS.md b/NEWS.md index fe7ea6e3ebcfa81a4dcab6db088cfe35d650c0a4..40f97c3adbb979911614fc9f44bbe7444c84cd17 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,61 @@ +SUNSET v2.0.0 +============= + +Modules: Loading, Units, Anomalies, Calibration, Downscaling, Indices, Skill, Saving, Visualization, Scorecards + +New features: +- New module for unit conversion, available for temperature, precipitation and pressure variables. +- New Indices module with the following indices available: NAO, Niño1+2, Niño3, Niño3.4 and Niño4. +- New Scorecards module to create Scorecard visualizations of the computed skill metrics. +- New Downscaling module making use of the in-house CSDownscale package. +- Recipe splitting feature: A recipe can be split into 'atomic recipes' and the same workflow can be easily parallelized for multiple forecast systems, references, variables, start dates and regions. +- Option to load, process, save and plot multiple variables in one atomic recipe. +- Possibility to use Autosubmit 4 as a workflow manager to run recipes in parallel. +- New SUNSET launcher script to split a recipe and run it in parallel directly in the cluster with SLURM or with Autosubmit. +- Option to load tas-tos blend by requesting the 'tas-tos' variable. +- For each module there is the possibility to choose whether or not to save the forecast, hindcast and/or observations. +- New skill metrics MSSS and MSE available. +- New use cases with hands-on tutorials. +- GRIB file loading. + +Summary of bugfixes/improvements: + +- The names of the module functions have changed to be the same as the name of the module (e.g. load_datasets() has become Loading()). The old names will be deprecated in a future release. +- New datasets and variables added to the seasonal archive. +- The launcher script and the prepare_outputs() function have a new "disable_unique_ID" option, which removes the numerical identifier that is added to the name of the output folder when it is generated. +- The seasonal and decadal loading functions have been renamed and can now sourced and called from the same Loading module. +- Bugfix in the recipe checker: checks are now completed even when the time horizon is not correct. +- The functions have been adapted to the new s2dv_cube structure in CSTools>5.0.0. +- The metric 'corr' (correlation for each ensemble member) has been renamed to 'corr_individual_members'. +- Datasets saved under 'daily' folders in esarchive can now be loaded along with 'daily_mean' datasets. +- Start date information has been added to the plot names to avoid overwriting plots from different start dates. +- In the Visualization section of the recipe, the user can specify which plots to generate, whether they should be in a single-panel or multi-panel layout, and choose between different projections. +- General improvements to the plots: color scheme, units, titles and subtitles, layout and colorbars. +- Anomalies module can compute anomalies when hindcast and observations do not share the same grid. +- Added module configuration to run SUNSET in the BSC hub. +- Language specification has been added to lubridate functions to ensure they work well in all language locales. + +ESS Verification Suite v1.1.0 +============================= + +Modules: Loading, Anomalies, Calibration, Skill, Saving, Visualization + +New features: + +- New module for anomaly computation. +- New 'Scorecards' output format (different file names and paths from the default format). +- New 'recipe checker' feature in prepare_outputs(): It runs a series of checks on the recipe to detect potential errors, typos, or missing information. + +Summary of fixes/improvements: + +- Changed the names of the seasonal systems from the names in /esarchive to the official names in the CDS. +- Fixed a bug in the conversion of precipitation units. +- Fixed a bug related to the consistency between experiment and observation dates for some systems. +- Function parameters have been simplified and uniformized. +- Improvements in the logging functionality for longer messages. +- Improvements to the plots generated by the Visualization module. +- compute_probabilities() now returns the fcst probabilities as well. + ESS Verification Suite v1.0.0 ============================= diff --git a/OperationalCS.R b/OperationalCS.R deleted file mode 100644 index ec01a30ec482167ee7dbc07d31389086f7548721..0000000000000000000000000000000000000000 --- a/OperationalCS.R +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env Rscript -args = commandArgs(trailingOnly = TRUE) -# To test: -# args <- NULL; args[1] <- "recipes/seasonal_oper.yml" - - -# execution: Rscript OperationalCS.R recipe.yml -# This code checks the recipe and builds and executes the workflow -print(args) -library(yaml) - -recipe <- read_yaml(args[1]) -recipe$filename <- args[1] - -# Load required libraries -source("tools/libs.R") - -# Create output folder and log: -logger <- prepare_outputs(recipe = recipe) -folder <- logger$foldername -log_file <- logger$logname -logger <- logger$logger - -# Checks: -verifications <- check_recipe(recipe, file = args[2], conf, logger) -# Divide recipe into single verifications recipes: -total_recipes <- divide_recipe(recipe, verifications, folder, logger) - -# Divide recipe into single verifications recipes: -total_recipes <- divide_recipe(recipe, verifications, folder, logger) -# Go to verification code: -capture.output(source("modules/verifications.R"), - file = log_file, type ='message', - append = TRUE) - - diff --git a/README.md b/README.md index 4df05eccb0fd30e154bc3f723f399045dff37503..2ae44e8e6d6619cbe8a08bff72c644f3c33c9163 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,31 @@ -ESS Verification Suite +SUNSET: SUbseasoNal to decadal climate forecast post-processing and asSEssmenT suite ====================== -This is the Git project for the ESS Verification Suite, which will serve as a tool for research projects and operational workflows involving subseasonal to seasonal to decadal forecast verification. +This is the Git project for the SUNSET, a collaborative in-house tool developed at BSC-ES for research projects and operational workflows involving subseasonal to seasonal to decadal forecast verification. -The main developers of the tool are Victòria Agudetse (@vagudets), An-Chi Ho (@aho), Lluís Palma (@lpalma) and Núria Pérez-Zanón (@nperez). +This is the Git project for SUNSET, an collaborative R-based tool developed in-house at BSC-ES that aims to provide climate services for sub-seasonal, seasonal and decadal climate forecast time scales. The tool post-processes climate forecast outputs by applying state-of-the-art methodologies to tailor climate products for each application and sector (e.g.: agriculture, energy, water management, or health). + +Its modular design allows flexibility in defining the required post-processing steps, as well as the products definition by deciding on the forecast system and reference datasets, variables, and forecast horizon among others. The tool also allows for the creation and visualization of climate forecast products, such as maps for the most likely terciles, and performs the verification of the products, which can be visualized on maps and scorecards. The inclusion of a launcher script provides a user-friendly way to parallelize the computation on HPC machines. + +Victòria Agudetse (@vagudets) is the maintainer of the SUNSET repository. Resources --------- -You can access the documentation of the Verification Suite through the wiki: -[Auto-s2s Wiki](https://earth.bsc.es/gitlab/es/auto-s2s/-/wikis/home?target=_blank) +You can access the documentation through the wiki: +[SUNSET Wiki](https://earth.bsc.es/gitlab/es/sunset/-/wikis/home?target=_blank) + +Use cases and hands-on tutorials are available in the repository for you to follow: + +[SUNSET Use Cases](https://earth.bsc.es/gitlab/es/sunset/-/tree/master/use_cases/) You may also find useful information in the slides from past user meetings: +[R-tools Climate Forecast Analysis Training session 2023](https://earth.bsc.es/wiki/lib/exe/fetch.php?media=tools:day2_04_handson_r_tools.pdf) + +[User meeting March 2023](https://docs.google.com/presentation/d/18VoqgJCzcZTmqNyXL3op_KecsPxsWRkf/edit#slide=id.p1?target=_blank) + [User meeting September 2022](https://docs.google.com/presentation/d/14-qq__fblMt7xvJDaqS5UqfQMXWCf3Ju/edit#slide=id.p1?target=_blank) [User meeting June 2022](https://docs.google.com/presentation/d/1R8Gcz5R_NTgcBQvXBkCPG3jY31BVPDur/edit#slide=id.p1?target=_blank) @@ -23,9 +35,9 @@ Branching strategy Branches containing developments that are to be merged into the tool must contain "dev-" at the beginning of the name, followed by a short, meaningful description of the development in question. E.g. "dev-loading-subseasonal" for the branch containing developments related to the loading of subseasonal datasets. -Users that wish to incorporate their own developments into the core of the tool are encouraged to create a personal fork of the Auto-S2S repository to work on their projects. Please contact Victòria Agudetse at victoria.agudetse@bsc.es to discuss the first steps. +Users that wish to incorporate their own developments into the core of the tool are encouraged to create a local copy repository to work on their projects and push their changes to a new branch. Please contact Victòria Agudetse (@vagudets) or Núria Pérez-Zanón (@nperez) to discuss the first steps. Mailing list ------------ -User meetings, internal releases and news are announced through the mailing list. You can send an email to victoria.agudetse@bsc.es or an.ho@bsc.es to request subscription. +User meetings, internal releases and news are announced through the mailing list. You can send an email to victoria.agudetse@bsc.es to request subscription. diff --git a/autosubmit/auto-scorecards.sh b/autosubmit/auto-scorecards.sh new file mode 100644 index 0000000000000000000000000000000000000000..c30f643f3be53f216ead66675a9545a0e159198a --- /dev/null +++ b/autosubmit/auto-scorecards.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +############ AUTOSUBMIT INPUTS ############ +proj_dir=%PROJDIR% +outdir=%common.OUTDIR% +recipe=%common.RECIPE% +############################### + +cd $proj_dir + +recipe=${outdir}/logs/recipes/${recipe} + +source MODULES + +Rscript modules/Scorecards/execute_scorecards.R ${recipe} ${outdir} diff --git a/autosubmit/auto-verification-CERISE.sh b/autosubmit/auto-verification-CERISE.sh new file mode 100644 index 0000000000000000000000000000000000000000..caf2dd0ec8194b2868187eb26697a197003b5d1a --- /dev/null +++ b/autosubmit/auto-verification-CERISE.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +############ AUTOSUBMIT INPUTS ############ +proj_dir=%PROJDIR% +outdir=%common.OUTDIR% +script=%common.SCRIPT% +CHUNK=%CHUNK% +############################### + +cd $proj_dir + +atomic_recipe_number=$(printf "%02d" $CHUNK) +atomic_recipe=${outdir}/logs/recipes/atomic_recipe_${atomic_recipe_number}.yml + +## Workaround to avoid bug in conda activate/source activate when running +## inside bash script +set +eu +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /esarchive/scratch/vagudets/conda-cerise/conda/envs +set -eu + +Rscript ${script} ${atomic_recipe} diff --git a/autosubmit/auto-verification.sh b/autosubmit/auto-verification.sh new file mode 100644 index 0000000000000000000000000000000000000000..0089e322d63e89a5578c98a6a64a1369b5b9b108 --- /dev/null +++ b/autosubmit/auto-verification.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +############ AUTOSUBMIT INPUTS ############ +proj_dir=%PROJDIR% +outdir=%common.OUTDIR% +script=%common.SCRIPT% +CHUNK=%CHUNK% +############################### + +cd $proj_dir + +atomic_recipe_number=$(printf "%02d" $CHUNK) +atomic_recipe=${outdir}/logs/recipes/atomic_recipe_${atomic_recipe_number}.yml + +source MODULES + +Rscript ${script} ${atomic_recipe} diff --git a/autosubmit/conf_esarchive/autosubmit.yml b/autosubmit/conf_esarchive/autosubmit.yml new file mode 100644 index 0000000000000000000000000000000000000000..0fd5d5c6aaf61945d131da77cda08d8d1fdd86cd --- /dev/null +++ b/autosubmit/conf_esarchive/autosubmit.yml @@ -0,0 +1,22 @@ +config: + EXPID: + AUTOSUBMIT_VERSION: 4.0.0b0 + MAXWAITINGJOBS: 16 + # Default maximum number of jobs to be running at the same time at any platform + # Default: 6 + TOTALJOBS: 16 + SAFETYSLEEPTIME: 10 + RETRIALS: 0 +mail: + NOTIFICATIONS: + TO: +communications: + # Communications library used to connect with platforms: paramiko or saga. + # Default: paramiko + API: paramiko +storage: + # Defines the way of storing the progress of the experiment. The available options are: + # A PICKLE file (pkl) or an SQLite database (db). Default: pkl + TYPE: pkl + # Defines if the remote logs will be copied to the local platform. Default: True. + COPY_REMOTE_LOGS: True diff --git a/autosubmit/conf_esarchive/expdef.yml b/autosubmit/conf_esarchive/expdef.yml new file mode 100644 index 0000000000000000000000000000000000000000..8dc29b27843729afa89be242a0d0de96bad1b3ec --- /dev/null +++ b/autosubmit/conf_esarchive/expdef.yml @@ -0,0 +1,44 @@ +DEFAULT: + EXPID: + HPCARCH: nord3v2 +experiment: + DATELIST: + MEMBERS: fc0 + CHUNKSIZEUNIT: month + CHUNKSIZE: 1 + NUMCHUNKS: + CHUNKINI: 1 + CALENDAR: standard +project: + PROJECT_TYPE: local + # Destination folder name for project. type: STRING, default: leave empty, + PROJECT_DESTINATION: auto-s2s +# If PROJECT_TYPE is not git, no need to change +git: + # Repository URL STRING: 'https://github.com/torvalds/linux.git' + PROJECT_ORIGIN: https://earth.bsc.es/gitlab/es/auto-s2s.git + # Select branch or tag, STRING, default: 'master', help: {'master' (default), 'develop', 'v3.1b', ...} + PROJECT_BRANCH: master + # type: STRING, default: leave empty, help: if model branch is a TAG leave empty + PROJECT_COMMIT: '' +svn: + PROJECT_URL: '' + PROJECT_REVISION: '' +# If PROJECT_TYPE is not local, no need to change +local: + # type: STRING, help: /foo/bar/ecearth + PROJECT_PATH: /esarchive/scratch/vagudets/repos/auto-s2s/ +# If PROJECT_TYPE is none, no need to change +project_files: + # Where is PROJECT CONFIGURATION file location relative to project root path + FILE_PROJECT_CONF: '' + # Where is JOBS CONFIGURATION file location relative to project root path + FILE_JOBS_CONF: '' + # Default job scripts type in the project. type: STRING, default: bash, supported: 'bash', 'python' or 'r' + JOB_SCRIPTS_TYPE: '' +rerun: + # Is a rerun or not? [Default: Do set FALSE]. BOOLEAN: TRUE, FALSE + RERUN: FALSE + # If RERUN: TRUE then supply the list of chunks to rerun + # LIST: [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] + CHUNKLIST: '' diff --git a/autosubmit/conf_esarchive/jobs.yml b/autosubmit/conf_esarchive/jobs.yml new file mode 100644 index 0000000000000000000000000000000000000000..a3c8934bf70f92d15a8e644a6f34947afcc28847 --- /dev/null +++ b/autosubmit/conf_esarchive/jobs.yml @@ -0,0 +1,16 @@ +JOBS: + verification: + FILE: autosubmit/auto-verification.sh + RUNNING: chunk + WALLCLOCK: + NOTIFY_ON: + PLATFORM: nord3v2 + PROCESSORS: + scorecards: + FILE: autosubmit/auto-scorecards.sh + WALLCLOCK: 00:10 + PLATFORM: nord3v2 + NOTIFY_ON: + PROCESSORS: 1 + DEPENDENCIES: verification + diff --git a/autosubmit/conf_esarchive/platforms.yml b/autosubmit/conf_esarchive/platforms.yml new file mode 100644 index 0000000000000000000000000000000000000000..78056d62973552432a9e7b55194b8c5e0ecac09a --- /dev/null +++ b/autosubmit/conf_esarchive/platforms.yml @@ -0,0 +1,11 @@ +## TODO: Change platform +Platforms: + nord3v2: + TYPE: slurm + HOST: nord4.bsc.es + USER: + PROJECT: bsc32 ## TO BE CHANGED + SCRATCH_DIR: /gpfs/scratch/ ## TO BE CHANGED + PROCESSORS_PER_NODE: 16 + SERIAL_QUEUE: debug + QUEUE: bsc_es diff --git a/autosubmit/conf_esarchive/proj.yml b/autosubmit/conf_esarchive/proj.yml new file mode 100644 index 0000000000000000000000000000000000000000..679cf63b1ced38fd833d28ea9acfa145a1e9bc4f --- /dev/null +++ b/autosubmit/conf_esarchive/proj.yml @@ -0,0 +1,4 @@ +common: + MODULES: "MODULES" + OUTDIR: + SCRIPT: diff --git a/autosubmit/conf_mars/autosubmit.yml b/autosubmit/conf_mars/autosubmit.yml new file mode 100644 index 0000000000000000000000000000000000000000..030081165150f1ac62dd897e11437ff893195f74 --- /dev/null +++ b/autosubmit/conf_mars/autosubmit.yml @@ -0,0 +1,22 @@ +config: + EXPID: + AUTOSUBMIT_VERSION: 4.0.73 + MAXWAITINGJOBS: 16 + # Default maximum number of jobs to be running at the same time at any platform + # Default: 6 + TOTALJOBS: 16 + SAFETYSLEEPTIME: 10 + RETRIALS: 0 +mail: + NOTIFICATIONS: + TO: +communications: + # Communications library used to connect with platforms: paramiko or saga. + # Default: paramiko + API: paramiko +storage: + # Defines the way of storing the progress of the experiment. The available options are: + # A PICKLE file (pkl) or an SQLite database (db). Default: pkl + TYPE: pkl + # Defines if the remote logs will be copied to the local platform. Default: True. + COPY_REMOTE_LOGS: True diff --git a/autosubmit/conf_mars/expdef.yml b/autosubmit/conf_mars/expdef.yml new file mode 100644 index 0000000000000000000000000000000000000000..b4327f6556ceefc29336db9697b101b2ddc47134 --- /dev/null +++ b/autosubmit/conf_mars/expdef.yml @@ -0,0 +1,44 @@ +DEFAULT: + EXPID: + HPCARCH: NORD3 +experiment: + DATELIST: + MEMBERS: fc0 + CHUNKSIZEUNIT: month + CHUNKSIZE: 1 + NUMCHUNKS: + CHUNKINI: 1 + CALENDAR: standard +project: + PROJECT_TYPE: local + # Destination folder name for project. type: STRING, default: leave empty, + PROJECT_DESTINATION: auto-s2s +# If PROJECT_TYPE is not git, no need to change +git: + # Repository URL STRING: 'https://github.com/torvalds/linux.git' + PROJECT_ORIGIN: https://earth.bsc.es/gitlab/es/auto-s2s.git + # Select branch or tag, STRING, default: 'master', help: {'master' (default), 'develop', 'v3.1b', ...} + PROJECT_BRANCH: master + # type: STRING, default: leave empty, help: if model branch is a TAG leave empty + PROJECT_COMMIT: '' +svn: + PROJECT_URL: '' + PROJECT_REVISION: '' +# If PROJECT_TYPE is not local, no need to change +local: + # type: STRING, help: /foo/bar/ecearth + PROJECT_PATH: /esarchive/scratch/vagudets/repos/auto-s2s/ +# If PROJECT_TYPE is none, no need to change +project_files: + # Where is PROJECT CONFIGURATION file location relative to project root path + FILE_PROJECT_CONF: '' + # Where is JOBS CONFIGURATION file location relative to project root path + FILE_JOBS_CONF: '' + # Default job scripts type in the project. type: STRING, default: bash, supported: 'bash', 'python' or 'r' + JOB_SCRIPTS_TYPE: '' +rerun: + # Is a rerun or not? [Default: Do set FALSE]. BOOLEAN: TRUE, FALSE + RERUN: FALSE + # If RERUN: TRUE then supply the list of chunks to rerun + # LIST: [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] + CHUNKLIST: '' diff --git a/autosubmit/conf_mars/jobs.yml b/autosubmit/conf_mars/jobs.yml new file mode 100644 index 0000000000000000000000000000000000000000..273d3d6f45c7811062fb07a4ac5620daefd82f72 --- /dev/null +++ b/autosubmit/conf_mars/jobs.yml @@ -0,0 +1,16 @@ +JOBS: + verification: + FILE: autosubmit/auto-verification-CERISE.sh + RUNNING: chunk + WALLCLOCK: + NOTIFY_ON: + PLATFORM: NORD3 + PROCESSORS: + scorecards: + FILE: autosubmit/auto-scorecards.sh + WALLCLOCK: 00:10 + PLATFORM: NORD3 + NOTIFY_ON: + PROCESSORS: 1 + DEPENDENCIES: verification + diff --git a/autosubmit/conf_mars/platforms.yml b/autosubmit/conf_mars/platforms.yml new file mode 100644 index 0000000000000000000000000000000000000000..5f76557fb9ab7d5a6c2621858d1d5349cce05464 --- /dev/null +++ b/autosubmit/conf_mars/platforms.yml @@ -0,0 +1,12 @@ +## TODO: Change platform +Platforms: + NORD3: + TYPE: slurm + HOST: nord4.bsc.es + USER: + PROJECT: bsc32 ## TO BE CHANGED + SCRATCH_DIR: /gpfs/scratch/ ## TO BE CHANGED + PROCESSORS_PER_NODE: 16 + SERIAL_QUEUE: debug + QUEUE: bsc_es + CUSTOM_DIRECTIVES: ["#SBATCH --exclusive"] diff --git a/autosubmit/conf_mars/proj.yml b/autosubmit/conf_mars/proj.yml new file mode 100644 index 0000000000000000000000000000000000000000..679cf63b1ced38fd833d28ea9acfa145a1e9bc4f --- /dev/null +++ b/autosubmit/conf_mars/proj.yml @@ -0,0 +1,4 @@ +common: + MODULES: "MODULES" + OUTDIR: + SCRIPT: diff --git a/conda_installation/environment-cerise-localgribR-ecmwf.yml b/conda_installation/environment-cerise-localgribR-ecmwf.yml new file mode 100644 index 0000000000000000000000000000000000000000..983347f0d2d91190e5165e86c66c3694cf28fcfb --- /dev/null +++ b/conda_installation/environment-cerise-localgribR-ecmwf.yml @@ -0,0 +1,331 @@ +name: condaCerise +channels: + - r + - conda-forge + - https://nexus.ecmwf.int/repository/conda-ecmwf + - https://nexus.ecmwf.int/repository/conda-forge +dependencies: + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_gnu + - _r-mutex=1.0.1=anacondar_1 + - atk-1.0=2.38.0=hd4edc92_1 + - binutils_impl_linux-64=2.40=hf600244_0 + - blosc=1.21.5=h0f2a231_0 + - boost-cpp=1.78.0=h5adbc97_2 + - bwidget=1.9.14=ha770c72_1 + - bzip2=1.0.8=h7f98852_4 + - c-ares=1.19.1=hd590300_0 + - ca-certificates=2023.7.22=hbcca054_0 + - cairo=1.16.0=ha61ee94_1014 + - cfitsio=4.1.0=hd9d235c_0 + - curl=8.1.2=h409715c_0 + - eccodes=2.30.2=hc4464db_0 + - expat=2.5.0=hcb278e6_1 + - fftw=3.3.10=nompi_hc118613_108 + - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 + - font-ttf-inconsolata=3.000=h77eed37_0 + - font-ttf-source-code-pro=2.038=h77eed37_0 + - font-ttf-ubuntu=0.83=hab24e00_0 + - fontconfig=2.14.2=h14ed4e7_0 + - fonts-conda-ecosystem=1=0 + - fonts-conda-forge=1=0 + - freeglut=3.2.2=h9c3ff4c_1 + - freetype=2.12.1=h267a509_2 + - freexl=1.0.6=h166bdaf_1 + - fribidi=1.0.10=h36c2ea0_0 + - gcc_impl_linux-64=13.2.0=h338b0a0_2 + - gdal=3.5.2=py311hd39052d_7 + - gdk-pixbuf=2.42.8=hff1cb4f_1 + - geos=3.11.0=h27087fc_0 + - geotiff=1.7.1=ha76d385_4 + - gettext=0.21.1=h27087fc_0 + - gfortran_impl_linux-64=13.2.0=h76e1118_2 + - ghostscript=10.02.0=h59595ed_0 + - giflib=5.2.1=h0b41bf4_3 + - gmp=6.2.1=h58526e2_0 + - graphite2=1.3.13=h58526e2_1001 + - graphviz=6.0.2=h99bc08f_0 + - gsl=2.7=he838d99_0 + - gtk2=2.24.33=h90689f9_2 + - gts=0.7.6=h977cf35_4 + - gxx_impl_linux-64=13.2.0=h338b0a0_2 + - harfbuzz=6.0.0=h8e241bc_0 + - hdf4=4.2.15=h9772cbc_5 + - hdf5=1.12.2=nompi_h4df4325_101 + - icu=70.1=h27087fc_0 + - imagemagick=7.1.0_55=pl5321h0d24a18_0 + - jasper=2.0.33=h0ff4b12_1 + - jbig=2.1=h7f98852_2003 + - jpeg=9e=h0b41bf4_3 + - json-c=0.16=hc379101_0 + - kealib=1.4.15=ha7026e8_1 + - kernel-headers_linux-64=2.6.32=he073ed8_16 + - keyutils=1.6.1=h166bdaf_0 + - krb5=1.20.1=h81ceb04_0 + - lcms2=2.14=h6ed2654_0 + - ld_impl_linux-64=2.40=h41732ed_0 + - lerc=4.0.0=h27087fc_0 + - libaec=1.0.6=hcb278e6_1 + - libblas=3.9.0=18_linux64_openblas + - libcblas=3.9.0=18_linux64_openblas + - libcurl=8.1.2=h409715c_0 + - libdap4=3.20.6=hd7c4107_2 + - libdeflate=1.14=h166bdaf_0 + - libedit=3.1.20191231=he28a2e2_2 + - libev=4.33=h516909a_1 + - libexpat=2.5.0=hcb278e6_1 + - libffi=3.4.2=h7f98852_5 + - libgcc-devel_linux-64=13.2.0=ha9c7c90_2 + - libgcc-ng=13.2.0=h807b86a_2 + - libgd=2.3.3=h18fbbfe_3 + - libgdal=3.5.2=h27ae5c1_7 + - libgfortran-ng=13.2.0=h69a702a_2 + - libgfortran5=13.2.0=ha4646dd_2 + - libglib=2.78.0=hebfc3b9_0 + - libglu=9.0.0=he1b5a44_1001 + - libgomp=13.2.0=h807b86a_2 + - libiconv=1.17=h166bdaf_0 + - libkml=1.3.0=h37653c0_1015 + - liblapack=3.9.0=18_linux64_openblas + - libnetcdf=4.8.1=nompi_h261ec11_106 + - libnghttp2=1.52.0=h61bc06f_0 + - libnsl=2.0.0=hd590300_1 + - libopenblas=0.3.24=pthreads_h413a1c8_0 + - libpng=1.6.39=h753d276_0 + - libpq=14.5=hb675445_5 + - librsvg=2.54.4=h7abd40a_0 + - librttopo=1.1.0=hf730bdb_11 + - libsanitizer=13.2.0=h7e041cc_2 + - libspatialite=5.0.1=hfbd986c_21 + - libsqlite=3.43.0=h2797004_0 + - libssh2=1.11.0=h0841786_0 + - libstdcxx-devel_linux-64=13.2.0=ha9c7c90_2 + - libstdcxx-ng=13.2.0=h7e041cc_2 + - libtiff=4.4.0=h82bc61c_5 + - libtool=2.4.7=h27087fc_0 + - libuuid=2.38.1=h0b41bf4_0 + - libwebp=1.2.4=h522a892_0 + - libwebp-base=1.2.4=h166bdaf_0 + - libxcb=1.13=h7f98852_1004 + - libxml2=2.10.3=hca2bb57_4 + - libzip=1.10.1=h2629f0a_3 + - libzlib=1.2.13=hd590300_5 + - lz4-c=1.9.4=hcb278e6_0 + - make=4.3=hd18ef5c_1 + - ncurses=6.4=hcb278e6_0 + - nspr=4.35=h27087fc_0 + - nss=3.92=h1d7d5a4_0 + - numpy=1.26.0=py311h64a7726_0 + - openjpeg=2.5.0=h7d73246_1 + - openssl=3.1.3=hd590300_0 + - pandoc=3.1.3=h32600fe_0 + - pango=1.50.14=hd33c08f_0 + - pcre=8.45=h9c3ff4c_0 + - pcre2=10.40=hc3806b6_0 + - perl=5.32.1=4_hd590300_perl5 + - phantomjs=2.1.1=ha770c72_1 + - pip=23.2.1=pyhd8ed1ab_0 + - pixman=0.42.2=h59595ed_0 + - pkg-config=0.29.2=h36c2ea0_1008 + - poppler=22.10.0=h92391eb_0 + - poppler-data=0.4.12=hd8ed1ab_0 + - postgresql=14.5=h3248436_5 + - proj=9.1.0=h93bde94_0 + - pthread-stubs=0.4=h36c2ea0_1001 + - python=3.11.5=hab00c5b_0_cpython + - python_abi=3.11=4_cp311 + - r-abind=1.4_5=r42hc72bb7e_1005 + - r-askpass=1.2.0=r42h57805ef_0 + - r-assertthat=0.2.1=r42hc72bb7e_4 + - r-backports=1.4.1=r42h57805ef_2 + - r-base=4.2.2=h6b4767f_2 + - r-base64enc=0.1_3=r42h57805ef_1006 + - r-bigmemory=4.6.1=r42ha503ecb_2 + - r-bigmemory.sri=0.1.6=r42hc72bb7e_1 + - r-brio=1.1.3=r42h57805ef_2 + - r-bslib=0.5.1=r42hc72bb7e_0 + - r-cachem=1.0.8=r42h57805ef_1 + - r-callr=3.7.3=r42hc72bb7e_1 + - r-class=7.3_22=r42h57805ef_1 + - r-classint=0.4_10=r42h61816a4_0 + - r-cli=3.6.1=r42ha503ecb_1 + - r-climdex.pcic=1.1_11=r42ha503ecb_2 + - r-climprojdiags=0.3.2=r42hc72bb7e_1 + - r-clock=0.7.0=r42ha503ecb_1 + - r-codetools=0.2_19=r42hc72bb7e_1 + - r-colorspace=2.1_0=r42h57805ef_1 + - r-commonmark=1.9.0=r42h57805ef_1 + - r-configr=0.3.5=r42hc72bb7e_1 + - r-cowplot=1.1.1=r42hc72bb7e_2 + - r-cpp11=0.4.6=r42hc72bb7e_0 + - r-crayon=1.5.2=r42hc72bb7e_2 + - r-curl=5.0.1=r42hf9611b0_0 + - r-dbi=1.1.3=r42hc72bb7e_2 + - r-desc=1.4.2=r42hc72bb7e_2 + - r-diffobj=0.3.5=r42h57805ef_2 + - r-digest=0.6.33=r42ha503ecb_0 + - r-docopt=0.7.1=r42hc72bb7e_3 + - r-doparallel=1.0.17=r42hc72bb7e_2 + - r-dotcall64=1.0_2=r42h61816a4_2 + - r-e1071=1.7_13=r42ha503ecb_1 + - r-easyncdf=0.1.2=r42hc72bb7e_1 + - r-easyverification=0.4.5=r42ha503ecb_0 + - r-ellipsis=0.3.2=r42h57805ef_2 + - r-evaluate=0.21=r42hc72bb7e_1 + - r-fansi=1.0.4=r42h57805ef_1 + - r-farver=2.1.1=r42ha503ecb_2 + - r-fastmap=1.1.1=r42ha503ecb_1 + - r-fields=15.2=r42h61816a4_0 + - r-fnn=1.1.3.2=r42ha503ecb_1 + - r-fontawesome=0.5.2=r42hc72bb7e_0 + - r-foreach=1.5.2=r42hc72bb7e_2 + - r-formattable=0.2.1=r42ha770c72_2 + - r-fs=1.6.3=r42ha503ecb_0 + - r-future=1.33.0=r42hc72bb7e_0 + - r-generics=0.1.3=r42hc72bb7e_2 + - r-geomap=2.5_0=r42h57805ef_2 + - r-geomapdata=2.0_2=r42hc72bb7e_0 + - r-ggplot2=3.4.3=r42hc72bb7e_0 + - r-globals=0.16.2=r42hc72bb7e_1 + - r-glue=1.6.2=r42h57805ef_2 + - r-gridextra=2.3=r42hc72bb7e_1005 + - r-gtable=0.3.4=r42hc72bb7e_0 + - r-highr=0.10=r42hc72bb7e_1 + - r-htmltools=0.5.6=r42ha503ecb_0 + - r-htmlwidgets=1.6.2=r42hc72bb7e_1 + - r-httpuv=1.6.11=r42ha503ecb_1 + - r-httr=1.4.7=r42hc72bb7e_0 + - r-ini=0.3.1=r42hc72bb7e_1005 + - r-isoband=0.2.7=r42ha503ecb_2 + - r-iterators=1.0.14=r42hc72bb7e_2 + - r-jquerylib=0.1.4=r42hc72bb7e_2 + - r-jsonlite=1.8.7=r42h57805ef_0 + - r-kableextra=1.3.4=r42hc72bb7e_2 + - r-kernsmooth=2.23_22=r42h13b3f57_0 + - r-knitr=1.44=r42hc72bb7e_0 + - r-labeling=0.4.3=r42hc72bb7e_0 + - r-later=1.3.1=r42ha503ecb_1 + - r-lattice=0.21_8=r42h57805ef_1 + - r-lifecycle=1.0.3=r42hc72bb7e_2 + - r-listenv=0.9.0=r42hc72bb7e_1 + - r-log4r=0.4.3=r42h57805ef_1 + - r-lubridate=1.9.2=r42h57805ef_2 + - r-magick=2.7.3=r42h7525677_1 + - r-magrittr=2.0.3=r42h57805ef_2 + - r-mapproj=1.2.11=r42h57805ef_1 + - r-maps=3.4.1=r42h57805ef_2 + - r-mass=7.3_60=r42h57805ef_1 + - r-matrix=1.6_1.1=r42h316c678_0 + - r-mba=0.1_0=r42ha503ecb_1 + - r-memoise=2.0.1=r42hc72bb7e_2 + - r-memuse=4.2_3=r42h57805ef_1 + - r-mgcv=1.9_0=r42h316c678_0 + - r-mime=0.12=r42h57805ef_2 + - r-multiapply=2.1.4=r42hc72bb7e_1 + - r-munsell=0.5.0=r42hc72bb7e_1006 + - r-nbclust=3.0.1=r42hc72bb7e_2 + - r-ncdf4=1.21=r42h15f2bca_0 + - r-nlme=3.1_163=r42h61816a4_0 + - r-nnet=7.3_19=r42h57805ef_1 + - r-openssl=2.1.1=r42hb353fa6_0 + - r-parallelly=1.36.0=r42hc72bb7e_1 + - r-pbapply=1.7_2=r42hc72bb7e_0 + - r-pcict=0.5_4.4=r42h57805ef_1 + - r-pillar=1.9.0=r42hc72bb7e_1 + - r-pkgbuild=1.4.2=r42hc72bb7e_0 + - r-pkgconfig=2.0.3=r42hc72bb7e_3 + - r-pkgload=1.3.3=r42hc72bb7e_0 + - r-plyr=1.8.8=r42ha503ecb_1 + - r-praise=1.0.0=r42hc72bb7e_1007 + - r-prettyunits=1.2.0=r42hc72bb7e_0 + - r-processx=3.8.2=r42h57805ef_0 + - r-proj4=1.0_12=r42h4db2be8_0 + - r-promises=1.2.1=r42ha503ecb_0 + - r-proxy=0.4_27=r42h57805ef_2 + - r-ps=1.7.5=r42h57805ef_1 + - r-r6=2.5.1=r42hc72bb7e_2 + - r-rappdirs=0.3.3=r42h57805ef_2 + - r-rcolorbrewer=1.1_3=r42h6115d3f_0 + - r-rcpp=1.0.11=r42h7df8631_0 + - r-rcpparmadillo=0.12.6.4.0=r42h08d816e_0 + - r-rcpptoml=0.2.2=r42ha503ecb_1 + - r-rematch2=2.1.2=r42hc72bb7e_3 + - r-rlang=1.1.1=r42ha503ecb_1 + - r-rmarkdown=2.25=r42hc72bb7e_0 + - r-rnaturalearth=0.1.0=r42hc72bb7e_1 + - r-rpmg=2.2_7=r42hc72bb7e_0 + - r-rprojroot=2.0.3=r42hc72bb7e_1 + - r-rstudioapi=0.15.0=r42hc72bb7e_0 + - r-rvest=1.0.3=r42hc72bb7e_2 + - r-s2=1.1.4=r42h5eac2b3_1 + - r-s2dv=1.4.1=r42hc72bb7e_1 + - r-s2dverification=2.10.3=r42hc72bb7e_2 + - r-sass=0.4.7=r42ha503ecb_0 + - r-scales=1.2.1=r42hc72bb7e_2 + - r-selectr=0.4_2=r42hc72bb7e_3 + - r-sf=1.0_7=r42h25da31b_5 + - r-shiny=1.7.5=r42h785f33e_0 + - r-sourcetools=0.1.7_1=r42ha503ecb_1 + - r-sp=2.0_0=r42h57805ef_0 + - r-spam=2.9_1=r42hd9ac46e_2 + - r-specsverification=0.5_3=r42h7525677_2 + - r-splancs=2.01_44=r42h61816a4_0 + - r-startr=2.3.0=r42hc72bb7e_0 + - r-stringi=1.7.12=r42h1ae9187_0 + - r-stringr=1.5.0=r42h785f33e_1 + - r-svglite=2.1.1=r42h329214f_1 + - r-sys=3.4.2=r42h57805ef_1 + - r-systemfonts=1.0.4=r42haf97adc_2 + - r-testthat=3.1.10=r42ha503ecb_0 + - r-tibble=3.2.1=r42h57805ef_2 + - r-timechange=0.2.0=r42ha503ecb_1 + - r-tinytex=0.46=r42hc72bb7e_0 + - r-tzdb=0.4.0=r42ha503ecb_1 + - r-units=0.8_4=r42ha503ecb_0 + - r-utf8=1.2.3=r42h57805ef_1 + - r-uuid=1.1_1=r42h57805ef_0 + - r-vctrs=0.6.3=r42ha503ecb_0 + - r-viridis=0.6.4=r42hc72bb7e_0 + - r-viridislite=0.4.2=r42hc72bb7e_1 + - r-waldo=0.5.1=r42hc72bb7e_1 + - r-webshot=0.5.5=r42hc72bb7e_0 + - r-withr=2.5.1=r42hc72bb7e_0 + - r-wk=0.8.0=r42ha503ecb_0 + - r-xfun=0.40=r42ha503ecb_0 + - r-xml2=1.3.3=r42h044e5c7_2 + - r-xtable=1.8_4=r42hc72bb7e_5 + - r-yaml=2.3.5=r42h06615bd_1 + - readline=8.2=h8228510_1 + - sed=4.8=he412f7d_0 + - setuptools=68.2.2=pyhd8ed1ab_0 + - snappy=1.1.10=h9fff704_0 + - sqlite=3.43.0=h2c6b66d_0 + - sysroot_linux-64=2.12=he073ed8_16 + - tiledb=2.11.3=h3f4058f_1 + - tk=8.6.13=h2797004_0 + - tktable=2.10=h0c5db8f_5 + - tzcode=2023c=h0b41bf4_0 + - tzdata=2023c=h71feb2d_0 + - udunits2=2.2.28=h40f5838_2 + - wheel=0.41.2=pyhd8ed1ab_0 + - xerces-c=3.2.4=h55805fa_1 + - xorg-fixesproto=5.0=h7f98852_1002 + - xorg-inputproto=2.3.2=h7f98852_1002 + - xorg-kbproto=1.0.7=h7f98852_1002 + - xorg-libice=1.0.10=h7f98852_0 + - xorg-libsm=1.2.3=hd9c2040_1000 + - xorg-libx11=1.8.4=h0b41bf4_0 + - xorg-libxau=1.0.11=hd590300_0 + - xorg-libxdmcp=1.1.3=h7f98852_0 + - xorg-libxext=1.3.4=h0b41bf4_2 + - xorg-libxfixes=5.0.3=h7f98852_1004 + - xorg-libxi=1.7.10=h7f98852_0 + - xorg-libxrender=0.9.10=h7f98852_1003 + - xorg-libxt=1.3.0=hd590300_0 + - xorg-renderproto=0.11.1=h7f98852_1002 + - xorg-xextproto=7.3.0=h0b41bf4_1003 + - xorg-xproto=7.0.31=h7f98852_1007 + - xz=5.2.6=h166bdaf_0 + - zlib=1.2.13=hd590300_5 + - zstd=1.5.5=hfc55251_0 +prefix: /perm/cyce/conda/envs/condaCerise diff --git a/conda_installation/environment-cerise-localgribr.yml b/conda_installation/environment-cerise-localgribr.yml new file mode 100644 index 0000000000000000000000000000000000000000..08c74f3d06e9e63c1914f21d7fdd53732dc6df01 --- /dev/null +++ b/conda_installation/environment-cerise-localgribr.yml @@ -0,0 +1,346 @@ +name: ceriseNoAS +channels: + - conda-forge + - defaults +dependencies: + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_kmp_llvm + - _r-mutex=1.0.0=anacondar_1 + - _sysroot_linux-64_curr_repodata_hack=3=haa98f57_10 + - atk-1.0=2.38.0=hd4edc92_1 + - aws-c-auth=0.7.0=hf8751d9_2 + - aws-c-cal=0.6.0=h93469e0_0 + - aws-c-common=0.8.23=hd590300_0 + - aws-c-compression=0.2.17=h862ab75_1 + - aws-c-event-stream=0.3.1=h9599702_1 + - aws-c-http=0.7.11=hbe98c3e_0 + - aws-c-io=0.13.28=h3870b5a_0 + - aws-c-mqtt=0.8.14=h2e270ba_2 + - aws-c-s3=0.3.13=heb0bb06_2 + - aws-c-sdkutils=0.1.11=h862ab75_1 + - aws-checksums=0.1.16=h862ab75_1 + - aws-crt-cpp=0.20.3=he9c0e7f_4 + - aws-sdk-cpp=1.10.57=hbc2ea52_17 + - binutils_impl_linux-64=2.40=hf600244_0 + - blas=2.116=openblas + - blas-devel=3.9.0=16_linux64_openblas + - blosc=1.21.4=h0f2a231_0 + - boost-cpp=1.78.0=h6582d0a_3 + - bwidget=1.9.11=1 + - bzip2=1.0.8=h7b6447c_0 + - c-ares=1.19.0=h5eee18b_0 + - ca-certificates=2023.7.22=hbcca054_0 + - cairo=1.16.0=hb05425b_5 + - cfitsio=4.2.0=hd9d235c_0 + - curl=8.2.0=hca28451_0 + - eccodes=2.31.0=h73bf81c_0 + - expat=2.5.0=hcb278e6_1 + - fftw=3.3.10=nompi_hf0379b8_106 + - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 + - font-ttf-inconsolata=3.000=h77eed37_0 + - font-ttf-source-code-pro=2.038=h77eed37_0 + - font-ttf-ubuntu=0.83=hab24e00_0 + - fontconfig=2.14.2=h14ed4e7_0 + - fonts-conda-ecosystem=1=0 + - fonts-conda-forge=1=0 + - freeglut=3.2.2=hac7e632_2 + - freetype=2.12.1=h4a9f257_0 + - freexl=1.0.6=h166bdaf_1 + - fribidi=1.0.10=h7b6447c_0 + - gcc_impl_linux-64=13.1.0=hc4be1a9_0 + - gdk-pixbuf=2.42.10=h6b639ba_2 + - geos=3.11.2=hcb278e6_0 + - geotiff=1.7.1=h22adcc9_11 + - gettext=0.21.1=h27087fc_0 + - gfortran_impl_linux-64=13.1.0=hd511a9b_0 + - ghostscript=9.54.0=h27087fc_2 + - giflib=5.2.1=h0b41bf4_3 + - glib=2.76.4=hfc55251_0 + - glib-tools=2.76.4=hfc55251_0 + - graphite2=1.3.14=h295c915_1 + - graphviz=8.1.0=h28d9a01_0 + - gsl=2.7=he838d99_0 + - gtk2=2.24.33=h90689f9_2 + - gts=0.7.6=h977cf35_4 + - gxx_impl_linux-64=13.1.0=hc4be1a9_0 + - harfbuzz=7.3.0=hdb3a94d_0 + - hdf4=4.2.15=h501b40f_6 + - hdf5=1.14.1=nompi_h4f84152_100 + - icu=72.1=hcb278e6_0 + - imagemagick=7.1.1_14=pl5321hf48ede7_0 + - jasper=4.0.0=h32699f2_1 + - jbig=2.1=h7f98852_2003 + - json-c=0.16=hc379101_0 + - kealib=1.5.1=h3e6883b_4 + - kernel-headers_linux-64=2.6.32=he073ed8_16 + - keyutils=1.6.1=h166bdaf_0 + - krb5=1.21.1=h659d440_0 + - lcms2=2.15=haa2dc70_1 + - ld_impl_linux-64=2.40=h41732ed_0 + - lerc=4.0.0=h27087fc_0 + - libaec=1.0.6=hcb278e6_1 + - libblas=3.9.0=16_linux64_openblas + - libcblas=3.9.0=16_linux64_openblas + - libcurl=8.2.0=hca28451_0 + - libdeflate=1.18=h0b41bf4_0 + - libedit=3.1.20221030=h5eee18b_0 + - libev=4.33=h7f8727e_1 + - libexpat=2.5.0=hcb278e6_1 + - libffi=3.4.4=h6a678d5_0 + - libgcc-devel_linux-64=13.1.0=he3cc6c4_0 + - libgcc-ng=13.1.0=he5830b7_0 + - libgd=2.3.3=hfa28ad5_6 + - libgdal=3.6.4=hd54c316_5 + - libgfortran-ng=13.1.0=h69a702a_0 + - libgfortran5=13.1.0=h15d22d2_0 + - libglib=2.76.4=hebfc3b9_0 + - libglu=9.0.0=hac7e632_1002 + - libgomp=13.1.0=he5830b7_0 + - libiconv=1.17=h166bdaf_0 + - libjpeg-turbo=2.1.5.1=h0b41bf4_0 + - libkml=1.3.0=h37653c0_1015 + - liblapack=3.9.0=16_linux64_openblas + - liblapacke=3.9.0=16_linux64_openblas + - libnetcdf=4.9.2=nompi_h7e745eb_109 + - libnghttp2=1.52.0=h2d74bed_1 + - libnsl=2.0.0=h7f98852_0 + - libopenblas=0.3.21=pthreads_h78a6416_3 + - libpng=1.6.39=h5eee18b_0 + - libpq=15.3=hfc447b1_2 + - librsvg=2.56.1=h98fae49_0 + - librttopo=1.1.0=h0d5128d_13 + - libsanitizer=13.1.0=hfd8a6a1_0 + - libspatialite=5.0.1=hca56755_27 + - libsqlite=3.42.0=h2797004_0 + - libssh2=1.11.0=h0841786_0 + - libstdcxx-devel_linux-64=13.1.0=he3cc6c4_0 + - libstdcxx-ng=13.1.0=hfd8a6a1_0 + - libtiff=4.5.1=h8b53f26_0 + - libtool=2.4.7=h27087fc_0 + - libuuid=2.38.1=h0b41bf4_0 + - libwebp=1.3.1=hbf2b3c1_0 + - libwebp-base=1.3.1=hd590300_0 + - libxcb=1.15=h7f8727e_0 + - libxml2=2.11.4=h0d562d8_0 + - libzip=1.9.2=hc929e4a_1 + - libzlib=1.2.13=hd590300_5 + - llvm-openmp=16.0.6=h4dfa4b3_0 + - lz4-c=1.9.4=h6a678d5_0 + - make=4.2.1=h1bed415_1 + - ncurses=6.4=h6a678d5_0 + - nspr=4.35=h27087fc_0 + - nss=3.89=he45b914_0 + - openblas=0.3.21=pthreads_h320a7e8_3 + - openjpeg=2.5.0=hfec8fc6_2 + - openssl=3.1.1=hd590300_1 + - pandoc=2.19.2=h32600fe_2 + - pango=1.50.14=heaa33ce_1 + - pcre=8.45=h295c915_0 + - pcre2=10.40=hc3806b6_0 + - perl=5.32.1=4_hd590300_perl5 + - phantomjs=2.1.1=1 + - pip=23.1.2=py311h06a4308_0 + - pixman=0.40.0=h7f8727e_1 + - pkg-config=0.29.2=h36c2ea0_1008 + - poppler=23.05.0=hd18248d_1 + - poppler-data=0.4.12=hd8ed1ab_0 + - postgresql=15.3=h8972f4a_2 + - proj=9.2.1=ha643af7_0 + - python=3.11.4=hab00c5b_0_cpython + - r-abind=1.4_5=r42hc72bb7e_1005 + - r-askpass=1.1=r42h57805ef_4 + - r-assertthat=0.2.1=r42hc72bb7e_4 + - r-backports=1.4.1=r42h57805ef_2 + - r-base=4.2.3=hb1b9b86_6 + - r-base64enc=0.1_3=r42h57805ef_1006 + - r-bigmemory=4.6.1=r42ha503ecb_2 + - r-bigmemory.sri=0.1.6=r42hc72bb7e_1 + - r-brio=1.1.3=r42h57805ef_2 + - r-bslib=0.3.1=r42h6115d3f_0 + - r-cachem=1.0.8=r42h57805ef_1 + - r-callr=3.7.3=r42hc72bb7e_1 + - r-class=7.3_22=r42h57805ef_1 + - r-classint=0.4_9=r42h61816a4_1 + - r-cli=3.6.1=r42ha503ecb_1 + - r-climdex.pcic=1.1_11=r42ha503ecb_2 + - r-climprojdiags=0.3.2=r42hc72bb7e_1 + - r-clock=0.7.0=r42ha503ecb_1 + - r-codetools=0.2_19=r42hc72bb7e_1 + - r-colorspace=2.1_0=r42h57805ef_1 + - r-commonmark=1.9.0=r42h57805ef_1 + - r-configr=0.3.5=r42hc72bb7e_1 + - r-cowplot=1.1.1=r42hc72bb7e_2 + - r-cpp11=0.4.5=r42hc72bb7e_0 + - r-crayon=1.5.2=r42hc72bb7e_2 + - r-curl=5.0.1=r42hf9611b0_0 + - r-dbi=1.1.3=r42hc72bb7e_2 + - r-desc=1.4.2=r42hc72bb7e_2 + - r-diffobj=0.3.5=r42h57805ef_2 + - r-digest=0.6.33=r42ha503ecb_0 + - r-docopt=0.7.1=r42hc72bb7e_3 + - r-doparallel=1.0.17=r42hc72bb7e_2 + - r-dotcall64=1.0_2=r42h61816a4_2 + - r-e1071=1.7_13=r42ha503ecb_1 + - r-easyncdf=0.1.2=r42hc72bb7e_1 + - r-easyverification=0.4.4=r42ha503ecb_3 + - r-ellipsis=0.3.2=r42h57805ef_2 + - r-evaluate=0.21=r42hc72bb7e_1 + - r-fansi=1.0.4=r42h57805ef_1 + - r-farver=2.1.1=r42ha503ecb_2 + - r-fastmap=1.1.1=r42ha503ecb_1 + - r-fields=14.1=r42h61816a4_2 + - r-fnn=1.1.3.2=r42ha503ecb_1 + - r-fontawesome=0.5.1=r42hc72bb7e_1 + - r-foreach=1.5.2=r42hc72bb7e_2 + - r-formattable=0.2.1=r42ha770c72_2 + - r-fs=1.6.3=r42ha503ecb_0 + - r-future=1.33.0=r42hc72bb7e_0 + - r-generics=0.1.3=r42hc72bb7e_2 + - r-geomap=2.5_0=r42h57805ef_2 + - r-geomapdata=2.0_0=r42hc72bb7e_2 + - r-ggplot2=3.4.2=r42hc72bb7e_1 + - r-globals=0.16.2=r42hc72bb7e_1 + - r-glue=1.6.2=r42h57805ef_2 + - r-gribr=1.2.6=r42hd590300_0 + - r-gridextra=2.3=r42hc72bb7e_1005 + - r-gtable=0.3.3=r42hc72bb7e_1 + - r-highr=0.10=r42hc72bb7e_1 + - r-htmltools=0.5.5=r42ha503ecb_1 + - r-htmlwidgets=1.6.2=r42hc72bb7e_1 + - r-httpuv=1.6.11=r42ha503ecb_1 + - r-httr=1.4.6=r42hc72bb7e_1 + - r-ini=0.3.1=r42hc72bb7e_1005 + - r-isoband=0.2.7=r42ha503ecb_2 + - r-iterators=1.0.14=r42hc72bb7e_2 + - r-jquerylib=0.1.4=r42hc72bb7e_2 + - r-jsonlite=1.8.7=r42h57805ef_0 + - r-kableextra=1.3.4=r42hc72bb7e_2 + - r-kernsmooth=2.23_22=r42h13b3f57_0 + - r-knitr=1.43=r42hc72bb7e_1 + - r-labeling=0.4.2=r42hc72bb7e_3 + - r-later=1.3.1=r42ha503ecb_1 + - r-lattice=0.21_8=r42h57805ef_1 + - r-lifecycle=1.0.3=r42hc72bb7e_2 + - r-listenv=0.9.0=r42hc72bb7e_1 + - r-lobstr=1.1.2=r42ha503ecb_3 + - r-log4r=0.4.3=r42h57805ef_1 + - r-lubridate=1.8.0=r42h884c59f_0 + - r-magick=2.7.4=r42ha503ecb_1 + - r-magrittr=2.0.3=r42h57805ef_2 + - r-mapproj=1.2.11=r42h57805ef_1 + - r-maps=3.4.1=r42h57805ef_2 + - r-mass=7.3_60=r42h57805ef_1 + - r-matrix=1.6_0=r42h316c678_0 + - r-mba=0.1_0=r42ha503ecb_1 + - r-memuse=4.2_3=r42h57805ef_1 + - r-mgcv=1.9_0=r42h316c678_0 + - r-mime=0.12=r42h57805ef_2 + - r-multiapply=2.1.4=r42hc72bb7e_1 + - r-munsell=0.5.0=r42hc72bb7e_1006 + - r-nbclust=3.0.1=r42hc72bb7e_2 + - r-ncdf4=1.21=r42hdc3962f_5 + - r-nlme=3.1_162=r42h61816a4_1 + - r-nnet=7.3_19=r42h57805ef_1 + - r-openssl=2.0.6=r42hb353fa6_1 + - r-parallelly=1.36.0=r42hc72bb7e_1 + - r-pbapply=1.7_2=r42hc72bb7e_0 + - r-pcict=0.5_4.4=r42h57805ef_1 + - r-pillar=1.9.0=r42hc72bb7e_1 + - r-pkgconfig=2.0.3=r42hc72bb7e_3 + - r-pkgload=1.3.2.1=r42hc72bb7e_0 + - r-plyr=1.8.8=r42ha503ecb_1 + - r-praise=1.0.0=r42hc72bb7e_1007 + - r-prettyunits=1.1.1=r42hc72bb7e_3 + - r-processx=3.8.2=r42h57805ef_0 + - r-proj=0.4.0=r42h57805ef_2 + - r-proj4=1.0_12=r42he1907f6_4 + - r-promises=1.2.0.1=r42ha503ecb_2 + - r-proxy=0.4_27=r42h57805ef_2 + - r-pryr=0.1.6=r42ha503ecb_1 + - r-ps=1.7.5=r42h57805ef_1 + - r-r6=2.5.1=r42hc72bb7e_2 + - r-rappdirs=0.3.3=r42h57805ef_2 + - r-rcolorbrewer=1.1_3=r42h785f33e_2 + - r-rcpp=1.0.11=r42h7df8631_0 + - r-rcpparmadillo=0.12.4.1.0=r42h08d816e_0 + - r-rcpptoml=0.2.2=r42ha503ecb_1 + - r-rematch2=2.1.2=r42hc72bb7e_3 + - r-rlang=1.1.1=r42ha503ecb_1 + - r-rmarkdown=2.14=r42h6115d3f_0 + - r-rnaturalearth=0.3.3=r42hc72bb7e_1 + - r-rpmg=2.2_3=r42hc72bb7e_3 + - r-rprojroot=2.0.3=r42hc72bb7e_1 + - r-rstudioapi=0.15.0=r42hc72bb7e_0 + - r-rvest=1.0.3=r42hc72bb7e_2 + - r-s2=1.1.4=r42h5eac2b3_1 + - r-s2dv=1.4.1=r42hc72bb7e_1 + - r-s2dverification=2.10.3=r42hc72bb7e_2 + - r-sass=0.4.7=r42ha503ecb_0 + - r-scales=1.2.1=r42hc72bb7e_2 + - r-selectr=0.4_2=r42hc72bb7e_3 + - r-sf=1.0_7=r42h6a678d5_2 + - r-shiny=1.7.4.1=r42h785f33e_0 + - r-sourcetools=0.1.7_1=r42ha503ecb_1 + - r-sp=1.6_1=r42h57805ef_1 + - r-spam=2.9_1=r42hd9ac46e_2 + - r-specsverification=0.5_3=r42h7525677_2 + - r-splancs=2.01_43=r42h61816a4_2 + - r-startr=2.2.3=r42hc72bb7e_1 + - r-stringi=1.7.12=r42hc0c3e09_2 + - r-stringr=1.5.0=r42h785f33e_1 + - r-svglite=2.1.1=r42h329214f_1 + - r-sys=3.4.2=r42h57805ef_1 + - r-systemfonts=1.0.4=r42haf97adc_2 + - r-terra=1.5_21=r42h884c59f_2 + - r-testthat=3.1.10=r42ha503ecb_0 + - r-tibble=3.2.1=r42h57805ef_2 + - r-tinytex=0.45=r42hc72bb7e_1 + - r-tzdb=0.4.0=r42ha503ecb_1 + - r-units=0.8_2=r42ha503ecb_1 + - r-utf8=1.2.3=r42h57805ef_1 + - r-uuid=1.1_0=r42h57805ef_2 + - r-vctrs=0.6.3=r42ha503ecb_0 + - r-viridis=0.6.4=r42hc72bb7e_0 + - r-viridislite=0.4.2=r42hc72bb7e_1 + - r-waldo=0.5.1=r42hc72bb7e_1 + - r-webshot=0.5.5=r42hc72bb7e_0 + - r-withr=2.5.0=r42hc72bb7e_2 + - r-wk=0.7.3=r42ha503ecb_1 + - r-xfun=0.39=r42ha503ecb_1 + - r-xml2=1.3.5=r42h1ad5fc0_0 + - r-xtable=1.8_4=r42hc72bb7e_5 + - r-yaml=2.3.5=r42h76d94ec_0 + - readline=8.2=h5eee18b_0 + - s2n=1.3.46=h06160fa_0 + - sed=4.8=he412f7d_0 + - setuptools=67.8.0=py311h06a4308_0 + - snappy=1.1.10=h9fff704_0 + - sqlite=3.41.2=h5eee18b_0 + - sysroot_linux-64=2.12=he073ed8_16 + - tiledb=2.13.2=hd532e3d_0 + - tk=8.6.12=h1ccaba5_0 + - tktable=2.10=h14c3975_0 + - tzcode=2023c=h0b41bf4_0 + - tzdata=2023c=h04d1e81_0 + - udunits2=2.2.28=hc3e0081_0 + - wheel=0.38.4=py311h06a4308_0 + - xerces-c=3.2.4=h8d71039_2 + - xorg-fixesproto=5.0=h7f98852_1002 + - xorg-inputproto=2.3.2=h7f98852_1002 + - xorg-kbproto=1.0.7=h7f98852_1002 + - xorg-libice=1.1.1=hd590300_0 + - xorg-libsm=1.2.4=h7391055_0 + - xorg-libx11=1.8.6=h8ee46fc_0 + - xorg-libxau=1.0.11=hd590300_0 + - xorg-libxext=1.3.4=h0b41bf4_2 + - xorg-libxfixes=5.0.3=h7f98852_1004 + - xorg-libxi=1.7.10=h7f98852_0 + - xorg-libxrender=0.9.11=hd590300_0 + - xorg-libxt=1.3.0=hd590300_1 + - xorg-renderproto=0.11.1=h7f98852_1002 + - xorg-xextproto=7.3.0=h0b41bf4_1003 + - xorg-xproto=7.0.31=h7f98852_1007 + - xz=5.4.2=h5eee18b_0 + - zlib=1.2.13=hd590300_5 + - zstd=1.5.5=hc292b87_0 +prefix: /home/Earth/pbretonn/miniconda3/envs/ceriseNoAS diff --git a/conda_installation/environment-cerise.yml b/conda_installation/environment-cerise.yml new file mode 100644 index 0000000000000000000000000000000000000000..9be18a853f2a0d12ed0fa18e6a99118b88086eca --- /dev/null +++ b/conda_installation/environment-cerise.yml @@ -0,0 +1,399 @@ +name: condaCerise +channels: + - r + - conda-forge + - defaults +dependencies: + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_gnu + - _r-mutex=1.0.0=anacondar_1 + - alsa-lib=1.2.8=h166bdaf_0 + - atk-1.0=2.36.0=ha1a6a79_0 + - attr=2.5.1=h166bdaf_1 + - binutils_impl_linux-64=2.40=hf600244_0 + - brotli=1.0.9=h5eee18b_7 + - brotli-bin=1.0.9=h5eee18b_7 + - brotlipy=0.7.0=py37h27cfd23_1003 + - bwidget=1.9.11=1 + - bzip2=1.0.8=h7b6447c_0 + - c-ares=1.19.0=h5eee18b_0 + - ca-certificates=2023.5.7=hbcca054_0 + - cairo=1.16.0=ha61ee94_1014 + - cdo=2.1.1=h4f55462_2 + - certifi=2023.5.7=pyhd8ed1ab_0 + - cffi=1.15.1=py37h5eee18b_3 + - conda=4.14.0=py37h89c1867_0 + - conda-package-handling=2.0.2=py37h06a4308_0 + - conda-package-streaming=0.7.0=py37h06a4308_0 + - cov-core=1.15.0=py_1 + - curl=7.88.1=hdc1c0ab_1 + - cycler=0.11.0=pyhd3eb1b0_0 + - dbus=1.13.18=hb2f20db_0 + - eccodes=2.29.0=h7986f14_1 + - expat=2.5.0=h27087fc_0 + - fftw=3.3.10=nompi_hf0379b8_106 + - findlibs=0.0.2=pyhd8ed1ab_0 + - flit-core=3.6.0=pyhd3eb1b0_0 + - font-ttf-dejavu-sans-mono=2.37=hd3eb1b0_0 + - font-ttf-inconsolata=2.001=hcb22688_0 + - font-ttf-source-code-pro=2.030=hd3eb1b0_0 + - font-ttf-ubuntu=0.83=h8b1ccd4_0 + - fontconfig=2.14.2=h14ed4e7_0 + - fonts-anaconda=1=h8fa9717_0 + - fonts-conda-ecosystem=1=hd3eb1b0_0 + - fonttools=4.25.0=pyhd3eb1b0_0 + - freeglut=3.2.2=h9c3ff4c_1 + - freetype=2.12.1=h4a9f257_0 + - fribidi=1.0.10=h7b6447c_0 + - gcc_impl_linux-64=12.2.0=hcc96c02_19 + - gdk-pixbuf=2.42.10=h5eee18b_0 + - gettext=0.21.1=h27087fc_0 + - gfortran_impl_linux-64=12.2.0=h55be85b_19 + - giflib=5.2.1=h5eee18b_3 + - glib=2.74.1=h6239696_1 + - glib-tools=2.74.1=h6239696_1 + - gobject-introspection=1.72.0=py37hbb6d50b_1 + - graphite2=1.3.14=h295c915_1 + - graphviz=7.1.0=h2e5815a_0 + - gsl=2.7.1=h6e86dc7_1 + - gst-plugins-base=1.22.0=h4243ec0_2 + - gstreamer=1.22.0=h25f0c4b_2 + - gstreamer-orc=0.4.33=h166bdaf_0 + - gtk2=2.24.33=h73c1081_2 + - gts=0.7.6=hb67d8dd_3 + - gxx_impl_linux-64=12.2.0=hcc96c02_19 + - harfbuzz=6.0.0=h8e241bc_0 + - hdf4=4.2.15=h9772cbc_5 + - hdf5=1.12.2=nompi_h4df4325_101 + - icu=70.1=h27087fc_0 + - idna=3.4=py37h06a4308_0 + - jack=1.9.22=h11f4161_0 + - jasper=4.0.0=h0ff4b12_0 + - jpeg=9e=h5eee18b_1 + - kernel-headers_linux-64=2.6.32=he073ed8_15 + - keyutils=1.6.1=h166bdaf_0 + - kiwisolver=1.4.4=py37h6a678d5_0 + - krb5=1.20.1=h81ceb04_0 + - lame=3.100=h7b6447c_0 + - lcms2=2.12=h3be6417_0 + - ld_impl_linux-64=2.40=h41732ed_0 + - lerc=3.0=h295c915_0 + - libaec=1.0.6=hcb278e6_1 + - libblas=3.9.0=16_linux64_openblas + - libbrotlicommon=1.0.9=h5eee18b_7 + - libbrotlidec=1.0.9=h5eee18b_7 + - libbrotlienc=1.0.9=h5eee18b_7 + - libcap=2.66=ha37c62d_0 + - libcblas=3.9.0=16_linux64_openblas + - libclang=15.0.7=default_had23c3d_1 + - libclang13=15.0.7=default_h3e3d535_1 + - libcups=2.3.3=h36d4200_3 + - libcurl=7.88.1=hdc1c0ab_1 + - libdb=6.2.32=h6a678d5_1 + - libdeflate=1.17=h5eee18b_0 + - libedit=3.1.20221030=h5eee18b_0 + - libev=4.33=h7f8727e_1 + - libevent=2.1.10=h28343ad_4 + - libffi=3.4.4=h6a678d5_0 + - libflac=1.4.2=h27087fc_0 + - libgcc-devel_linux-64=12.2.0=h3b97bd3_19 + - libgcc-ng=12.2.0=h65d4601_19 + - libgcrypt=1.10.1=h166bdaf_0 + - libgd=2.3.3=h695aa2c_1 + - libgfortran-ng=12.2.0=h69a702a_19 + - libgfortran5=12.2.0=h337968e_19 + - libglib=2.74.1=h606061b_1 + - libglu=9.0.0=hf484d3e_1 + - libgomp=12.2.0=h65d4601_19 + - libgpg-error=1.46=h620e276_0 + - libiconv=1.17=h166bdaf_0 + - liblapack=3.9.0=16_linux64_openblas + - libllvm15=15.0.7=hadd5161_1 + - libnetcdf=4.9.1=nompi_h34a3ff0_101 + - libnghttp2=1.52.0=h61bc06f_0 + - libnsl=2.0.0=h5eee18b_0 + - libogg=1.3.5=h27cfd23_1 + - libopenblas=0.3.21=h043d6bf_0 + - libopus=1.3.1=h7b6447c_0 + - libpng=1.6.39=h5eee18b_0 + - libpq=15.2=hb675445_0 + - librsvg=2.54.4=h7abd40a_0 + - libsanitizer=12.2.0=h46fd767_19 + - libsndfile=1.2.0=hb75c966_0 + - libsodium=1.0.18=h7b6447c_0 + - libsqlite=3.40.0=h753d276_0 + - libssh2=1.10.0=hf14f497_3 + - libstdcxx-devel_linux-64=12.2.0=h3b97bd3_19 + - libstdcxx-ng=12.2.0=h46fd767_19 + - libsystemd0=252=h2a991cd_0 + - libtiff=4.5.0=h6a678d5_2 + - libtool=2.4.7=h27087fc_0 + - libudev1=253=h0b41bf4_0 + - libuuid=2.32.1=h7f98852_1000 + - libvorbis=1.3.7=h7b6447c_0 + - libwebp=1.2.4=h11a3e52_1 + - libwebp-base=1.2.4=h5eee18b_1 + - libxcb=1.13=h1bed415_1 + - libxkbcommon=1.5.0=h79f4944_1 + - libxml2=2.10.3=hca2bb57_4 + - libzip=1.9.2=hc929e4a_1 + - libzlib=1.2.13=h166bdaf_4 + - lz4-c=1.9.4=h6a678d5_0 + - magics=4.13.0=h37abd2f_1 + - magics-python=1.5.8=pyhd8ed1ab_1 + - make=4.2.1=h1bed415_1 + - matplotlib=3.5.3=py37h89c1867_2 + - matplotlib-base=3.5.3=py37hf590b9c_0 + - mock=5.0.1=pyhd8ed1ab_0 + - mpg123=1.31.3=hcb278e6_0 + - munkres=1.1.4=py_0 + - mysql-common=8.0.32=ha901b37_0 + - mysql-libs=8.0.32=hd7da12d_0 + - ncurses=6.4=h6a678d5_0 + - ninja=1.10.2=h06a4308_5 + - ninja-base=1.10.2=hd09550d_5 + - nose=1.3.7=py_1006 + - nose-cov=1.6=py_1 + - nspr=4.35=h6a678d5_0 + - nss=3.89=he45b914_0 + - numpy=1.21.6=py37h976b520_0 + - openssl=3.1.1=hd590300_1 + - packaging=22.0=py37h06a4308_0 + - pandoc=2.12=h06a4308_3 + - pango=1.50.14=hd33c08f_0 + - paramiko=3.1.0=pyhd8ed1ab_0 + - pcre2=10.40=hc3806b6_0 + - phantomjs=2.1.1=1 + - pillow=9.4.0=py37h6a678d5_0 + - pip=23.0.1=pyhd8ed1ab_0 + - pixman=0.40.0=h7f8727e_1 + - ply=3.11=py37_0 + - proj=9.1.1=h8ffa02c_2 + - pulseaudio=16.1=ha8d29e2_1 + - pycosat=0.6.4=py37h5eee18b_0 + - pycparser=2.21=pyhd3eb1b0_0 + - pynacl=1.5.0=py37h5eee18b_0 + - pyopenssl=23.0.0=py37h06a4308_0 + - pyparsing=3.0.9=pyhd8ed1ab_0 + - pyqt=5.15.7=py37hf30b843_1 + - pyqt5-sip=12.11.0=py37hd23a5d3_1 + - pysocks=1.7.1=py37_1 + - python=3.7.12=hf930737_100_cpython + - python-dateutil=2.8.2=pyhd8ed1ab_0 + - python_abi=3.7=3_cp37m + - qt-main=5.15.8=h5d23da1_6 + - r-abind=1.4_5=r42hc72bb7e_1004 + - r-askpass=1.1=r42h06615bd_3 + - r-assertthat=0.2.1=r42hc72bb7e_3 + - r-base=4.2.3=ha7d60f8_0 + - r-base64enc=0.1_3=r42h06615bd_1005 + - r-bigmemory=4.6.1=r42h7525677_1 + - r-bigmemory.sri=0.1.6=r42hc72bb7e_0 + - r-brio=1.1.3=r42h76d94ec_0 + - r-bslib=0.4.2=r42hc72bb7e_0 + - r-cachem=1.0.7=r42h133d619_0 + - r-callr=3.7.0=r42h6115d3f_0 + - r-cli=3.6.0=r42h38f115c_0 + - r-climdex.pcic=1.1_11=r42h7525677_1 + - r-climprojdiags=0.3.0=r42hc72bb7e_0 + - r-clock=0.6.1=r42h7525677_1 + - r-codetools=0.2_19=r42hc72bb7e_0 + - r-colorspace=2.1_0=r42h133d619_0 + - r-commonmark=1.9.0=r42h133d619_0 + - r-configr=0.3.5=r42hc72bb7e_0 + - r-cpp11=0.4.3=r42hc72bb7e_0 + - r-crayon=1.5.2=r42hc72bb7e_1 + - r-curl=4.3.3=r42h06615bd_1 + - r-desc=1.4.1=r42h6115d3f_0 + - r-diffobj=0.3.5=r42h76d94ec_0 + - r-digest=0.6.31=r42h38f115c_0 + - r-doparallel=1.0.17=r42hc72bb7e_1 + - r-dotcall64=1.0_2=r42hac0b197_1 + - r-easyncdf=0.1.1=r42hc72bb7e_1 + - r-easyverification=0.4.4=r42h7525677_2 + - r-ellipsis=0.3.2=r42h06615bd_1 + - r-evaluate=0.15=r42h6115d3f_0 + - r-fansi=1.0.4=r42h133d619_0 + - r-farver=2.1.1=r42h7525677_1 + - r-fastmap=1.1.1=r42h38f115c_0 + - r-fields=14.1=r42hac0b197_1 + - r-fontawesome=0.5.1=r42hc72bb7e_0 + - r-foreach=1.5.2=r42hc72bb7e_1 + - r-formattable=0.2.1=r42ha770c72_1 + - r-fs=1.6.1=r42h38f115c_0 + - r-future=1.32.0=r42hc72bb7e_0 + - r-generics=0.1.2=r42h142f84f_0 + - r-geomap=2.5_0=r42h06615bd_1 + - r-geomapdata=2.0_0=r42hc72bb7e_1 + - r-ggplot2=3.4.1=r42hc72bb7e_0 + - r-globals=0.16.2=r42hc72bb7e_0 + - r-glue=1.6.2=r42h06615bd_1 + - r-gribr=1.2.5=r42hd590300_3 + - r-gridextra=2.3=r42hc72bb7e_1004 + - r-gtable=0.3.3=r42hc72bb7e_0 + - r-highr=0.10=r42hc72bb7e_0 + - r-htmltools=0.5.5=r42h38f115c_0 + - r-htmlwidgets=1.6.2=r42hc72bb7e_0 + - r-httpuv=1.6.9=r42h38f115c_0 + - r-httr=1.4.6=r42hc72bb7e_0 + - r-ini=0.3.1=r42hc72bb7e_1004 + - r-isoband=0.2.7=r42h38f115c_1 + - r-iterators=1.0.14=r42hc72bb7e_1 + - r-jquerylib=0.1.4=r42hc72bb7e_1 + - r-jsonlite=1.8.0=r42h76d94ec_0 + - r-kableextra=1.3.4=r42hc72bb7e_1 + - r-knitr=1.43=r42hc72bb7e_0 + - r-labeling=0.4.2=r42hc72bb7e_2 + - r-later=1.3.0=r42h7525677_1 + - r-lattice=0.20_45=r42h06615bd_1 + - r-lifecycle=1.0.3=r42hc72bb7e_1 + - r-listenv=0.9.0=r42hc72bb7e_0 + - r-lobstr=1.1.2=r42h38f115c_2 + - r-log4r=0.4.3=r42h06615bd_0 + - r-lubridate=1.8.0=r42h884c59f_0 + - r-magrittr=2.0.3=r42h06615bd_1 + - r-mapproj=1.2.11=r42h133d619_0 + - r-maps=3.4.1=r42h06615bd_1 + - r-mass=7.3_58.3=r42h133d619_0 + - r-matrix=1.5_3=r42h5f7b363_0 + - r-mba=0.1_0=r42h7525677_0 + - r-memoise=2.0.1=r42hc72bb7e_1 + - r-mgcv=1.8_42=r42he1ae0d6_0 + - r-mime=0.12=r42h06615bd_1 + - r-multiapply=2.1.3=r42hc72bb7e_1 + - r-munsell=0.5.0=r42hc72bb7e_1005 + - r-nbclust=3.0.1=r42hc72bb7e_1 + - r-ncdf4=1.21=r42h93dc0a4_1 + - r-nlme=3.1_162=r42hac0b197_0 + - r-openssl=2.0.6=r42habfbb5e_0 + - r-parallelly=1.34.0=r42hc72bb7e_0 + - r-pbapply=1.7_0=r42hc72bb7e_0 + - r-pcict=0.5_4.4=r42h133d619_0 + - r-pillar=1.8.1=r42hc72bb7e_1 + - r-pkgconfig=2.0.3=r42hc72bb7e_2 + - r-pkgload=1.2.4=r42h142f84f_0 + - r-plyr=1.8.8=r42h7525677_0 + - r-praise=1.0.0=r42h6115d3f_4 + - r-prettyunits=1.1.1=r42hc72bb7e_2 + - r-processx=3.5.3=r42h76d94ec_0 + - r-proj4=1.0_12=r42h66e2efa_1 + - r-promises=1.2.0.1=r42h7525677_1 + - r-pryr=0.1.6=r42h38f115c_0 + - r-ps=1.7.0=r42h76d94ec_0 + - r-r6=2.5.1=r42hc72bb7e_1 + - r-rappdirs=0.3.3=r42h06615bd_1 + - r-rcolorbrewer=1.1_3=r42h785f33e_1 + - r-rcpp=1.0.10=r42h38f115c_0 + - r-rcpparmadillo=0.11.4.4.0=r42h358215d_0 + - r-rcpptoml=0.1.7=r42h7525677_2 + - r-rematch2=2.1.2=r42h142f84f_0 + - r-rlang=1.1.0=r42h38f115c_0 + - r-rmarkdown=2.21=r42hc72bb7e_0 + - r-rpmg=2.2_3=r42hc72bb7e_2 + - r-rprojroot=2.0.3=r42h6115d3f_0 + - r-rstudioapi=0.13=r42h6115d3f_0 + - r-rvest=1.0.3=r42hc72bb7e_1 + - r-s2dv=1.4.0=r42hc72bb7e_0 + - r-s2dverification=2.10.3=r42hc72bb7e_1 + - r-sass=0.4.5=r42h38f115c_0 + - r-scales=1.2.1=r42hc72bb7e_1 + - r-selectr=0.4_2=r42hc72bb7e_2 + - r-shiny=1.7.4=r42h785f33e_0 + - r-sourcetools=0.1.7_1=r42h38f115c_0 + - r-sp=1.6_0=r42h133d619_0 + - r-spam=2.9_1=r42hb20cf53_1 + - r-specsverification=0.5_3=r42h7525677_2 + - r-splancs=2.01_43=r42h8da6f51_1 + - r-startr=2.2.1=r42hc72bb7e_0 + - r-stringi=1.7.12=r42h1ae9187_0 + - r-stringr=1.5.0=r42h785f33e_0 + - r-svglite=2.1.1=r42he8f5e61_0 + - r-sys=3.4.2=r42h57805ef_0 + - r-systemfonts=1.0.4=r42h0ff29ef_1 + - r-testthat=3.1.4=r42h884c59f_0 + - r-tibble=3.2.1=r42h133d619_1 + - r-tinytex=0.45=r42hc72bb7e_0 + - r-tzdb=0.3.0=r42h7525677_1 + - r-utf8=1.2.3=r42h133d619_0 + - r-uuid=1.1_0=r42h06615bd_1 + - r-vctrs=0.6.0=r42h38f115c_0 + - r-viridis=0.6.2=r42hc72bb7e_1 + - r-viridislite=0.4.1=r42hc72bb7e_1 + - r-waldo=0.4.0=r42h6115d3f_0 + - r-webshot=0.5.4=r42hc72bb7e_1 + - r-withr=2.5.0=r42hc72bb7e_1 + - r-xfun=0.39=r42ha503ecb_0 + - r-xml2=1.3.3=r42h884c59f_0 + - r-xtable=1.8_4=r42hc72bb7e_4 + - r-yaml=2.3.7=r42h133d619_0 + - readline=8.2=h5eee18b_0 + - requests=2.28.1=py37h06a4308_0 + - ruamel_yaml=0.15.100=py37h27cfd23_0 + - sed=4.8=h7b6447c_0 + - simplejson=3.17.6=py37h7f8727e_0 + - sip=6.7.2=py37hd23a5d3_0 + - six=1.16.0=pyhd3eb1b0_1 + - sqlite=3.40.0=h4ff8645_0 + - sysroot_linux-64=2.12=he073ed8_15 + - tk=8.6.12=h1ccaba5_0 + - tktable=2.10=h14c3975_0 + - toml=0.10.2=pyhd3eb1b0_0 + - toolz=0.12.0=py37h06a4308_0 + - tornado=6.2=py37h5eee18b_0 + - typing_extensions=4.4.0=py37h06a4308_0 + - udunits2=2.2.25=hd30922c_1 + - urllib3=1.26.14=py37h06a4308_0 + - wheel=0.38.4=py37h06a4308_0 + - xcb-util=0.4.0=h166bdaf_0 + - xcb-util-image=0.4.0=h166bdaf_0 + - xcb-util-keysyms=0.4.0=h166bdaf_0 + - xcb-util-renderutil=0.3.9=h166bdaf_0 + - xcb-util-wm=0.4.1=h166bdaf_0 + - xkeyboard-config=2.38=h0b41bf4_0 + - xorg-fixesproto=5.0=h7f98852_1002 + - xorg-inputproto=2.3.2=h7f98852_1002 + - xorg-kbproto=1.0.7=h7f98852_1002 + - xorg-libice=1.0.10=h7f98852_0 + - xorg-libsm=1.2.3=hd9c2040_1000 + - xorg-libx11=1.8.4=h0b41bf4_0 + - xorg-libxau=1.0.9=h7f98852_0 + - xorg-libxext=1.3.4=h0b41bf4_2 + - xorg-libxfixes=5.0.3=h7f98852_1004 + - xorg-libxi=1.7.10=h7f98852_0 + - xorg-libxrender=0.9.10=h7f98852_1003 + - xorg-libxt=1.2.1=h7f98852_2 + - xorg-renderproto=0.11.1=h7f98852_1002 + - xorg-xextproto=7.3.0=h0b41bf4_1003 + - xorg-xproto=7.0.31=h27cfd23_1007 + - xz=5.2.10=h5eee18b_1 + - yaml=0.2.5=h7b6447c_0 + - zlib=1.2.13=h166bdaf_4 + - zstandard=0.19.0=py37h5eee18b_0 + - zstd=1.5.5=hc292b87_0 + - pip: + - argparse==1.4.0 + - attrs==22.2.0 + - autosubmit==4.0.73 + - autosubmitconfigparser==1.0.21 + - bcrypt==4.0.1 + - bscearth-utils==0.5.2 + - charset-normalizer==3.1.0 + - configobj==5.0.8 + - coverage==7.2.2 + - cryptography==39.0.2 + - exceptiongroup==1.1.1 + - importlib-metadata==6.1.0 + - iniconfig==2.0.0 + - networkx==2.6.3 + - pluggy==1.0.0 + - portalocker==2.7.0 + - py3dotplus==1.1.0 + - pygments==2.14.0 + - pytest==7.2.2 + - pythondialog==3.5.3 + - ruamel-yaml==0.17.21 + - ruamel-yaml-clib==0.2.7 + - setuptools==67.6.0 + - xlib==0.21 + - zipp==3.15.0 +prefix: /esarchive/scratch/pbretonn/conda-cerise/conda/envs/condaCerise diff --git a/conda_installation/environment-sunset.yml b/conda_installation/environment-sunset.yml new file mode 100644 index 0000000000000000000000000000000000000000..27dbd8b96bfb39955ba302c6712bc3a8cca0ef36 --- /dev/null +++ b/conda_installation/environment-sunset.yml @@ -0,0 +1,375 @@ +name: condaSUNSET +channels: + - r + - conda-forge + - defaults +dependencies: + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_gnu + - _r-mutex=1.0.1=anacondar_1 + - atk-1.0=2.38.0=hd4edc92_1 + - binutils_impl_linux-64=2.40=hf600244_0 + - blosc=1.21.5=h0f2a231_0 + - boost-cpp=1.78.0=h5adbc97_2 + - bwidget=1.9.14=ha770c72_1 + - bzip2=1.0.8=h7f98852_4 + - c-ares=1.19.1=hd590300_0 + - ca-certificates=2023.7.22=hbcca054_0 + - cairo=1.16.0=ha61ee94_1014 + - cfitsio=4.1.0=hd9d235c_0 + - curl=8.1.2=h409715c_0 + - expat=2.5.0=hcb278e6_1 + - fftw=3.3.10=nompi_hc118613_108 + - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 + - font-ttf-inconsolata=3.000=h77eed37_0 + - font-ttf-source-code-pro=2.038=h77eed37_0 + - font-ttf-ubuntu=0.83=hab24e00_0 + - fontconfig=2.14.2=h14ed4e7_0 + - fonts-conda-ecosystem=1=0 + - fonts-conda-forge=1=0 + - freeglut=3.2.2=h9c3ff4c_1 + - freetype=2.12.1=h267a509_2 + - freexl=1.0.6=h166bdaf_1 + - fribidi=1.0.10=h36c2ea0_0 + - gcc_impl_linux-64=13.2.0=h338b0a0_2 + - gdal=3.5.2=py311hd39052d_7 + - gdk-pixbuf=2.42.8=hff1cb4f_1 + - geos=3.11.0=h27087fc_0 + - geotiff=1.7.1=ha76d385_4 + - gettext=0.21.1=h27087fc_0 + - gfortran_impl_linux-64=13.2.0=h76e1118_2 + - ghostscript=10.02.0=h59595ed_0 + - giflib=5.2.1=h0b41bf4_3 + - gmp=6.2.1=h58526e2_0 + - graphite2=1.3.13=h58526e2_1001 + - graphviz=6.0.2=h99bc08f_0 + - gsl=2.7=he838d99_0 + - gtk2=2.24.33=h90689f9_2 + - gts=0.7.6=h977cf35_4 + - gxx_impl_linux-64=13.2.0=h338b0a0_2 + - harfbuzz=6.0.0=h8e241bc_0 + - hdf4=4.2.15=h9772cbc_5 + - hdf5=1.12.2=nompi_h4df4325_101 + - icu=70.1=h27087fc_0 + - imagemagick=7.1.0_55=pl5321h0d24a18_0 + - jasper=2.0.33=h0ff4b12_1 + - jbig=2.1=h7f98852_2003 + - jpeg=9e=h0b41bf4_3 + - json-c=0.16=hc379101_0 + - kealib=1.4.15=ha7026e8_1 + - kernel-headers_linux-64=2.6.32=he073ed8_16 + - keyutils=1.6.1=h166bdaf_0 + - krb5=1.20.1=h81ceb04_0 + - lcms2=2.14=h6ed2654_0 + - ld_impl_linux-64=2.40=h41732ed_0 + - lerc=4.0.0=h27087fc_0 + - libaec=1.0.6=hcb278e6_1 + - libblas=3.9.0=18_linux64_openblas + - libcblas=3.9.0=18_linux64_openblas + - libcurl=8.1.2=h409715c_0 + - libdap4=3.20.6=hd7c4107_2 + - libdeflate=1.14=h166bdaf_0 + - libedit=3.1.20191231=he28a2e2_2 + - libev=4.33=h516909a_1 + - libexpat=2.5.0=hcb278e6_1 + - libffi=3.4.2=h7f98852_5 + - libgcc-devel_linux-64=13.2.0=ha9c7c90_2 + - libgcc-ng=13.2.0=h807b86a_2 + - libgd=2.3.3=h18fbbfe_3 + - libgdal=3.5.2=h27ae5c1_7 + - libgfortran-ng=13.2.0=h69a702a_2 + - libgfortran5=13.2.0=ha4646dd_2 + - libglib=2.78.0=hebfc3b9_0 + - libglu=9.0.0=he1b5a44_1001 + - libgomp=13.2.0=h807b86a_2 + - libiconv=1.17=h166bdaf_0 + - libkml=1.3.0=h37653c0_1015 + - liblapack=3.9.0=18_linux64_openblas + - libnetcdf=4.8.1=nompi_h261ec11_106 + - libnghttp2=1.52.0=h61bc06f_0 + - libnsl=2.0.0=hd590300_1 + - libopenblas=0.3.24=pthreads_h413a1c8_0 + - libpng=1.6.39=h753d276_0 + - libpq=14.5=hb675445_5 + - librsvg=2.54.4=h7abd40a_0 + - librttopo=1.1.0=hf730bdb_11 + - libsanitizer=13.2.0=h7e041cc_2 + - libspatialite=5.0.1=hfbd986c_21 + - libsqlite=3.43.0=h2797004_0 + - libssh2=1.11.0=h0841786_0 + - libstdcxx-devel_linux-64=13.2.0=ha9c7c90_2 + - libstdcxx-ng=13.2.0=h7e041cc_2 + - libtiff=4.4.0=h82bc61c_5 + - libtool=2.4.7=h27087fc_0 + - libuuid=2.38.1=h0b41bf4_0 + - libwebp=1.2.4=h522a892_0 + - libwebp-base=1.2.4=h166bdaf_0 + - libxcb=1.13=h7f98852_1004 + - libxml2=2.10.3=hca2bb57_4 + - libzip=1.10.1=h2629f0a_3 + - libzlib=1.2.13=hd590300_5 + - lz4-c=1.9.4=hcb278e6_0 + - make=4.3=hd18ef5c_1 + - ncurses=6.4=hcb278e6_0 + - nspr=4.35=h27087fc_0 + - nss=3.92=h1d7d5a4_0 + - numpy=1.26.0=py311h64a7726_0 + - openjpeg=2.5.0=h7d73246_1 + - openssl=3.1.3=hd590300_0 + - pandoc=3.1.3=h32600fe_0 + - pango=1.50.14=hd33c08f_0 + - pcre=8.45=h9c3ff4c_0 + - pcre2=10.40=hc3806b6_0 + - perl=5.32.1=4_hd590300_perl5 + - phantomjs=2.1.1=ha770c72_1 + - pip=23.2.1=pyhd8ed1ab_0 + - pixman=0.42.2=h59595ed_0 + - pkg-config=0.29.2=h36c2ea0_1008 + - poppler=22.10.0=h92391eb_0 + - poppler-data=0.4.12=hd8ed1ab_0 + - postgresql=14.5=h3248436_5 + - proj=9.1.0=h93bde94_0 + - pthread-stubs=0.4=h36c2ea0_1001 + - python=3.11.5=hab00c5b_0_cpython + - python_abi=3.11=4_cp311 + - r-abind=1.4_5=r42hc72bb7e_1005 + - r-askpass=1.2.0=r42h57805ef_0 + - r-assertthat=0.2.1=r42hc72bb7e_4 + - r-backports=1.4.1=r42h57805ef_2 + - r-base=4.2.2=h6b4767f_2 + - r-base64enc=0.1_3=r42h57805ef_1006 + - r-bigmemory=4.6.1=r42ha503ecb_2 + - r-bigmemory.sri=0.1.6=r42hc72bb7e_1 + - r-brio=1.1.3=r42h57805ef_2 + - r-bslib=0.5.1=r42hc72bb7e_0 + - r-cachem=1.0.8=r42h57805ef_1 + - r-callr=3.7.3=r42hc72bb7e_1 + - r-class=7.3_22=r42h57805ef_1 + - r-classint=0.4_10=r42h61816a4_0 + - r-cli=3.6.1=r42ha503ecb_1 + - r-climdex.pcic=1.1_11=r42ha503ecb_2 + - r-climprojdiags=0.3.2=r42hc72bb7e_1 + - r-clock=0.7.0=r42ha503ecb_1 + - r-codetools=0.2_19=r42hc72bb7e_1 + - r-colorspace=2.1_0=r42h57805ef_1 + - r-commonmark=1.9.0=r42h57805ef_1 + - r-configr=0.3.5=r42hc72bb7e_1 + - r-cowplot=1.1.1=r42hc72bb7e_2 + - r-cpp11=0.4.6=r42hc72bb7e_0 + - r-crayon=1.5.2=r42hc72bb7e_2 + - r-curl=5.0.1=r42hf9611b0_0 + - r-dbi=1.1.3=r42hc72bb7e_2 + - r-desc=1.4.2=r42hc72bb7e_2 + - r-diffobj=0.3.5=r42h57805ef_2 + - r-digest=0.6.33=r42ha503ecb_0 + - r-docopt=0.7.1=r42hc72bb7e_3 + - r-doparallel=1.0.17=r42hc72bb7e_2 + - r-dotcall64=1.0_2=r42h61816a4_2 + - r-dplyr=1.1.2=r42ha503ecb_1 + - r-e1071=1.7_13=r42ha503ecb_1 + - r-easyncdf=0.1.2=r42hc72bb7e_1 + - r-easyverification=0.4.5=r42ha503ecb_0 + - r-ellipsis=0.3.2=r42h57805ef_2 + - r-evaluate=0.21=r42hc72bb7e_1 + - r-fansi=1.0.4=r42h57805ef_1 + - r-farver=2.1.1=r42ha503ecb_2 + - r-fastmap=1.1.1=r42ha503ecb_1 + - r-fields=15.2=r42h61816a4_0 + - r-fnn=1.1.3.2=r42ha503ecb_1 + - r-fontawesome=0.5.2=r42hc72bb7e_0 + - r-foreach=1.5.2=r42hc72bb7e_2 + - r-formattable=0.2.1=r42ha770c72_2 + - r-fs=1.6.3=r42ha503ecb_0 + - r-future=1.33.0=r42hc72bb7e_0 + - r-generics=0.1.3=r42hc72bb7e_2 + - r-geomap=2.5_0=r42h57805ef_2 + - r-geomapdata=2.0_2=r42hc72bb7e_0 + - r-ggplot2=3.4.3=r42hc72bb7e_0 + - r-globals=0.16.2=r42hc72bb7e_1 + - r-glue=1.6.2=r42h57805ef_2 + - r-gridextra=2.3=r42hc72bb7e_1005 + - r-gridgraphics=0.5_1=r42hc72bb7e_2 + - r-gtable=0.3.4=r42hc72bb7e_0 + - r-highr=0.10=r42hc72bb7e_1 + - r-htmltools=0.5.6=r42ha503ecb_0 + - r-htmlwidgets=1.6.2=r42hc72bb7e_1 + - r-httpuv=1.6.11=r42ha503ecb_1 + - r-httr=1.4.7=r42hc72bb7e_0 + - r-ini=0.3.1=r42hc72bb7e_1005 + - r-isoband=0.2.7=r42ha503ecb_2 + - r-iterators=1.0.14=r42hc72bb7e_2 + - r-jquerylib=0.1.4=r42hc72bb7e_2 + - r-jsonlite=1.8.7=r42h57805ef_0 + - r-kableextra=1.3.4=r42hc72bb7e_2 + - r-kernsmooth=2.23_22=r42h13b3f57_0 + - r-knitr=1.44=r42hc72bb7e_0 + - r-labeling=0.4.3=r42hc72bb7e_0 + - r-later=1.3.1=r42ha503ecb_1 + - r-lattice=0.21_8=r42h57805ef_1 + - r-lifecycle=1.0.3=r42hc72bb7e_2 + - r-listenv=0.9.0=r42hc72bb7e_1 + - r-lobstr=1.1.2=r42ha503ecb_3 + - r-log4r=0.4.3=r42h57805ef_1 + - r-lubridate=1.9.2=r42h57805ef_2 + - r-magick=2.7.3=r42h7525677_1 + - r-magrittr=2.0.3=r42h57805ef_2 + - r-mapproj=1.2.11=r42h57805ef_1 + - r-maps=3.4.1=r42h57805ef_2 + - r-mass=7.3_60=r42h57805ef_1 + - r-matrix=1.6_1.1=r42h316c678_0 + - r-mba=0.1_0=r42ha503ecb_1 + - r-memoise=2.0.1=r42hc72bb7e_2 + - r-memuse=4.2_3=r42h57805ef_1 + - r-mgcv=1.9_0=r42h316c678_0 + - r-mime=0.12=r42h57805ef_2 + - r-multiapply=2.1.4=r42hc72bb7e_1 + - r-munsell=0.5.0=r42hc72bb7e_1006 + - r-nbclust=3.0.1=r42hc72bb7e_2 + - r-ncdf4=1.21=r42h15f2bca_0 + - r-nlme=3.1_163=r42h61816a4_0 + - r-nnet=7.3_19=r42h57805ef_1 + - r-openssl=2.1.1=r42hb353fa6_0 + - r-parallelly=1.36.0=r42hc72bb7e_1 + - r-pbapply=1.7_2=r42hc72bb7e_0 + - r-pcict=0.5_4.4=r42h57805ef_1 + - r-pillar=1.9.0=r42hc72bb7e_1 + - r-pkgbuild=1.4.2=r42hc72bb7e_0 + - r-pkgconfig=2.0.3=r42hc72bb7e_3 + - r-pkgload=1.3.3=r42hc72bb7e_0 + - r-plyr=1.8.8=r42ha503ecb_1 + - r-praise=1.0.0=r42hc72bb7e_1007 + - r-prettyunits=1.2.0=r42hc72bb7e_0 + - r-processx=3.8.2=r42h57805ef_0 + - r-proj4=1.0_12=r42h4db2be8_0 + - r-promises=1.2.1=r42ha503ecb_0 + - r-proxy=0.4_27=r42h57805ef_2 + - r-pryr=0.1.6=r42ha503ecb_1 + - r-ps=1.7.5=r42h57805ef_1 + - r-r6=2.5.1=r42hc72bb7e_2 + - r-rappdirs=0.3.3=r42h57805ef_2 + - r-rcolorbrewer=1.1_3=r42h6115d3f_0 + - r-rcpp=1.0.11=r42h7df8631_0 + - r-rcpparmadillo=0.12.6.4.0=r42h08d816e_0 + - r-rcpptoml=0.2.2=r42ha503ecb_1 + - r-rematch2=2.1.2=r42hc72bb7e_3 + - r-rlang=1.1.1=r42ha503ecb_1 + - r-rmarkdown=2.25=r42hc72bb7e_0 + - r-rnaturalearth=0.1.0=r42hc72bb7e_1 + - r-rpmg=2.2_7=r42hc72bb7e_0 + - r-rprojroot=2.0.3=r42hc72bb7e_1 + - r-rstudioapi=0.15.0=r42hc72bb7e_0 + - r-rvest=1.0.3=r42hc72bb7e_2 + - r-s2=1.1.4=r42h5eac2b3_1 + - r-s2dv=1.4.1=r42hc72bb7e_1 + - r-s2dverification=2.10.3=r42hc72bb7e_2 + - r-sass=0.4.7=r42ha503ecb_0 + - r-scales=1.2.1=r42hc72bb7e_2 + - r-selectr=0.4_2=r42hc72bb7e_3 + - r-sf=1.0_7=r42h25da31b_5 + - r-shiny=1.7.5=r42h785f33e_0 + - r-sourcetools=0.1.7_1=r42ha503ecb_1 + - r-sp=2.0_0=r42h57805ef_0 + - r-spam=2.9_1=r42hd9ac46e_2 + - r-specsverification=0.5_3=r42h7525677_2 + - r-splancs=2.01_44=r42h61816a4_0 + - r-startr=2.3.0=r42hc72bb7e_0 + - r-stringi=1.7.12=r42h1ae9187_0 + - r-stringr=1.5.0=r42h785f33e_1 + - r-svglite=2.1.1=r42h329214f_1 + - r-sys=3.4.2=r42h57805ef_1 + - r-systemfonts=1.0.4=r42haf97adc_2 + - r-testthat=3.1.10=r42ha503ecb_0 + - r-tibble=3.2.1=r42h57805ef_2 + - r-timechange=0.2.0=r42ha503ecb_1 + - r-tinytex=0.46=r42hc72bb7e_0 + - r-tzdb=0.4.0=r42ha503ecb_1 + - r-units=0.8_4=r42ha503ecb_0 + - r-utf8=1.2.3=r42h57805ef_1 + - r-uuid=1.1_1=r42h57805ef_0 + - r-vctrs=0.6.3=r42ha503ecb_0 + - r-viridis=0.6.4=r42hc72bb7e_0 + - r-viridislite=0.4.2=r42hc72bb7e_1 + - r-waldo=0.5.1=r42hc72bb7e_1 + - r-webshot=0.5.5=r42hc72bb7e_0 + - r-withr=2.5.1=r42hc72bb7e_0 + - r-wk=0.8.0=r42ha503ecb_0 + - r-xfun=0.40=r42ha503ecb_0 + - r-xml2=1.3.3=r42h044e5c7_2 + - r-xtable=1.8_4=r42hc72bb7e_5 + - r-yaml=2.3.5=r42h06615bd_1 + - readline=8.2=h8228510_1 + - sed=4.8=he412f7d_0 + - setuptools=68.2.2=pyhd8ed1ab_0 + - snappy=1.1.10=h9fff704_0 + - sqlite=3.43.0=h2c6b66d_0 + - sysroot_linux-64=2.12=he073ed8_16 + - tiledb=2.11.3=h3f4058f_1 + - tk=8.6.13=h2797004_0 + - tktable=2.10=h0c5db8f_5 + - tzcode=2023c=h0b41bf4_0 + - tzdata=2023c=h71feb2d_0 + - udunits2=2.2.28=h40f5838_2 + - wheel=0.41.2=pyhd8ed1ab_0 + - xerces-c=3.2.4=h55805fa_1 + - xorg-fixesproto=5.0=h7f98852_1002 + - xorg-inputproto=2.3.2=h7f98852_1002 + - xorg-kbproto=1.0.7=h7f98852_1002 + - xorg-libice=1.0.10=h7f98852_0 + - xorg-libsm=1.2.3=hd9c2040_1000 + - xorg-libx11=1.8.4=h0b41bf4_0 + - xorg-libxau=1.0.11=hd590300_0 + - xorg-libxdmcp=1.1.3=h7f98852_0 + - xorg-libxext=1.3.4=h0b41bf4_2 + - xorg-libxfixes=5.0.3=h7f98852_1004 + - xorg-libxi=1.7.10=h7f98852_0 + - xorg-libxrender=0.9.10=h7f98852_1003 + - xorg-libxt=1.3.0=hd590300_0 + - xorg-renderproto=0.11.1=h7f98852_1002 + - xorg-xextproto=7.3.0=h0b41bf4_1003 + - xorg-xproto=7.0.31=h7f98852_1007 + - xz=5.2.6=h166bdaf_0 + - zlib=1.2.13=hd590300_5 + - zstd=1.5.5=hfc55251_0 + - pip: + - argparse==1.4.0 + - autosubmit==4.0.98 + - autosubmitconfigparser==1.0.49 + - bcrypt==4.0.1 + - bscearth-utils==0.5.2 + - cdo==1.6.0 + - certifi==2023.7.22 + - cffi==1.16.0 + - charset-normalizer==3.3.1 + - configobj==5.0.8 + - coverage==7.3.2 + - cryptography==41.0.5 + - cycler==0.12.1 + - cython==3.0.4 + - fonttools==4.43.1 + - idna==3.4 + - iniconfig==2.0.0 + - kiwisolver==1.4.5 + - matplotlib==3.5.3 + - mock==5.1.0 + - networkx==2.6.3 + - nose==1.3.7 + - packaging==23.2 + - paramiko==3.3.1 + - pillow==10.1.0 + - pluggy==1.3.0 + - portalocker==2.7.0 + - psutil==5.9.6 + - py3dotplus==1.1.0 + - pycparser==2.21 + - pygments==2.16.1 + - pynacl==1.5.0 + - pyparsing==3.1.1 + - pytest==7.4.3 + - python-dateutil==2.8.2 + - pythondialog==3.5.3 + - requests==2.31.0 + - ruamel-yaml==0.17.21 + - six==1.16.0 + - urllib3==2.0.7 + - xlib==0.21 +# prefix: /perm/cyce/conda/envs/condaCerise diff --git a/conda_installation/gribr_1.2.5.tar.gz b/conda_installation/gribr_1.2.5.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..2da172102e503c5e62b0da499626f2bc628aed4c Binary files /dev/null and b/conda_installation/gribr_1.2.5.tar.gz differ diff --git a/conda_installation/load_cerise.bash b/conda_installation/load_cerise.bash new file mode 100755 index 0000000000000000000000000000000000000000..3a10f3f6143f5afb73e65fd2b778941ac937df4b --- /dev/null +++ b/conda_installation/load_cerise.bash @@ -0,0 +1,17 @@ +#!/bin/bash + +prefix=$1 +local_gribr=$2 + + +if [[ -z $prefix ]]; then + conda env create --file environment-cerise${local_gribr}.yml +else + conda env create --file environment-cerise${local_gribr}.yml --prefix $prefix +fi +conda activate condaCerise +[[ -n ${local_gribr} ]] && module load ecCodes + +R -e "options(timeout = 600) ; install.packages('CSTools', repos='https://ftp.cixug.es/CRAN/')" +[[ -n ${local_gribr} ]] && R -e "options(timeout = 600) ; install.packages('conda_installation/gribr_1.2.5.tar.gz', repos = NULL)" +#conda env export > environment-cerise${local_gribr}.yml diff --git a/conda_installation/load_sunset.bash b/conda_installation/load_sunset.bash new file mode 100755 index 0000000000000000000000000000000000000000..add332b4a99210f3a890b14da77ee466f8b2ba8d --- /dev/null +++ b/conda_installation/load_sunset.bash @@ -0,0 +1,9 @@ +#!/bin/bash + +prefix=$1 + +conda env create --file environment-sunset.yml --prefix $prefix + +conda activate $prefix + +R -e "options(timeout = 600) ; install.packages('CSTools', repos='https://ftp.cixug.es/CRAN/')" diff --git a/conf/archive.yml b/conf/archive.yml index 1f226a0745bf9f4f49edd560f5d68a7c70489dce..61f62be230b4ff05021a60a17e38e5e4d446cff9 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -1,39 +1,47 @@ -archive: +esarchive: src: "/esarchive/" System: ECMWF-SEAS5: name: "ECMWF SEAS5" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ecmwf/system5c3s/" - daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", - "prlr":"_s0-24h/", "sfcWind":"_f6h/", - "tasmin":"_f24h/", "tasmax":"_f24h/", - "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", - "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", - "tdps":"_f6h/", "hurs":"_f6h/"} - monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", - "prlr":"_s0-24h/", "sfcWind":"_f6h/", - "tasmin":"_f24h/", "tasmax":"_f24h/", - "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", - "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", - "tdps":"_f6h/"} + daily_mean: {"tas":"daily_mean/tas_f6h/", "rsds":"daily/rsds_s0-24h/", + "prlr":"daily/prlr_s0-24h/", "tasmin":"daily/tasmin/", + "tasmax":"daily/tasmax/", "sfcWind":"daily_mean/sfcWind_f6h/", + "ta300":"daily_mean/ta300_f12h/", "ta500":"daily_mean/ta500_f12h/", + "ta850":"daily_mean/ta850_f12h/", "g300":"daily_mean/g300_f12h/", + "g500":"daily_mean/g500_f12h/", "g850":"daily_mean/g850_f12h/", + "tdps":"daily_mean/tdps_f6h/", "hurs":"daily_mean/hurs_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "rsds":"monthly_mean/rsds_s0-24h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "tasmin":"monthly_mean/tasmin_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", + "ta300":"monthly_mean/ta300_f12h/", "ta500":"monthly_mean/ta500_f12h/", + "ta850":"monthly_mean/ta850_f12h/", "g300":"monthly_mean/g300_f12h/", + "g500":"monthly_mean/g500_f12h/", "g850":"monthly_mean/g500_f12h/", + "tdps":"monthly_mean/tdps_f6h/", "psl":"monthly_mean/psl_f6h/", + "tos":"monthly_mean/tos_f6h/", "sic":"monthly_mean/sic_f24h/"} nmember: fcst: 51 hcst: 25 calendar: "proleptic_gregorian" time_stamp_lag: "0" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" + land_sea_mask: "/esarchive/exp/ecmwf/system5c3s/constant/lsm/lsm.nc" ECMWF-SEAS5.1: name: "ECMWF SEAS5 (v5.1)" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ecmwf/system51c3s/" - daily_mean: {"tas":"_f6h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", - "uas":"_f6h/", "vas":"_f6h/", "psl":"_f6h/", - "tdps":"_f6h/"} - monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", - "sfcWind":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/", - "uas":"_f6h/", "vas":"_f6h/", "psl":"_f6h/", - "tdps":"_f6h/"} + daily_mean: {"tas":"daily_mean/tas_f6h/", "prlr":"daily/prlr_s0-24h/", + "sfcWind":"daily_mean/sfcWind_f6h/", + "uas":"daily_mean/uas_f6h/", "vas":"daily_mean/vas_f6h/", + "psl":"daily_mean/psl_f6h/", "tdps":"daily_mean/tdps_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "rsds":"monthly_mean/rsds_s0-24h/", + "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", + "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", + "uas":"monthly_mean/uas_f6h/", "vas":"monthly_mean/vas_f6h/", + "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} nmember: fcst: 51 hcst: 25 @@ -44,9 +52,10 @@ archive: name: "Meteo-France System 7" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/meteofrance/system7c3s/" - monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", - "prlr":"_f24h/", "sfcWind": "_f6h/", - "tasmax":"_f6h/", "tasmin": "_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "g500":"monthly_mean/g500_f12h/", + "prlr":"monthly_mean/prlr_f24h/", "sfcWind": "monthly_mean/sfcWind_f6h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", + "tos":"monthly_mean/tos_f6h/"} nmember: fcst: 51 hcst: 25 @@ -57,9 +66,9 @@ archive: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/dwd/system21_m1/" - monthly_mean: {"tas":"_f6h/", "prlr":"_f24h/", - "g500":"_f12h/", "sfcWind":"_f6h/", - "tasmin":"_f24h/", "tasmax":"_f24h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", + "g500":"monthly_mean/g500_f12h/", "sfcWind":"monthly_mean/sfcWind_f6h/", + "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/"} nmember: fcst: 50 hcst: 30 @@ -70,9 +79,9 @@ archive: name: "CMCC-SPS3.5" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/cmcc/system35c3s/" - monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", - "prlr":"_f24h/", "sfcWind": "_f6h/", - "tasmax":"_f24h/", "tasmin":"_f24h"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", + "g500":"monthly_mean/g500_f12h/", "sfcWind":"monthly_mean/sfcWind_f6h/", + "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/"} nmember: fcst: 50 hcst: 40 @@ -83,8 +92,8 @@ archive: name: "JMA System 2" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/jma/system2c3s/" - monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", - "tasmax":"_f6h/", "tasmin":"_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} nmember: fcst: 10 hcst: 10 @@ -95,8 +104,8 @@ archive: name: "ECCC CanCM4i" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/eccc/eccc1/" - monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", - "tasmax":"_f6h/", "tasmin":"_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} nmember: fcst: 10 hcst: 10 @@ -107,8 +116,8 @@ archive: name: "UK MetOffice GloSea 6 (v6.0)" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ukmo/glosea6_system600-c3s/" - monthly_mean: {"tas":"_f6h/", "tasmin":"_f24h/", - "tasmax":"_f24h/", "prlr":"_f24h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tasmin":"monthly_mean/tasmin_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f24h/"} nmember: fcst: 62 hcst: 28 @@ -119,8 +128,8 @@ archive: name: "NCEP CFSv2" institution: "NOAA NCEP" #? src: "exp/ncep/cfs-v2/" - monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", - "tasmax":"_f6h/", "tasmin":"_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} nmember: fcst: 20 hcst: 20 @@ -132,52 +141,139 @@ archive: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" - daily_mean: {"tas":"_f1h-r1440x721cds/", - "rsds":"_f1h-r1440x721cds/", - "prlr":"_f1h-r1440x721cds/", - "g300":"_f1h-r1440x721cds/", - "g500":"_f1h-r1440x721cds/", - "g850":"_f1h-r1440x721cds/", - "sfcWind":"_f1h-r1440x721cds/", - "tasmax":"_f1h-r1440x721cds/", - "tasmin":"_f1h-r1440x721cds/", - "ta300":"_f1h-r1440x721cds/", - "ta500":"_f1h-r1440x721cds/", - "ta850":"_f1h-r1440x721cds/", - "hurs":"_f1h-r1440x721cds/"} - monthly_mean: {"tas":"_f1h-r1440x721cds/", - "prlr":"_f1h-r1440x721cds/", - "rsds":"_f1h-r1440x721cds/", - "g300":"_f1h-r1440x721cds/", - "g500":"_f1h-r1440x721cds/", - "g850":"_f1h-r1440x721cds/", - "sfcWind":"_f1h-r1440x721cds/", - "tasmax":"_f1h-r1440x721cds/", - "tasmin":"_f1h-r1440x721cds/", - "ta300":"_f1h-r1440x721cds/", - "ta500":"_f1h-r1440x721cds/", - "ta850":"_f1h-r1440x721cds/"} + daily_mean: {"tas":"daily_mean/tas_f1h-r1440x721cds/", + "rsds":"daily_mean/rsds_f1h-r1440x721cds/", + "prlr":"daily_mean/prlr_f1h-r1440x721cds/", + "g300":"daily_mean/g300_f1h-r1440x721cds/", + "g500":"daily_mean/g500_f1h-r1440x721cds/", + "g850":"daily_mean/g850_f1h-r1440x721cds/", + "sfcWind":"daily_mean/sfcWind_f1h-r1440x721cds/", + "tasmax":"daily/tasmax_f1h-r1440x721cds/", + "tasmin":"daily/tasmin_f1h-r1440x721cds/", + "ta300":"daily_mean/ta300_f1h-r1440x721cds/", + "ta500":"daily_mean/ta500_f1h-r1440x721cds/", + "ta850":"daily_mean/ta850_f1h-r1440x721cds/", + "hurs":"daily_mean/hurs_f1h-r1440x721cds/"} + monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", + "psl":"monthly_mean/psl_f1h-r1440x721cds/", + "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", + "rsds":"monthly_mean/rsds_f1h-r1440x721cds/", + "g300":"monthly_mean/g300_f1h-r1440x721cds/", + "g500":"monthly_mean/g500_f1h-r1440x721cds/", + "g850":"monthly_mean/g850_f1h-r1440x721cds/", + "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/", + "tasmax":"monthly_mean/tasmax_f1h-r1440x721cds/", + "tasmin":"monthly_mean/tasmin_f1h-r1440x721cds/", + "ta300":"montly_mean/ta300_f1h-r1440x721cds/", + "ta500":"monthly_mean/ta500_f1h-r1440x721cds/", + "ta850":"monthly_mean/ta850_f1h-r1440x721cds/", + "tos":"monthly_mean/tos_f1h-r1440x721cds/", + "sic":"monthly_mean/sic_f1h-r1440x721cds/"} calendar: "standard" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/esarchive/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" ERA5-Land: name: "ERA5-Land" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5land/" - daily_mean: {"tas":"_f1h/", "rsds":"_f1h/", - "prlr":"_f1h/", "sfcWind":"_f1h/"} - monthly_mean: {"tas":"_f1h/","tasmin":"_f24h/", - "tasmax":"_f24h/", "prlr":"_f1h/", - "sfcWind":"_f1h/", "rsds":"_f1h/", - "tdps":"_f1h/"} + daily_mean: {"tas":"daily_mean/tas_f1h/", "rsds":"daily_mean/rsds_f1h/", + "prlr":"daily_mean/prlr_f1h/", "sfcWind":"daily_mean/sfcWind_f1h/", + "tasmin":"daily/tasmin/", "tasmax":"daily/tasmax/"} + monthly_mean: {"tas":"monthly_mean/tas_f1h/","tasmin":"monthly_mean/tasmin_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f1h/", + "sfcWind":"monthly_mean/sfcWind_f1h/", "rsds":"monthly_mean/rsds_f1h/", + "tdps":"monthly_mean/tdps_f1h/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" UERRA: name: "ECMWF UERRA" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/uerra_mescan/" - daily_mean: {"tas":"_f6h/"} - monthly_mean: {"tas":"_f6h/"} + daily_mean: {"tas":"daily_mean/tas_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/uerra_mescan/daily_mean/tas_f6h/tas_201805.nc" + CERRA: + name: "ECMWF CERRA" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/cerra/" + daily_mean: {"hurs":"daily_mean/hurs_f3h-r2631x1113/", "ps":"daily_mean/ps_f3h-r2631x1113/", + "sfcWind":"daily_mean/sfcWind_f3h-r2631x1113/", + "tas":"daily_mean/tas_f3h-r2631x1113/", "winddir":"daily_mean/tas_f3h-r2631x1113/"} + monthly_mean: {"hurs":"monthly_mean/hurs_f3h-r2631x1113/", "ps":"monthly_mean/ps_f3h-r2631x1113/", + "sfcWind":"monthly_mean/sfcWind_f3h-r2631x1113/", + "tas":"monthly_mean/tas_f3h-r2631x1113/", + "winddir":"monthly_mean/winddir_f3h-r2631x1113/", + "tasmin":"monthly_mean/tasmin_f24h-r2631x1113/", + "tasmax":"monthly_mean/tasmax_f24h-r2631x1113/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/ecmwf/cerra/monthly_mean/tas_f3h-r2631x1113/tas_200506.nc" + CERRA-Land: + name: "ECMWF CERRA-Land" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/cerraland/" + daily_mean: {"prlr":"daily_mean/prlr_f6h-r2631x1113/"} + monthly_mean: {"prlr":"monthly_mean/prlr_f6h-r2631x1113/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/ecmwf/cerraland/monthly_mean/prlr_f6h-r2631x1113/prlr_200412.nc" + HadCRUT5: + name: "HadCRUT5" + institution: "Met Office" + src: "obs/ukmo/hadcrut_v5.0_analysis/" + monthly_mean: {"tasanomaly":"monthly_mean/tasanomaly/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/obs/ukmo/hadcrut_v5.0_analysis/monthly_mean/tasanomaly/tasanomaly_202001.nc" + BEST: + name: "BEST" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "obs/berkeleyearth/berkeleyearth/" + daily_mean: {"tas":"daily_mean/tas/"} + monthly_mean: {"tas":"monthly_mean/tas/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/obs/berkeleyearth/berkeleyearth/monthly_mean/tas/tas_201805.nc" +mars: + src: "/esarchive/scratch/aho/tmp/GRIB/" #"/mars/" + System: + ECMWF-SEAS5: + name: "ECMWF SEAS5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "GRIB_system5_tas_CORRECTED/" + monthly_mean: {"tas":""} + nmember: + fcst: 51 + hcst: 51 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "GRIB_era5_tas/" + monthly_mean: {"tas":""} + calendar: "standard" + reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" +sample: + src: + System: + ECMWF-SEAS5.1: + name: "ECMWF SEAS5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: + monthly_mean: {"tas":"", "prlr":""} + nmember: + fcst: 15 + hcst: 15 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_GRIB_system51_m1.txt" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "GRIB_era5_tas/" + monthly_mean: {"tas":"", "prlr":""} + calendar: "standard" + reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 2b74bff89ab2c27db06ba8e46a55b125fee21151..c25d8d3a86fb08670e1556db0b9281adf5e43d4c 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -1,4 +1,4 @@ -archive: +esarchive: src: "/esarchive/" System: # ---- @@ -91,12 +91,12 @@ archive: first_dcppB_syear: 2019 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon", "tos":"Omon"} - grid: {"tas":"gr", "psl":"gr", "pr":"gr", "ts":"gr", "tos":"gr"} + grid: {"tas":"gn", "psl":"gr", "pr":"gr", "ts":"gr", "tos":"gr"} #version depends on member and variable - version: {"tas":"v20200316", "psl":"v20200316", "pr":"v20200316", "ts":"v20200316", "tos":"v20200417"} + version: {"tas":"v20200417", "psl":"v20200316", "pr":"v20200316", "ts":"v20200316", "tos":"v20200417"} daily_mean: - grid: {"tas":"gn"} - version: {"tasmin":"v20200101", "tasmax":"v20200101", "pr":"v20200417"} + grid: {"tasmin":"gn", "tasmax":"gn", "pr":"gn"} + version: {"tasmin":"v20200417", "tasmax":"v20200417", "pr":"v20200417"} calendar: "360-day" member: r1i1p1f2,r2i1p1f2,r3i1p1f2,r4i1p1f2,r5i1p1f2,r6i1p1f2,r7i1p1f2,r8i1p1f2,r9i1p1f2,r10i1p1f2 initial_month: 11 @@ -132,10 +132,10 @@ archive: fcst: "exp/canesm5/cmip6-dcppB-forecast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppB-forecast/" first_dcppB_syear: 2020 monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon", "tos":"Omon"} - grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn"} - version: {"tas":"v20190429", "pr":"v20190429", "psl":"v20190429", "tasmin":"v20190429", "tasmax":"v20190429"} + grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn", "tos":"gr"} + version: {"tas":"v20190429", "pr":"v20190429", "psl":"v20190429", "tasmin":"v20190429", "tasmax":"v20190429", "tos":"v20190429"} daily_mean: grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} version: {"pr":"v20190429", "tas":"v20190429", "tasmax":"v20190429", "tasmin":"v20190429"} diff --git a/conf/autosubmit.yml b/conf/autosubmit.yml new file mode 100644 index 0000000000000000000000000000000000000000..4ff15ffd24e2bac63543cc792e7004f22953a6ab --- /dev/null +++ b/conf/autosubmit.yml @@ -0,0 +1,14 @@ +esarchive: + platform: nord3v2 + module_version: autosubmit/4.0.0b-foss-2015a-Python-3.7.3 + auto_version: 4.0.0 + conf_format: yaml + experiment_dir: /esarchive/autosubmit/ + userID: bsc32 +mars: + platform: NORD3 ## TO BE CHANGED + module_version: autosubmit/4.0.0b-foss-2015a-Python-3.7.3 ## TO BE CHANGED + auto_version: 4.0.0 + conf_format: yaml + experiment_dir: /esarchive/autosubmit/ ## TO BE CHANGED + userID: bsc32 ## TO BE CHANGED diff --git a/conf/grid_description/griddes_GRIB_system5_m1.txt b/conf/grid_description/griddes_GRIB_system5_m1.txt new file mode 100644 index 0000000000000000000000000000000000000000..7e38b73a3cc887d01693e7e966b05eb2b65fc6a1 --- /dev/null +++ b/conf/grid_description/griddes_GRIB_system5_m1.txt @@ -0,0 +1,246 @@ +# +# gridID system5_m1 +# +gridtype = lonlat +gridsize = 819200 +xname = longitude +xlongname = longitude +xunits = degrees_east +yname = latitude +ylongname = latitude +yunits = degrees_north +xsize = 1280 +ysize = 640 +xvals = 0 0.2812502 0.5625004 0.8437506 1.125001 1.406251 1.687501 1.968751 2.250002 + 2.531252 2.812502 3.093752 3.375002 3.656253 3.937503 4.218753 4.500003 +4.781253 5.062504 5.343754 5.625004 5.906254 6.187504 6.468754 6.750005 +7.031255 7.312505 7.593755 7.875005 8.156256 8.437506 8.718756 9.000006 +9.281256 9.562507 9.843757 10.12501 10.40626 10.68751 10.96876 11.25001 +11.53126 11.81251 12.09376 12.37501 12.65626 12.93751 13.21876 13.50001 +13.78126 14.06251 14.34376 14.62501 14.90626 15.18751 15.46876 15.75001 +16.03126 16.31251 16.59376 16.87501 17.15626 17.43751 17.71876 18.00001 +18.28126 18.56251 18.84376 19.12501 19.40626 19.68751 19.96876 20.25001 +20.53126 20.81251 21.09376 21.37501 21.65627 21.93752 22.21877 22.50002 +22.78127 23.06252 23.34377 23.62502 23.90627 24.18752 24.46877 24.75002 +25.03127 25.31252 25.59377 25.87502 26.15627 26.43752 26.71877 27.00002 +27.28127 27.56252 27.84377 28.12502 28.40627 28.68752 28.96877 29.25002 +29.53127 29.81252 30.09377 30.37502 30.65627 30.93752 31.21877 31.50002 +31.78127 32.06252 32.34377 32.62502 32.90627 33.18752 33.46877 33.75002 +34.03127 34.31252 34.59377 34.87502 35.15627 35.43752 35.71877 36.00003 +36.28128 36.56253 36.84378 37.12503 37.40628 37.68753 37.96878 38.25003 +38.53128 38.81253 39.09378 39.37503 39.65628 39.93753 40.21878 40.50003 +40.78128 41.06253 41.34378 41.62503 41.90628 42.18753 42.46878 42.75003 +43.03128 43.31253 43.59378 43.87503 44.15628 44.43753 44.71878 45.00003 +45.28128 45.56253 45.84378 46.12503 46.40628 46.68753 46.96878 47.25003 +47.53128 47.81253 48.09378 48.37503 48.65628 48.93753 49.21878 49.50003 +49.78128 50.06253 50.34378 50.62504 50.90629 51.18754 51.46879 51.75004 +52.03129 52.31254 52.59379 52.87504 53.15629 53.43754 53.71879 54.00004 +54.28129 54.56254 54.84379 55.12504 55.40629 55.68754 55.96879 56.25004 +56.53129 56.81254 57.09379 57.37504 57.65629 57.93754 58.21879 58.50004 +58.78129 59.06254 59.34379 59.62504 59.90629 60.18754 60.46879 60.75004 +61.03129 61.31254 61.59379 61.87504 62.15629 62.43754 62.71879 63.00004 +63.28129 63.56254 63.84379 64.12504 64.40629 64.68754 64.9688 65.25005 65.5313 +65.81255 66.0938 66.37505 66.6563 66.93755 67.2188 67.50005 67.7813 68.06255 +68.3438 68.62505 68.9063 69.18755 69.4688 69.75005 70.0313 70.31255 70.5938 +70.87505 71.1563 71.43755 71.7188 72.00005 72.2813 72.56255 72.8438 73.12505 +73.4063 73.68755 73.9688 74.25005 74.5313 74.81255 75.0938 75.37505 75.6563 +75.93755 76.2188 76.50005 76.7813 77.06255 77.3438 77.62505 77.9063 78.18755 +78.4688 78.75005 79.0313 79.31256 79.59381 79.87506 80.15631 80.43756 80.71881 +81.00006 81.28131 81.56256 81.84381 82.12506 82.40631 82.68756 82.96881 +83.25006 83.53131 83.81256 84.09381 84.37506 84.65631 84.93756 85.21881 +85.50006 85.78131 86.06256 86.34381 86.62506 86.90631 87.18756 87.46881 +87.75006 88.03131 88.31256 88.59381 88.87506 89.15631 89.43756 89.71881 +90.00006 90.28131 90.56256 90.84381 91.12506 91.40631 91.68756 91.96881 +92.25006 92.53131 92.81256 93.09381 93.37506 93.65632 93.93757 94.21882 +94.50007 94.78132 95.06257 95.34382 95.62507 95.90632 96.18757 96.46882 +96.75007 97.03132 97.31257 97.59382 97.87507 98.15632 98.43757 98.71882 +99.00007 99.28132 99.56257 99.84382 100.1251 100.4063 100.6876 100.9688 +101.2501 101.5313 101.8126 102.0938 102.3751 102.6563 102.9376 103.2188 +103.5001 103.7813 104.0626 104.3438 104.6251 104.9063 105.1876 105.4688 +105.7501 106.0313 106.3126 106.5938 106.8751 107.1563 107.4376 107.7188 +108.0001 108.2813 108.5626 108.8438 109.1251 109.4063 109.6876 109.9688 +110.2501 110.5313 110.8126 111.0938 111.3751 111.6563 111.9376 112.2188 +112.5001 112.7813 113.0626 113.3438 113.6251 113.9063 114.1876 114.4688 +114.7501 115.0313 115.3126 115.5938 115.8751 116.1563 116.4376 116.7188 +117.0001 117.2813 117.5626 117.8438 118.1251 118.4063 118.6876 118.9688 +119.2501 119.5313 119.8126 120.0938 120.3751 120.6563 120.9376 121.2188 +121.5001 121.7813 122.0626 122.3438 122.6251 122.9063 123.1876 123.4688 +123.7501 124.0313 124.3126 124.5938 124.8751 125.1563 125.4376 125.7188 +126.0001 126.2813 126.5626 126.8438 127.1251 127.4063 127.6876 127.9688 +128.2501 128.5313 128.8126 129.0938 129.3751 129.6563 129.9376 130.2188 +130.5001 130.7813 131.0626 131.3438 131.6251 131.9063 132.1876 132.4688 +132.7501 133.0313 133.3126 133.5938 133.8751 134.1563 134.4376 134.7188 +135.0001 135.2813 135.5626 135.8438 136.1251 136.4063 136.6876 136.9688 +137.2501 137.5313 137.8126 138.0938 138.3751 138.6563 138.9376 139.2188 +139.5001 139.7813 140.0626 140.3438 140.6251 140.9063 141.1876 141.4688 +141.7501 142.0313 142.3126 142.5938 142.8751 143.1563 143.4376 143.7188 +144.0001 144.2814 144.5626 144.8439 145.1251 145.4064 145.6876 145.9689 +146.2501 146.5314 146.8126 147.0939 147.3751 147.6564 147.9376 148.2189 +148.5001 148.7814 149.0626 149.3439 149.6251 149.9064 150.1876 150.4689 +150.7501 151.0314 151.3126 151.5939 151.8751 152.1564 152.4376 152.7189 +153.0001 153.2814 153.5626 153.8439 154.1251 154.4064 154.6876 154.9689 +155.2501 155.5314 155.8126 156.0939 156.3751 156.6564 156.9376 157.2189 +157.5001 157.7814 158.0626 158.3439 158.6251 158.9064 159.1876 159.4689 +159.7501 160.0314 160.3126 160.5939 160.8751 161.1564 161.4376 161.7189 +162.0001 162.2814 162.5626 162.8439 163.1251 163.4064 163.6876 163.9689 +164.2501 164.5314 164.8126 165.0939 165.3751 165.6564 165.9376 166.2189 +166.5001 166.7814 167.0626 167.3439 167.6251 167.9064 168.1876 168.4689 +168.7501 169.0314 169.3126 169.5939 169.8751 170.1564 170.4376 170.7189 +171.0001 171.2814 171.5626 171.8439 172.1251 172.4064 172.6876 172.9689 +173.2501 173.5314 173.8126 174.0939 174.3751 174.6564 174.9376 175.2189 +175.5001 175.7814 176.0626 176.3439 176.6251 176.9064 177.1876 177.4689 +177.7501 178.0314 178.3126 178.5939 178.8751 179.1564 179.4376 179.7189 +180.0001 180.2814 180.5626 180.8439 181.1251 181.4064 181.6876 181.9689 +182.2501 182.5314 182.8126 183.0939 183.3751 183.6564 183.9376 184.2189 +184.5001 184.7814 185.0626 185.3439 185.6251 185.9064 186.1876 186.4689 +186.7501 187.0314 187.3126 187.5939 187.8751 188.1564 188.4376 188.7189 +189.0001 189.2814 189.5626 189.8439 190.1251 190.4064 190.6876 190.9689 +191.2501 191.5314 191.8126 192.0939 192.3751 192.6564 192.9376 193.2189 +193.5001 193.7814 194.0626 194.3439 194.6251 194.9064 195.1876 195.4689 +195.7501 196.0314 196.3126 196.5939 196.8751 197.1564 197.4376 197.7189 +198.0001 198.2814 198.5626 198.8439 199.1251 199.4064 199.6876 199.9689 +200.2501 200.5314 200.8126 201.0939 201.3751 201.6564 201.9376 202.2189 +202.5001 202.7814 203.0626 203.3439 203.6251 203.9064 204.1876 204.4689 +204.7501 205.0314 205.3126 205.5939 205.8751 206.1564 206.4376 206.7189 +207.0001 207.2814 207.5626 207.8439 208.1251 208.4064 208.6876 208.9689 +209.2501 209.5314 209.8126 210.0939 210.3751 210.6564 210.9376 211.2189 +211.5001 211.7814 212.0626 212.3439 212.6251 212.9064 213.1876 213.4689 +213.7501 214.0314 214.3126 214.5939 214.8751 215.1564 215.4376 215.7189 +216.0002 216.2814 216.5627 216.8439 217.1252 217.4064 217.6877 217.9689 +218.2502 218.5314 218.8127 219.0939 219.3752 219.6564 219.9377 220.2189 +220.5002 220.7814 221.0627 221.3439 221.6252 221.9064 222.1877 222.4689 +222.7502 223.0314 223.3127 223.5939 223.8752 224.1564 224.4377 224.7189 +225.0002 225.2814 225.5627 225.8439 226.1252 226.4064 226.6877 226.9689 +227.2502 227.5314 227.8127 228.0939 228.3752 228.6564 228.9377 229.2189 +229.5002 229.7814 230.0627 230.3439 230.6252 230.9064 231.1877 231.4689 +231.7502 232.0314 232.3127 232.5939 232.8752 233.1564 233.4377 233.7189 +234.0002 234.2814 234.5627 234.8439 235.1252 235.4064 235.6877 235.9689 +236.2502 236.5314 236.8127 237.0939 237.3752 237.6564 237.9377 238.2189 +238.5002 238.7814 239.0627 239.3439 239.6252 239.9064 240.1877 240.4689 +240.7502 241.0314 241.3127 241.5939 241.8752 242.1564 242.4377 242.7189 +243.0002 243.2814 243.5627 243.8439 244.1252 244.4064 244.6877 244.9689 +245.2502 245.5314 245.8127 246.0939 246.3752 246.6564 246.9377 247.2189 +247.5002 247.7814 248.0627 248.3439 248.6252 248.9064 249.1877 249.4689 +249.7502 250.0314 250.3127 250.5939 250.8752 251.1564 251.4377 251.7189 +252.0002 252.2814 252.5627 252.8439 253.1252 253.4064 253.6877 253.9689 +254.2502 254.5314 254.8127 255.0939 255.3752 255.6564 255.9377 256.2189 +256.5002 256.7814 257.0627 257.3439 257.6252 257.9064 258.1877 258.4689 +258.7502 259.0314 259.3127 259.5939 259.8752 260.1564 260.4377 260.7189 +261.0002 261.2814 261.5627 261.8439 262.1252 262.4064 262.6877 262.9689 +263.2502 263.5314 263.8127 264.0939 264.3752 264.6564 264.9377 265.2189 +265.5002 265.7814 266.0627 266.3439 266.6252 266.9064 267.1877 267.4689 +267.7502 268.0314 268.3127 268.5939 268.8752 269.1564 269.4377 269.7189 +270.0002 270.2814 270.5627 270.8439 271.1252 271.4064 271.6877 271.9689 +272.2502 272.5314 272.8127 273.0939 273.3752 273.6564 273.9377 274.2189 +274.5002 274.7814 275.0627 275.3439 275.6252 275.9064 276.1877 276.4689 +276.7502 277.0314 277.3127 277.5939 277.8752 278.1564 278.4377 278.7189 +279.0002 279.2814 279.5627 279.8439 280.1252 280.4064 280.6877 280.9689 +281.2502 281.5314 281.8127 282.0939 282.3752 282.6564 282.9377 283.2189 +283.5002 283.7814 284.0627 284.3439 284.6252 284.9064 285.1877 285.4689 +285.7502 286.0314 286.3127 286.5939 286.8752 287.1564 287.4377 287.7189 +288.0002 288.2815 288.5627 288.844 289.1252 289.4065 289.6877 289.969 290.2502 +290.5315 290.8127 291.094 291.3752 291.6565 291.9377 292.219 292.5002 292.7815 +293.0627 293.344 293.6252 293.9065 294.1877 294.469 294.7502 295.0315 295.3127 +295.594 295.8752 296.1565 296.4377 296.719 297.0002 297.2815 297.5627 297.844 +298.1252 298.4065 298.6877 298.969 299.2502 299.5315 299.8127 300.094 300.3752 +300.6565 300.9377 301.219 301.5002 301.7815 302.0627 302.344 302.6252 302.9065 +303.1877 303.469 303.7502 304.0315 304.3127 304.594 304.8752 305.1565 305.4377 +305.719 306.0002 306.2815 306.5627 306.844 307.1252 307.4065 307.6877 307.969 +308.2502 308.5315 308.8127 309.094 309.3752 309.6565 309.9377 310.219 310.5002 +310.7815 311.0627 311.344 311.6252 311.9065 312.1877 312.469 312.7502 313.0315 +313.3127 313.594 313.8752 314.1565 314.4377 314.719 315.0002 315.2815 315.5627 +315.844 316.1252 316.4065 316.6877 316.969 317.2502 317.5315 317.8127 318.094 +318.3752 318.6565 318.9377 319.219 319.5002 319.7815 320.0627 320.344 320.6252 +320.9065 321.1877 321.469 321.7502 322.0315 322.3127 322.594 322.8752 323.1565 +323.4377 323.719 324.0002 324.2815 324.5627 324.844 325.1252 325.4065 325.6877 +325.969 326.2502 326.5315 326.8127 327.094 327.3752 327.6565 327.9377 328.219 +328.5002 328.7815 329.0627 329.344 329.6252 329.9065 330.1877 330.469 330.7502 +331.0315 331.3127 331.594 331.8752 332.1565 332.4377 332.719 333.0002 333.2815 +333.5627 333.844 334.1252 334.4065 334.6877 334.969 335.2502 335.5315 335.8127 +336.094 336.3752 336.6565 336.9377 337.219 337.5002 337.7815 338.0627 338.344 +338.6252 338.9065 339.1877 339.469 339.7502 340.0315 340.3127 340.594 340.8752 +341.1565 341.4377 341.719 342.0002 342.2815 342.5627 342.844 343.1252 343.4065 +343.6877 343.969 344.2502 344.5315 344.8127 345.094 345.3752 345.6565 345.9377 +346.219 346.5002 346.7815 347.0627 347.344 347.6252 347.9065 348.1877 348.469 +348.7502 349.0315 349.3127 349.594 349.8752 350.1565 350.4377 350.719 351.0002 +351.2815 351.5627 351.844 352.1252 352.4065 352.6877 352.969 353.2502 353.5315 +353.8127 354.094 354.3752 354.6565 354.9377 355.219 355.5002 355.7815 356.0627 +356.344 356.6252 356.9065 357.1877 357.469 357.7502 358.0315 358.3127 358.594 +358.8752 359.1565 359.4377 359.719 +yvals = 89.78488 89.5062 89.22588 88.94519 88.66436 88.38346 88.10252 87.82156 87.54058 +87.25959 86.97859 86.69759 86.41658 86.13557 85.85456 85.57355 85.29253 +85.01151 84.73049 84.44947 84.16845 83.88742 83.6064 83.32538 83.04435 82.76333 +82.4823 82.20128 81.92025 81.63923 81.3582 81.07717 80.79615 80.51512 80.23409 +79.95306 79.67204 79.39101 79.10998 78.82895 78.54792 78.26689 77.98587 +77.70484 77.42381 77.14278 76.86175 76.58072 76.29969 76.01867 75.73764 +75.45661 75.17558 74.89455 74.61352 74.33249 74.05146 73.77043 73.4894 73.20837 +72.92734 72.64631 72.36528 72.08426 71.80323 71.5222 71.24117 70.96014 70.67911 +70.39808 70.11705 69.83602 69.55499 69.27396 68.99293 68.7119 68.43087 68.14984 +67.86881 67.58778 67.30675 67.02572 66.74469 66.46366 66.18263 65.9016 65.62057 +65.33954 65.05851 64.77748 64.49645 64.21542 63.93439 63.65336 63.37233 63.0913 +62.81027 62.52924 62.24821 61.96718 61.68615 61.40512 61.12409 60.84306 +60.56203 60.281 59.99997 59.71894 59.43791 59.15688 58.87585 58.59482 58.31379 +58.03276 57.75173 57.4707 57.18967 56.90864 56.62761 56.34658 56.06555 55.78452 +55.50349 55.22246 54.94143 54.6604 54.37937 54.09834 53.81731 53.53628 53.25525 +52.97422 52.69319 52.41216 52.13113 51.85009 51.56906 51.28803 51.007 50.72597 +50.44494 50.16391 49.88288 49.60185 49.32082 49.03979 48.75876 48.47773 48.1967 +47.91567 47.63464 47.35361 47.07258 46.79155 46.51052 46.22949 45.94846 +45.66743 45.3864 45.10537 44.82434 44.54331 44.26228 43.98125 43.70022 43.41919 +43.13816 42.85713 42.5761 42.29507 42.01404 41.73301 41.45198 41.17094 40.88991 +40.60888 40.32785 40.04682 39.76579 39.48476 39.20373 38.9227 38.64167 38.36064 +38.07961 37.79858 37.51755 37.23652 36.95549 36.67446 36.39343 36.1124 35.83137 +35.55034 35.26931 34.98828 34.70725 34.42622 34.14519 33.86416 33.58313 33.3021 +33.02107 32.74004 32.45901 32.17797 31.89694 31.61591 31.33488 31.05385 +30.77282 30.49179 30.21076 29.92973 29.6487 29.36767 29.08664 28.80561 28.52458 +28.24355 27.96252 27.68149 27.40046 27.11943 26.8384 26.55737 26.27634 25.99531 +25.71428 25.43325 25.15222 24.87119 24.59016 24.30913 24.0281 23.74706 23.46603 +23.185 22.90397 22.62294 22.34191 22.06088 21.77985 21.49882 21.21779 20.93676 +20.65573 20.3747 20.09367 19.81264 19.53161 19.25058 18.96955 18.68852 18.40749 +18.12646 17.84543 17.5644 17.28337 17.00234 16.72131 16.44028 16.15925 15.87822 +15.59718 15.31615 15.03512 14.75409 14.47306 14.19203 13.911 13.62997 13.34894 +13.06791 12.78688 12.50585 12.22482 11.94379 11.66276 11.38173 11.1007 10.81967 +10.53864 10.25761 9.976578 9.695547 9.414517 9.133487 8.852456 8.571426 +8.290396 8.009365 7.728335 7.447305 7.166274 6.885244 6.604213 6.323183 +6.042153 5.761122 5.480092 5.199062 4.918031 4.637001 4.355971 4.07494 3.79391 +3.512879 3.231849 2.950819 2.669788 2.388758 2.107728 1.826697 1.545667 +1.264637 0.9836063 0.7025759 0.4215455 0.1405152 -0.1405152 -0.4215455 +-0.7025759 -0.9836063 -1.264637 -1.545667 -1.826697 -2.107728 -2.388758 +-2.669788 -2.950819 -3.231849 -3.512879 -3.79391 -4.07494 -4.355971 -4.637001 +-4.918031 -5.199062 -5.480092 -5.761122 -6.042153 -6.323183 -6.604213 -6.885244 +-7.166274 -7.447305 -7.728335 -8.009365 -8.290396 -8.571426 -8.852456 -9.133487 +-9.414517 -9.695547 -9.976578 -10.25761 -10.53864 -10.81967 -11.1007 -11.38173 +-11.66276 -11.94379 -12.22482 -12.50585 -12.78688 -13.06791 -13.34894 -13.62997 +-13.911 -14.19203 -14.47306 -14.75409 -15.03512 -15.31615 -15.59718 -15.87822 +-16.15925 -16.44028 -16.72131 -17.00234 -17.28337 -17.5644 -17.84543 -18.12646 +-18.40749 -18.68852 -18.96955 -19.25058 -19.53161 -19.81264 -20.09367 -20.3747 +-20.65573 -20.93676 -21.21779 -21.49882 -21.77985 -22.06088 -22.34191 -22.62294 +-22.90397 -23.185 -23.46603 -23.74706 -24.0281 -24.30913 -24.59016 -24.87119 +-25.15222 -25.43325 -25.71428 -25.99531 -26.27634 -26.55737 -26.8384 -27.11943 +-27.40046 -27.68149 -27.96252 -28.24355 -28.52458 -28.80561 -29.08664 -29.36767 +-29.6487 -29.92973 -30.21076 -30.49179 -30.77282 -31.05385 -31.33488 -31.61591 +-31.89694 -32.17797 -32.45901 -32.74004 -33.02107 -33.3021 -33.58313 -33.86416 +-34.14519 -34.42622 -34.70725 -34.98828 -35.26931 -35.55034 -35.83137 -36.1124 +-36.39343 -36.67446 -36.95549 -37.23652 -37.51755 -37.79858 -38.07961 -38.36064 +-38.64167 -38.9227 -39.20373 -39.48476 -39.76579 -40.04682 -40.32785 -40.60888 +-40.88991 -41.17094 -41.45198 -41.73301 -42.01404 -42.29507 -42.5761 -42.85713 +-43.13816 -43.41919 -43.70022 -43.98125 -44.26228 -44.54331 -44.82434 -45.10537 +-45.3864 -45.66743 -45.94846 -46.22949 -46.51052 -46.79155 -47.07258 -47.35361 +-47.63464 -47.91567 -48.1967 -48.47773 -48.75876 -49.03979 -49.32082 -49.60185 +-49.88288 -50.16391 -50.44494 -50.72597 -51.007 -51.28803 -51.56906 -51.85009 +-52.13113 -52.41216 -52.69319 -52.97422 -53.25525 -53.53628 -53.81731 -54.09834 +-54.37937 -54.6604 -54.94143 -55.22246 -55.50349 -55.78452 -56.06555 -56.34658 +-56.62761 -56.90864 -57.18967 -57.4707 -57.75173 -58.03276 -58.31379 -58.59482 +-58.87585 -59.15688 -59.43791 -59.71894 -59.99997 -60.281 -60.56203 -60.84306 +-61.12409 -61.40512 -61.68615 -61.96718 -62.24821 -62.52924 -62.81027 -63.0913 +-63.37233 -63.65336 -63.93439 -64.21542 -64.49645 -64.77748 -65.05851 -65.33954 +-65.62057 -65.9016 -66.18263 -66.46366 -66.74469 -67.02572 -67.30675 -67.58778 +-67.86881 -68.14984 -68.43087 -68.7119 -68.99293 -69.27396 -69.55499 -69.83602 +-70.11705 -70.39808 -70.67911 -70.96014 -71.24117 -71.5222 -71.80323 -72.08426 +-72.36528 -72.64631 -72.92734 -73.20837 -73.4894 -73.77043 -74.05146 -74.33249 +-74.61352 -74.89455 -75.17558 -75.45661 -75.73764 -76.01867 -76.29969 -76.58072 +-76.86175 -77.14278 -77.42381 -77.70484 -77.98587 -78.26689 -78.54792 -78.82895 +-79.10998 -79.39101 -79.67204 -79.95306 -80.23409 -80.51512 -80.79615 -81.07717 +-81.3582 -81.63923 -81.92025 -82.20128 -82.4823 -82.76333 -83.04435 -83.32538 +-83.6064 -83.88742 -84.16845 -84.44947 -84.73049 -85.01151 -85.29253 -85.57355 +-85.85456 -86.13557 -86.41658 -86.69759 -86.97859 -87.25959 -87.54058 -87.82156 +-88.10252 -88.38346 -88.66436 -88.94519 -89.22588 -89.5062 -89.78488 diff --git a/conf/slurm_templates/run_parallel_workflow.sh b/conf/slurm_templates/run_parallel_workflow.sh new file mode 100644 index 0000000000000000000000000000000000000000..461ee7e2335f4e830e4e72d95319d88415d3d98c --- /dev/null +++ b/conf/slurm_templates/run_parallel_workflow.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +#SBATCH -J SUNSET_verification + +# Slurm directive description: +# -J: job name + +set -vx + +script=$1 +atomic_recipe=$2 + +source MODULES + +Rscript ${script} ${atomic_recipe} diff --git a/conf/slurm_templates/run_scorecards.sh b/conf/slurm_templates/run_scorecards.sh new file mode 100644 index 0000000000000000000000000000000000000000..5ebf65281985d4ddac408609497eb76408f2eb32 --- /dev/null +++ b/conf/slurm_templates/run_scorecards.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +#SBATCH -J SUNSET_scorecards +#SBATCH --kill-on-invalid-dep=yes + +# Slurm directive description: +# -J: job name +# --kill-on-invalid-dep: Whether to kill the job if the dependencies are +# not satisfied. If the verification jobs fail, the scorecards job +# will be canceled. + +set -vx + +recipe=$1 +outdir=$2 + +source MODULES +# Execute scorecards +Rscript modules/Scorecards/execute_scorecards.R ${recipe} ${outdir} + diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index 18e694bac67ac23832535ce5f4379a5ef6c6a61e..c440eac1baa910973cf45e6918802135d2310bd4 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -60,6 +60,11 @@ vars: long_name: "Daily Maximum Near-Surface Wind Speed" standard_name: "wind_speed" accum: no + winddir: + units: "degrees" + long_name: "10 metre wind direction" + standard_name: + accum: no # outname: "wind" rsds: units: "W m-2" @@ -89,6 +94,11 @@ vars: long_name: "Geopotential" standard_name: "geopotential" accum: no + ps: + units: "Pa" + long_name: "Surface pressure" + standard_name: "surface_air_pressure" + accum: no pr: units: "kg m-2 s-1" long_name: "Precipitation" @@ -109,6 +119,11 @@ vars: long_name: "Total Cloud Cover Percentage" standard_name: "cloud_area_fraction" accum: no + hur: + units: "%" + long_name: "2 metre relative humidity" + standard_name: "relative_humidity" + accum: no hurs: units: "%" long_name: "Near-Surface Relative Humidity" @@ -189,6 +204,17 @@ vars: long_name: "Surface Upward Sensible Heat Flux" standard_name: "surface_upward_sensible_heat_flux" accum: no + tas-tos: + units: "K" + long_name: "Blended air - sea temperature" + standard_name: "air_sea_temperature" + accum: no + sic: + units: "1" + long_name: "Sea Ice Concentration" + standard_name: "sea_ice_concentration" + accum: no + # Coordinates coords: diff --git a/conf/vars-dict.yml-OLD b/conf/vars-dict.yml-OLD deleted file mode 100644 index 04549d36001c848521f53fd704b752878b2eb862..0000000000000000000000000000000000000000 --- a/conf/vars-dict.yml-OLD +++ /dev/null @@ -1,114 +0,0 @@ - -vars: -# ECVs - tas: - units: "°C" - longname: "Daily mean temperature at surface" - outname: ~ - tasmin: - units: "°C" - longname: "Minimum daily temperature at surface" - outname: ~ - tasmax: - units: "°C" - longname: "Maximum daily temperature at surface" - outname: ~ - sfcwind: - units: "m/s" - longname: "Surface wind speed module" - outname: ~ - rsds: - units: "W/m2" - longname: "Surface solar radiation downwards" - outname: ~ - psl: - units: "hPa" - longname: "Mean sea level pressure" - outname: ~ - prlr: - units: "mm" - longname: "Total precipitation" - outname: ~ -# CFs - cfwnd1: - units: "%" - longname: "Wind Capacity factor IEC1" - outname: ~ - cfwnd2: - units: "%" - longname: "Wind Capacity factor IEC2" - outname: ~ - cfwnd3: - units: "%" - longname: "Wind Capacity factor IEC3" - outname: ~ - cfslr: - units: "%" - longname: "Solar Capacity factor" - outname: ~ -# Energy - edmnd: - units: "GW" - longname: "Electricity Demmand" - outname: ~ - wndpwo: - units: "GW" - longname: "Wind Power" - outname: ~ - dmndnetwnd: - units: "GW" - longname: "Demmand-net-Wind" - outname: ~ -# Indices - Spr32: - units: "days" - longname: > - Total count of days when daily maximum temp exceeded 32°C - from April 21st to June 21st - outname: ~ - SU35: - units: "days" - longname: > - Total count of days when daily maximum temp exceeded 35°C - from June 21st to September 21st - outname: ~ - SU36: - units: "days" - longname: > - Total count of days when daily maximum temp exceeded 36°C - from June 21st to September 21st - outname: ~ - SU40: - units: "days" - longname: > - Total count of days when daily maximum temp exceeded 40°C - from June 21st to September 21st - outname: ~ - GDD: - units: "days" - longname: > - The sum of the daily differences between daily mean - temperature and 10°C from April 1st to October 31st - outname: ~ - GST: - units: "°C" - longname: "The average temperature from April 1st to October 31st" - outname: ~ - SprTX: - units: "°C" - longname: "The average daily maximum temperature from April 1st to October 31st" - outname: ~ - WSDI: - units: "" - longname: > - The total count of days with at least 6 consecutives days - when the daily temperature maximum exceeds its 90th percentile - outname: ~ - SprR: - units: "mm" - longname: 'Total precipitation from April 21st to June 21st' - outname: ~ - HarR: - units: "mm" - longname: 'Total precipitation from August 21st to September 21st' - outname: ~ diff --git a/example_scripts/example_downscaling.R b/example_scripts/example_downscaling.R new file mode 100644 index 0000000000000000000000000000000000000000..1e9f1d6ba13ae328eae149624fb70df9c75bbb4e --- /dev/null +++ b/example_scripts/example_downscaling.R @@ -0,0 +1,32 @@ +############################################################################### +## Author: V. Agudetse +## Description: Example script for seasonal downscaling. +############################################################################### + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Downscaling/Downscaling.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +# Read recipe +recipe_file <- "recipes/atomic_recipes/recipe_seasonal_downscaling.yml" +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- Loading(recipe) +# Change units +data <- Units(recipe, data) +# Compute anomalies +data <- Anomalies(recipe, data) +# Downscale datasets +data <- Downscaling(recipe, data) +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Compute percentiles and probability bins +probabilities <- Probabilities(recipe, data) +# Plot data +Visualization(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/example_scripts/exec_ecvs_seasonal_oper.R b/example_scripts/exec_ecvs_seasonal_oper.R new file mode 100644 index 0000000000000000000000000000000000000000..cdb48d099b040e5500f9566ba1c87449be0c450c --- /dev/null +++ b/example_scripts/exec_ecvs_seasonal_oper.R @@ -0,0 +1,50 @@ +rm(list=ls()) +gc() +setwd("/esarchive/scratch/nperez/git/auto-s2s") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("tools/prepare_outputs.R") +source("modules/Units/Units.R") + +# Read recipe +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) +## to test a single recipe: +#recipe_file <- "recipe_ecvs_seasonal_oper.yml" +#recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- load_datasets(recipe) +# Change units +data <- Units(recipe, data) +# Calibrate datasets +data <- calibrate_datasets(recipe, data) +# Compute skill metrics +skill_metrics <- compute_skill_metrics(recipe, data) +# Compute percentiles and probability bins +probabilities <- compute_probabilities(recipe, data) +# Export all data to netCDF +# save_data(recipe, data, skill_metrics, probabilities) +# Plot data +plot_data(recipe, data, skill_metrics, probabilities, significance = T) + +## Add BSC logo +logo <- "tools/BSC_logo_95.jpg" +system <- list.files(paste0(recipe$Run$output_dir, "/plots")) +variable <- strsplit(recipe$Analysis$Variable$name, ", | |,")[[1]] +files <- lapply(variable, function(x) { + f <- list.files(paste0(recipe$Run$output_dir, "/plots/", + system, "/", x)) + full_path <- paste0(recipe$Run$output_dir, "/plots/", + system, "/", x,"/", f)})[[1]] +dim(files) <- c(file = length(files)) +Apply(list(files), target_dims = NULL, function(x) { + system(paste("composite -gravity southeast -geometry +10+10", + logo, x, x))}, ncores = recipe$Analysis$ncores) + diff --git a/example_scripts/exec_units.R b/example_scripts/exec_units.R new file mode 100644 index 0000000000000000000000000000000000000000..819121c9103e8da50ef49665431e33960d0bb5c8 --- /dev/null +++ b/example_scripts/exec_units.R @@ -0,0 +1,38 @@ +rm(list=ls()) +gc() +setwd("/esarchive/scratch/nperez/git/auto-s2s") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("tools/prepare_outputs.R") + +# Read recipe +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +#recipe <- read_atomic_recipe(recipe_file) +## to test a single recipe: + # recipe_file <- "recipes/examples/recipe_tas_seasonal_units.yml" + # recipe_file <- "recipes/examples/recipe_prlr_seasonal_units.yml" + +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- load_datasets(recipe) +# Units transformation +source("modules/Units/Units.R") +test <- Units(recipe, data) +# Calibrate datasets +data <- calibrate_datasets(recipe, test) +# Compute skill metrics +skill_metrics <- compute_skill_metrics(recipe, data) +# Compute percentiles and probability bins +probabilities <- compute_probabilities(recipe, data) +# Export all data to netCDF +## TODO: Fix plotting +# save_data(recipe, data, skill_metrics, probabilities) +# Plot data +plot_data(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/example_scripts/execute_NAO.R b/example_scripts/execute_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..3ec3586fdd5fdb9f45e79ae7231f35e54a7702de --- /dev/null +++ b/example_scripts/execute_NAO.R @@ -0,0 +1,30 @@ +############################################################################### +## Author: Núria Pérez-Zanón +## Description: Computes the NAO index and some skill metrics for it, for a +## seasonal prediction system. +## Instructions: To run it, modify the recipe to set your own output directory. +############################################################################### + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("tools/prepare_outputs.R") +source("modules/Anomalies/Anomalies.R") + +recipe_file <- "recipes/examples/NAO_recipe.yml" +recipe <- prepare_outputs(recipe_file) + +#for (smonth in 1:12) { + data <- load_datasets(recipe) + gc() + data <- compute_anomalies(recipe, data) +# data <- readRDS("../Test_NAOmodule.RDS") +source("modules/Indices/Indices.R") + nao_s2dv <- Indices(data = data, recipe = recipe) +source("modules/Skill/Skill.R") + # todo parameter agg to get it from the nao_s2dv? + skill_metrics <- compute_skill_metrics(recipe = recipe, data = nao_s2dv, + agg = 'region') + + diff --git a/example_scripts/execute_NAO_decadal.R b/example_scripts/execute_NAO_decadal.R new file mode 100644 index 0000000000000000000000000000000000000000..233e796f562e640ffc2ee1a128d6a26e31d870b8 --- /dev/null +++ b/example_scripts/execute_NAO_decadal.R @@ -0,0 +1,30 @@ +############################################################################### +## Author: Núria Pérez-Zanón +## Description: Computes the NAO index and some skill metrics for it, for a +## decadal prediction system. +## Instructions: To run it, modify the recipe to set your own output directory. +############################################################################### + +source("modules/Loading/Loading_decadal.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("tools/prepare_outputs.R") +source("modules/Anomalies/Anomalies.R") + +recipe_file <- "recipes/examples/recipe_model_decadal_NAO.yml" +recipe <- prepare_outputs(recipe_file) + +#for (smonth in 1:12) { + data <- load_datasets(recipe) + gc() + data <- compute_anomalies(recipe, data) +# data <- readRDS("../data_decadal_nao.RDS") +source("modules/Indices/Indices.R") + nao_s2dv <- Indices(data = data, recipe = recipe) +source("modules/Skill/Skill.R") + # todo parameter agg to get it from the nao_s2dv? + skill_metrics <- compute_skill_metrics(recipe = recipe, data = nao_s2dv, + agg = 'region') + + diff --git a/example_scripts/execute_Nino.R b/example_scripts/execute_Nino.R new file mode 100644 index 0000000000000000000000000000000000000000..4bd4a72c2f44954d7284baad7468d925ced192ac --- /dev/null +++ b/example_scripts/execute_Nino.R @@ -0,0 +1,42 @@ +############################################################################### +## Author: Núria Pérez-Zanón +## Description: Computes the Niño1+2, Niño3, Niño3.4 and Niño4 indices and some +## some skill metrics for each index. +## Instructions: To run it, modify the recipe to set your own output directory. +############################################################################### + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("tools/prepare_outputs.R") +source("modules/Anomalies/Anomalies.R") + +# Read recipe +recipe_file <- "recipes/examples/Nino_recipe.yml" +recipe <- prepare_outputs(recipe_file) + +#for (smonth in 1:12) { + data <- load_datasets(recipe) + gc() + data <- compute_anomalies(recipe, data) +# saveRDS(data, file = "Test_Ninomodule.RDS") +# data <- readRDS("Test_Ninomodule.RDS") +source("modules/Indices/Indices.R") + nino_s2dv <- Indices(data = data, recipe = recipe) +source("modules/Skill/Skill.R") + # todo parameter agg to get it from the nao_s2dv? + skill_metrics <- compute_skill_metrics(recipe = recipe, data = nino_s2dv$Nino1, + agg = 'region') + skill_metrics <- compute_skill_metrics(recipe = recipe, data = nino_s2dv$Nino3, + agg = 'region') + + skill_metrics <- compute_skill_metrics(recipe = recipe, data = nino_s2dv$Nino4, + agg = 'region') + + skill_metrics <- compute_skill_metrics(recipe = recipe, data = nino_s2dv$Nino3.4, + agg = 'region') + + + diff --git a/example_scripts/execute_Nino_decadal.R b/example_scripts/execute_Nino_decadal.R new file mode 100644 index 0000000000000000000000000000000000000000..cc5341bc0fe98c16ecf03fcd93a5dfb0f214da6f --- /dev/null +++ b/example_scripts/execute_Nino_decadal.R @@ -0,0 +1,30 @@ +############################################################################### +## Author: Núria Pérez-Zanón +## Description: Computes the Niño1+2, Niño3, Niño3.4 and Niño4 indices and some +## some skill metrics for each index. +## Instructions: To run it, modify the recipe to set your own output directory. +############################################################################### + +source("modules/Loading/Loading_decadal.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("tools/prepare_outputs.R") +source("modules/Anomalies/Anomalies.R") + +recipe_file <- "recipes/examples/recipe_Nino_decadal.yml" +recipe <- prepare_outputs(recipe_file) + +#for (smonth in 1:12) { + data <- load_datasets(recipe) + gc() + data <- compute_anomalies(recipe, data) +# data <- readRDS("../data_decadal_nao.RDS") +source("modules/Indices/Indices.R") + nino_s2dv <- Indices(data = data, recipe = recipe) +source("modules/Skill/Skill.R") + # todo parameter agg to get it from the nao_s2dv? + skill_metrics <- compute_skill_metrics(recipe = recipe, data = nino_s2dv[[1]], + agg = 'region') + + diff --git a/example_scripts/tas-tos_scorecards_data_loading.R b/example_scripts/tas-tos_scorecards_data_loading.R new file mode 100644 index 0000000000000000000000000000000000000000..da124cd4af12fab20f7e4cde01def646310bea84 --- /dev/null +++ b/example_scripts/tas-tos_scorecards_data_loading.R @@ -0,0 +1,63 @@ + +rm(list = ls()); gc() + +args <- commandArgs(trailingOnly = TRUE) + +setwd("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/") + +#source("modules/Loading/Loading.R") +source("modules/Loading/Dev_Loading.R") +source("modules/Anomalies/Anomalies.R") +#source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +#source("modules/Visualization/Visualization.R") +source("tools/prepare_outputs.R") + +recipe_file <- "recipes/atomic_recipes/recipe_test_multivar_nadia.yml" +recipe <- prepare_outputs(recipe_file) + +## Run job for each start month +recipe$Analysis$Time$sdate <- paste0(sprintf("%02d", as.numeric(args)), '01') + +## Load datasets +data <- load_datasets(recipe) + + +################################################################################ + +### For Testing ### +# +# lon <- attributes(data$hcst$coords$longitude) +# lat <- attributes(data$hcst$coords$latitude) +# +# +# source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') +# tas_tos_hcst <- mask_tas_tos(input_data = data$hcst, region = c(20, 40, 30, 50), +# grid = 'r360x181', lon = lon, lat = lat, +# lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) +# +# tas_tos_obs <- mask_tas_tos(input_data = data$obs, region = c(0.1, 359.95, -90, 90), +# grid = 'r360x181', lon = lon, lat = lat, +# lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) +# +# +# source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') +# mask <- .load_mask(grid = 'r360x181', mask_path = NULL, +# land_value = 0, sea_value = 1, +# lon_dim = 'lon', lat_dim = 'lat', region = NULL) + +################################################################################ + +## compute anomalies +anomalies <- compute_anomalies(recipe, data) + +## Compute skill metrics of data +skill_metrics <- compute_skill_metrics(recipe, anomalies) + +## save data +save_data(recipe, data, skill_metrics = skill_metrics) +gc() + +## plot metrics maps +## plot_data(recipe, anomalies, skill_metrics, significance = T) diff --git a/example_scripts/test_GRIB.R b/example_scripts/test_GRIB.R new file mode 100644 index 0000000000000000000000000000000000000000..27cf3c9aa08df54e3c44872518bd7142e376c9f5 --- /dev/null +++ b/example_scripts/test_GRIB.R @@ -0,0 +1,52 @@ +source('modules/Loading/GRIB/GrbLoad.R') +source('modules/Loading//Loading_GRIB.R') +source("tools/libs.R") # for prepare_outputs.R + +recipe <- "modules/Loading/testing_recipes/recipe_GRIB_system5_era5.yml" +recipe <- prepare_outputs(recipe) + +# Load datasets +data <- load_datasets(recipe) + +str(data) + +#============================ +# Test the remaining modules +#============================ +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +# Calibrate data +suppressWarnings({invisible(capture.output( +calibrated_data <- calibrate_datasets(recipe, data) +))}) +#pryr::mem_used() +#975 MB + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +))}) + +suppressWarnings({invisible(capture.output( +probs <- compute_probabilities(recipe, calibrated_data) +))}) + +# Saving +suppressWarnings({invisible(capture.output( +save_data(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + archive = read_yaml("conf/archive_GRIB.yml")$archive) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +plot_data(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + significance = T, + archive = read_yaml("conf/archive_GRIB.yml")$archive) +))}) +outdir <- get_dir(recipe) + diff --git a/example_scripts/test_decadal.R b/example_scripts/test_decadal.R new file mode 100644 index 0000000000000000000000000000000000000000..12daa540012d05d876c1da2fe1f142c11b58d66a --- /dev/null +++ b/example_scripts/test_decadal.R @@ -0,0 +1,32 @@ +############################################################################### +## Author: An-Chi Ho +## Description: Script to test developments for decadal workflows. Please, +## do not modify. +############################################################################### + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml" +recipe <- prepare_outputs(recipe_file) +# archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive + +# Load datasets +data <- Loading(recipe) + +# Calibrate datasets +calibrated_data <- Calibration(recipe, data) + +# Compute skill metrics +skill_metrics <- Skill(recipe, calibrated_data) + +# Compute percentiles and probability bins +probabilities <- Probabilities(recipe, calibrated_data) + +# Plot data +Visualization(recipe, calibrated_data, skill_metrics, probabilities, + significance = T) + diff --git a/example_scripts/test_parallel_GRIB.R b/example_scripts/test_parallel_GRIB.R new file mode 100644 index 0000000000000000000000000000000000000000..a13f9e5911bb2bebd40c19cbb9c79b8619cd8a96 --- /dev/null +++ b/example_scripts/test_parallel_GRIB.R @@ -0,0 +1,27 @@ +source("modules/Loading/GRIB/GrbLoad.R") +source("modules/Loading/Loading_GRIB.R") +source('modules/Loading/GRIB/s2dv_cube.R') +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) +# Load datasets +data <- load_datasets(recipe) +# Calibrate datasets +data <- calibrate_datasets(recipe, data) +# Compute anomalies +data <- compute_anomalies(recipe, data) +# Compute skill metrics +skill_metrics <- compute_skill_metrics(recipe, data) +# Compute percentiles and probability bins +probabilities <- compute_probabilities(recipe, data) +# Export all data to netCDF +save_data(recipe, data, skill_metrics, probabilities) +# Plot data +plot_data(recipe, data, skill_metrics, probabilities, + significance = T) diff --git a/example_scripts/test_parallel_workflow.R b/example_scripts/test_parallel_workflow.R new file mode 100644 index 0000000000000000000000000000000000000000..5f0265135cde51a0cdcc41bb2133e4d684da1ef3 --- /dev/null +++ b/example_scripts/test_parallel_workflow.R @@ -0,0 +1,23 @@ +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) +# Load datasets +data <- Loading(recipe) +# Calibrate datasets +data <- Calibration(recipe, data) +# Compute anomalies +data <- Anomalies(recipe, data) +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Compute percentiles and probability bins +probabilities <- Probabilities(recipe, data) +# Plot data +Visualization(recipe, data, skill_metrics, probabilities, + significance = T) diff --git a/example_scripts/test_scorecards_workflow.R b/example_scripts/test_scorecards_workflow.R new file mode 100644 index 0000000000000000000000000000000000000000..1287c8e713fecc0e28b136c224e5a7e6a0a2845b --- /dev/null +++ b/example_scripts/test_scorecards_workflow.R @@ -0,0 +1,15 @@ +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") + +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) +# Load datasets +data <- load_datasets(recipe) +# Compute anomalies +data <- compute_anomalies(recipe, data) +# Compute skill metrics +skill_metrics <- compute_skill_metrics(recipe, data) diff --git a/example_scripts/test_seasonal.R b/example_scripts/test_seasonal.R new file mode 100644 index 0000000000000000000000000000000000000000..2c7d673a68ebbb6d7116e5845da78a955b28dda4 --- /dev/null +++ b/example_scripts/test_seasonal.R @@ -0,0 +1,33 @@ +############################################################################### +## Author: V. Agudetse +## Description: Script to test developments for seasonal workflows. Please, +## do not modify. +############################################################################### + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +# Read recipe +recipe_file <- "recipes/atomic_recipes/recipe_test_multivar.yml" +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- Loading(recipe) +# Change units +data <- Units(recipe, data) +# Calibrate datasets +data <- Calibration(recipe, data) +# Compute anomalies +data <- Anomalies(recipe, data) +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Compute percentiles and probability bins +probabilities <- Probabilities(recipe, data) +# Plot data +Visualization(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh new file mode 100644 index 0000000000000000000000000000000000000000..f859a8b283203cbc2c3a75885cfa5d95988d8bc3 --- /dev/null +++ b/launch_SUNSET.sh @@ -0,0 +1,157 @@ +#!/bin/bash + +################################################################################ +## Launcher script for SUNSET +################################################################################ +## +## It reads a SUNSET recipe and splits it into single ('atomic') verifications. +## Then each atomic verification is launched as an independent job with SLURM. +## Each job can be further parallelized using the --cpus parameter and the +## 'ncores' field in the recipe. +## If requested in the recipe, the Scorecards for the results are also created +## and saved once the verifications are finished. +## +################################################################################ + +# Usage statement + +function usage +{ + echo "Usage: $0 --wallclock= --custom_directives= --disable_unique_ID" + echo " " + echo " : Path to the SUNSET recipe." + echo " " + echo " : Path to the user-defined script." + echo " " + echo " --wallclock=: Maximum execution time for the jobs." + echo " Default is 02:00:00." + echo " " + echo " --cpus=: CPUs (cores) to be requested for each job." + echo " Corresponds to the --cpus-per-task parameter" + echo " in sbatch. The default number is 1." + echo " " + echo " --custom_directives=: Custom directives for sbatch." + echo " E.g. '--constraint=medmem'." + echo " " + echo " --disable_unique_ID: Do not add a unique ID to the output folder name." +} + +if [[ ( $@ == "--help") || $@ == "-h" ]]; then + usage + exit 0 +fi + +# Assign arguments +recipe=$1 +script=$2 + +for i in "$@"; do + case $i in + --wallclock*) + export wallclock=`echo $1 | sed -e 's/^[^=]*=//g'` + shift + ;; + --cpus*) + export cpus=`echo $1 | sed -e 's/^[^=]*=//g'` + shift + ;; + --custom_directives*) + export custom_directives=`echo $1 | sed -e 's/^[^=]*=//g'` + shift + ;; + --disable_unique_ID) + export disable_unique_ID="--disable_unique_ID" + shift + ;; + -h|--help) + usage + exit 0 + shift + ;; + *) + shift + ;; + esac +done + +# Check recipe +if [ ! -f "$recipe" ]; then + echo "Could not find the recipe file: $recipe" + usage + exit 1 +fi +# Check script +if [ ! -f "$script" ]; then + echo "Could not find the script file: $script" + usage + exit 1 +fi +# Assign default wallclock time if not specified +if [ -z "$wallclock" ]; then + wallclock="02:00:00" +fi +# Assign default number of cores if not specified +if [ -z "$cpus" ]; then + cpus=1 +fi +# Assign empty custom directives if not specified +if [ -z "$custom_directives" ]; then + custom_directives="" +fi + +# Define tmp file to store necessary information +tmpfile=$(mktemp ${TMPDIR-/tmp}/SUNSET.XXXXXX) + +# Create outdir and split recipes +source MODULES +Rscript split.R ${recipe} $disable_unique_ID --tmpfile $tmpfile + +# Run with Autosubmit or directly with Slurm's sbatch? +run_method=$( head -1 $tmpfile | tail -1 ) +# If run method is 'sbatch', launch jobs with dependencies +if [ $run_method == "sbatch" ]; then + # Retrieve working directory + codedir=$( head -2 $tmpfile | tail -1 ) + # Retrieve output directory + outdir=$( head -3 $tmpfile | tail -1 ) + # Scorecards TRUE/FALSE + scorecards=$( head -4 $tmpfile | tail -1) + + # Create directory for slurm output + logdir=${outdir}/logs/slurm/ + mkdir -p $logdir + echo "Slurm job logs will be stored in $logdir" + + # Launch one job per atomic recipe + cd $codedir + job_number=0 + # Create empty array to store all the job IDs for the verification jobs + verification_job_list=() + echo "Submitting verification jobs..." + # Loop over atomic recipes + for atomic_recipe in ${outdir}/logs/recipes/atomic_recipe_*.yml; do + job_number=$(($job_number + 1)) + job_name=$(basename $outdir)_$(printf %02d $job_number) + outfile=${logdir}/run-${job_name}.out + errfile=${logdir}/run-${job_name}.err + # Send batch job and capture job ID + job_ID=$(sbatch --parsable --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) + # Add job ID to array + verification_job_list+=($job_ID) + echo "Submitted batch job $job_ID" + done + + # Submit scorecards job with dependency on verification jobs, passed as a + # comma-separated string. The scorecards job will not run until all the + # verification jobs have finished successfully. + # If any of the jobs fail, it will be canceled. + if [[ $scorecards == "TRUE" ]]; then + echo "Submitting scorecards jobs..." + outfile=${logdir}/run-scorecards.out + errfile=${logdir}/run-scorecards.err + sbatch --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]}") --output=$outfile --error=$errfile --time=01:00:00 conf/slurm_templates/run_scorecards.sh ${recipe} ${outdir} + fi +fi + +# Clean temporary file +rm $tmpfile diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 859e97bbd042be63af43c08358700b663ebe9139..2d54365a6e611291fc355a918de30c0e3e24a1b9 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -1,17 +1,17 @@ -source("modules/Anomalies/tmp/CST_Anomaly.R") +## TODO: Remove in the next release +source("modules/Anomalies/compute_anomalies.R") # Compute the hcst, obs and fcst anomalies with or without cross-validation # and return them, along with the hcst and obs climatologies. -compute_anomalies <- function(recipe, data) { +Anomalies <- function(recipe, data) { if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { error(recipe$Run$logger, - paste("The anomaly module has been called, but the element", - "'Workflow:Anomalies:compute' is missing from the recipe.")) + paste("The anomaly module has been called, but the element", + "'Workflow:Anomalies:compute' is missing from the recipe.")) stop() } - if (recipe$Analysis$Workflow$Anomalies$compute) { if (recipe$Analysis$Workflow$Anomalies$cross_validation) { cross <- TRUE @@ -20,81 +20,142 @@ compute_anomalies <- function(recipe, data) { cross <- FALSE cross_msg <- "without" } - original_dims <- dim(data$hcst$data) - - # Compute anomalies - anom <- CST_Anomaly(data$hcst, data$obs, - cross = cross, - memb = TRUE, - memb_dim = 'ensemble', - dim_anom = 'syear', - dat_dim = c('dat', 'ensemble'), - ftime_dim = 'time', - ncores = recipe$Analysis$ncores) - # Reorder dims - anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) - anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm <- FALSE + } else { + na.rm <- recipe$Analysis$remove_NAs + } + original_dims <- data$hcst$dim # Save full fields hcst_fullvalue <- data$hcst obs_fullvalue <- data$obs - - # Hindcast climatology - - data$hcst <- anom$exp - data$obs <- anom$obs - remove(anom) + # Compute anomalies + if (isTRUE(all.equal(as.vector(data$hcst$coords$latitude), + as.vector(data$obs$coords$latitude))) && + isTRUE(all.equal(as.vector(data$hcst$coords$longitude), + as.vector(data$obs$coord$longitude)))) { + anom <- CST_Anomaly(data$hcst, data$obs, + cross = cross, + memb = TRUE, + memb_dim = 'ensemble', + dim_anom = 'syear', + dat_dim = c('dat', 'ensemble'), + ftime_dim = 'time', + ncores = recipe$Analysis$ncores) + # Reorder dims + anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) + anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) + # Hindcast climatology + data$hcst <- anom$exp + data$obs <- anom$obs + remove(anom) + } else { + ## TODO: Remove when cross-validation is implemented for this use case + if (cross) { + warn(recipe$Run$logger, + paste("Anomaly computation in cross-validation has not been", + "implemented yet for the case where hcst and obs have", + "different grids. The climatology will be computed as", + "a simple mean.")) + } + clim_hcst <- Apply(data$hcst$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = recipe$Analysis$ncores)$output1 + clim_obs <- Apply(data$obs$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = recipe$Anaysis$ncores)$output1 + data$hcst$data <- Ano(data = data$hcst$data, clim = clim_hcst) + data$obs$data <- Ano(data = data$obs$data, clim = clim_obs) + } # Change variable metadata - # data$hcst$Variable$varName <- paste0(data$hcst$Variable$varName, "anomaly") - attr(data$hcst$Variable, "variable")$long_name <- - paste(attr(data$hcst$Variable, "variable")$long_name, "anomaly") - # data$obs$Variable$varName <- paste0(data$obs$Variable$varName, "anomaly") - attr(data$obs$Variable, "variable")$long_name <- - paste(attr(data$obs$Variable, "variable")$long_name, "anomaly") - + for (var in data$hcst$attrs$Variable$varName) { + # Change hcst longname + data$hcst$attrs$Variable$metadata[[var]]$long_name <- + paste(data$hcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") + # Change obs longname + data$obs$attrs$Variable$metadata[[var]]$long_name <- + paste(data$obs$attrs$Variable$metadata[[var]]$long_name, "anomaly") + } # Compute forecast anomaly field if (!is.null(data$fcst)) { # Compute hindcast climatology ensemble mean - clim <- s2dv::Clim(hcst_fullvalue$data, obs_fullvalue$data, - time_dim = "syear", - dat_dim = c("dat", "ensemble"), - memb = FALSE, - memb_dim = "ensemble", - ftime_dim = "time", - ncores = recipe$Analysis$ncores) - clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, - name = "syear") + if (isTRUE(all.equal(as.vector(data$hcst$coords$latitude), + as.vector(data$obs$coords$latitude))) && + isTRUE(all.equal(as.vector(data$hcst$coords$longitude), + as.vector(data$obs$coord$longitude)))) { + clim <- s2dv::Clim(hcst_fullvalue$data, obs_fullvalue$data, + time_dim = "syear", + dat_dim = c("dat", "ensemble"), + memb = FALSE, + memb_dim = "ensemble", + ftime_dim = "time", + ncores = recipe$Analysis$ncores) + clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, + name = "syear") + } + # Store original dimensions dims <- dim(clim_hcst) - clim_hcst <- rep(clim_hcst, dim(data$fcst$data)[['ensemble']]) - dim(clim_hcst) <- c(dims, ensemble = dim(data$fcst$data)[['ensemble']]) - clim_hcst <- Reorder(clim_hcst, order = names(dim(data$fcst$data))) + # Repeat the array as many times as ensemble members + clim_hcst <- rep(clim_hcst, data$fcst$dim[['ensemble']]) + # Rename and reorder dimensions + dim(clim_hcst) <- c(dims, ensemble = data$fcst$dim[['ensemble']]) + clim_hcst <- Reorder(clim_hcst, order = names(data$fcst$dim)) # Get fcst anomalies data$fcst$data <- data$fcst$data - clim_hcst # Change metadata - # data$fcst$Variable$varName <- paste0(data$fcst$Variable$varName, "anomaly") - attr(data$fcst$Variable, "variable")$long_name <- - paste(attr(data$fcst$Variable, "variable")$long_name, "anomaly") + for (var in data$fcst$attrs$Variable$varName) { + data$fcst$attrs$Variable$metadata[[var]]$long_name <- + paste(data$fcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") + } } info(recipe$Run$logger, - paste("The anomalies have been computed,", cross_msg, - "cross-validation. The original full fields are returned as", - "$hcst.full_val and $obs.full_val.")) + paste("The anomalies have been computed,", cross_msg, + "cross-validation. The original full fields are returned as", + "$hcst.full_val and $obs.full_val.")) info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") + # Save outputs + if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + + info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = data$hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { + save_observations(recipe = recipe, data_cube = data$obs) + } + } + } else { warn(recipe$Run$logger, paste("The Anomalies module has been called, but", - "recipe parameter Analysis:Variables:anomaly is set to FALSE.", - "The full fields will be returned.")) + "recipe parameter Workflow:anomalies:compute is set to FALSE.", + "The full fields will be returned.")) hcst_fullvalue <- NULL obs_fullvalue <- NULL info(recipe$Run$logger, "##### ANOMALIES NOT COMPUTED #####") } ## TODO: Return fcst full value? + .log_memory_usage(recipe$Run$logger, "After computing anomalies") return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, - hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) + hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) } diff --git a/modules/Anomalies/compute_anomalies.R b/modules/Anomalies/compute_anomalies.R new file mode 100644 index 0000000000000000000000000000000000000000..2ef36a34086a9bc270f09740a337438370a43950 --- /dev/null +++ b/modules/Anomalies/compute_anomalies.R @@ -0,0 +1,7 @@ +compute_anomalies <- function(recipe, data) { + warning(paste0("The function compute_anomalies() has been renamed to: ", + "'Anomalies()'. The name 'compute_anomalies()' will be ", + "deprecated in the next release. Please change your scripts ", + "accordingly.")) + return(Anomalies(recipe, data)) +} diff --git a/modules/Anomalies/tmp/CST_Anomaly.R b/modules/Anomalies/tmp/CST_Anomaly.R deleted file mode 100644 index f38e39b050f7c46be452ac6e6571542c465264b9..0000000000000000000000000000000000000000 --- a/modules/Anomalies/tmp/CST_Anomaly.R +++ /dev/null @@ -1,246 +0,0 @@ -#'Anomalies relative to a climatology along selected dimension with or without cross-validation -#' -#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -#'@author Pena Jesus, \email{jesus.pena@bsc.es} -#'@description This function computes the anomalies relative to a climatology -#'computed along the selected dimension (usually starting dates or forecast -#'time) allowing the application or not of crossvalidated climatologies. The -#'computation is carried out independently for experimental and observational -#'data products. -#' -#'@param exp An object of class \code{s2dv_cube} as returned by \code{CST_Load} -#' function, containing the seasonal forecast experiment data in the element -#' named \code{$data}. -#'@param obs An object of class \code{s2dv_cube} as returned by \code{CST_Load} -#' function, containing the observed data in the element named \code{$data}. -#'@param dim_anom A character string indicating the name of the dimension -#' along which the climatology will be computed. The default value is 'sdate'. -#'@param cross A logical value indicating whether cross-validation should be -#' applied or not. Default = FALSE. -#'@param memb_dim A character string indicating the name of the member -#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no -#' member dimension, set NULL. The default value is 'member'. -#'@param memb A logical value indicating whether to subtract the climatology -#' based on the individual members (TRUE) or the ensemble mean over all -#' members (FALSE) when calculating the anomalies. The default value is TRUE. -#'@param dat_dim A character vector indicating the name of the dataset and -#' member dimensions. If there is no dataset dimension, it can be NULL. -#' The default value is "c('dataset', 'member')". -#'@param filter_span A numeric value indicating the degree of smoothing. This -#' option is only available if parameter \code{cross} is set to FALSE. -#'@param ftime_dim A character string indicating the name of the temporal -#' dimension where the smoothing with 'filter_span' will be applied. It cannot -#' be NULL if 'filter_span' is provided. The default value is 'ftime'. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. It will be used only when -#' 'filter_span' is not NULL. -#' -#'@return A list with two S3 objects, 'exp' and 'obs', of the class -#''s2dv_cube', containing experimental and date-corresponding observational -#'anomalies, respectively. These 's2dv_cube's can be ingested by other functions -#'in CSTools. -#' -#'@examples -#'# Example 1: -#'mod <- 1 : (2 * 3 * 4 * 5 * 6 * 7) -#'dim(mod) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'obs <- 1 : (1 * 1 * 4 * 5 * 6 * 7) -#'dim(obs) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'lon <- seq(0, 30, 5) -#'lat <- seq(0, 25, 5) -#'exp <- list(data = mod, lat = lat, lon = lon) -#'obs <- list(data = obs, lat = lat, lon = lon) -#'attr(exp, 'class') <- 's2dv_cube' -#'attr(obs, 'class') <- 's2dv_cube' -#' -#'anom <- CST_Anomaly(exp = exp, obs = obs, cross = FALSE, memb = TRUE) -#' -#'@seealso \code{\link[s2dv]{Ano_CrossValid}}, \code{\link[s2dv]{Clim}} and \code{\link{CST_Load}} -#' -#'@import multiApply -#'@importFrom s2dv InsertDim Clim Ano_CrossValid Reorder -#'@export -CST_Anomaly <- function(exp = NULL, obs = NULL, dim_anom = 'sdate', cross = FALSE, - memb_dim = 'member', memb = TRUE, dat_dim = c('dataset', 'member'), - filter_span = NULL, ftime_dim = 'ftime', ncores = NULL) { - # s2dv_cube - if (!inherits(exp, 's2dv_cube') & !is.null(exp) || - !inherits(obs, 's2dv_cube') & !is.null(obs)) { - stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - # exp and obs - if (is.null(exp$data) & is.null(obs$data)) { - stop("One of the parameter 'exp' or 'obs' cannot be NULL.") - } - case_exp = case_obs = 0 - if (is.null(exp)) { - exp <- obs - case_obs = 1 - warning("Parameter 'exp' is not provided and 'obs' will be used instead.") - } - if (is.null(obs)) { - obs <- exp - case_exp = 1 - warning("Parameter 'obs' is not provided and 'exp' will be used instead.") - } - if(any(is.null(names(dim(exp$data))))| any(nchar(names(dim(exp$data))) == 0) | - any(is.null(names(dim(obs$data))))| any(nchar(names(dim(obs$data))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names in element 'data'.") - } - if(!all(names(dim(exp$data)) %in% names(dim(obs$data))) | - !all(names(dim(obs$data)) %in% names(dim(exp$data)))) { - stop("Parameter 'exp' and 'obs' must have same dimension names in element 'data'.") - } - dim_exp <- dim(exp$data) - dim_obs <- dim(obs$data) - dimnames_data <- names(dim_exp) - # dim_anom - if (is.numeric(dim_anom) & length(dim_anom) == 1) { - warning("Parameter 'dim_anom' must be a character string and a numeric value will not be ", - "accepted in the next release. The corresponding dimension name is assigned.") - dim_anom <- dimnames_data[dim_anom] - } - if (!is.character(dim_anom)) { - stop("Parameter 'dim_anom' must be a character string.") - } - if (!dim_anom %in% names(dim_exp) | !dim_anom %in% names(dim_obs)) { - stop("Parameter 'dim_anom' is not found in 'exp' or in 'obs' dimension in element 'data'.") - } - if (dim_exp[dim_anom] <= 1 | dim_obs[dim_anom] <= 1) { - stop("The length of dimension 'dim_anom' in label 'data' of the parameter ", - "'exp' and 'obs' must be greater than 1.") - } - # cross - if (!is.logical(cross) | !is.logical(memb) ) { - stop("Parameters 'cross' and 'memb' must be logical.") - } - if (length(cross) > 1 | length(memb) > 1 ) { - cross <- cross[1] - warning("Parameter 'cross' has length greater than 1 and only the first element", - "will be used.") - } - # memb - if (length(memb) > 1) { - memb <- memb[1] - warning("Parameter 'memb' has length greater than 1 and only the first element", - "will be used.") - } - # memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim_exp) | !memb_dim %in% names(dim_obs)) { - stop("Parameter 'memb_dim' is not found in 'exp' or in 'obs' dimension.") - } - } - # dat_dim - if (!is.null(dat_dim)) { - if (!is.character(dat_dim)) { - stop("Parameter 'dat_dim' must be a character vector.") - } - if (!all(dat_dim %in% names(dim_exp)) | !all(dat_dim %in% names(dim_obs))) { - stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension in element 'data'.", - " Set it as NULL if there is no dataset dimension.") - } - } - # filter_span - if (!is.null(filter_span)) { - if (!is.numeric(filter_span)) { - warning("Paramater 'filter_span' is not numeric and any filter", - " is being applied.") - filter_span <- NULL - } - # ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - # ftime_dim - if (!is.character(ftime_dim)) { - stop("Parameter 'ftime_dim' must be a character string.") - } - if (!ftime_dim %in% names(dim_exp) | !memb_dim %in% names(dim_obs)) { - stop("Parameter 'ftime_dim' is not found in 'exp' or in 'obs' dimension in element 'data'.") - } - } - - # Computating anomalies - #---------------------- - - # With cross-validation - if (cross) { - ano <- Ano_CrossValid(exp = exp$data, obs = obs$data, - time_dim = dim_anom, - memb_dim = memb_dim, - memb = memb, - dat_dim = dat_dim, - ncores = ncores) - - # Without cross-validation - } else { - tmp <- Clim(exp = exp$data, obs = obs$data, - time_dim = dim_anom, - memb_dim = memb_dim, - memb = memb, - dat_dim = dat_dim, - ncores = ncores) - if (!is.null(filter_span)) { - tmp$clim_exp <- Apply(tmp$clim_exp, - target_dims = c(ftime_dim), - output_dims = c(ftime_dim), - fun = .Loess, - loess_span = filter_span, - ncores = ncores)$output1 - tmp$clim_obs <- Apply(tmp$clim_obs, - target_dims = c(ftime_dim), - output_dims = c(ftime_dim), - fun = .Loess, - loess_span = filter_span, - ncores = ncores)$output1 - } - if (memb) { - clim_exp <- tmp$clim_exp - clim_obs <- tmp$clim_obs - } else { - clim_exp <- InsertDim(tmp$clim_exp, 1, dim_exp[memb_dim]) - clim_obs <- InsertDim(tmp$clim_obs, 1, dim_obs[memb_dim]) - } - clim_exp <- InsertDim(clim_exp, 1, dim_exp[dim_anom]) - clim_obs <- InsertDim(clim_obs, 1, dim_obs[dim_anom]) - ano <- NULL - - # Permuting back dimensions to original order - clim_exp <- Reorder(clim_exp, dimnames_data) - clim_obs <- Reorder(clim_obs, dimnames_data) - - ano$exp <- exp$data - clim_exp - ano$obs <- obs$data - clim_obs - } - - exp$data <- ano$exp - obs$data <- ano$obs - - # Outputs - # ~~~~~~~~~ - if (case_obs == 1) { - return(obs) - } - else if (case_exp == 1) { - return(exp) - } - else { - return(list(exp = exp, obs = obs)) - } -} - -.Loess <- function(clim, loess_span) { - data <- data.frame(ensmean = clim, day = 1 : length(clim)) - loess_filt <- loess(ensmean ~ day, data, span = loess_span) - output <- predict(loess_filt) - return(output) -} - diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 899b12913bfade3e1b3955a8236b22fe387e33f1..989d9b94519ee298dc6d76a3ecaf254f71927aaa 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -1,5 +1,7 @@ +## TODO: Remove in the next release +source("modules/Calibration/calibrate_datasets.R") -calibrate_datasets <- function(recipe, data) { +Calibration <- function(recipe, data) { # Function that calibrates the hindcast using the method stated in the # recipe. If the forecast is not null, it calibrates it as well. # @@ -11,9 +13,9 @@ calibrate_datasets <- function(recipe, data) { if (method == "raw") { warn(recipe$Run$logger, - paste("The Calibration module has been called, but the calibration", - "method in the recipe is 'raw'. The hcst and fcst will not be", - "calibrated.")) + paste("The Calibration module has been called, but the calibration", + "method in the recipe is 'raw'. The hcst and fcst will not be", + "calibrated.")) fcst_calibrated <- data$fcst hcst_calibrated <- data$hcst if (!is.null(data$hcst.full_val)) { @@ -42,12 +44,12 @@ calibrate_datasets <- function(recipe, data) { # Replicate observation array for the multi-model case ## TODO: Implement for obs.full_val if (mm) { - obs.mm <- obs$data + obs.mm <- data$obs$data for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { obs.mm <- abind(obs.mm, data$obs$data, - along=which(names(dim(data$obs$data)) == 'dat')) + along=which(names(dim(data$obs$data)) == 'dat')) } - names(dim(obs.mm)) <- names(dim(obs$data)) + names(dim(obs.mm)) <- names(data$obs$dims) data$obs$data <- obs.mm remove(obs.mm) } @@ -57,9 +59,9 @@ calibrate_datasets <- function(recipe, data) { CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") ## TODO: implement other calibration methods if (!(method %in% CST_CALIB_METHODS)) { - error(recipe$Run$logger, - paste("Calibration method in the recipe is not available for", - "monthly data.")) + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "monthly data.")) stop() } else { # Calibrate the hindcast @@ -74,24 +76,24 @@ calibrate_datasets <- function(recipe, data) { memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) - # In the case where anomalies have been computed, calibrate full values - if (!is.null(data$hcst.full_val)) { - hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, - data$obs.full_val, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - } else { - hcst_full_calibrated <- NULL - } + # In the case where anomalies have been computed, calibrate full values + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, + data$obs.full_val, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + hcst_full_calibrated <- NULL + } - # Calibrate the forecast + # Calibrate the forecast if (!is.null(data$fcst)) { fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst, cal.method = method, @@ -108,69 +110,90 @@ calibrate_datasets <- function(recipe, data) { fcst_calibrated <- NULL } } - } else if (recipe$Analysis$Variables$freq == "daily_mean") { + } else if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { # Daily data calibration using Quantile Mapping if (!(method %in% c("qmap"))) { - error(recipe$Run$logger, - paste("Calibration method in the recipe is not available for", - "daily data. Only quantile mapping 'qmap is implemented.")) + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "daily data. Only quantile mapping 'qmap is implemented.")) stop() } # Calibrate the hindcast dim_order <- names(dim(data$hcst$data)) hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, - exp_cor = NULL, - sdate_dim = "syear", - memb_dim = "ensemble", - # window_dim = "time", - method = "QUANT", - ncores = ncores, - na.rm = na.rm, - wet.day = F) + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) # Restore dimension order hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) # In the case where anomalies have been computed, calibrate full values if (!is.null(data$hcst.full_val)) { - hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, - data$obs.full_val, - exp_cor = NULL, - sdate_dim = "syear", - memb_dim = "ensemble", - method = "QUANT", - ncores = ncores, - na.rm = na.rm, - wet.day = F) + hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, + data$obs.full_val, + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) } else { - hcst_full_calibrated <- NULL + hcst_full_calibrated <- NULL } if (!is.null(data$fcst)) { # Calibrate the forecast fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, - exp_cor = data$fcst, - sdate_dim = "syear", - memb_dim = "ensemble", - # window_dim = "time", - method = "QUANT", - ncores = ncores, - na.rm = na.rm, - wet.day = F) + exp_cor = data$fcst, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) # Restore dimension order - fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) + fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) } else { fcst_calibrated <- NULL } } } info(recipe$Run$logger, CALIB_MSG) + .log_memory_usage(recipe$Run$logger, "After calibration") + # Saving + if (recipe$Analysis$Workflow$Calibration$save != 'none') { + info(recipe$Run$logger, "##### START SAVING CALIBRATED DATA #####") + + ## TODO: What do we do with the full values? + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Calibration/") + if ((recipe$Analysis$Workflow$Calibration$save %in% + c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { + save_forecast(recipe = recipe, data_cube = fcst_calibrated, type = 'fcst') + } + if (recipe$Analysis$Workflow$Calibration$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = hcst_calibrated, type = 'hcst') + } + if (recipe$Analysis$Workflow$Calibration$save == 'all') { + save_observations(recipe = recipe, data_cube = data$obs) + } + } + ## TODO: Sort out returns return_list <- list(hcst = hcst_calibrated, - obs = data$obs, - fcst = fcst_calibrated) + obs = data$obs, + fcst = fcst_calibrated) if (!is.null(hcst_full_calibrated)) { return_list <- append(return_list, - list(hcst.full_val = hcst_full_calibrated, - obs.full_val = data$obs.full_val)) + list(hcst.full_val = hcst_full_calibrated, + obs.full_val = data$obs.full_val)) } return(return_list) } diff --git a/modules/Calibration/calibrate_datasets.R b/modules/Calibration/calibrate_datasets.R new file mode 100644 index 0000000000000000000000000000000000000000..8264992f949788b40e6bd6d5bfd5471b1536f1e5 --- /dev/null +++ b/modules/Calibration/calibrate_datasets.R @@ -0,0 +1,7 @@ +calibrate_datasets <- function(recipe, data) { + warning(paste0("The function calibrate_datasets() has been renamed to: ", + "'Calibration()'. The name 'calibrate_datasets' will be ", + "deprecated in the next release. Please change your scripts ", + "accordingly.")) + return(Calibration(recipe, data)) +} diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R new file mode 100644 index 0000000000000000000000000000000000000000..59233dc2ac02749e91b597e832c721a412915f55 --- /dev/null +++ b/modules/Downscaling/Downscaling.R @@ -0,0 +1,295 @@ +### Downscaling Module +source('modules/Downscaling/tmp/Interpolation.R') +source('modules/Downscaling/tmp/Intbc.R') +source('modules/Downscaling/tmp/Intlr.R') +source('modules/Downscaling/tmp/Analogs.R') +source('modules/Downscaling/tmp/LogisticReg.R') +source('modules/Downscaling/tmp/Utils.R') + +Downscaling <- function(recipe, data) { + # Function that downscale the hindcast using the method stated in the + # recipe. For the moment, forecast must be null. + # + # data: list of s2dv_cube objects containing the hcst, obs and fcst. + # recipe: object obtained when passing the .yml recipe file to read_yaml() + + type <- tolower(recipe$Analysis$Workflow$Downscaling$type) + + if (type == "none") { + hcst_downscal <- data$hcst + DOWNSCAL_MSG <- "##### NO DOWNSCALING PERFORMED #####" + + } else { + + if (!is.null(data$fcst)) { + warn(recipe$Run$logger, + "The downscaling will be only performed to the hindcast data") + data$fcst <- NULL + } + # Downscaling function params + int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) + bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) + lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) + log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) + target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) + nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) + size <- recipe$Analysis$Workflow$Downscaling$size + + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + + #TO DO: add the parametre loocv where it corresponds + if (is.null(recipe$Analysis$loocv)) { + loocv <- TRUE + } else { + loocv <- recipe$Analysis$loocv + } + + DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") + BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", "rpc-based", "qm") + LR_METHODS <- c("basic", "large-scale", "4nn") + LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + + if (!(type %in% DOWNSCAL_TYPES)) { + stop("Downscaling type in the recipe is not available. Accepted types ", + "are 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.") + } + + if (type == "int") { + if (is.null(int_method)) { + stop("Please provide one interpolation method in the recipe.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + # Ensure that observations are in the same grid as experiments + # Only needed for this method because the others already return the + # observations + latmin <- data$hcst$coords$latitude[1] + lonmin <- data$hcst$coords$longitude[1] + latmax <- data$hcst$coords$latitude[length(data$hcst$coords$latitude)] + lonmax <- data$hcst$coords$longitude[length(data$hcst$coords$longitude)] + hcst_downscal <- CST_Interpolation(data$hcst, + points = NULL, + method_remap = int_method, + target_grid = target_grid, + lat_dim = "latitude", + lon_dim = "longitude", + region = c(lonmin, lonmax, latmin, latmax), + method_point_interp = NULL) + + obs_downscal <- CST_Interpolation(data$obs, + points = NULL, + method_remap = int_method, + target_grid = target_grid, + lat_dim = "latitude", + lon_dim = "longitude", + region = c(lonmin, lonmax, latmin, latmax), + method_point_interp = NULL) + + hcst_downscal$obs <- obs_downscal$exp + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "intbc") { + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (is.null(bc_method)) { + stop("Please provide one bias-correction method in the recipe. Accepted ", + "methods are 'quantile_mapping', 'bias', 'evmos', 'mse_min', 'crps_min' ", + "'rpc-based', 'qm'. ") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + if (!(bc_method %in% BC_METHODS)) { + stop(paste0(bc_method, " method in the recipe is not available. Accepted methods ", + "are 'quantile_mapping', 'bias', 'evmos', 'mse_min', 'crps_min' ", + "'rpc-based', 'qm'.")) + } + + hcst_downscal <- CST_Intbc(data$hcst, data$obs, + target_grid = target_grid, + bc_method = bc_method, + int_method = int_method, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + ncores = ncores) + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "intlr") { + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (is.null(lr_method)) { + stop("Please provide one linear regression method in the recipe. Accepted ", + "methods are 'basic', 'large-scale', '4nn'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + if (!(lr_method %in% LR_METHODS)) { + stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", + "are 'basic', 'large-scale', '4nn'.")) + } + + # TO DO: add the possibility to have the element 'pred' in 'data' + if (lr_method == "large-scale") { + if (is.null(data$pred$data)) { + stop("Please provide the large scale predictors in the element 'data$pred$data'.") + } + } else { + data$pred$data <- NULL + } + + hcst_downscal <- CST_Intlr(data$hcst, data$obs, + lr_method = lr_method, + target_grid = target_grid, + points = NULL, + int_method = int_method, + method_point_interp = NULL, + predictors = data$pred$data, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + time_dim = "time", + member_dim = "ensemble", + large_scale_predictor_dimname = 'vars', + loocv = loocv, + region = NULL, + ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "analogs") { + + if (is.null(nanalogs)) { + warning("The number of analogs for searching has not been provided in the ", + "recipe. Setting it to 3.") + nanalogs <- 3 + } + + if (!is.null(size) & recipe$Analysis$Variables$freq == "monthly_mean") { + size <- NULL + warning("Size is set to NULL. ", + "It must be NULL for the monthly input data.") + } + + if (!is.null(size)) { + dum <- data$obs$data ## keep obs data before the window process to provide it later as the output + data$obs$data <- .generate_window(data$obs$data, + sdate_dim = 'syear', + time_dim = 'time', + loocv = TRUE, + size = size) + data$obs$data <- Apply(data$obs$data, + target_dims="window", + fun=function (x) x[!is.na(x)])$output1 + } + + hcst_downscal <- CST_Analogs(data$hcst, data$obs, + grid_exp = data$hcst$attrs$source_files[ + which(!is.na(data$hcst$attrs$source_files))[1]], + nanalogs = nanalogs, + fun_analog = "wmean", + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + time_dim = "time", + member_dim = "ensemble", + region = NULL, + return_indices = FALSE, + loocv_window = loocv, + ncores = ncores) + + if (!is.null(size)) { + hcst_downscal$obs$data <- Apply(dum, target_dims=c("time", "smonth"), + function (x) {x[1:(dim(data$hcst$data)["time"]), 2]}, + ncores = ncores, + output_dims = "time")$output1 ## 2nd month is the target month + } + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "logreg") { + + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (is.null(log_reg_method)) { + stop("Please provide one logistic regression method in the recipe. Accepted ", + "methods are 'ens_mean', 'ens_mean_sd', 'sorted_members'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + # Since we are forcing to create three categories, and applying cross-validation, + # we need at least six years of data for the logistic regression function to not + # crash + if (dim(data$hcst$data)[names(dim(data$hcst$data)) == "syear"] <= 5) { + stop("The number of start dates is insufficient for the logisitic regression method. ", + "Please provide six or more.") + } + + if (!(log_reg_method %in% LOG_REG_METHODS)) { + stop(paste0(log_reg_method, " method in the recipe is not available. Accepted methods ", + "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.")) + } + + hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, + target_grid = target_grid, + int_method = int_method, + log_reg_method = log_reg_method, + probs_cat = c(1/3,2/3), + return_most_likely_cat = FALSE, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + loocv = loocv, + ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } + } + print(DOWNSCAL_MSG) + + # Saving + if (recipe$Analysis$Workflow$Downscaling$save != 'none') { + info(recipe$Run$logger, "##### START SAVING DOWNSCALED DATA #####") + } + ## TODO: What do we do with the full values? + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Downscaling/") + # if ((recipe$Analysis$Workflow$Downscaling$save %in% + # c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { + # save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + # } + if (recipe$Analysis$Workflow$Downscaling$save %in% c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = hcst_downscal$exp, type = 'hcst') + } + if (recipe$Analysis$Workflow$Downscaling$save == 'all') { + save_observations(recipe = recipe, data_cube = hcst_downscal$obs) + } + + return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) +} diff --git a/modules/Downscaling/tmp/Analogs.R b/modules/Downscaling/tmp/Analogs.R new file mode 100644 index 0000000000000000000000000000000000000000..73b76ae56cd114536a07ec410e172b09e33b3067 --- /dev/null +++ b/modules/Downscaling/tmp/Analogs.R @@ -0,0 +1,681 @@ +#'@rdname CST_Analogs +#'@title Downscaling using Analogs based on coarse scale fields. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using Analogs. To compute +#'the analogs given a coarse-scale field, the function looks for days with similar conditions +#'in the historical observations. The analogs function determines the N best analogs based +#'on Euclidian distance, distance correlation, or Spearman's correlation metrics. To downscale +#'a local-scale variable, either the variable itself or another large-scale variable +#'can be utilized as the predictor. In the first scenario, analogs are examined between +#'the observation and model data of the same local-scale variable. In the latter scenario, +#'the function identifies the day in the observation data that closely resembles +#'the large-scale pattern of interest in the model. When it identifies the date of +#'the best analog, the function extracts the corresponding local-scale variable for that day +#'from the observation of the local scale variable. The used local-scale and large-scale +#'variables can be retrieved from independent regions. The input data for the first case must +#'include 'exp' and 'obs,' while in the second case, 'obs,' 'obsL,' and 'exp' are the +#'required input fields. Users can perform the downscaling process over the subregions +#'that can be identified through the 'region' argument, instead of focusing +#'on the entire area of the loaded data. +#' +#'The search of analogs must be done in the longest dataset posible, but might +#'require high-memory computational resources. This is important since it is +#'necessary to have a good representation of the possible states of the field in +#'the past, and therefore, to get better analogs. The function can also look for +#'analogs within a window of D days, but is the user who has to define that window. +#'Otherwise, the function will look for analogs in the whole dataset. This function +#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal +#'and decadal predictions) but can admit climate projections or reanalyses. It does +#'not have constrains of specific region or variables to downscale. +#'@param exp an 's2dv_cube' object with named dimensions containing the experimental field +#'on the coarse scale for the variable targeted for downscaling (in case obsL is not provided) +#'or for the large-scale variable used as the predictor (if obsL is provided). +#'The object must have, at least, the dimensions latitude, longitude, start date and time. +#'The object is expected to be already subset for the desired region. Data can be in one +#'or two integrated regions, e.g., crossing the Greenwich meridian. To get the correct +#'results in the latter case, the borders of the region should be specified in the parameter +#''region'. See parameter 'region'. +#'@param obs an 's2dv_cube' object with named dimensions containing the observational field +#'for the variable targeted for downscaling. The object must have, at least, the dimensions +#'latitude, longitude and start date. The object is expected to be already subset for the +#'desired region. +#'@param obsL an 's2dv_cube' object with named dimensions containing the observational +#'field of the large-scale variable. The object must have, at least, the dimensions latitude, +#'longitude and start date. The object is expected to be already subset for the desired region. +#'@param grid_exp a character vector with a path to an example file of the exp (if the +#'predictor is the local scale variable) or expL (if the predictor is a large scale variable) +#'data. It can be either a path to another NetCDF file which to read the target grid from +#'(a single grid must be defined in such file) or a character vector indicating the +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. +#'@param nanalogs an integer indicating the number of analogs to be searched +#'@param fun_analog a function to be applied over the found analogs. Only these options +#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), +#'the function returns the found analogs. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''data' in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''data' in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the +#'element 'data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param metric a character vector to select the analog specification method. Only these +#'options are valid: "dist" (i.e., Euclidian distance), "dcor" (i.e., distance correlation) +#'or "cor" (i.e., Spearman's .correlation). The default metric is "dist". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param return_indices a logical vector indicating whether to return the indices of the +#'analogs together with the downscaled fields. Default to FALSE. +#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension +#''window'. It indicates whether to apply leave-one-out cross-validation in the creation +#'of the window. It is recommended to be set to TRUE. Default to TRUE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. +#'@return An 's2dv_cube' object. The element 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL +#'(default), the output array in 'data' also contains the dimension 'analog' with the best +#'analog days. +#'@examples +#'exp <- rnorm(15000) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(27000) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) +#'obs_lons <- seq(0,6, 6/14) +#'obs_lats <- seq(0,6, 6/11) +#'exp <- s2dv_cube(data = exp, coords = list(lat = exp_lats, lon = exp_lons)) +#'obs <- s2dv_cube(data = obs, coords = list(lat = obs_lats, lon = obs_lons)) +#'downscaled_field <- CST_Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') +#'@export +CST_Analogs <- function(exp, obs, obsL = NULL, grid_exp, nanalogs = 3, + fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", member_dim = "member", metric = "dist", region = NULL, + return_indices = FALSE, loocv_window = TRUE, ncores = NULL) { + + # input exp and obs must be s2dv_cube objects + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + if (!is.null(obsL)) { + # input obs must be s2dv_cube objects + if (!inherits(obsL,'s2dv_cube')) { + stop("Parameter 'obsL' must be of the class 's2dv_cube'") + } + } + # input exp and obs must be s2dv_cube objects + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + res <- Analogs(exp = exp$data, obs = obs$data, obsL = obsL$data, + exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], + obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], + obsL_lats = obsL$coords[[lat_dim]], obsL_lons = obsL$coords[[lon_dim]], + grid_exp = grid_exp, nanalogs = nanalogs, fun_analog = fun_analog, + lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, + time_dim = time_dim, member_dim = member_dim, metric = metric, + region = region, return_indices = return_indices, + loocv_window = loocv_window, ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + + obs$data <- res$obs + obs$dims <- dim(obs$data) + obs$coords[[lon_dim]] <- res$lon + obs$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) +} + +#'@rdname Analogs +#'@title Downscaling using Analogs based on large scale fields. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} +#' +#'@description This function performs a downscaling using Analogs. To compute +#'the analogs given a coarse-scale field, the function looks for days with similar conditions +#'in the historical observations. The analogs function determines the N best analogs based +#'on RMSE, distance correlation, or Spearman's correlation metrics. To downscale +#'a local-scale variable, either the variable itself or another large-scale variable +#'can be utilized as the predictor. In the first scenario, analogs are examined between +#'the observation and model data of the same local-scale variable. In the latter scenario, +#'the function identifies the day in the observation data that closely resembles +#'the large-scale pattern of interest in the model. When it identifies the date of +#'the best analog, the function extracts the corresponding local-scale variable for that day +#'from the observation of the local scale variable. The used local-scale and large-scale +#'variables can be retrieved from independent regions. The input data for the first case must +#'include 'exp' and 'obs,' while in the second case, 'obs,' 'obsL,' and 'expL' are the +#'required input fields. Users can perform the downscaling process over the subregions +#'that can be identified through the 'region' and 'regionL' arguments, instead of focusing +#'on the entire area of the loaded data. +#' +#'The search of analogs must be done in the longest dataset posible, but might +#'require high-memory computational resources. This is important since it is +#'necessary to have a good representation of the possible states of the field in +#'the past, and therefore, to get better analogs. The function can also look for +#'analogs within a window of D days, but is the user who has to define that window. +#'Otherwise, the function will look for analogs in the whole dataset. This function +#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal +#'and decadal predictions) but can admit climate projections or reanalyses. It does +#'not have constrains of specific region or variables to downscale. +#'@param exp an array with named dimensions containing the experimental field +#'on the coarse scale for the variable targeted for downscaling (in case obsL is not provided) +#'or for the large-scale variable used as the predictor (if obsL is provided). +#'The object must have, at least, the dimensions latitude, longitude, start date and time. +#'The object is expected to be already subset for the desired region. Data can be in one +#'or two integrated regions, e.g., crossing the Greenwich meridian. To get the correct +#'results in the latter case, the borders of the region should be specified in the parameter +#''region'. See parameter 'region'. +#'@param obs an array with named dimensions containing the observational field for the variable +#'targeted for downscaling. The object must have, at least, the dimensions latitude, longitude, +#'start date and time. The object is expected to be already subset for the desired region. +#'Optionally, 'obs' can have the dimension 'window', containing the sampled fields into which +#'the function will look for the analogs. See function 'generate_window()'. Otherwise, +#'the function will look for analogs using all the possible fields contained in obs. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obsL an 's2dv_cube' object with named dimensions containing the observational +#'field of the large-scale variable.The object must have, at least, the dimensions latitude, +#'longitude and start date. The object is expected to be already subset for the desired region. +#'Optionally, 'obsL' can have the dimension 'window', containing the sampled fields into which +#'the function will look for the analogs. See function 'generate_window()'. Otherwise, +#'the function will look for analogs using all the possible fields contained in obs. +#'@param obsL_lats a numeric vector containing the latitude values in 'obsL'. Latitudes must +#'range from -90 to 90. +#'@param obsL_lons a numeric vector containing the longitude values in 'obsL'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param grid_exp a character vector with a path to an example file of the exp (if the +#'predictor is the local scale variable) or expL (if the predictor is a large scale variable) data. +#'It can be either a path to another NetCDF file which to read the target grid from +#'(a single grid must be defined in such file) or a character vector indicating the +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. +#'@param nanalogs an integer indicating the number of analogs to be searched. +#'@param fun_analog a function to be applied over the found analogs. Only these options +#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), +#'the function returns the found analogs. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''data' in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''data' in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the +#'element 'data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param metric a character vector to select the analog specification method. Only these +#'options are valid: "dist" (i.e., Euclidian distance), "dcor" (i.e., distance correlation) +#'or "cor" (i.e., Spearman's .correlation). The default metric is "dist". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param return_indices a logical vector indicating whether to return the indices of the +#'analogs together with the downscaled fields. The indices refer to the position of the +#'element in the vector time * start_date. If 'obs' contain the dimension 'window', it will +#'refer to the position of the element in the dimension 'window'. Default to FALSE. +#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension +#''window'. It indicates whether to apply leave-one-out cross-validation in the creation +#'of the window. It is recommended to be set to TRUE. Default to TRUE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. +#'@import multiApply +#'@import CSTools +#'@importFrom s2dv InsertDim CDORemap +#'@importFrom energy dcor +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL +#'(default), the output array in 'data' also contains the dimension 'analog' with the best +#'analog days. +#'@examples +#'exp <- rnorm(15000) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(27000) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) +#'obs_lons <- seq(0,6, 6/14) +#'obs_lats <- seq(0,6, 6/11) +#'downscaled_field <- Analogs(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, +#'obs_lats = obs_lats, obs_lons = obs_lons, grid_exp = 'r360x180') +#'@export +Analogs <- function(exp, obs, exp_lats = NULL, exp_lons = NULL, obs_lats, obs_lons, + grid_exp, obsL = NULL, obsL_lats = NULL, obsL_lons = NULL, nanalogs = 3, + fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", member_dim = "member", metric = "dist", region = NULL, + return_indices = FALSE, loocv_window = TRUE, ncores = NULL) { + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(grid_exp, 'character')) { + stop("Parameter 'grid_exp' must be of class 'character'. It can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must be ", + "defined in such file) or a character vector indicating the coarse grid to ", + "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") + } + + if (!inherits(nanalogs, 'numeric')) { + stop("Parameter 'nanalogs' must be of the class 'numeric'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(time_dim, 'character')) { + stop("Parameter 'time_dim' must be of the class 'character'") + } + + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(time_dim, names(dim(exp)))) | is.na(match(time_dim, names(dim(obs))))) { + stop("Missing time dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'time_dim'") + } + + # Ensure we have enough data to interpolate from high-res to coarse grid + #if ((obs_lats[1] > exp_lats[1]) | (obs_lats[length(obs_lats)] < exp_lats[length(exp_lats)]) | + # (obs_lons[1] > exp_lons[1]) | (obs_lons[length(obs_lons)] < exp_lons[length(exp_lons)])) { + + # stop("There are not enough data in 'obs'. Please to add more latitudes or ", + # "longitudes.") + #} + + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + restore_ens <- TRUE + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + if (!is.null(obsL) ) { + + # the code is not yet prepared to handle members in the observations + if (member_dim %in% names(dim(obsL))) { + if (identical(as.numeric(dim(obsL)[member_dim]), 1)) { + obsL <- ClimProjDiags::Subset(x = obsL, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obsL' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + if (is.null(obsL_lats) | is.null(obsL_lons)) { + stop("Missing latitudes and/or longitudes for the provided training observations. Please ", + "provide them with the parametres 'obsL_lats' and 'obsL_lons'") + } + + if (is.na(match(lon_dim, names(dim(obsL))))) { + stop("Missing longitude dimension in 'obsL', or does not match the parameter 'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(obsL))))) { + stop("Missing latitude dimension in 'obsL', or does not match the parameter 'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(obsL))))) { + stop("Missing start date dimension in 'obsL', or does not match the parameter 'sdate_dim'") + } + + if (is.na(match(time_dim, names(dim(obsL))))) { + stop("Missing time dimension in 'obsL', or does not match the parameter 'time_dim'") + } + + } + + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # Select a function to apply to the analogs selected for a given observation + if (!is.null(fun_analog)) { + stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) + } + + # metric method to be used to specify the analogs + stopifnot(metric %in% c("cor", "dcor", "dist")) + + + + if (!is.null(obsL)) { + obs_train <- obsL + obs_train_lats <- obsL_lats + obs_train_lons <- obsL_lons + } else { + obs_train <- obs + obs_train_lats <- obs_lats + obs_train_lons <- obs_lons + } + + # Correct indices later if cross-validation + loocv_correction <- FALSE + if ( !("window" %in% names(dim(obs_train))) & loocv_window) { + loocv_correction <- TRUE + } + + # crop downscaling region, if the argument region is provided. + if (!is.null(region) & is.null(obsL)) { + # if a border is equally distant from two different grids, the map will be cropped from the grid having smaller coordinate + + a <- which.min(abs((region[1]-obs_lons))) + b <- which.min(abs((region[2]-obs_lons))) + c <- which.min(abs((region[3]-obs_lats))) + d <- which.min(abs((region[4]-obs_lats))) + obs <- ClimProjDiags::Subset(x = obs, along = list(lon_dim,lat_dim), + indices = list(a:b,c:d), drop = 'selected') + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. ", + "Assuming the four borders of the downscaling region are defined by the ", + "first and last elements of the parameters 'exp_lats' and 'exp_lons'.") + region <- c(exp_lons[1], exp_lons[length(exp_lons)], exp_lats[1], + exp_lats[length(exp_lats)]) + } + + obs_interpolated <- Interpolation(exp = obs_train, lats = obs_train_lats, lons = obs_train_lons, + target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = "conservative", region = region, + ncores = ncores) + + # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to + # the same grid to force the matching + if (!.check_coords(lat1 = as.numeric(obs_interpolated$lat), + lat2 = exp_lats, + lon1 = as.numeric(obs_interpolated$lon), + lon2 = exp_lons)) { + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, + target_grid = grid_exp, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = "conservative", + region = region, ncores = ncores)$data + } else { + exp_interpolated <- exp + } + + # Create window if user does not have it in the training observations + if ( !("window" %in% names(dim(obs_interpolated$data))) ) { + obs_train_interpolated <- .generate_window(obj = obs_interpolated$data, sdate_dim = sdate_dim, + time_dim = time_dim, loocv = loocv_window, + ncores = ncores) + if (!is.null(obsL)) { + if ( ("window" %in% names(dim(obs))) ) { + stop("Either both obs and obsL should include 'window' dimension or none.") + } + } + obs_hres <- .generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, + loocv = loocv_window, ncores = ncores) + + } else { + obs_train_interpolated <- obs_interpolated$data + dim(obs_train_interpolated) <- dim(ClimProjDiags::Subset(x = obs_train_interpolated, + along = time_dim, indices = 1, drop = 'selected')) + + if (!is.null(obsL)) { + if ( !("window" %in% names(dim(obs))) ) { + stop("Either both obs and obsL should include 'window' dimension or none.") + } + } + obs_hres <- obs + dim(obs_hres) <- dim(ClimProjDiags::Subset(x = obs_hres, + along = time_dim, indices = 1, drop = 'selected')) + + } + + #----------------------------------- + # Reshape train and test + #----------------------------------- + + RES <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), + target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), + c("window", lat_dim, lon_dim)), + fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, + k = nanalogs, metric = metric, fun_analog = fun_analog), + ncores = ncores) ## output1 -> data, output2 -> index, output3 -> metric + + res.data <- RES$output1 + + # Return the indices of the best analogs + if (return_indices) { + res.ind <- RES$output2 + + # If cross-validation has been applied, correct the indices + if (loocv_correction) { + nsdates <- dim(res.ind)[names(dim(res.ind)) == sdate_dim] + ntimes <- dim(res.ind)[names(dim(res.ind)) == time_dim] + res.ind <- Apply(res.ind, target_dims = c("index", sdate_dim), function(x) + sapply(1:nsdates, function(s) seq(ntimes * nsdates)[ - (ntimes * (s - 1) + 1:ntimes)][x[, s]]), + output_dims = c("index", sdate_dim), ncores = ncores)$output1 + } + + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) + } + + res <- list(data = res.data, ind = res.ind, obs = obs, lon = obs_lons, lat = obs_lats) + } else { + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) + } + res <- list(data = res.data, obs = obs, lon = obs_lons, lat = obs_lats) + } + + return(res) +} + +# For each element in test, find the indices of the k nearest neigbhors in train +.analogs <- function(train, test, obs_hres, k, fun_analog, metric = NULL, return_indices = FALSE) { + + # train and obs_rs dim: 3 dimensions window, lat and lon (in this order) + # test dim: 2 dimensions lat and lon (in this order) + # Number of lats/lons of the high-resolution data + space_dims_hres <- dim(obs_hres)[c(2,3)] + + # Reformat train and test as an array with (time, points) + train <- apply(train, 1, as.vector); names(dim(train))[1] <- "space" + test <- as.vector(test) + obs_hres <- apply(obs_hres, 1, as.vector); names(dim(obs_hres))[1] <- "space" + + # Identify and remove NA's + dum<-which(!apply(train,2,function (x) all(is.na(x))))[1] ## the column in which NA in space will be investigated. it shouldn't be all-NA time-step + idx_na_tr <- is.na(train[ , dum]) # NA in space + idy_na_tr <- is.na(train[1, ]) # NA in time + idx_na_te <- is.na(test) + idx_na <- idx_na_tr | idx_na_te + tr_wo_na <- t(train[!idx_na , !idy_na_tr ]) + te_wo_na <- test[!idx_na] + te_wo_na <- InsertDim(data = te_wo_na, posdim = 1, lendim = 1, name = "time") + + if (all(is.na(test))) { + res <- array(NA, space_dims_hres) + res_ind <- array(NA, k) + names(dim(res_ind)) <- c("index") + res_metric <- array(NA, k) + names(dim(res_metric)) <- c("metric") + } else { + if (metric == "dist") { + dist_all <- sqrt(rowSums((sweep(tr_wo_na, 2, te_wo_na[1,]))^2)) # euc dist + best_vals <- head(sort(dist_all), k) + idx <- match(best_vals, dist_all) + } else if (metric == "cor") { + cor_all <- apply(tr_wo_na, 1, function (x) cor(x,te_wo_na[1, ], method = "spearman")) # cor + best_vals <- head(sort(cor_all, decreasing = TRUE), k) + idx <- match(best_vals, cor_all) + } else if (metric == "dcor") { +# require(energy) + dcor_all <- apply(tr_wo_na, 1, function (x) .dcor(x,te_wo_na[1, ])) # dcor + best_vals <- head(sort(dcor_all, decreasing = TRUE), k,) + idx <- match(best_vals, dcor_all) + } + if (isTRUE(any(idy_na_tr))) { + dum <-(1:length(idy_na_tr))[!idy_na_tr] + idx <- dum[idx] + } + res_ind <- array(idx, k) + names(dim(res_ind)) <- c("index") + res_metric <- array(best_vals, c(k)) + names(dim(res_metric)) <- c("metric") + res <- obs_hres[ , idx] + dim(res) <- c(space_dims_hres, analogs = k) + + if (!is.null(fun_analog)) { + if (fun_analog == "wmean") { + if (metric == "dist") { + weight <- 1 / best_vals + } else { + weight <- best_vals + } + res <- apply(res, c(1,2), function(x) weighted.mean(x, weight)) + } else if (fun_analog == "min") { + res <- res[, , which.min(best_vals)] + } else if (fun_analog == "max") { + res <- res[, , which.max(best_vals)] + } else { + res <- apply(res, c(1,2), fun_analog) + } + } + } + return(list(res, res_ind, res_metric)) +} + +# Add the dimension window to an array that contains, at least, the start date and time +# dimensions +# object has at least dimensions sdate and time +.generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL, ncores = NULL) { + + rsdates <- 1:dim(obj)[names(dim(obj)) == sdate_dim] + ntimes <- dim(obj)[names(dim(obj)) == time_dim] + rtimes <- 1:dim(obj)[names(dim(obj)) == time_dim] + + # Generate a window containing all the data + if (is.null(size)) { + + # Generate window removing one start date each time (leave-one-out cross-validation) + if (loocv) { + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = function(x) sapply(rsdates, function(s) as.vector(x[ rtimes, -s])), + output_dims = c('window', sdate_dim), ncores = ncores)$output1 + # Generate window without cross-validation + } else { + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = as.vector, output_dims = 'window', ncores = ncores)$output1 + } + } + # Generate window of the size specified by the user. Only applied with CV + else { + # For an accurate generation of the window, it is mandatory to add some "extra" data. + if (!("smonth" %in% names(dim(obj)))) { + stop("Missing 'smonth' dimension") + } + + # Concatenate data from previous, target and posterior months + obj_new <- Apply(obj, target_dims = list(c("time", "smonth")), + fun = as.vector, output_dims = "time", ncores = ncores )$output1 + + if (loocv) { + obj_window <- Apply(list(obj_new, rsdates), target_dims = list(c(time_dim, sdate_dim), NULL), + fun = function(x, s) as.vector(x[(ntimes + min(rtimes) - size):(ntimes + max(rtimes) + size), -s]), + output_dims = 'window', ncores = ncores)$output1 + names(dim(obj_window))[(length(names(dim(obj_window))) - 1):length(names(dim(obj_window)))] <- c(time_dim, sdate_dim) + } else { + obj_window <- Apply(obj_new, target_dims = c(time_dim, sdate_dim), + fun = function(x) sapply(rtimes, function(t) as.vector(x[(ntimes + min(rtimes) - size):(ntimes + max(rtimes) + size), ])), + output_dims = c('window', time_dim), ncores = ncores)$output1 + + } + } + + return(obj_window) +} + +# Distance correlation function +.dcor <- function(x, y) { + n <- length(x) + + # Calculate Euclidean distances for x + dist_x <- as.matrix(dist(matrix(x))) + # Calculate Euclidean distances for y + dist_y <- as.matrix(dist(matrix(y))) + + # Centering matrices + H <- diag(n) - 1/n + + # Centered distance matrices + dist_centered_x <- H %*% dist_x %*% H + dist_centered_y <- H %*% dist_y %*% H + + # Calculate the product of mean-centered distance matrices + B <- dist_centered_x %*% t(dist_centered_y) + C <- dist_centered_x %*% t(dist_centered_x) + D <- dist_centered_y %*% t(dist_centered_y) + + # Calculate the distance covariance + dcov_xy <- sqrt(sum(diag(B))) + + # Calculate the variances + cov_xx <- sqrt(sum(diag(C))) + cov_yy <- sqrt(sum(diag(D))) + + # Calculate the distance correlation + dcor_xy <- dcov_xy / sqrt(cov_xx * cov_yy) + + return(dcor_xy) +} + diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R new file mode 100644 index 0000000000000000000000000000000000000000..4b9527323aa58ecd67daf71870c5755e7b31492b --- /dev/null +++ b/modules/Downscaling/tmp/Intbc.R @@ -0,0 +1,339 @@ +#'@rdname CST_Intbc +#'@title Downscaling using interpolation and bias adjustment. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a later bias +#'adjustment. It is recommended that the observations are passed already in the target grid. +#'Otherwise, the function will also perform an interpolation of the observed field into the +#'target grid. The coarse scale and observation data can be either global or regional. In the +#'latter case, the region is defined by the user. In principle, the coarse and observation data +#'are intended to be of the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param bc_method a character vector indicating the bias adjustment method to be applied after +#'the interpolation. Accepted methods are 'quantile_mapping', 'dynamical_bias', 'bias', 'evmos', +#''mse_min', 'crps_min', 'rpc-based'. The abbreviations 'dbc','qm' can also be used. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. +#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'@export + +CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = NULL, ...) +{ + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], + obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, + int_method = int_method, bc_method = bc_method, points = points, + source_file_exp = exp$coords$source_files[1], source_file_obs = obs$coords$source_files[1], + method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, + sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores, ...) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + + obs$data <- res$obs + obs$dims <- dim(obs$data) + obs$coords[[lon_dim]] <- res$lon + obs$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) + +} + +#'@rdname Intbc +#'@title Downscaling using interpolation and bias adjustment. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a later bias +#'adjustment. It is recommended that the observations are passed already in the target grid. +#'Otherwise, the function will also perform an interpolation of the observed field into the +#'target grid. The coarse scale and observation data can be either global or regional. In the +#'latter case, the region is defined by the user. In principle, the coarse and observation data +#'are intended to be of the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param bc_method a character vector indicating the bias adjustment method to be applied after +#'the interpolation. Accepted methods are 'quantile_mapping', 'dynamical_bias', 'bias', 'evmos', +#''mse_min', 'crps_min', 'rpc-based'. The abbreviations 'dbc','qm' can also be used. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param source_file_exp a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param source_file_obs a character vector with a path to an example file of the obs data. +#'Only needed if the downscaling is to a point location. +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. +#'@import CSTools +#' +#'@seealso \code{\link[CSTools]{BiasCorrection}} +#'@seealso \code{\link[CSTools]{Calibration}} +#'@seealso \code{\link[CSTools]{QuantileMapping}} +#' +#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- Intbc(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, +#'obs_lons = obs_lons, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'@export +Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, + points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", member_dim = "member", source_file_exp = NULL, source_file_obs = NULL, + region = NULL, ncores = NULL, ...) { + + if (!inherits(bc_method, 'character')) { + stop("Parameter 'bc_method' must be of the class 'character'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(member_dim, 'character')) { + stop("Parameter 'member_dim' must be of the class 'character'") + } + + if (is.na(match(lon_dim, names(dim(exp))))) { + stop("Missing longitude dimension in 'exp', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp))))) { + stop("Missing latitude dimension in 'exp', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp))))) { + stop("Missing member dimension in 'exp', or does not match the parameter 'member_dim'") + } + + if (!(bc_method %in% c('qm', 'dbc', 'quantile_mapping', 'dynamical_bias', 'bias', 'evmos', 'mse_min', + 'crps_min', 'rpc-based'))) { + stop("Parameter 'bc_method' must be a character vector indicating the bias adjustment method. ", + "Accepted methods are 'quantile_mapping', 'dynamical_bias', 'bias', 'evmos', 'mse_min', ", + "'crps_min', 'rpc-based'. The abbreviations 'dbc','qm' can also be used.") + } + + # When observations are pointwise + if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { + point_obs <- T + # dimension aux in obs is needed + if (is.na(match("aux", names(dim(obs))))) { + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = "aux") + } + } else { + point_obs <- F + } + + if (!is.null(points) & (is.null(source_file_exp))) { + stop("No source file found. Source file must be provided in the parameter 'source_file_exp'.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file_exp, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region, ncores = ncores) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file_obs, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region, ncores = ncores) + obs_ref <- obs_interpolated$data + } else { + obs_ref <- obs + } + + # Some functions only accept the dimension names "member" and "sdate" for exp and + # "sdate" for obs + #if (member_dim != 'member') { + # names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + # which(names(dim(exp_interpolated$data)) == member_dim), 'member') + #} + + #if (sdate_dim != 'sdate') { + # names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + # which(names(dim(exp_interpolated$data)) == sdate_dim), 'sdate') + # names(dim(obs_ref)) <- replace(names(dim(obs_ref)), + # which(names(dim(obs_ref)) == sdate_dim), 'sdate') + #} + + if (bc_method == 'qm' | bc_method == 'quantile_mapping') { + + res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, na.rm = TRUE, + memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, ...) + } + else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { + # the temporal dimension must be only one dimension called "time" + if (all(c(time_dim, sdate_dim) %in% names(dim(exp_interpolated$data)))) { + exp_interpolated$data <- Apply(exp_interpolated$data, target_dims = c(time_dim, sdate_dim), + fun = as.vector, output_dims = "time", ncores = ncores)$output1 + } + if (all(c(time_dim, sdate_dim) %in% names(dim(obs_ref)))) { + obs_ref <- Apply(obs_ref, target_dims = c(time_dim, sdate_dim), fun = as.vector, + output_dims = "time", ncores = ncores)$output1 + } + # REMEMBER to add na.rm = T in colMeans in .proxiesattractor + res <- DynBiasCorrection(exp = exp_interpolated$data, obs = obs_ref, ncores = ncores, ...) + } else { + if (dim(exp_interpolated$data)[member_dim] == 1) { + stop('Calibration must not be used with only one ensemble member.') + } + if (dim(obs_ref)[sdate_dim] == 1) { + warning('Simple Bias Correction should not be used with only one observation. Returning NA.') + } + res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, + sdate_dim = sdate_dim, ncores = ncores, cal.method = bc_method) + } + + # Return a list of three elements + res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) + + return(res) +} diff --git a/modules/Downscaling/tmp/Interpolation.R b/modules/Downscaling/tmp/Interpolation.R new file mode 100644 index 0000000000000000000000000000000000000000..5d5f70b8d074f50c4ef391c78bb32f51c75ed191 --- /dev/null +++ b/modules/Downscaling/tmp/Interpolation.R @@ -0,0 +1,767 @@ +#'@rdname CST_Interpolation +#'@title Regrid or interpolate gridded data to a point location. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function interpolates gridded model data from one grid to +#'another (regrid) or interpolates gridded model data to a set of point locations. +#'The gridded model data can be either global or regional. In the latter case, the +#'region is defined by the user. It does not have constrains of specific region or +#'variables to downscale. +#'@param exp s2dv object containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude and longitude. The field data is expected to be already subset +#'for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_remap a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''exp' and/or 'points'. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''exp' and/or 'points'. Default set to "lon". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return An s2dv object containing the dowscaled field. +#' +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) +#'lons <- 1:5 +#'lats <- 1:4 +#'exp <- s2dv_cube(data = exp, lat = lats, lon = lons) +#'res <- CST_Interpolation(exp = exp, method_remap = 'conservative', target_grid = 'r1280x640') +#'@export +CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, + lat_dim = "lat", lon_dim = "lon", region = NULL, + method_point_interp = NULL, ncores = NULL) +{ + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + #if (is.null(exp[[lat_dim]]) | is.null(exp[[lon_dim]])) { + # stop("The name of the latitude/longitude elements in 'exp' must match the parametres ", + # "'lat_dim' and 'lon_dim'") + #} + + if ((length(which(names(dim(exp$data)) == lat_dim)) == 0) | (length(which(names(dim(exp$data)) == lon_dim)) == 0)) { + stop("The name of the latitude/longitude dimensions in 'exp$data' must match the parametres 'lat_dim' and 'lon_dim'") + } + + res <- Interpolation(exp = exp$data, lats = exp$coords[[lat_dim]], lons = exp$coords[[lon_dim]], + source_file = exp$coords$source_files[1], points = points, + method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, + lon_dim = lon_dim, region = region, method_point_interp = method_point_interp, ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp, obs = NULL) + return(res_s2dv) +} + +#'@rdname Interpolation +#'@title Regrid or interpolate gridded data to a point location. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} +#' +#'@description This function interpolates gridded model data from one grid to +#'another (regrid) or interpolates gridded model data to a set of point locations. +#'The gridded model data can be either global or regional. In the latter case, the +#'region is defined by the user. It does not have constrains of specific region or +#'variables to downscale. +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude and longitude. The object is expected to be already subset +#'for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param lats a numeric vector containing the latitude values. Latitudes must range from +#'-90 to 90. +#'@param lons a numeric vector containing the longitude values. Longitudes can range from +#'-180 to 180 or from 0 to 360. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param method_remap a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''exp' and/or 'points'. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''exp' and/or 'points'. Default set to "lon". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling +#'is to a point location. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. +#'@import multiApply +#'@import plyr +#'@importFrom s2dv CDORemap +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#' +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) +#'lons <- 1:5 +#'lats <- 1:4 +#'res <- Interpolation(exp = exp, lats = lats, lons = lons, method_remap = 'conservative', target_grid = 'r1280x640') +#'@export +Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, method_remap = NULL, + target_grid = NULL, lat_dim = "lat", lon_dim = "lon", region = NULL, + method_point_interp = NULL, ncores = NULL) +{ + if (!is.null(method_remap)) { + if (!inherits(method_remap, 'character')) { + stop("Parameter 'method_remap' must be of the class 'character'") + } + } + + if (!is.null(method_point_interp)) { + if (!inherits(method_point_interp, 'character')) { + stop("Parameter 'method_point_interp' must be of the class 'character'") + } + } + + if (is.na(match(lon_dim, names(dim(exp))))) { + stop("Missing longitude dimension in 'exp', or does not match the parameter 'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp))))) { + stop("Missing latitude dimension in 'exp', or does not match the parameter 'lat_dim'") + } + + # Check for negative latitudes in the exp data + if (any(lats < -90 | lats > 90) ) { + stop("Out-of-range latitudes have been found. Latitudes must range from -90 to 90") + } + + # checkings for the case of interpolation to point locations + if (!is.null(points)) { + if (!inherits(points, 'list')) { + stop("Parameter 'points' must be a list of two elements containing the point ", + "latitudes and longitudes.") + } + + if (is.null(method_point_interp)) { + stop("Parameter 'method_point_interp' must be a character vector indicating the ", + "interpolation method. Accepted methods are nearest, bilinear, 9point, ", + "invdist4nn, NE, NW, SE, SW") + } + + if (!(method_point_interp %in% c('nearest', 'bilinear', '9point', 'invdist4nn', 'NE', 'NW', 'SE', 'SW'))) { + stop("Parameter 'method_point_interp' must be a character vector indicating the ", + "interpolation method. Accepted methods are nearest, bilinear, 9point, ", + "invdist4nn, NE, NW, SE, SW") + } + + # Points must be a list of two elements + if (length(points) != 2) { + stop("'points' must be a lis of two elements containing the point ", + "latitudes and longitudes in the form 'points$lat', 'points$lon'") + } + + # The names of the two elements must be 'lat' and 'lon' + if (any(!(c(lat_dim, lon_dim) %in% names(points)))) { + stop("The names of the elements in the list 'points' must coincide with the parametres ", + "'lat_dim' and 'lon_dim'") + } + + # Check that the number of latitudes and longitudes match + if (length(unique(lengths(points))) != 1L) { + stop("The number of latitudes and longitudes must match") + } + + # Check for negative latitudes in the point coordinates + if (any(points[[lat_dim]] < -90 | points[[lat_dim]] > 90) ) { + stop("Out-of-range latitudes have been found in 'points'. Latitudes must range from ", + "-90 to 90") + } + + if (is.null(source_file)) { + stop("No source file found. Source file must be provided in the parameter 'source_file'.") + } + } else { + if (is.null(method_remap)) { + stop("Parameter 'method_remap' must be a character vector indicating the ", + "interpolation method. Accepted methods are con, bil, bic, nn, con2") + } + + if (is.null(target_grid)) { + stop("Parameter 'target_grid' can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must be ", + "defined in such file) or a character vector indicating the coarse grid to ", + "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + #---------------------------------- + # Limits of the region defined by the model data + #---------------------------------- + # for the case when region limits are not passed by the user + # regions contains the following elements in order: lonmin, lonmax, latmin, latmax + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", + "downscaling region are defined by the first and last elements of the parametres 'lats' and 'lons'.") + region <- c(lons[1], lons[length(lons)], lats[1], lats[length(lats)]) + } + + # Ensure points to be within the region limits + if (!is.null(points)) { + if (any(points[[lat_dim]] > region[4]) | any(points[[lat_dim]] < region[3]) | + any(points[[lon_dim]] > region[2]) | any(points[[lon_dim]] < region[1])) { + stop("At least one of the points lies outside the model region") + } + } + + #---------------------------------- + # Map regrid with CDO + #---------------------------------- + if (is.null(points)) { + + .KnownLonNames <- s2dv:::.KnownLonNames + .KnownLatNames <- s2dv:::.KnownLatNames + .warning <- s2dv:::.warning + + res <- CDORemap(data_array = exp, + lats = lats, + lons = lons, + grid = target_grid, + method = method_remap, + crop = region) + + # Return a list + res <- list(data = res$data_array, obs = NULL, lon = res$lons, lat = res$lats) + + #---------------------------------- + # Interpolate to point locations + #---------------------------------- + } else { + # First create interpolation weights, depending on the chosen method + weights <- .create_interp_weights(ncfile = source_file, locids = 1:unique(lengths(points)), + lats = points[[lat_dim]], lons = points[[lon_dim]], + method = method_point_interp, region = list(lat_min = region[3], + lat_max = region[4], lon_min = region[1], lon_max = region[2])) + + # Select coarse-scale data to be interpolated + model_data_gridpoints <- .get_model_data(weights.df = weights, mdata = exp, ncores = ncores) + + # Interpolate model data to point locations + res <- .interpolate_data(model_data_gridpoints, weights, ncores = ncores) + + # Return a list + res <- list(data = res, obs = NULL, lon = points[[lon_dim]], lat = points[[lat_dim]]) + } + + return(res) +} + +#====================== +# Compute weights for interpolation at several (lat,lon) positions +# We assume that grid boxes are centered in the grid point. +#====================== +.create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, + method = c("nearest", "bilinear", "9point", "invdist4nn", "NE", + "NW", "SE", "SW")) +{ + # crop the region to get the correct weights - save temporary file + nc_cropped1 <- paste0('tmp_cropped_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') + nc_cropped2 <- paste0('tmp_cropped2_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') + + system(paste0('cdo sellonlatbox,', region$lon_min, ',', region$lon_max, ',', region$lat_min, + ',', region$lat_max, ' ', ncfile, ' ', nc_cropped1)) + + #---------------- + # Read grid description and compute (i,j) of requested locations (including decimals) + #---------------- + griddes <- .get_griddes(nc_cropped1) + + if (is.null(griddes$yinc)) { + system(paste0('rm ', nc_cropped1)) + stop("'griddes$yinc' not found in NetCDF file. Remember that only regular grids are accepted when ", + "downscaling to point locations.") + } + + # If latitudes are decreasingly ordered, revert them + if (griddes$yinc < 0) { + system(paste0('cdo invertlat ', nc_cropped1, ' ', nc_cropped2)) + griddes <- .get_griddes(nc_cropped2) + } + # remove temporary files + system(paste0('rm ', nc_cropped1)) + system(paste0('rm ', nc_cropped2)) + + if (is.null(griddes)) { + stop("'griddes' not found in the NetCDF source files") + } + + gridpoints <- .latlon2ij(griddes, lats, lons) + + #---------------- + # Compute the weights according to the selected method + #---------------- + if(method == "nearest") { + #---------------- + # Round i and j to closest integer. Weight is always 1. + #---------------- + + # | | | + # -+-----+-----+- + # | x| | + # | a | | + # | | | + # -+-----+-----+- + # | | | + + centeri <- round(gridpoints$i,0) + centeri[centeri == griddes$xsize+1] <- 1 # close longitudes + + weights.df <- data.frame(locid = locids, + lat = lats, + lon = lons, + rawi = gridpoints$i, + rawj = gridpoints$j, + i = centeri, + j = round(gridpoints$j, 0), + weight = 1) + } else if (method %in% c("bilinear","invdist4nn")) { + #---------------- + # Get the (i,j) coordinates of the 4 points (a,b,c,d) around x. + # This plot shows i increasing to the right and + # j increasing to the top, but the computations are generic. + #---------------- + # | | | + #- +-----+-----+- + # | | | + # | b | c | + # | | | + #- +-----+-----+- + # | x| | + # | a | d | + # | | | + #- +-----+-----+- + # | | | + + lowi <- floor(gridpoints$i) + highi <- ceiling(gridpoints$i) + highi[highi == griddes$xsize+1] <- 1 # close the longitudes + lowj <- floor(gridpoints$j) + highj <- ceiling(gridpoints$j) + # Note: highi and lowi are the same if i is integer + # Note: highj and lowj are the same if j is integer + + #---------------- + # Get x position wrt ad and ab axes (from 0 to 1) + #---------------- + pcti <- gridpoints$i - lowi + pctj <- gridpoints$j - lowj + + #---------------- + # Compute weights for a,b,c,d grid points + #---------------- + if(method == "bilinear") { + wa = (1 - pcti) * (1 - pctj) + wb = (1 - pcti) * pctj + wc = pcti * pctj + wd = pcti * (1 - pctj) + } else if(method == "invdist4nn") { + #---------------- + # Note: the distance is computed in the (i,j) space. + # Note2: this method does not guarantees a continuous interpolation. + # Use bilinear if that's desirable. + # When x is on the ab line, c and d would be used. In the limit of x + # being just left of ab other points would be used. + # Here we just dropped c and d coeffs when over ab. Same for ad line, + # b and c coeffs dropped. This prevents repeated nodes. + #---------------- + ida = 1 / sqrt(pcti^2 + pctj^2) + idb = 1 / sqrt(pcti^2 + (1 - pctj)^2) + idc = 1 / sqrt((1-pcti)^2 + (1-pctj)^2) + idd = 1 / sqrt((1-pcti)^2 + pctj^2) + idb[pctj == 0] <- 0; + idc[pctj == 0] <- 0; + idc[pcti == 0] <- 0; + idd[pcti == 0] <- 0; + + #---------------- + # Normalize vector of inverse distances + #---------------- + invdist <- cbind(ida, idb, idc, idd) + print(invdist) + w <- t(apply(invdist, 1, function(x) { print(x); if(any(is.infinite(x))) { + x <- is.infinite(x) * 1 } ; x <- x/sum(x) })) + print(w) + + wa = w[ , 1] + wb = w[ , 2] + wc = w[ , 3] + wd = w[ , 4] + } + + #---------------- + # Put info in dataframes and rbind them + #---------------- + weightsa.df <- data.frame(locid = locids, lat = lats,lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = lowj, weight = wa) + weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = highj, weight = wb) + weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = highj, weight = wc) + weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = lowj, weight = wd) + weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df) + } else if(method == "9point") { + #---------------- + # Get the (i,j) coordinates of the 9 points (a,b,...,i) around x + # This plot shows i increasing to the right and + # j increasing to the top, but the computations are generic. + #---------------- + # | | | | + #-+-----+-----+-----+- + # | | | | + # | c | f | i | + # | | | | + #-+-----+-----+-----+- + # | | x| | + # | b | e | h | + # | | | | + #-+-----+-----+-----+- + # | | | | + # | a | d | g | + # | | | | + #-+-----+-----+-----+- + # | | | | + + centeri <- round(gridpoints$i, 0) + centeri[centeri == griddes$xsize + 1] <- 1 + centerj <- round(gridpoints$j, 0) + lowi <- centeri - 1 + highi <- centeri + 1 + lowi[lowi == 0] <- griddes$xsize # close the longitudes + highi[highi == griddes$xsize+1] <- 1 # close the longitudes + lowj <- centerj - 1 + highj <- centerj + 1 + + #---------------- + # For the north and south pole do a 6-point average + #---------------- + w_highj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,0 ,1/9)) + w_centerj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,1/6,1/9)) + w_lowj <- ifelse(centerj == 1,0 ,ifelse(centerj == griddes$ysize,1/6,1/9)) + + #---------------- + # Put info in dataframes and rbind them + #---------------- + weightsa.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = lowj, weight = w_lowj) + weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = centerj, weight = w_centerj) + weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = highj, weight = w_highj) + weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = lowj, weight = w_lowj) + weightse.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = centerj, weight = w_centerj) + weightsf.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = highj, weight = w_highj) + weightsg.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = lowj, weight = w_lowj) + weightsh.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = centerj, weight = w_centerj) + weightsi.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = highj, weight = w_highj) + weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df, weightse.df, + weightsf.df, weightsg.df, weightsh.df, weightsi.df) + } else if(method %in% c("NE", "NW", "SW", "SE")) { + #---------------- + # Find if increasing i and j increases or decreases lat and lon + #---------------- + westtoeast <- (griddes$xinc > 0) + southtonorth <- T + if(griddes$gridtype == "gaussian") { + # We assume gaussian grid latitudes are ordered north to south + southtonorth <- F + } else { #lonlat + if(griddes$yinc < 0) {southtonorth <- F} + } + + #---------------- + # Get the (i,j) coordinates of the desired point (a,b,c or d) around x + #---------------- + # | | | + #- +-----+-----+- + # | | | + # | b | c | + # | | | + #- +-----+-----+- + # | x| | + # | a | d | + # | | | + #- +-----+-----+- + # | | | + + if(substr(method,1,1) == "N" & southtonorth == T) { selj <- ceiling(gridpoints$j) } + if(substr(method,1,1) == "S" & southtonorth == T) { selj <- floor(gridpoints$j) } + if(substr(method,1,1) == "N" & southtonorth == F) { selj <- floor(gridpoints$j) } + if(substr(method,1,1) == "S" & southtonorth == F) { selj <- ceiling(gridpoints$j) } + + if(substr(method,2,2) == "E" & westtoeast == T) {seli <- ceiling(gridpoints$i) } + if(substr(method,2,2) == "W" & westtoeast == T) {seli <- floor(gridpoints$i) } + if(substr(method,2,2) == "E" & westtoeast == F) {seli <- floor(gridpoints$i) } + if(substr(method,2,2) == "W" & westtoeast == F) {seli <- ceiling(gridpoints$i) } + + seli[seli == griddes$xsize + 1] <- 1 # close the longitudes + + weights.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = seli, j = selj, weight = 1) + } else { + stop(paste0("Method " ,method, " not implemented")) + } + + #---------------- + # Order by locid and remove lines with 0 weight + # This also removes some duplicates in the bilinear/invdist4nn methods when i + # or j is a whole number, or in the 9-point method when at the poles. + #---------------- + weights.df <- weights.df[order(weights.df$locid), ] + weights.df <- weights.df[weights.df$weight != 0, ] + + #---------------- + # Add as attributes the method and the nc file used to compute the weights + #---------------- + attributes(weights.df)$nc_file <- normalizePath(ncfile) + attributes(weights.df)$method <- method + + return(weights.df) +} + +#====================== +# Compute (i,j) from (lat,lon). +# Works only for 'lonlat' and 'gaussian' grids. +# Grids are supposed to cover whole globe. +#====================== +.latlon2ij <- function(griddes, lats, lons) { + #------------ + # Check input params + #------------ + if(length(lons) != length(lats)) {stop("Input lat and lon have different lengths.")} + if(any(lats < -90) | any(lats > 90)) {stop("Latitude out of valid range")} + if((griddes$xfirst > 180) & (any(lons < 0))) { + stop("Please use the same convention for the longitudes in the source file and the ", + "longitude values in 'points'.") + } + #if(round(griddes$xinc*griddes$xsize) != 360) {stop("Grid is not global")} + # no need to resize lons to [0,360) + + #------------ + # Compute i (with decimals) + # i lies in [1,xsize+1) + # %% gives the remainder + #------------ + gridpoints <- list() + gridpoints$i <- 1 + (((lons - griddes$xfirst) / griddes$xinc) %% griddes$xsize) + + #------------ + # Compute j (with decimals) + #------------ + if(griddes$gridtype=='lonlat') { + gridpoints$j <- 1 + (lats - griddes$yfirst) / griddes$yinc + } else if(griddes$gridtype == 'gaussian') { + # We assume gaussian grid latitudes are ordered north to south + # findInterval can only work with monotonic ascending values so we revert twice + northj <- griddes$ysize-findInterval(lats, -griddes$yvals) + southj <- northj + 1 + + # Special case: We are north of the first lat + gridpoints$j[northj == 0] <- 1 + + # Special case: We are south of the last lat + gridpoints$j[southj == griddes$ysize + 1] <- griddes$ysize + + # Generic case + ok_idx <- !(northj == 0 | southj == griddes$ysize+1) + gridpoints$j[ok_idx] <- northj[ok_idx] + (griddes$yvals[northj[ok_idx]] - + lats[ok_idx])/(griddes$yvals[northj[ok_idx]] - griddes$yvals[southj[ok_idx]]) + } else { stop("Unsupported grid") } + + return(gridpoints) +} + +#====================== +# Use cdo griddes to obtain grid information +#====================== +.get_griddes <- function(ncfile) { + tmp <- system(paste0("cdo griddes ", ncfile, + " 2>/dev/null | egrep 'gridtype|xsize|ysize|xfirst|xinc|yfirst|yinc'"), intern = T) + arr <- do.call(rbind, strsplit(tmp,"\\s+= ", perl = T)) + griddes <- as.list(arr[,2]) + names(griddes) <- arr[,1] + + if(griddes$gridtype == "gaussian") { + griddes$yvals <- .get_lats(ncfile) + } + + # Convert some fields to numeric. Ensures all fields are present. + for(nm in c("xsize", "ysize", "xfirst", "yfirst", "xinc", "yinc")) { + griddes[[nm]] <- ifelse(is.null(griddes[[nm]]), NA, as.numeric(griddes[[nm]])) + } + + return(griddes) +} + +#====================== +# Use nco to obtain latitudes. Latitudes shall be named "lat" or "latitude". +#====================== +.get_lats <- function(ncfile) { + + tmp <- system(paste0('ncks -H -s "%f " -v latitude ',ncfile),intern=T) + + if(!is.null(attributes(tmp)$status)) { + tmp <- system(paste0('ncks -H -s "%f " -v lat ',ncfile),intern=T) + } + + lats <- as.numeric(strsplit(tmp[1],"\\s+",perl=T)[[1]]) + + return(lats) +} + +#====================== +# Load model data at all (i,j) pairs listed in the weights dataframe. +# Uses StartR. All ... parameters go to Start (i.e. specify dat, var, +# sdate, time, ensemble, num_procs, etc) +#====================== +.get_model_data <- function(weights.df, mdata, ncores = NULL) { + + #----------------- + # Get data for all combinations of i and j. + # (inefficient, getting many unneded pairs). + # Avoid retrieving duplicates with unique() + # These are the indices of the global grid + #----------------- + is <- weights.df$i + js <- weights.df$j + + #----------------- + # If any of the indices happens to be 0, + # change it by 1 but give a warning + #----------------- + if (any(is == 0) | any(js == 0)) { + warning("Is the point location in the border of the region? The code can run but ", + "results will be less accurate than those obtained with a larger region." ) + is[is == 0] <- 1 + js[js == 0] <- 1 + } + + #----------------- + # Get indices of original is and js in unique(is),unique(js) that were requested + #----------------- + idxi <- match(is, unique(is)) + idxj <- match(js, unique(js)) + + #----------------- + # Subsample mdata to keep only the needed (i,j) pairs. + #----------------- + if (is.na(match("longitude", names(dim(mdata))))) { + londim <- match("lon", names(dim(mdata))) + } else { + londim <- match("longitude", names(dim(mdata))) + } + if (is.na(match("latitude", names(dim(mdata))))) { + latdim <- match("lat", names(dim(mdata))) + } else { + latdim <- match("latitude", names(dim(mdata))) + } + + # trick: exchange idxi and idxj + #if(londim > latdim) { idx.tmp <- idxi; idxi <- idxj; idxj <- idx.tmp } + #keepdims <- (1:length(dim(mdata)))[-c(londim,latdim)] + + #sub_mdata <- apply(mdata, keepdims, function(x) { + # laply(1:length(is),function(k) { x[idxi[k],idxj[k]] }) }) + #names(dim(sub_mdata))[1] <- "gridpoint" + + #----------------- + # Retrieve with multiApply + #----------------- + sub_mdata <- Apply(mdata, target_dims = list(c(latdim, londim)), + fun = function(x) {laply(1:length(is),function(k) { x[js[k],is[k]] }) }, + ncores = ncores)$output1 + names(dim(sub_mdata))[1] <- "gridpoint" + + #----------------- + # Return an array that contains as many gridpoints as (i,j) pairs were requested + #----------------- + return(sub_mdata) +} + +#====================== +# Multiply the grid-point series by the weights, +# to obtain the desired interpolations +#====================== +.interpolate_data <- function(model_data, weights.df, ncores) { + #----------------- + # Multiply each gridpoint matrix by its corresponding weight + #----------------- + gpdim <- match("gridpoint", names(dim(model_data))) + weighted_data <- sweep(model_data, gpdim, weights.df$weight, "*") + + #----------------- + # Sum all series that belong to same interpolation point + # Return an array that contains the requested locations and interpolation type + #----------------- + #interp_data <- apply(weighted_data, -gpdim, function(x) { rowsum(x, weights.df$locid) }) + #names(dim(interp_data))[1] <- "location" + interp_data <- Apply(weighted_data, target_dims = gpdim, fun = function(x) { + rowsum(x, weights.df$locid)}, output_dims = c("location", "aux"), + ncores = ncores)$output1 + + return(interp_data) +} diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R new file mode 100644 index 0000000000000000000000000000000000000000..f108877b7d55a6016b437a17a8144d6c68b28198 --- /dev/null +++ b/modules/Downscaling/tmp/Intlr.R @@ -0,0 +1,559 @@ +#'@rdname CST_Intlr +#'@title Downscaling using interpolation and linear regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a linear +#'regression. Different methodologies that employ linear regressions are available. See +#'parameter 'lr_method' for more information. It is recommended that the observations +#'are passed already in the target grid. Otherwise, the function will also perform an +#'interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to +#'be of the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param int_method a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@param predictors an array with large-scale data to be used in the 'large-scale' method. +#'Only needed if the linear regression method is set to 'large-scale'. It must have, at +#'least the dimension start date and another dimension whose name has to be specified in +#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the +#'number of large-scale predictors. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param large_scale_predictor_dimname a character vector indicating the name of the +#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'generating the linear regressions. Default to TRUE. +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. +#'@import multiApply +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_Intlr(exp = exp, obs = obs, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') +#'@export +CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, + method_point_interp = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", time_dim = "time", member_dim = "member", + large_scale_predictor_dimname = 'vars', loocv = TRUE, region = NULL, ncores = NULL) { + + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], + obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], points = points, + source_file_exp = exp$coords$source_files[1], source_file_obs = obs$coords$source_files[1], + target_grid = target_grid, lr_method = lr_method, int_method = int_method, + method_point_interp = method_point_interp, predictors = predictors, + lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, + member_dim = member_dim, large_scale_predictor_dimname = large_scale_predictor_dimname, + loocv = loocv, region = region, ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + + obs$data <- res$obs + obs$dims <- dim(obs$data) + obs$coords[[lon_dim]] <- res$lon + obs$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) +} + +#'@rdname Intlr +#'@title Downscaling using interpolation and linear regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a linear +#'regression. Different methodologies that employ linear regressions are available. See +#'parameter 'lr_method' for more information. It is recommended that the observations +#'are passed already in the target grid. Otherwise, the function will also perform an +#'interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to +#'be of the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude and start date. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param int_method a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@param source_file_exp a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param source_file_obs a character vector with a path to an example file of the obs data. +#'Only needed if the downscaling is to a point location. +#'@param predictors an array with large-scale data to be used in the 'large-scale' method. +#'Only needed if the linear regression method is set to 'large-scale'. It must have, at +#'least the dimension start date and another dimension whose name has to be specified in +#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the +#'number of large-scale predictors. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param large_scale_predictor_dimname a character vector indicating the name of the +#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'generating the linear regressions. Default to TRUE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. +#'@import multiApply +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- Intlr(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, +#'obs_lons = obs_lons, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') +#'@export +Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, target_grid = NULL, points = NULL, + int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, + predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', loocv = TRUE, + ncores = NULL) { + + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(lr_method, 'character')) { + stop("Parameter 'lr_method' must be of the class 'character'") + } + + if (!inherits(large_scale_predictor_dimname, 'character')) { + stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") + } + + if (!inherits(loocv, 'logical')) { + stop("Parameter 'loocv' must be set to TRUE or FALSE") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(large_scale_predictor_dimname, 'character')) { + stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") + } + + if (is.na(match(lon_dim, names(dim(exp))))) { + stop("Missing longitude dimension in 'exp', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp))))) { + stop("Missing latitude dimension in 'exp', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + # When observations are pointwise + if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { + point_obs <- T + # dimension aux in obs is needed + if (is.na(match("aux", names(dim(obs))))) { + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = "aux") + } + } else { + point_obs <- F + } + + if (!is.null(points) & is.null(source_file_exp)) { + stop("No source file found. Source file for exp must be provided in the parameter ", + "'source_file_exp'.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + # sdate must be the time dimension in the input data + stopifnot(sdate_dim %in% names(dim(exp))) + stopifnot(sdate_dim %in% names(dim(obs))) + + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + restore_ens <- TRUE + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + # checkings for the parametre 'predictors' + if (!is.null(predictors)) { + if (!is.array(predictors)) { + stop("Parameter 'predictors' must be of the class 'array'") + } else { + # ensure the predictor variable name matches the parametre large_scale_predictor_dimname + stopifnot(large_scale_predictor_dimname %in% names(dim(predictors))) + stopifnot(sdate_dim %in% names(dim(predictors))) + stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + #----------------------------------- + # Interpolation + #----------------------------------- + if (lr_method != '4nn') { + + if (is.null(int_method)) { + stop("Parameter 'int_method' must be a character vector indicating the interpolation method. ", + "Accepted methods are con, bil, bic, nn, con2") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the ", + "four borders of the downscaling region are defined by the first and last ", + "elements of the parametres 'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region, ncores = ncores) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region, ncores = ncores) + + lats <- obs_interpolated$lat + lons <- obs_interpolated$lon + obs_interpolated <- obs_interpolated$data + } else { + obs_interpolated <- obs + lats <- obs_lats + lons <- obs_lons + } + } + + #----------------------------------- + # Linear regressions + #----------------------------------- + # Pointwise linear regression + # Predictor: model data + # Predictand: observations + if (lr_method == 'basic') { + predictor <- exp_interpolated$data + predictand <- obs_interpolated + + target_dims_predictor <- sdate_dim + target_dims_predictand <- sdate_dim + } + + # (Multi) linear regression with large-scale predictors + # Predictor: passed through the parameter 'predictors' by the user. Can be model or observations + # Predictand: model data + else if (lr_method == 'large-scale') { + if (is.null(predictors)) { + stop("The large-scale predictors must be passed through the parametre 'predictors'") + } + + predictand <- obs_interpolated + predictor <- predictors + + target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname) + target_dims_predictand <- sdate_dim + } + + # Multi-linear regression with the four nearest neighbours + # Predictors: model data + # Predictand: observations + else if (lr_method == '4nn') { + + predictor <- .find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, + lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4, ncores = ncores) + + if (is.null(points) | ("location" %in% names(dim(obs)))) { + if (!is.null(target_grid)) { + warning("Interpolating to the 'obs' grid") + } + predictand <- obs + + lats <- obs_lats + lons <- obs_lons + } + # If the downscaling is to point locations: Once the 4 nearest neighbours have been found, + # interpolate to point locations + else { + predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, region = region, ncores = ncores) + + predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, region = region, ncores = ncores) + + lats <- predictor$lat + lons <- predictor$lon + predictor <- predictor$data + predictand <- predictand$data + } + + target_dims_predictor <- c(sdate_dim,'nn') + target_dims_predictand <- sdate_dim + } + + else { + stop(paste0(lr_method, " method is not implemented yet")) + } + + print(paste0('dim predictor',dim(predictor))) + print(paste0('dim predictand',dim(predictand))) + print(dim(list(predictor[1]))) + # Apply the linear regressions + + + + res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), + fun = .intlr, loocv = loocv, ncores = ncores)$output1 + + names(dim(res))[1] <- sdate_dim + # names(dim(res))[which(names(dim(res)) == '')] + + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + predictand <- s2dv::InsertDim(predictand, posdim = 1, lendim = 1, name = member_dim) + } + + # Return a list of three elements + res <- list(data = res, obs = predictand, lon = lons, lat = lats) + + return(res) +} + +#----------------------------------- +# Atomic function to generate and apply the linear regressions +#----------------------------------- +.intlr <- function(x, y, loocv) { + + tmp_df <- data.frame(x = x, y = y) + # if the data is all NA, force return return NA + if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { + + n <- nrow(tmp_df) + res <- rep(NA, n) + + } else { + # training + lm1 <- .train_lm(df = tmp_df, loocv = loocv) + + # prediction + res <- .pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) + } + + return(res) +} + +#----------------------------------- +# Function to generate the linear regressions. +# Returns a list +#----------------------------------- +.train_lm <- function(df, loocv) { + + # Remove predictor columns containing only NA's + df <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, function(x) !all(is.na(x)))] + + if (loocv) { + + lm1 <- lapply(1:nrow(df), function(j) { + if (all(is.na(df[-j,]$y))) { + return(NA) + } else { + return(lm(df[-j,], formula = y ~ .)) + }}) + } else { + + lm1 <- ifelse(all(is.na(df$y)), NA, list(lm(data = df, formula = y ~ .))) + } + + return(lm1) +} + +#----------------------------------- +# Function to apply the linear regressions. +#----------------------------------- +.pred_lm <- function(df, lm1, loocv) { + + if (loocv) { + pred_vals <- sapply(1:nrow(df), function(j) { + if (all(is.na(lm1[[j]]))) { + return(NA) + } else { + return(predict(lm1[[j]], df[j,])) + }}) + } else { + if (!is.na(lm1)) { + pred_vals_ls <- lapply(lm1, predict, data = df) + pred_vals <- unlist(pred_vals_ls) + } else { + pred_vals <- rep(NA, nrow(df)) + } + } + return(pred_vals) +} + +#----------------------------------- +# Function to find N nearest neighbours. +# 'coar' is an array with named dimensions +#----------------------------------- +.find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, lat_dim, lon_dim, nn = 4, ncores = NULL) { + + # Sort the distances from closest to furthest + idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) + idx_lon <- as.array(sapply(lons_hres, function(x) order(abs(lons_coar - x))[1:nn])) + + names(dim(idx_lat)) <- c('nn', lat_dim) + names(dim(idx_lon)) <- c('nn', lon_dim) + + # obtain the values of the nearest neighbours + nearest <- Apply(list(coar, idx_lat, idx_lon), + target_dims = list(c(lat_dim, lon_dim), lat_dim, lon_dim), + fun = function(x, y, z) x[y, z], ncores = ncores)$output1 + + return(nearest) +} diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R new file mode 100644 index 0000000000000000000000000000000000000000..67fa1b28c9d8bf508d3f2b0d39c6a4940ade29c1 --- /dev/null +++ b/modules/Downscaling/tmp/LogisticReg.R @@ -0,0 +1,555 @@ +#'@rdname CST_LogisticReg +#'@title Downscaling using interpolation and logistic regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a logistic +#'regression. See \code{\link[nnet]{multinom}} for further details. It is recommended that +#'the observations are passed already in the target grid. Otherwise, the function will also +#'perform an interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to be of +#'the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' with named dimensions containing the experimental field on +#'the coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' with named dimensions containing the observational field. +#'The object must have, at least, the dimensions latitude, longitude and start date. The +#'object is expected to be already subset for the desired region. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param log_reg_method a character vector indicating the logistic regression method to be used. +#'Accepted methods are "ens_mean", "ens_mean_sd", "sorted_members". "ens_mean" uses the ensemble +#'mean anomalies as predictors in the logistic regression, "ens_mean_sd" uses the ensemble +#'mean anomalies and the ensemble spread (computed as the standard deviation of all the members) +#'as predictors in the logistic regression, and "sorted_members" considers all the members +#'ordered decreasingly as predictors in the logistic regression. Default method is "ens_mean". +#'@param probs_cat a numeric vector indicating the percentile thresholds separating the +#'climatological distribution into different classes (categories). Default to c(1/3, 2/3). See +#'\code{\link[easyVerification]{convert2prob}}. +#'@param return_most_likely_cat if TRUE, the function returns the most likely category. If +#'FALSE, the function returns the probabilities for each category. Default to FALSE. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation +#'in the fitting of the logistic regression. Default to TRUE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. +#'@import multiApply +#'@import nnet +#'@importFrom laply plyr +#' +#'@seealso \code{\link[nnet]{multinom}} +#' +#'@return An list of three elements. 'data' contains the dowscaled data, that could be either +#'in the form of probabilities for each category or the most likely category. 'lat' contains the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#' +#'@examples +#'exp <- rnorm(1500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 15) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(2700) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 15) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_LogisticReg(exp = exp, obs = obs, int_method = 'bil', target_grid = 'r1280x640', +#'probs_cat = c(1/3, 2/3)) +#'@export + +CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_method = "ens_mean", + probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + member_dim = "member", region = NULL, loocv = TRUE, ncores = NULL) { + + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- LogisticReg(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], + exp_lons = exp$coords[[lon_dim]], obs_lats = obs$coords[[lat_dim]], + obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, + probs_cat = probs_cat, return_most_likely_cat = return_most_likely_cat, + int_method = int_method, log_reg_method = log_reg_method, points = points, + method_point_interp = method_point_interp, lat_dim = lat_dim, + lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, + source_file_exp = exp$coords$source_files[1], source_file_obs = obs$coords$source_files[1], + region = region, loocv = loocv, ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + + obs$data <- res$obs + obs$dims <- dim(obs$data) + obs$coords[[lon_dim]] <- res$lon + obs$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) +} + +#'@rdname LogisticReg +#'@title Downscaling using interpolation and logistic regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a logistic +#'regression. See \code{\link[nnet]{multinom}} for further details. It is recommended that +#'the observations are passed already in the target grid. Otherwise, the function will also +#'perform an interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to be of +#'the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param log_reg_method a character vector indicating the logistic regression method to be used. +#'Accepted methods are "ens_mean", "ens_mean_sd", "sorted_members". "ens_mean" uses the ensemble +#'mean anomalies as predictors in the logistic regression, "ens_mean_sd" uses the ensemble +#'mean anomalies and the ensemble spread (computed as the standard deviation of all the members) +#'as predictors in the logistic regression, and "sorted_members" considers all the members +#'ordered decreasingly as predictors in the logistic regression. Default method is "ens_mean". +#'@param probs_cat a numeric vector indicating the percentile thresholds separating the +#'climatological distribution into different classes (categories). Default to c(1/3, 2/3). See +#'\code{\link[easyVerification]{convert2prob}}. +#'@param return_most_likely_cat if TRUE, the function returns the most likely category. If +#'FALSE, the function returns the probabilities for each category. Default to FALSE. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param source_file_exp a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param source_file_obs a character vector with a path to an example file of the obs data. +#'Only needed if the downscaling is to a point location. +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation +#'in the fitting of the logistic regression. Default to TRUE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. +#'@import multiApply +#'@import nnet +#'@importFrom laply plyr +#' +#'@seealso \code{\link[nnet]{multinom}} +#' +#'@return An list of three elements. 'data' contains the dowscaled data, that could be either +#'in the form of probabilities for each category or the most likely category. 'lat' contains the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#' +#'@examples +#'exp <- rnorm(1500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 15) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(2700) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 15) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- LogisticReg(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, +#'obs_lats = obs_lats, obs_lons = obs_lons, int_method = 'bil', target_grid = 'r1280x640', +#'probs_cat = c(1/3, 2/3)) +#'@export +LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, + int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), + return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", + source_file_exp = NULL, source_file_obs = NULL, region = NULL, loocv = TRUE, ncores = NULL) { + + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(target_grid, 'character')) { + stop("Parameter 'target_grid' must be of the class 'character'") + } + + if (!is.null(int_method) & !inherits(int_method, 'character')) { + stop("Parameter 'int_method' must be of the class 'character'") + } + + if (!is.null(method_point_interp) & !inherits(method_point_interp, 'character')) { + stop("Parameter 'method_point_interp' must be of the class 'character'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(member_dim, 'character')) { + stop("Parameter 'member_dim' must be of the class 'character'") + } + + if (!is.null(source_file_exp) & !inherits(source_file_exp, 'character')) { + stop("Parameter 'source_file_exp' must be of the class 'character'") + } + + if (!is.null(source_file_obs) & !inherits(source_file_obs, 'character')) { + stop("Parameter 'source_file_obs' must be of the class 'character'") + } + + if (!inherits(loocv, 'logical')) { + stop("Parameter 'loocv' must be set to TRUE or FALSE") + } + + if (is.na(match(lon_dim, names(dim(exp))))) { + stop("Missing longitude dimension in 'exp', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp))))) { + stop("Missing latitude dimension in 'exp', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp))))) { + stop("Missing member dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'member_dim'") + } + + # When observations are pointwise + if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { + point_obs <- T + # dimension aux in obs is needed + if (is.na(match("aux", names(dim(obs))))) { + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = "aux") + } + } else { + point_obs <- F + } + + if (!is.null(points) & (is.null(source_file_exp))) { + stop("No source file found. Source file must be provided in the parameter 'source_file_exp'.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + restore_ens <- TRUE + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file_exp, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region, ncores = ncores) + + # compute ensemble mean anomalies + if (log_reg_method == "ens_mean") { + predictor <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim, + ncores = ncores) + + target_dims_predictor <- sdate_dim + } + else if (log_reg_method == "ens_mean_sd") { + + require(abind) + + ens_mean_anom <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, + sdate_dim = sdate_dim, ncores = ncores) + ens_sd <- .get_ens_sd(obj_ens = exp_interpolated$data, member_dim = member_dim, ncores = ncores) + + #merge two arrays into one array of predictors + predictor <- abind(ens_mean_anom, ens_sd, along = 1/2) + names(dim(predictor)) <- c("pred", names(dim(ens_mean_anom))) + + target_dims_predictor <- c(sdate_dim, "pred") + } else if (log_reg_method == "sorted_members") { + predictor <- .sort_members(obj_ens = exp_interpolated$data, member_dim = member_dim, ncores = ncores) + + target_dims_predictor <- c(sdate_dim, member_dim) + } else { + stop(paste0(log_reg_method, " not recognised or not implemented.")) + } + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = as.numeric(exp_interpolated$lat), lat2 = obs_lats, + lon1 = as.numeric(exp_interpolated$lon), lon2 = obs_lons)) | !(point_obs)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file_obs, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region, ncores = ncores) + obs_ref <- obs_interpolated$data + } else { + obs_ref <- obs + } + + # convert observations to categorical predictands + +obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { + if (!any(!is.na(x))) { + rep(NA,length(x)) + } else { + terc <- convert2prob(as.vector(x), prob = probs_cat) + as.integer(apply(terc, 1, function(r) which (r == 1)))}}, + output_dims = sdate_dim, ncores = ncores)$output1 + + + res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), + fun = function(x, y) + .log_reg(x = x, y = y, loocv = loocv,probs_cat=probs_cat), + output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 + + if (return_most_likely_cat) { + res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category, + output_dims = sdate_dim, ncores = ncores)$output1 + } + + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim, ncores = ncores) + } + + res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) + + return(res) +} + +.most_likely_category <- function(data) { +# data, expected dims: start date, category (in this order) + + if (all(is.na(data))) { + mlc <- rep(NA, nrow(data)) + } else { + mlc <- apply(data, 1, which.max) + } + return(mlc) +} + +.sort_members <- function(obj_ens, member_dim, ncores = NULL) { + + sorted <- Apply(obj_ens, target_dims = member_dim, sort, decreasing = TRUE, na.last = TRUE, ncores = ncores)$output1 + + return(sorted) +} + +.get_ens_sd <- function(obj_ens, member_dim, ncores = NULL) { + + # compute ensemble spread + ens_sd <- Apply(obj_ens, target_dims = member_dim, sd, na.rm = TRUE, ncores = ncores)$output1 + + return(ens_sd) +} + +.get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim, ncores = NULL) { + + require(s2dv) + + # compute climatology + clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean, ncores = ncores, na.rm = TRUE)$output1 + + # compute ensemble mean + ens_mean <- Apply(obj_ens, target_dims = member_dim, mean, na.rm = TRUE, ncores = ncores)$output1 + + # compute ensemble mean anomalies + anom <- Ano(ens_mean, clim, ncores = ncores) + + return(anom) +} + +# atomic functions for logistic regressions +.log_reg <- function(x, y, loocv,probs_cat) { + + tmp_df <- data.frame(x = x, y = y) + + # if the data is all NA, force return return NA + if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1) | all(is.na(tmp_df$y))) { + + n1 <- nrow(tmp_df) + n2<- length(probs_cat)+1 + res <- matrix(NA, nrow = n1, ncol = n2) + + } else { + # training + lm1 <- .train_lr(df = tmp_df, loocv = loocv) + + # prediction + res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv, probs_cat=probs_cat) + } + return(res) +} + +#----------------------------------- +# Function to train the logistic regressions. +#----------------------------------- +.train_lr <- function(df, loocv) { + + require(nnet) + + # Remove columns containing only NA's + df <- df[ , apply(df, 2, function(x) !all(is.na(x)))] + + if (loocv) { + + lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ], trace = FALSE)) + + } else { + + lm1 <- list(multinom(y ~ ., data = df, trace = FALSE)) + + } + + return(lm1) +} + +#----------------------------------- +# Function to apply the logistic regressions. +#----------------------------------- +pred_lr <- function(df, lm1, loocv, probs_cat) { + + require(plyr) + + if (loocv) { + + # The error: "Error: Results must have the same dimensions." can + # appear when the number of sdates is insufficient + + pred_vals_ls <-list() + for (j in 1:nrow(df)) { + if (all(is.na(df[j,]))) { + pred_vals_ls[[j]] <- rep(NA, length(probs_cat) + 1) + } else { + pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") + } + } + + pred_vals <- laply(pred_vals_ls, .fun = as.array) + + if( length(probs_cat)+1==2) { + pred_vals_dum<-array(NA,dim=c(nrow(df),2)) + pred_vals_dum[,2]<-pred_vals + pred_vals_dum[,1]<-1-pred_vals + pred_vals<-pred_vals_dum + colnames(pred_vals)<-c(1,2) + } + + } else { + + # type = class, probs + #pred_vals_ls <- lapply(lm1, predict, data = df, type = "probs") + #pred_vals <- unlist(pred_vals_ls) + pred_vals <- predict(lm1[[1]], df, type = "probs") + + if( length(probs_cat)+1==2) { + pred_vals_dum<-array(NA,dim=c(nrow(df),2)) + pred_vals_dum[,2]<-pred_vals + pred_vals_dum[,1]<-1-pred_vals + pred_vals<-pred_vals_dum + colnames(pred_vals)<-c(1,2) + } + + } + + return(pred_vals) +} diff --git a/modules/Downscaling/tmp/Utils.R b/modules/Downscaling/tmp/Utils.R new file mode 100644 index 0000000000000000000000000000000000000000..3cd658526ef9d56a367504c4348e9ca09cd75de4 --- /dev/null +++ b/modules/Downscaling/tmp/Utils.R @@ -0,0 +1,39 @@ +.check_coords <- function(lat1, lon1, lat2, lon2) { + if (all(as.numeric(lat1) == as.numeric(lat2)) & all(as.numeric(lon1) == as.numeric(lon2))) { + match <- TRUE + } else { + match <- FALSE + } + return(match) +} + +# reorder dims to a reference array. If they do not exist, they are created +# example +#arr_ref <- array(NA, c(dataset = 1, sdate = 8, member = 3, ftime = 1, lon = 269, lat = 181)) +#arr_to_reorder <- array(NA, c(dataset = 1, member = 3, sdate = 8, lat = 181, lon = 269, pp = 1)) + +.reorder_dims <- function(arr_ref, arr_to_reorder) { + + miss_dims <- names(dim(arr_ref))[!(names(dim(arr_ref)) %in% names(dim(arr_to_reorder)))] + + if (length(miss_dims) != 0) { + for (m in seq(miss_dims)) { + arr_to_reorder <- InsertDim(data = arr_to_reorder, posdim = length(dim(arr_to_reorder)) + 1, lendim = 1, + name = miss_dims[m]) + } + } + + # TODO: add code to reorder dimensions and put the non-common dimensions at the end + + orddim <- match(names(dim(arr_ref)),names(dim(arr_to_reorder))) + return(Reorder(data = arr_to_reorder, order = orddim)) +} + +#.check_coords <- function(lat1, lon1, lat2, lon2) { +# match <- TRUE +# if (!((length(lat1) == length(lat2)) & (length(lon1) == length(lon2)))) { +# match <- FALSE +# } +# return(match) +#} + diff --git a/modules/Indices/Indices.R b/modules/Indices/Indices.R new file mode 100644 index 0000000000000000000000000000000000000000..fb9a7277587f5fb63f4f8151a090a6245f571a5b --- /dev/null +++ b/modules/Indices/Indices.R @@ -0,0 +1,111 @@ +source("modules/Indices/R/compute_nao.R") +source("modules/Indices/R/compute_nino.R") +source("modules/Indices/R/drop_indices_dims.R") +source("modules/Saving/Saving.R") +Indices <- function(recipe, data) { + # Define parameters + nao <- NULL + if ("nao" %in% tolower(names(recipe$Analysis$Workflow$Indices))) { + if (!is.null(recipe$Analysis$Workflow$Indices$NAO$obsproj)) { + obsproj <- recipe$Analysis$Workflow$Indices$NAO$obsproj + } else { + obsproj <- TRUE + } + if (!is.null(recipe$Analysis$Workflow$Indices$NAO$plot_ts)) { + plot_ts <- recipe$Analysis$Workflow$Indices$NAO$plot_ts + } else { + plot_ts <- TRUE + } + if (!is.null(recipe$Analysis$Workflow$Indices$NAO$plot_sp)) { + plot_sp <- recipe$Analysis$Workflow$Indices$NAO$plot_sp + } else { + plot_sp <- TRUE + } + if (!is.null(recipe$Analysis$Workflow$Indices$NAO$alpha)) { + alpha <- recipe$Analysis$Workflow$Indices$NAO$alpha + } else { + alpha <- 0.05 + } + nao <- compute_nao(data = data, recipe = recipe, + obsproj = obsproj, + plot_ts = plot_ts, plot_sp = plot_sp, + alpha = alpha) + } + ninos <- list() + num_ninos <- sum(tolower(substr(names(recipe$Analysis$Workflow$Indices), + 1, 4)) == "nino") + if (num_ninos > 0) { + for (nins in 1:num_ninos) { + if ("nino1+2" %in% + tolower(names(recipe$Analysis$Workflow$Indices)[nins])) { + region <- c(lonmin = -90, lonmax = -80, latmin = -10, latmax = 0) + } else if ("nino3" %in% + tolower(names(recipe$Analysis$Workflow$Indices)[nins])) { + region <- c(lonmin = -150, lonmax = -90, latmin = -5, latmax = 5) + } else if ("nino3.4" %in% + tolower(names(recipe$Analysis$Workflow$Indices)[nins])) { + region <- c(lonmin = -170, lonmax = -120, latmin = -5, latmax = 5) + } else if ("nino4" %in% + tolower(names(recipe$Analysis$Workflow$Indices)[nins])) { + region <- c(lonmin = 160, lonmax = -150, latmin = -5, latmax = 5) + } + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm <- TRUE + } else { + na.rm <- recipe$Analysis$remove_NAs + } + if (is.null(recipe$Analysis$Workflow$Indices[nins]$standardised)) { + standardised <- TRUE + } else { + standardised <-recipe$Analysis$Workflow$Indices[nins]$standardised + } + if (is.null(recipe$Analysis$Workflow$Indices[nins]$running_mean)) { + running_mean <- NULL + } else { + running_mean <-recipe$Analysis$Workflow$Indices[nins]$running_mean + } + if (!is.null(recipe$Analysis$Workflow$Indices[nins]$plot_ts)) { + plot_ts <- recipe$Analysis$Workflow$Indices[nins]$plot_ts + } else { + plot_ts <- TRUE + } + if (!is.null(recipe$Analysis$Workflow$Indices[nins]$plot_sp)) { + plot_sp <- recipe$Analysis$Workflow$Indices[nins]$plot_sp + } else { + plot_sp <- TRUE + } + if (!is.null(recipe$Analysis$Workflow$Indices[nins]$alpha)) { + alpha <- recipe$Analysis$Workflow$Indices[nins]$alpha + } else { + alpha <- 0.05 + } + if (!is.null(recipe$Analysis$Workflow$Indices[nins]$save)) { + save <- recipe$Analysis$Workflow$Indices[nins]$save + } else { + save <- 'all' + } + nino <- compute_nino(data = data, recipe = recipe, + standardised = standardised, + region = region, save = save, plot_ts = plot_ts, + plot_sp = plot_sp, alpha = alpha, + running_mean = running_mean, ncores = ncores, + na.rm = na.rm) + ninos[[nins]] <- nino + names(ninos)[nins] <- names(recipe$Analysis$Workflow$Indices)[nins] + } + } + if (is.null(nao)) { + return(ninos) + info(recipe$Run$logger, + "##### EL NINO INDICES COMPUTED SUCCESSFULLY #####") + } else { + return(nao) + info(recipe$Run$logger, + "##### NAO INDEX COMPUTED SUCCESSFULLY #####") + } +} diff --git a/modules/Indices/R/compute_nao.R b/modules/Indices/R/compute_nao.R new file mode 100644 index 0000000000000000000000000000000000000000..5bb76e4edd2a6fa10434e3872ca1bf7da2bb63c1 --- /dev/null +++ b/modules/Indices/R/compute_nao.R @@ -0,0 +1,440 @@ + +compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, + alpha, logo = NULL) { + ## TODO: if fcst object in data compute the nao too + if (!is.null(data$fcst)) { + warning("NAO computed only for hindcast data.") + } + # Check if subsetting the data for the region is required + lons <- data$hcst$coords$longitude + lats <- data$hcst$coords$latitude + subset_needed <- FALSE + nao_region <- c(lonmin = -80, lonmax = 40, + latmin = 20, latmax = 80) + if (any(lons < 0)) { #[-180, 180] + if (!(min(lons) > -90 & min(lons) < -70 & + max(lons) < 50 & max(lons) > 30)) { + subset_needed <- TRUE + } + } else { #[0, 360] + if (any(lons >= 50 & lons <= 270)) { + susbset_needed <- TRUE + } else { + lon_E <- lons[which(lons < 50)] + lon_W <- lons[-which(lons < 50)] + if (max(lon_E) < 30 | min(lon_W) > 290) { + subset_needed <- TRUE + } + } + } + if (any(max(lats) > 80 & min(lats) < 20)) { + subset_needed <- TRUE + } + if (subset_needed) { + warning("The data is being subsetted for 20N-80N and 80W-40E region.") + + hcst1 <- ClimProjDiags::SelBox(data$hcst$data, + lon = as.vector(lons), + lat = as.vector(lats), + region = nao_region, + londim = "longitude", + latdim = "latitude") + obs1 <- ClimProjDiags::SelBox(data$obs$data, + lon = as.vector(lons), + lat = as.vector(lats), + region = nao_region, + londim = "longitude", + latdim = "latitude") + hcst <- s2dv_cube(data = hcst1$data, lat = hcst1$lat, lon = hcst1$lon, + Variable = c(data$hcst$Variable[1], level = 'surface'), + data$hcst$Va, Datasets = data$hcst$Datasets, + time_dims = c('syear', 'time'), + Dates = data$hcst$Dates) + obs <- s2dv_cube(data = obs1$data, lat = obs1$lat, lon = obs1$lon, + Variable = c(data$obs$Variable[1], level = 'surface'), + Datasets = data$obs$Datasets, time_dims = c('syear', 'time')) +# TODO check and create data object for the next steps + data <- list(hcst = hcst, obs = obs) + lons <- data$hcst$coords$longitude + lats <- data$hcst$coords$latitude + obs1 <- hcst1 <- NULL + gc() + } + nao <- NAO(exp = data$hcst$data, obs = data$obs$data, + lat = data$hcst$coords$latitude, + lon = data$hcst$coords$longitude, + time_dim = 'syear', + memb_dim = 'ensemble', space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', ftime_avg = NULL, + obsproj = obsproj, ncores = recipe$Analysis$ncores) + ## Standardisation: + nao$exp <- Apply(list(nao$exp), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function(x) {(x-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao$obs <- Apply(list(nao$obs), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function(x) {(x-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao$exp <- InsertDim(nao$exp, posdim = 1, lendim = 1, name = 'region') + nao$obs <- InsertDim(nao$obs, posdim = 1, lendim = 1, name = 'region') + hcst_dates <- data$hcst$attrs$Dates + hcst_dates <- drop(data$hcst$attrs$Dates) + + if (!("time" %in% names(dim(hcst_dates)))) { + if (is.null(dim(hcst_dates))) { + hcst_dates <- array(hcst_dates, c(syear = length(hcst_dates))) + } + hcst_dates <- InsertDim(hcst_dates, pos = 1, len = 1, name = 'time') + hcst_dates <- as.POSIXct(hcst_dates, origin = '1970-01-01', tz = 'UTC') + } + nao <- list(hcst = s2dv_cube( + data = nao$exp, + varName = "nao", + metadata = list( + region = list(name = "NAO region", + lats_range = paste0(range(lats)), + lons_range = paste0(range(lons))), + time = data$hcst$attrs$Variable$metadata$time, + nao = list(units = 'adim', + longname = 'North Atlantic Oscillation')), + Dates = hcst_dates, + Dataset = recipe$Analysis$Datasets$System$name), + obs = s2dv_cube( + data = nao$obs, + varName = "nao", + metadata = list( + region = list(name = "NAO region", + lats_range = paste0(range(lats)), + lons_range = paste0(range(lons))), + time = data$obs$attrs$Variable$metadata$time, + nao = list(units = 'adim', + longname = 'North Atlantic Oscillation')), + Dates = data$obs$attrs$Dates, + Dataset = recipe$Analysis$Datasets$Reference$name)) + if (recipe$Analysis$Workflow$Indices$NAO$save == 'all') { + file_dest <- paste0(recipe$Run$output_dir, "/outputs/Indices/") + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + # Use startdates param from SaveExp to correctly name the files: + if (length(data$hcst$attrs$source_files) == dim(data$hcst$data)['syear']) { + file_dates <- Apply(data$hcst$attrs$source_files, target_dim = NULL, + fun = function(x) { + pos <- which(strsplit(x, "")[[1]] == ".") + res <- substr(x, pos-8, pos-1) + })$output1 + } + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + file_dates <- paste0('s', + recipe$Analysis$Time$hcst_start : recipe$Analysis$Time$hcst_end) + } + # need to recover original dimensions after saving to make Skill module work + nao_original_dims_hcst <- nao$hcst$data + nao$hcst$data <- .drop_indices_dims(nao$hcst$data) + CST_SaveExp(data = nao$hcst, + destination = file_dest, + startdates = as.vector(file_dates), + dat_dim = NULL, sdate_dim = 'syear', + ftime_dim = 'time', var_dim = NULL, + memb_dim = 'ensemble') + nao_original_dims_obs <- nao$obs$data + nao$obs$data <- .drop_indices_dims(nao$obs$data) + CST_SaveExp(data = nao$obs, #res, + destination = file_dest, + startdates = as.vector(file_dates), + dat_dim = NULL, sdate_dim = 'syear', + ftime_dim = 'time', var_dim = NULL, + memb_dim = NULL) + nao$hcst$data <- nao_original_dims_hcst + nao$obs$data <- nao_original_dims_obs + nao_original_dims_hcst <- nao_original_dims_obs <- NULL + gc() + } + # Read variable long_name to plot it + conf <- yaml::read_yaml("conf/variable-dictionary.yml") + var_name <- conf$vars[[which(names(conf$vars) == + recipe$Analysis$Variables$name)]]$long + + if (plot_ts) { + dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + showWarnings = F, recursive = T) + source("modules/Indices/R/plot_deterministic_forecast.R") + for (tstep in 1:dim(nao$hcst$data)['time']) { + mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) + mes <- ifelse(mes > 12, mes - 12, mes) + fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + obs <- Subset(nao$obs$data, along = 'time', ind = tstep) + exp <- Subset(nao$hcst$data, along = 'time', ind = tstep) + if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { + system <- recipe$Analysis$Datasets$System$name + } else { + system <-gsub(".", "", recipe$Analysis$Datasets$System$name) + } + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste("NAO Index\n", + month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".png") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n") + xlabs <- as.numeric(substr(file_dates, 1, 4)) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste("NAO Index\n", + "Lead time", fmonth, + " / Start dates", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".png") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Lead time: ", fmonth, "\n") + xlabs <- file_dates + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + plot_deterministic_forecast(obs, exp, + time_dim = 'syear', + member_dim = 'ensemble', style = 'boxplot', + xlabs = xlabs, + title = toptitle, fileout = plotfile, + caption = caption, caption_line = 6.5, + legend_text = c( + recipe$Analysis$Datasets$Reference$name, + recipe$Analysis$Datasets$System$name)) + } + } + if (plot_sp) { + ## TODO: To be removed when s2dv is released: + source("modules/Visualization/R/tmp/PlotRobinson.R") + source("modules/Indices/R/correlation_eno.R") + source("modules/Visualization/R/get_proj_code.R") + dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + showWarnings = F, recursive = T) + # Get correct code for stereographic projection + projection_code <- get_proj_code(proj_name = "stereographic") + correl_obs <- Apply(list(data$obs$data, nao$obs$data), + target_dims = 'syear', fun = .correlation_eno, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE, + ncores = recipe$Analysis$ncores) + correl_hcst <- Apply(list(data$hcst$data, nao$hcst$data), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + x <- apply(x, 1, mean, na.rm = TRUE) + y <- apply(y, 1, mean, na.rm = TRUE) + dim(y) <- c(syear = length(y)) + dim(x) <- c(syear = length(x)) + res <- .correlation_eno(x, y, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE)}, + ncores = recipe$Analysis$ncores) + correl_hcst_full <- Apply(list(data$hcst$data, nao$hcst$data), + target_dims = c('syear', 'ensemble'), + fun = function(x,y) { + dim(y) <- c(syear = length(y)) + dim(x) <- c(syear = length(x)) + res <- .correlation_eno(x, y, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE)}, + ncores = recipe$Analysis$ncores) + + for (tstep in 1:dim(nao$obs$data)['time']) { + fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + ## Observations + map <- drop(Subset(correl_obs$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_obs$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) + mes <- ifelse(mes > 12, mes - 12, mes) + toptitle <- paste(recipe$Analysis$Datasets$Reference$name, "\n", + "NAO Index -",var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".png") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + toptitle <- paste(recipe$Analysis$Datasets$Reference$name, "\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".png") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Start date: month ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { + system <- recipe$Analysis$Datasets$System$name + } else { + system <- gsub(".", "", recipe$Analysis$Datasets$System$name) + } + + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', style = 'polygon', + toptitle = toptitle, crop_coastlines = nao_region, + caption = caption, mask = sig, bar_extra_margin = c(4,0,4,0), + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + ## Ensemble-mean + map <- drop(Subset(correl_hcst$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste(recipe$Analysis$Datasets$System$name, "\n", + "NAO Index -", var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".png") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation ensemble mean\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".png") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation ensemble mean\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', bar_extra_margin = c(4,0,4,0), + toptitle = toptitle, style = 'polygon', + caption = caption, mask = sig, crop_coastline = nao_region, + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + + # Full hcst corr + map <- drop(Subset(correl_hcst_full$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst_full$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".png") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation all members\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".png") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation all members\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', bar_extra_margin = c(4,0,4,0), + toptitle = toptitle, style = 'polygon', + caption = caption, mask = sig, crop_coastline = nao_region, + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + } # end tstep loop + } + return(nao) +} diff --git a/modules/Indices/R/compute_nino.R b/modules/Indices/R/compute_nino.R new file mode 100644 index 0000000000000000000000000000000000000000..915dc9cedd826e9733f0b8495dbb7f72ee8edcbb --- /dev/null +++ b/modules/Indices/R/compute_nino.R @@ -0,0 +1,430 @@ +compute_nino <- function(data, recipe, region, standardised = TRUE, + running_mean = NULL, plot_ts = TRUE, plot_sp = TRUE, + alpha = 0.5, save = 'all', na.rm = TRUE, logo = NULL, + ncores = NULL) { + + if (!is.null(data$fcst)) { + warn(recipe$Run$logger, "Nino computed only for hindcast data.") + } + var <- recipe$Analysis$Variables$name + if (!(var %in% c('tos', 'sst'))) { + warn(recipe$Run$logger, "Variable name is not one of the expected sst or tos") + } + var_units <- data$hcst$attrs$Variable$metadata[[var]]$units + nino_hcst <- WeightedMean(data = data$hcst$data, + lon = as.vector(data$hcst$coords$longitude), + lat = as.vector(data$hcst$coords$latitude), + region = region, + londim = 'longitude', + latdim = 'latitude', + na.rm = na.rm, + ncores = ncores) + nino_obs <- WeightedMean(data = data$obs$data, + lon = as.vector(data$hcst$coords$longitude), + lat = as.vector(data$hcst$coords$latitude), + region = region, + londim = 'longitude', + latdim = 'latitude', + na.rm = na.rm, + ncores = ncores) + if (standardised) { + nino_hcst <- Apply(list(nino_hcst), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function(x) {(x-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nino_obs <- Apply(list(nino_obs), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function(x) {(x-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + var_units <- 'adim' + } + if (!is.null(running_mean)) { + nino_hcst <- Smoothing(nino_hcst, + runmeanlen = running_mean, + time_dim = 'time', + ncores = ncores) + nino_obs <- Smoothing(nino_obs, + runmeanlen = running_mean, + time_dim = 'time', + ncores = ncores) + } + if (all(region == c(-90, -80, -10, 0))) { + region_name <- "1+2" + nino_name <- "nino12" + } else if (all(region == c(-150, -90, -5, 5))) { + region_name <- "3" + nino_name <- "nino3" + } else if (all(region == c(-170, -120, -5, 5))) { + region_name <- "3.4" + nino_name <- "nino34" + } else if (all(region == c(160, -150, -5, 5))) { + region_name <- "4" + nino_name <- "nino4" + } else { + stop("Unknown nino region") + } + nino_hcst <- InsertDim(nino_hcst, posdim = 1, lendim = 1, name = 'region') + nino_obs <- InsertDim(nino_obs, posdim = 1, lendim = 1, name = 'region') + dims_dates_not_null <- dim(data$hcst$attrs$Dates)[which(dim(data$hcst$attrs$Dates) > 1)] + hcst_dates <- Subset(data$hcst$attrs$Dates, along = names(dims_dates_not_null), + indices = lapply(dims_dates_not_null, function(x){1:x}), + drop = "non-selected") + if (!("time" %in% names(dim(hcst_dates)))) { + hcst_dates <- InsertDim(hcst_dates, pos = 1, len = 1, name = 'time') + ## TODO: recover dates format + hcst_dates <- as.POSIXct(hcst_dates, origin = '1970-01-01', tz = 'UTC') + } + nino <- list(hcst = s2dv_cube( + data = nino_hcst, + varName = nino_name, + metadata = list( + region = list(name = paste("Nino", region_name, "region"), + lats_range = paste(region[3:4]), + lons_range = paste(region[1:2])), + time = data$hcst$attrs$Variable$metadata$time, + nino = list(units = var_units, + longname = paste("El Niño", region_name, "Index"))), + Dates = hcst_dates, + Dataset = recipe$Analysis$Datasets$System$name), + obs = s2dv_cube( + data = nino_obs, + varName = nino_name, + metadata = list( + region = list(name = paste("Nino", region_name, "region"), + lats_range = paste(region[3:4]), + lons_range = paste(region[1:2])), + time = data$obs$attrs$Variable$metadata$time, + nino = list(units = var_units, + longname = paste("El Niño", region_name, "Index"))), + Dates = data$obs$attrs$Dates, + Dataset = recipe$Analysis$Datasets$Reference$name)) + + if (save == 'all') { + file_dest <- paste0(recipe$Run$output_dir, "/outputs/Indices/") + # Use startdates param from SaveExp to correctly name the files: + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + if (length(data$hcst$attrs$source_files) == dim(data$hcst$data)['syear']) { + file_dates <- Apply(data$hcst$attrs$source_files, target_dim = NULL, + fun = function(x) { + pos <- which(strsplit(x, "")[[1]] == ".") + res <- substr(x, pos-8, pos-1) + })$output1 + } + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + file_dates <- paste0('s',recipe$Analysis$Time$hcst_start : recipe$Analysis$Time$hcst_end) + } + nino$hcst$data <- .drop_indices_dims(nino_hcst) + CST_SaveExp(data = nino$hcst, + destination = file_dest, + startdates = as.vector(file_dates), + dat_dim = NULL, sdate_dim = 'syear', + ftime_dim = 'time', var_dim = NULL, + memb_dim = 'ensemble') + res <- .drop_indices_dims(nino_obs) + if (!("time" %in% names(dim(res)))) { + res <- InsertDim(res, pos = 1, len = 1, name = 'time') + } + nino$obs$data <- res + CST_SaveExp(data = nino$obs, + destination = file_dest, + startdates = as.vector(file_dates), + dat_dim = NULL, sdate_dim = 'syear', + ftime_dim = 'time', var_dim = NULL, + memb_dim = NULL) + nino$hcst$data <- nino_hcst + nino$obs$data <- nino_obs + res <- NULL + gc() + } + # Read variable long_name to plot it + conf <- yaml::read_yaml("conf/variable-dictionary.yml") + var_name <- conf$vars[[which(names(conf$vars) == + recipe$Analysis$Variables$name)]]$long + if (plot_ts) { + dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + showWarnings = F, recursive = T) + source("modules/Indices/R/plot_deterministic_forecast.R") + for (tstep in 1:dim(nino$hcst$data)['time']) { + mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) + mes <- ifelse(mes > 12, mes - 12, mes) + fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + obs <- Subset(nino$obs$data, along = 'time', ind = tstep) + exp <- Subset(nino$hcst$data, along = 'time', ind = tstep) + if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { + system <- recipe$Analysis$Datasets$System$name + } else { + system <- gsub(".", "", recipe$Analysis$Datasets$System$name) + } + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste("Ni\u00F1o", region_name, "SST Index\n", + month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", + nino_name, "_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), + ".png") + caption <- paste0("Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth) + xlabs = as.numeric(substr(file_dates, 1, 4)) + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + toptitle <- paste("Ni\u00F1o", region_name, "SST Index\n", + "Lead time", fmonth, + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", nino_name, "_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".png") + caption <- paste0("Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Lead time: ", fmonth, "\n") + xlabs <- substr(file_dates, 2,5) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + plot_deterministic_forecast(obs, exp, + time_dim = 'syear', + member_dim = 'ensemble', style = 'boxplot', + xlabs = xlabs, + ylab = var_units, + title = toptitle, fileout = plotfile, + caption = caption, + legend_text = c( + recipe$Analysis$Datasets$Reference$name, + recipe$Analysis$Datasets$System$name)) + } + } + if (plot_sp) { + ## TODO: Remove sourcing of plot robinson and viz module code + source("modules/Visualization/R/tmp/PlotRobinson.R") + source("modules/Indices/R/correlation_eno.R") + source("modules/Visualization/R/get_proj_code.R") + lons <- data$hcst$coords$longitude + lats <- data$hcst$coords$latitude + # Get code for Robinson projection depending on GEOS/GDAL/PROJ version + projection_code <- get_proj_code("robinson") + # Avoid rewriten longitudes a shift is neeced: + dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + showWarnings = F, recursive = T) + correl_obs <- Apply(list(data$obs$data, nino$obs$data), + target_dims = 'syear', + fun = .correlation_eno, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE, + ncores = recipe$Analysis$ncores) + correl_hcst <- Apply(list(data$hcst$data, nino$hcst$data), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + x <- apply(x, 1, mean, na.rm = TRUE) + y <- apply(y, 1, mean, na.rm = TRUE) + dim(y) <- c(syear = length(y)) + dim(x) <- c(syear = length(x)) + res <- .correlation_eno(x, y, time_dim = 'syear', + method = 'pearson', + alpha = alpha, + test.type = 'two-sided', + pval = FALSE)}, + ncores = recipe$Analysis$ncores) + correl_hcst_full <- Apply(list(data$hcst$data, nino$hcst$data), + target_dims = c('syear', 'ensemble'), + fun = function(x,y) { + dim(y) <- c(syear = length(y)) + dim(x) <- c(syear = length(x)) + res <- .correlation_eno(x, y, + time_dim = 'syear', + method = 'pearson', + alpha = alpha, + test.type = 'two-sided', + pval = FALSE)}, + ncores = recipe$Analysis$ncores) + months <- lubridate::month(Subset(data$hcst$attrs$Dates, "syear", indices = 1), + label = T, abb = F, locale = "en_GB") + + for (tstep in 1:dim(nino$obs$data)['time']) { + map <- Subset(correl_obs$r, along = 'time', ind = tstep, drop = T) + sig <- Subset(correl_obs$sig, along = 'time', ind = tstep, drop = T) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) + + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) + mes <- ifelse(mes > 12, mes - 12, mes) + fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + toptitle <- paste(recipe$Analysis$Datasets$Reference$name, "\n", + "Ni\u00F1o", region_name, "SST Index -",var_name, "\n", + " Correlation /", + month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", nino_name, + "_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".png") + caption <- paste0("Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation ; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + toptitle <- paste(recipe$Analysis$Datasets$Reference$name, "\n", + "Ni\u00F1o", region_name, "SST Index -",var_name, "\n", + "Correlation / Start dates", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", + nino_name, "_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".png") + caption <- paste0("Start date: month ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + + PlotRobinson(map, lon = lons, lat = lats, + target_proj = projection_code, #"ESRI:54030", + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', style = 'point', + toptitle = toptitle, bar_extra_margin = c(4,0,4,0), + caption = caption, mask = sig, + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + + ## Ensemble-mean + map <- Subset(correl_hcst$r, along = 'time', ind = tstep) + sig <- Subset(correl_hcst$sig, along = 'time', ind = tstep) + if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { + system <- recipe$Analysis$Datasets$System$name + } else { + system <-gsub(".", "", recipe$Analysis$Datasets$System$name) + } + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste(recipe$Analysis$Datasets$System$name, "\n", + "Ni\u00F1o", region_name, "SST Index -",var_name, "\n", + "Correlation /", + month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", + nino_name, "_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".png") + caption <- paste0("Ensemble mean\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation ; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste(recipe$Analysis$Datasets$System$name, "\n", + "Ni\u00F1o", region_name, "SST Index -",var_name, "\n", + "Correlation / Start dates", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", + nino_name, "_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".png") + caption <- paste0("Correlation ensemble mean\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + PlotRobinson(map, lon = lons, lat = lats, + target_proj = projection_code, #"ESRI:54030", + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', style = 'point', + toptitle = toptitle, bar_extra_margin = c(4,0,4,0), + caption = caption, mask = sig, + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + + # Full hcst corr + map <- Subset(correl_hcst_full$r, along = 'time', ind = tstep) + sig <- Subset(correl_hcst_full$sig, along = 'time', ind = tstep) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste(recipe$Analysis$Datasets$System$name, "\n", + "Ni\u00F1o", region_name, "SST Index -",var_name, "\n", + " Correlation /", + month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", + nino_name, "_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".png") + caption <- paste0("Individual members\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation ; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste(recipe$Analysis$Datasets$System$name, "\n", + "Ni\u00F1o", region_name, "SST Index -",var_name, "\n", + "Correlation / Start dates", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", + nino_name, "_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".png") + caption <- paste0("Correlation ensemble mean\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + PlotRobinson(map, lon = lons, lat = lats, + target_proj = projection_code, #"ESRI:54030", + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', style = 'point', + toptitle = toptitle, bar_extra_margin = c(4,0,4,0), + caption = caption, mask = sig, + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + + + } + } + return(nino) +} diff --git a/modules/Indices/R/correlation_eno.R b/modules/Indices/R/correlation_eno.R new file mode 100644 index 0000000000000000000000000000000000000000..62a3d92b9b275decd4589474c5bf67a3a5b8ccc7 --- /dev/null +++ b/modules/Indices/R/correlation_eno.R @@ -0,0 +1,45 @@ +.correlation_eno <- function(exp, obs, time_dim, method, alpha, test.type, pval){ + + cor <- NULL + cor$r = cor(x = exp, y = obs, method = method) # Correlation coefficient + + n_eff = s2dv::Eno(data = obs, time_dim = time_dim, na.action = na.pass, ncores = 1) + + if (test.type == 'one-sided'){ + + t_alpha_n2 = qt(p=alpha, df = n_eff-2, lower.tail = FALSE) + t = cor$r * sqrt(n_eff-2) / sqrt(1-cor$r^2) + + if (anyNA(c(t,t_alpha_n2)) == FALSE & t >= t_alpha_n2 & cor$r > 0){ + cor$sign = TRUE + } else { + cor$sign = FALSE + } + + # cor$n_eff <- n_eff + + if (isTRUE(pval)){ + cor$pval <- pt(q = t, df = n_eff-2, lower.tail = FALSE) + } + + } else if (test.type == 'two-sided'){ + + t_alpha2_n2 = qt(p=alpha/2, df = n_eff-2, lower.tail = FALSE) + t = abs(cor$r) * sqrt(n_eff-2) / sqrt(1-cor$r^2) + + if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){ + cor$sign = TRUE + } else { + cor$sign = FALSE + } + + cor$n_eff <- n_eff + + if (isTRUE(pval)){ + cor$pval <- 2 * pt(q = t, df = n_eff-2, lower.tail = FALSE) + } + + } else {stop('test.type not supported')} + + return(cor) +} diff --git a/modules/Indices/R/drop_indices_dims.R b/modules/Indices/R/drop_indices_dims.R new file mode 100644 index 0000000000000000000000000000000000000000..aab97a170b7ebc96221bda9d3f80d3b3d5682f79 --- /dev/null +++ b/modules/Indices/R/drop_indices_dims.R @@ -0,0 +1,87 @@ +# version victoria https://earth.bsc.es/gitlab/es/auto-s2s/-/blob/dev-Loading-multivar/modules/Skill/Skill.R +# metric_array is in this case the index +.drop_indices_dims <- function(metric_array) { + # Define dimensions that are not essential for saving + droppable_dims <- c("var", "dat", "sday", "sweek", "ensemble", "nobs", + "nexp", "exp_memb", "obs_memb", "bin") + # Select non-essential dimensions of length 1 + dims_to_drop <- intersect(names(which(dim(metric_array) == 1)), + droppable_dims) + drop_indices <- grep(paste(dims_to_drop, collapse = "|"), + names(dim(metric_array))) + # Drop selected dimensions + metric_array <- abind::adrop(metric_array, drop = drop_indices) + # If array has memb dim (Corr case), change name to 'ensemble' + if ("exp_memb" %in% names(dim(metric_array))) { + names(dim(metric_array))[which(names(dim(metric_array)) == + "exp_memb")] <- "ensemble" + } + return(metric_array) +} + + + + + +## TODO: Replace with ClimProjDiags::Subset and add var and dat dimensions +#.drop_dims <- function(metric_array) { +# # Drop all singleton dimensions +# dims <- dim(metric_array) +# metric_array <- drop(metric_array) +# if ("region" %in% names(dims)) { +# if (!is.array(metric_array)) { +# dim(metric_array) <- length(metric_array) +# if (!all(dims > 1)) { +# if ("syear" %in% names(dims)) { +# names(dim(metric_array)) <- "syear" +# } else { +# names(dim(metric_array)) <- "time" +# } +# } else { +# names(dim(metric_array)) <- names(dims[which(dims > 1)]) +# } +# } +# if (!("time" %in% names(dim(metric_array)))) { +# dim(metric_array) <- c("time" = 1, dim(metric_array)) +# } +# # If latitude was singleton, add it back +# if (!("region" %in% names(dim(metric_array)))) { +# dim(metric_array) <- c(dim(metric_array), "region" = 1) +# } +# # If array has memb dim (Corr case), change name to 'ensemble' +# if ("exp_memb" %in% names(dim(metric_array))) { +# names(dim(metric_array))[which(names(dim(metric_array)) == +# "exp_memb")] <- "ensemble" +# } +# } else if (all(c("latitude", "longiguted") %in% names(dims))) { +# # If the array becomes a vector as a result, restore dimensions. +# ## This applies to the case of 'corr' with one lon and one lat +# if (!is.array(metric_array)) { +# dim(metric_array) <- length(metric_array) +# names(dim(metric_array)) <- names(dims[which(dims > 1)]) +# } +# # If the array becomes a vector as a result, restore dimensions. +# ## This applies to the case of 'corr' with one lon and one lat +# # If time happened to be a singleton dimension, add it back in the array +# if (!("time" %in% names(dim(metric_array)))) { +# dim(metric_array) <- c("time" = 1, dim(metric_array)) +# } +# # If latitude was singleton, add it back +# if (!("latitude" %in% names(dim(metric_array)))) { +# dim(metric_array) <- c(dim(metric_array), "latitude" = 1) +# } +# # If longitude was singleton, add it back +# if (!("longitude" %in% names(dim(metric_array)))) { +# dim(metric_array) <- c(dim(metric_array), "longitude" = 1) +# } +# # If array has memb dim (Corr case), change name to 'ensemble' +# if ("exp_memb" %in% names(dim(metric_array))) { +# names(dim(metric_array))[which(names(dim(metric_array)) == +# "exp_memb")] <- "ensemble" +# } else { +# stop("what dimensions") +# } +# } +# return(metric_array) +#} + diff --git a/modules/Indices/R/plot_deterministic_forecast.R b/modules/Indices/R/plot_deterministic_forecast.R new file mode 100644 index 0000000000000000000000000000000000000000..30f13b78db1654bcb35fa7a86b6f4ad93d7c6393 --- /dev/null +++ b/modules/Indices/R/plot_deterministic_forecast.R @@ -0,0 +1,181 @@ + +plot_deterministic_forecast <- function(obs, fcst, title = NULL, + xlabs = NULL, ylab = '', ylims = NULL, + xlabs_pos = NULL, ylabs_pos = NULL, + style = 'shading', col_obs = 'black', + col_hcst = 'blue', col_fcst = 'red', + member_dim = 'member', + time_dim = 'year', + logo = NULL, caption = NULL, + caption_line = NULL, + legend_text = c('Observations', 'Hindcast', + 'Forecast'), + width = 1010, height = 510, res = NA, + fileout = NULL) { + n_obs <- as.numeric(dim(obs)[time_dim]) + n_fcst <- as.numeric(dim(fcst)[time_dim]) + + if (is.null(ylims)) { + if (all(is.na(fcst)) && all(is.na(obs))) { + # No data, return random ylims + ylims <- c(-1, 1) + } else { + ylims <- c(-max(abs(fcst) + 0.1, abs(obs) + 0.1, na.rm = TRUE), + max(abs(fcst) + 0.1, abs(obs) + 0.1, na.rm = TRUE)) + } + } + if (is.null(xlabs)) { + xlabs <- 1:n_fcst + } + if (is.null(xlabs_pos)) { + xlabs_pos <- -2 + seq(from = 1, to = n_fcst, by = 5) + } + if (is.null(ylabs_pos)) { + ylabs_pos <- ylims[1] - 0.045 + par('usr')[3] + } + ## Opening the output file + if (!is.null(fileout)) { + png(filename = fileout, width = width, height = height, res = res) + } + if (identical(style, 'boxplot')) { + if (!is.null(legend_text)) { + right_mar <- max(c(nchar(legend_text), 8)) + 1 + } else { + right_mar <- 8 + } + par(mar = c(7, 4.1, 4.1, right_mar)) + } + ## Empty figure with the labs + plot(x = 1:n_fcst, y = rep(0,n_fcst), col = col_obs, type = 'l', lwd = 1, + xlab = '', ylab = ylab, main = title, xlim = c(1,n_fcst), ylim = ylims, + cex.lab = 1.2, cex.main = 1.2, cex.axis = 1.2, frame.plot = TRUE, + xaxt = 'n') + axis(1, at = 1:n_fcst, labels = xlabs, cex = 1.5) + #text(cex = 1.2, x = xlabs_pos, y = ylabs_pos, labels = xlabs[seq(from = 1, to = n_fcst, by = 5)], srt = 45, pos = 1, xpd = TRUE) + + ## Auxiliary lines + abline(h = 0, lw = 1) + abline(v = seq(from = 1, to = n_fcst, by = 5), lty = 2, lwd = 1, col = 'grey') + #abline(h = seq(from = floor(ylims[1]), to = ceiling(ylims[2]), by = 1), lty = 2, col = 'grey') + ## Ensemble spread + if (identical(style, 'boxplot')) { + ## Hindcast + for (y in 1:n_obs) { + boxplot(x = ClimProjDiags::Subset(x = fcst, + along = time_dim, + indices = y, + drop = 'selected'), + at = y, notch = FALSE, range = 0, add = TRUE, + col = adjustcolor(col_hcst, alpha.f = 0.60), + boxwex = 1, axes = F) + } + ## Forecast + if (n_fcst > n_obs) { + for (y in (n_obs+1):n_fcst) { + boxplot(x = ClimProjDiags::Subset(x = fcst, + along = time_dim, + indices = y, + drop = 'selected'), + at = y, notch = FALSE, range = 0, add = TRUE, + col = adjustcolor(col_fcst, alpha.f = 0.60), + boxwex = 1, axes = F) + } + } + } else if (identical(style, 'shading')) { + fcst_quantiles <- multiApply::Apply(data = fcst, target_dims = member_dim, + fun = function(x) {quantile(x = as.vector(x), type = 8, na.rm = FALSE)}, + ncores = 1)$output1 + ## Hindcast + polygon(x = c(1:n_obs, rev(1:n_obs)), + y = c(fcst_quantiles[4,1:n_obs], rev(fcst_quantiles[5,1:n_obs])), + col = adjustcolor(col_hcst, alpha.f = 0.10), + border = NA) + polygon(x = c(1:n_obs, rev(1:n_obs)), + y = c(fcst_quantiles[2,1:n_obs], rev(fcst_quantiles[4,1:n_obs])), + col = adjustcolor(col_hcst, alpha.f = 0.30), + border = NA) + polygon(x = c(1:n_obs, rev(1:n_obs)), + y = c(fcst_quantiles[1,1:n_obs], + rev(fcst_quantiles[2,1:n_obs])), + col = adjustcolor(col_hcst, alpha.f = 0.10), + border = NA) + ## Forecast + if (n_fcst > n_obs){ + polygon(x = c((n_obs):n_fcst, rev((n_obs):n_fcst)), + y = c(fcst_quantiles[4,(n_obs):n_fcst], + rev(fcst_quantiles[5,(n_obs):n_fcst])), + col = adjustcolor(col_fcst, alpha.f = 0.10), + border = NA) + polygon(x = c((n_obs):n_fcst, rev((n_obs):n_fcst)), + y = c(fcst_quantiles[2,(n_obs):n_fcst], + rev(fcst_quantiles[4,(n_obs):n_fcst])), + col = adjustcolor(col_fcst, alpha.f = 0.30), + border = NA) + polygon(x = c((n_obs):n_fcst, rev((n_obs):n_fcst)), + y = c(fcst_quantiles[1,(n_obs):n_fcst], + rev(fcst_quantiles[2,(n_obs):n_fcst])), + col = adjustcolor(col_fcst, alpha.f = 0.10), + border = NA) + } + ## NOTEV: What is 'fsct_stype'? + } else {stop('fcst_stype must be either boxplot or shading')} + + ## Observations + lines(x = 1:n_obs, y = obs, col = col_obs, lty = 'solid', lwd = 5) + + ## Ensemble mean + ## Hindcast + lines(x = 1:n_obs, + y = multiApply::Apply(data = fcst, + target_dims = member_dim, + fun = mean, + na.rm = FALSE, + ncores = 1)$output1[1:n_obs], + col = col_hcst, type = 'l', lwd = 5) + ## Forecast + if (n_fcst > n_obs) { + lines(x = (n_obs):n_fcst, + y = multiApply::Apply(data = fcst, + target_dims = member_dim, + fun = mean, + na.rm = FALSE, + ncores = 1)$output1[(n_obs):n_fcst], + col = col_fcst, type = 'l', lwd = 5) + } + + ## Legend with the skill scores + if (!is.null(legend)) { + par(xpd = TRUE) + legend(par('usr')[2], par('usr')[4],#x = 1, y = ylims[2], + text.col = c(col_obs, col_hcst), cex = 1.2, + legend = legend_text[1:2], lty = 1, lwd = 3, + col = c(col_obs, col_hcst, bg="transparent"), bty = 'n') + } + if (!is.null(caption)) { + if (is.null(caption_line)) { + caption_line <- 4.5 + } + mtext(caption, side = 1, line = caption_line, cex = 1.2, adj = 0) + } + if (identical(style, 'boxplot')){ + if (!is.null(logo)) { + rasterImage(logo, ybottom = par('usr')[3] - 2, ytop = par('usr')[3] - 1.1, #ylims[2] + 2, + xleft = par('usr')[2] - 1.5 , + xright = par('usr')[2] + dim(logo)[2]/dim(logo)[1] * width/height, xpd = T) + } + data_samp <- 1:100 + data_samp <- (data_samp - mean(data_samp))/ sqrt(var(data_samp)) + par(xpd = T) + box_params <- boxplot(data_samp, add = T, at = par('usr')[2] + n_fcst * 0.1) + text(x = par('usr')[2] + n_fcst * 0.1 + .5, y = box_params$stats[1,], "Minimum", adj = c(0,0)) + text(x = par('usr')[2] + n_fcst * 0.1 + .5, y = box_params$stats[2,], "Q1", adj = c(0,0)) + text(x = par('usr')[2] + n_fcst * 0.1 + .5, y = box_params$stats[3,], "Median", adj = c(0,0)) + text(x = par('usr')[2] + n_fcst * 0.1 + .5, y = box_params$stats[4,], "Q3", adj = c(0,0)) + text(x = par('usr')[2] + n_fcst * 0.1 + .5, y = box_params$stats[5,], "Maximum", adj = c(0,0)) + } + + ## Closing the output file + if (!is.null(fileout)){dev.off()} + +} + diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 598ddd0e4050105138e6579c290d1ef416924ee2..63fee97bede51b72128b917af9b5171d83061d4f 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -1,418 +1,41 @@ -## TODO: remove paths to personal scratchs -source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") -# Load required libraries/funs -source("modules/Loading/dates2load.R") -source("modules/Loading/check_latlon.R") -## TODO: Move to prepare_outputs.R source("tools/libs.R") - -load_datasets <- function(recipe) { - - # ------------------------------------------- - # Set params ----------------------------------------- - - hcst.inityear <- recipe$Analysis$Time$hcst_start - hcst.endyear <- recipe$Analysis$Time$hcst_end - lats.min <- recipe$Analysis$Region$latmin - lats.max <- recipe$Analysis$Region$latmax - lons.min <- recipe$Analysis$Region$lonmin - lons.max <- recipe$Analysis$Region$lonmax - ref.name <- recipe$Analysis$Datasets$Reference$name - exp.name <- recipe$Analysis$Datasets$System$name - variable <- recipe$Analysis$Variables$name - store.freq <- recipe$Analysis$Variables$freq - - # get sdates array - ## LOGGER: Change dates2load to extract logger from recipe? - sdates <- dates2load(recipe, recipe$Run$logger) - - idxs <- NULL - idxs$hcst <- get_timeidx(sdates$hcst, - recipe$Analysis$Time$ftime_min, - recipe$Analysis$Time$ftime_max, - time_freq=store.freq) - - if (!(is.null(sdates$fcst))) { - idxs$fcst <- get_timeidx(sdates$fcst, - recipe$Analysis$Time$ftime_min, - recipe$Analysis$Time$ftime_max, - time_freq=store.freq) - } - - ## TODO: Examine this verifications part, verify if it's necessary - # stream <- verifications$stream - # sdates <- verifications$fcst.sdate - - ## TODO: define fcst.name - ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name - - # get esarchive datasets dict: - archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive - exp_descrip <- archive$System[[exp.name]] - - freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) - reference_descrip <- archive$Reference[[ref.name]] - freq.obs <- unlist(reference_descrip[[store.freq]][variable]) - obs.dir <- reference_descrip$src - fcst.dir <- exp_descrip$src - hcst.dir <- exp_descrip$src - fcst.nmember <- exp_descrip$nmember$fcst - hcst.nmember <- exp_descrip$nmember$hcst - - ## TODO: it is necessary? - ##if ("accum" %in% names(reference_descrip)) { - ## accum <- unlist(reference_descrip$accum[store.freq][[1]]) - ##} else { - ## accum <- FALSE - ##} - - # ----------- - obs.path <- paste0(archive$src, - obs.dir, store.freq, "/$var$", - reference_descrip[[store.freq]][[variable]], - "$var$_$file_date$.nc") - - hcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], - "$var$_$file_date$.nc") - - fcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], - "$var$_$file_date$.nc") - - # Define regrid parameters: - #------------------------------------------------------------------- - regrid_params <- get_regrid_params(recipe, archive) - - # Longitude circular sort and latitude check - #------------------------------------------------------------------- - circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) - - if (recipe$Analysis$Variables$freq == "monthly_mean"){ - split_multiselected_dims = TRUE +## TODO: Remove with the next release +source("modules/Loading/load_datasets.R") + +Loading <- function(recipe) { + # Source correct function depending on filesystem and time horizon + # Case: CERISE (Mars) + if (tolower(recipe$Run$filesystem) == "mars") { + source("modules/Loading/R/load_GRIB.R") + data <- load_GRIB(recipe) + } else if (tolower(recipe$Run$filesystem) == "sample") { + source("modules/Loading/R/load_sample.R") + data <- load_sample(recipe) } else { - split_multiselected_dims = FALSE - } - - # Load hindcast - #------------------------------------------------------------------- - hcst <- Start(dat = hcst.path, - var = variable, - file_date = sdates$hcst, - time = idxs$hcst, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = indices(1:hcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) - - if (recipe$Analysis$Variables$freq == "daily_mean") { - # Adjusts dims for daily case, could be removed if startR allows - # multidim split - names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" - default_dims <- c(dat = 1, var = 1, sday = 1, - sweek = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(hcst))] <- dim(hcst) - dim(hcst) <- default_dims - # Change time attribute dimensions - default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) - names(dim(attr(hcst, "Variables")$common$time))[which(names( - dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" - default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- - dim(attr(hcst, "Variables")$common$time) - dim(attr(hcst, "Variables")$common$time) <- default_time_dims - } - - # Convert hcst to s2dv_cube object - ## TODO: Give correct dimensions to $Dates$start - ## (sday, sweek, syear instead of file_date) - hcst <- as.s2dv_cube(hcst) - # Adjust dates for models where the time stamp goes into the next month - if (recipe$Analysis$Variables$freq == "monthly_mean") { - hcst$Dates$start[] <- hcst$Dates$start - seconds(exp_descrip$time_stamp_lag) - } - - # Load forecast - #------------------------------------------------------------------- - if (!is.null(recipe$Analysis$Time$fcst_year)) { - # the call uses file_date instead of fcst_syear so that it can work - # with the daily case and the current version of startR not allowing - # multiple dims split - - fcst <- Start(dat = fcst.path, - var = variable, - file_date = sdates$fcst, - time = idxs$fcst, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = indices(1:fcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) - - if (recipe$Analysis$Variables$freq == "daily_mean") { - # Adjusts dims for daily case, could be removed if startR allows - # multidim split - names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" - default_dims <- c(dat = 1, var = 1, sday = 1, - sweek = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(fcst))] <- dim(fcst) - dim(fcst) <- default_dims - # Change time attribute dimensions - default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) - names(dim(attr(fcst, "Variables")$common$time))[which(names( - dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" - default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- - dim(attr(fcst, "Variables")$common$time) - dim(attr(fcst, "Variables")$common$time) <- default_time_dims + # Case: esarchive + time_horizon <- tolower(recipe$Analysis$Horizon) + if (time_horizon == "seasonal") { + if(recipe$Analysis$Variables$name == 'tas-tos') { + source("modules/Loading/R/load_tas_tos.R") + data <- load_tas_tos(recipe) + } else { + source("modules/Loading/R/load_seasonal.R") + data <- load_seasonal(recipe) + } + } else if (time_horizon == "decadal") { + source("modules/Loading/R/load_decadal.R") + data <- load_decadal(recipe) + } else { + stop("Incorrect time horizon.") } - - # Convert fcst to s2dv_cube - fcst <- as.s2dv_cube(fcst) - # Adjust dates for models where the time stamp goes into the next month - if (recipe$Analysis$Variables$freq == "monthly_mean") { - fcst$Dates$start[] <- - fcst$Dates$start - seconds(exp_descrip$time_stamp_lag) - } - - } else { - fcst <- NULL } - - # Load reference - #------------------------------------------------------------------- - - # Obtain dates and date dimensions from the loaded hcst data to make sure - # the corresponding observations are loaded correctly. - dates <- hcst$Dates$start - dim(dates) <- dim(Subset(hcst$data, - along=c('dat', 'var', - 'latitude', 'longitude', 'ensemble'), - list(1,1,1,1,1), drop="selected")) - - # Separate Start() call for monthly vs daily data - if (store.freq == "monthly_mean") { - - dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") - dim(dates_file) <- dim(dates) - - obs <- Start(dat = obs.path, - var = variable, - file_date = dates_file, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - - } else if (store.freq == "daily_mean") { - - # Get year and month for file_date - dates_file <- sapply(dates, format, '%Y%m') - dim(dates_file) <- dim(dates) - # Set hour to 12:00 to ensure correct date retrieval for daily data - lubridate::hour(dates) <- 12 - lubridate::minute(dates) <- 00 - # Restore correct dimensions - dim(dates) <- dim(dates_file) - - obs <- Start(dat = obs.path, - var = variable, - file_date = sort(unique(dates_file)), - time = dates, - time_var = 'time', - time_across = 'file_date', - merge_across_dims = TRUE, - merge_across_dims_narm = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - } - - # Adds ensemble dim to obs (for consistency with hcst/fcst) - default_dims <- c(dat = 1, var = 1, sday = 1, - sweek = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(obs))] <- dim(obs) - dim(obs) <- default_dims - - # Convert obs to s2dv_cube - obs <- as.s2dv_cube(obs) - - # Check for consistency between hcst and obs grid - if (!(recipe$Analysis$Regrid$type == 'none')) { - if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { - lat_error_msg <- paste("Latitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") - error(recipe$Run$logger, lat_error_msg) - hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) - info(recipe$Run$logger, hcst_lat_msg) - obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) - info(recipe$Run$logger, obs_lat_msg) - stop("hcst and obs don't share the same latitudes.") - } - if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { - lon_error_msg <- paste("Longitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") - error(recipe$Run$logger, lon_error_msg) - hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) - info(recipe$Run$logger, hcst_lon_msg) - obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) - info(recipe$Run$logger, obs_lon_msg) - stop("hcst and obs don't share the same longitudes.") - - } - } - - # Remove negative values in accumulative variables - dictionary <- read_yaml("conf/variable-dictionary.yml") - if (dictionary$vars[[variable]]$accum) { - info(recipe$Run$logger, - "Accumulated variable: setting negative values to zero.") - obs$data[obs$data < 0] <- 0 - hcst$data[hcst$data < 0] <- 0 - if (!is.null(fcst)) { - fcst$data[fcst$data < 0] <- 0 - } - } - - # Convert prlr from m/s to mm/day - ## TODO: Make a unit conversion function? - if (variable == "prlr") { - # Verify that the units are m/s and the same in obs and hcst - if (((attr(obs$Variable, "variable")$units == "m s-1") || - (attr(obs$Variable, "variable")$units == "m s**-1")) && - ((attr(hcst$Variable, "variable")$units == "m s-1") || - (attr(hcst$Variable, "variable")$units == "m s**-1"))) { - - info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") - obs$data <- obs$data*86400*1000 - attr(obs$Variable, "variable")$units <- "mm/day" - hcst$data <- hcst$data*86400*1000 - attr(hcst$Variable, "variable")$units <- "mm/day" - if (!is.null(fcst)) { - fcst$data <- fcst$data*86400*1000 - attr(fcst$Variable, "variable")$units <- "mm/day" - } - } - } - - # Compute anomalies if requested - # Print a summary of the loaded data for the user, for each object + # Display data summary if (recipe$Run$logger$threshold <= 2) { - data_summary(hcst, recipe) - data_summary(obs, recipe) - if (!is.null(fcst)) { - data_summary(fcst, recipe) + data_summary(data$hcst, recipe) + data_summary(data$obs, recipe) + if (!is.null(data$fcst)) { + data_summary(data$fcst, recipe) } } - - info(recipe$Run$logger, - "##### DATA LOADING COMPLETED SUCCESSFULLY #####") - - ############################################################################ - # - # CHECKS ON MISSING FILES - # - ############################################################################ - - #obs.NA_dates.ind <- Apply(obs, - # fun=(function(x){ all(is.na(x))}), - # target_dims=c('time', 'latitude', 'longitude'))[[1]] - #obs.NA_dates <- dates_file[obs.NA_dates.ind] - #obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] - #obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", - # freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") - # - #if (any(is.na(hcst))){ - # fatal(recipe$Run$logger, - # paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", - # " ################################################# ", - # " ###### MISSING FILES #### ", - # " ################################################# ", - # "hcst files:", - # hcst.NA_files, - # " ################################################# ", - # " ################################################# ", - # sep="\n")) - # quit(status = 1) - #} - # - #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ - # fatal(recipe$logger, - # paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", - # " ################################################# ", - # " ###### MISSING FILES #### ", - # " ################################################# ", - # "obs files:", - # obs.NA_files, - # " ################################################# ", - # " ################################################# ", - # sep="\n")) - # quit(status=1) - #} - # - #info(recipe$logger, - # "######### DATA LOADING COMPLETED SUCCESFULLY ##############") - - ############################################################################ - ############################################################################ - - return(list(hcst = hcst, fcst = fcst, obs = obs)) - + return(data) } diff --git a/modules/Loading/R/GRIB/GrbLoad.R b/modules/Loading/R/GRIB/GrbLoad.R new file mode 100644 index 0000000000000000000000000000000000000000..ef1df0cb13b6eb46ac4b55de3fabee4dd204213a --- /dev/null +++ b/modules/Loading/R/GRIB/GrbLoad.R @@ -0,0 +1,248 @@ +#---------------------------------------------------------------------------------- +# Use gribr package to load GRIB files +# Assume that all the messages have the same metadata; one message is one time step +# If exp, has.memb is a number; if obs, has.memb = NULL +# syear_time_dim is the time attr dim of exp as the input for obs +#---------------------------------------------------------------------------------- +GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, + regrid = NULL) { + library(gribr) + + result <- vector('list', length = length(dat)) + times <- vector('list', length = length(dat)) + times <- lapply(times, '[<-', rep(NA, length(time_step))) #NOTE: length is 0 (slower in loop?) +# times <- lapply(times, '[<-', .POSIXct(rep(NA, length(time_step)), tz = 'UTC')) + + for (dat_i in 1:length(dat)) { + + file_to_load <- grib_open(dat[[dat_i]]) + + #---------------------------------------- + # HOW TO FIND THE VALUE OF EACH FTIME STEP? + #---------------------------------------- + #NOTE: ValidityTime is not considered now. So if the time frequency is less than daily, it has problem. + + # METHOD 1: Get first message to figure out the validityDate/Time of each message + #NOTE: gm1$validityDate should be "s", "m", "h", etc. according to document. But our files have "1". + gm1 <- grib_get_message(file_to_load, 1) + first_ftime <- as.character(gm1$validityDate) + first_ftime_hour <- gm1$validityTime + # For monthly data + #NOTE: may not be correct because it is calculated by the first message + cdo_time_attr <- clock::add_months(as.POSIXct(paste0(first_ftime, ' ', first_ftime_hour), + format = "%Y%m%d %H", tz = 'UTC'), time_step - 1) + cdo_time <- format(cdo_time_attr, "%Y%m%d") + +# # METHOD 2: Use cdo showtimestamp (DEPENDENCY!) +# #TODO: Change to method 1 because can't predict what cdo will produce +# cdo_time <- system(paste0("cdo showtimestamp ", dat[[dat_i]]), intern = T) +# cdo_time <- strsplit(cdo_time, " ")[[length(cdo_time)]] +# cdo_time <- cdo_time[which(cdo_time != "")] +## # Check if there is member dim or not +## has_memb <- ifelse((length(unique(cdo_time)) == length(cdo_time)), FALSE, TRUE) +# if (has.memb) memb_dim_length <- length(cdo_time)/length(unique(cdo_time)) +# cdo_time <- unique(cdo_time)[time_step] #"2000-12-01T00:00:00" +# cdo_time_attr <- as.POSIXct(gsub('T', ' ', cdo_time), tz = 'UTC') +# cdo_time <- sapply(sapply(cdo_time, strsplit, "T"), '[[', 1) +# cdo_time <- gsub('-', '', cdo_time) + + #---------------------------------------- + + # all members + ftimes: length should be memb*ftime (e.g., 51*7) + ## Method 1: use grib_select and real values to filter + memb_ftime <- grib_select(file_to_load, list(validityDate = cdo_time)) + if (inherits(memb_ftime, 'gribMessage')) memb_ftime <- list(memb_ftime) + +# ## Method 2: Calculate which messages are the desired ones +# gm <- grib_get_message(file_to_load, time_step) +# if (length(time_step) == 1) { +# gm <- list(gm) +# } + + ################################################################## + # Get data as an array [longitude, latitude, (memb*)time] + ################################################################## + if (grepl("reduced", gm1$gridType)) { + #NOTE: Need to call gribr::grib_expand_grids because I don't know how to make .Call("gribr_redtoreg") work outside that function + # https://github.com/nawendt/gribr/blob/main/src/redtoreg.c + values_l <- vector('list', length = length(memb_ftime)) + for (gm_i in 1:length(memb_ftime)) { + values_l[[gm_i]] <- grib_expand_grids(memb_ftime[[gm_i]]) + } + result[[dat_i]] <- array(unlist(values_l), dim = c(longitude = gm1$Nj * 2, latitude = gm1$Nj, time = length(values_l))) + # Save memory + rm(values_l); gc() + + } else { + result[[dat_i]] <- .grib_expand_grids(memb_ftime) + } + + ################################################################## + # Get metadata + ################################################################## + ## (1-1) Everything from the first message of first file + if (dat_i == 1) { + ## (1-1) Everything from the first message of first file +# dims <- dim(result[[dat_i]]) +# attributes(result) <- gm1 +# # turn result into array again +# dim(result[[dat_i]]) <- dims + + ## (1-2) Only save the necessary attributes + attr(result, 'edition') <- gm1$edition + attr(result, 'shortName') <- gm1$shortName + #NOTE: Tune varaible name!! + if (gm1$shortName == '2t') attr(result, 'shortName') <- 'tas' + attr(result, 'name') <- gm1$name + attr(result, 'units') <- gm1$units +# attr(result, 'validityDate') <- gm1$validityDate +# attr(result, 'validityTime') <- gm1$validityTime + + ## (2) Lat and lon + latlon <- grib_latlons(gm1, expand = TRUE) + attr(result, 'latitude') <- unique(as.vector(c(latlon$lats))) + attr(result, 'longitude') <- unique(as.vector(c(latlon$lons))) + # Save memory (though it's small) + rm(latlon); gc() + + #NOTE: Find another way to check regular grid; Ni/Nj not always exist +# if (has.key(gm1, "Nx") && has.key(gm1, "Ny")) { +# nx <- gm1$Nx +# ny <- gm1$Ny +# } else { +# nx <- gm1$Ni +# ny <- gm1$Nj +# } +# if (length(lats) != ny | length(lons) != nx) { +# stop("Latitude and Longitude seem to be non-regular grid.") +# } + + } + +#-------------------------------- +#NOTE: Just use cdo_time +# ## (3) Date and time: Need to get from each massage +# for (time_i in 1:length(time_step)) { +# gm1 <- gm[[time_i]] +# #NOTE: What's the correct time? +## dates <- gm1$validityDate #Date of validity of the forecast +## times <- gm1$validityTime +## dates <- gm1$dataDate # Reference date +# times[[dat_i]][time_i] <- as.POSIXct( +# lubridate::ymd_hms(paste0(paste(gm1$year,gm1$month,gm1$day, '-'), ' ', +# paste(gm1$hour, gm1$minute, gm1$second, ':'))) +# ) +# } + times[[dat_i]] <- cdo_time_attr +#-------------------------------- + + ################################################################## + # regrid + ################################################################## + if (!is.null(regrid)) { + # result[[dat_i]]: [longitude, latitude, time] + res_data <- s2dv::CDORemap(result[[dat_i]], lons = attr(result, 'longitude'), lats = attr(result, 'latitude'), + grid = regrid$type, method = regrid$method, force_remap = TRUE) + if (dat_i == length(dat)) { + attr(result, 'longitude') <- res_data$lons + attr(result, 'latitude') <- res_data$lats + } + result[[dat_i]] <- res_data$data_array + } + + + ################################################################## + # Save memory + rm(memb_ftime); rm(gm1); gc() + grib_close(file_to_load) # Doesn't impact memory + ################################################################## +} #for loop dat + + # Turn result list into array + attr <- attributes(result) + res_dim <- c(dim(result[[1]]), syear = length(result)) #[longitude, latitude, (memb*)time, syear] + result <- unlist(result) + dim(result) <- res_dim + + # Generate date/time attributes + times <- array(unlist(times), dim = c(time = length(time_step), syear = length(dat), + sday = 1, sweek = 1)) + times <- s2dv::Reorder(times, c('sday', 'sweek', 'syear', 'time')) + if (!is.null(syear_time_dim)) dim(times) <- syear_time_dim + times <- as.POSIXct(times, origin = '1970-01-01', tz = 'UTC') + + # Reshape and reorder array + if (is.null(has.memb)) { # obs doesn't have memb; reshape syear/time dim + result <- s2dv::Reorder(result, c("syear", "time", "latitude", "longitude")) + result <- array(result, dim = c(dat = 1, var = 1, + syear_time_dim, dim(result)[3:4], + ensemble = 1)) + } else { + result <- array(result, dim = c(dim(result)[1:2], ensemble = has.memb, + time = length(time_step), dim(result)[4])) + result <- s2dv::Reorder(result, c("syear", "time", "latitude", "longitude", "ensemble")) + dim(result) <- c(dat = 1, var = 1, sday = 1, sweek = 1, dim(result)) + } + + # Add attributes back + attr$dim <- dim(result) + attributes(result) <- attr + attr(result, 'time') <- times + + # Save memory + rm(times); rm(attr); gc() + + return(result) +} + +######################################################################### +######################################################################### + +.grib_expand_grids <- function(gribMessages, vector = FALSE) { + # gribMessages is a list of multiple messages + gribMessage <- gribMessages[[1]] + + if (gribr::has.key(gribMessage, "Nx") && gribr::has.key(gribMessage, "Ny")) { + nx <- gribMessage$Nx + ny <- gribMessage$Ny + } else { + nx <- gribMessage$Ni + ny <- gribMessage$Nj + } + + if (is.null(nx) || is.null(ny)) { + stop("Unsupported grid type: ", gribMessage$gridType) + } + + if (grepl("reduced", gribMessage$gridType)) { + #TODO: This part is not used now. + nx <- ny * 2 + values <- .Call("gribr_redtoreg", nx, gribMessage$pl, + gribMessage$values) + values <- matrix(values, nx, ny, + byrow = gribMessage$jPointsAreConsecutive) + +# values_l <- vector('list', length = length(gribMessages)) +# for (gm_i in 1:length(gribMessages)) { +# values <- .Call("gribr_redtoreg", nx, gribMessages[[gm_i]]$pl, +# gribMessages[[gm_i]]$values) +# values <- matrix(values, nx, ny, +# byrow = gribMessage$jPointsAreConsecutive) +# values_l[[gm_i]] <- values +# } + + } else { +# values <- matrix(gribMessage$values, nx, ny, +# byrow = gribMessage$jPointsAreConsecutive) + values_l <- lapply(gribMessages, '[[', 'values') + values_l <- lapply(values_l, matrix, nx, ny, byrow = gribMessage$jPointsAreConsecutive) + values <- array(unlist(values_l), dim = c(longitude = nx, latitude = ny, time = length(values_l))) + } + + if (vector) { + values <- as.numeric(values) + } + + values +} + diff --git a/modules/Loading/R/GRIB/s2dv_cube.R b/modules/Loading/R/GRIB/s2dv_cube.R new file mode 100644 index 0000000000000000000000000000000000000000..d44b99e2e35760e34c069815717ed803cc2f465d --- /dev/null +++ b/modules/Loading/R/GRIB/s2dv_cube.R @@ -0,0 +1,227 @@ +#'Creation of a 's2dv_cube' object +#' +#'@description This function allows to create an 's2dv_cube' object by passing +#'information through its parameters. This function will be needed if the data +#'hasn't been loaded using CST_Load or has been transformed with other methods. +#'An 's2dv_cube' object has many different components including metadata. This +#'function will allow to create 's2dv_cube' objects even if not all elements +#'are defined and for each expected missed parameter a warning message will be +#'returned. +#' +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#' +#'@param data A multidimensional array with named dimensions, typically with +#' dimensions: dataset, member, sdate, ftime, lat and lon. +#'@param coords A named list with elements of the coordinates corresponding to +#' the dimensions of the data parameter. The names and length of each element +#' must correspond to the names of the dimensions. If any coordinate is not +#' provided, it is set as an index vector with the values from 1 to the length +#' of the corresponding dimension. +#'@param varName A character string indicating the abbreviation of the variable +#' name. +#'@param metadata A named list where each element is a variable containing the +#' corresponding information. The information can be contained in a list of +#' lists for each variable. +#'@param Datasets Character strings indicating the names of the dataset. It +#' there are multiple datasets it can be a vector of its names or a list of +#' lists with additional information. +#'@param Dates A POSIXct array of time dimensions containing the Dates. +#'@param when A time stamp of the date when the data has been loaded. This +#' parameter is also found in Load() and Start() functions output. +#'@param source_files A vector of character strings with complete paths to all +#' the found files involved in loading the data. +#'@param \dots Additional elements to be added in the object. They will be +#' stored in the end of 'attrs' element. Multiple elements are accepted. +#' +#'@return The function returns an object of class 's2dv_cube' with the following +#' elements in the structure:\cr +#'\itemize{ +#' \item{'data', array with named dimensions.} +#' \item{'dims', named vector of the data dimensions.} +#' \item{'coords', named list with elements of the coordinates corresponding to +#' the dimensions of the data parameter. If any coordinate is not provided, it +#' is set as an index vector with the values from 1 to the length of the +#' corresponding dimension. The attribute 'indices' indicates wether the +#' coordinate is an index vector (TRUE) or not (FALSE).} +#' \item{'attrs', named list with elements: +#' \itemize{ +#' \item{'Dates', array with named temporal dimensions of class 'POSIXct' from +#' time values in the data.} +#' \item{'Variable', has the following components: +#' \itemize{ +#' \item{'varName', with the short name of the loaded variable as specified +#' in the parameter 'var'.} +#' \item{''metadata', named list of elements with variable metadata. +#' They can be from coordinates variables (e.g. longitude) or +#' main variables (e.g. 'var').} +#' } +#' } +#' \item{'Datasets', character strings indicating the names of the dataset.} +#' \item{'source_files', a vector of character strings with complete paths to +#' all the found files involved in loading the data.} +#' \item{'when', a time stamp of the date issued by the Start() or Load() call to +#' obtain the data.} +#' \item{'load_parameters', it contains the components used in the arguments to +#' load the data from Start() or Load() functions.} +#' } +#' } +#'} +#' +#'@seealso \code{\link[s2dv]{Load}} and \code{\link{CST_Load}} +#'@examples +#'exp_original <- 1:100 +#'dim(exp_original) <- c(lat = 2, time = 10, lon = 5) +#'exp1 <- s2dv_cube(data = exp_original) +#'class(exp1) +#'coords <- list(lon = seq(-10, 10, 5), lat = c(45, 50)) +#'exp2 <- s2dv_cube(data = exp_original, coords = coords) +#'class(exp2) +#'metadata <- list(tas = list(level = '2m')) +#'exp3 <- s2dv_cube(data = exp_original, coords = coords, +#' varName = 'tas', metadata = metadata) +#'class(exp3) +#'Dates = as.POSIXct(paste0(rep("01", 10), rep("01", 10), 1990:1999), format = "%d%m%Y") +#'dim(Dates) <- c(time = 10) +#'exp4 <- s2dv_cube(data = exp_original, coords = coords, +#' varName = 'tas', metadata = metadata, +#' Dates = Dates) +#'class(exp4) +#'exp5 <- s2dv_cube(data = exp_original, coords = coords, +#' varName = 'tas', metadata = metadata, +#' Dates = Dates, when = "2019-10-23 19:15:29 CET") +#'class(exp5) +#'exp6 <- s2dv_cube(data = exp_original, coords = coords, +#' varName = 'tas', metadata = metadata, +#' Dates = Dates, +#' when = "2019-10-23 19:15:29 CET", +#' source_files = c("/path/to/file1.nc", "/path/to/file2.nc")) +#'class(exp6) +#'exp7 <- s2dv_cube(data = exp_original, coords = coords, +#' varName = 'tas', metadata = metadata, +#' Dates = Dates, +#' when = "2019-10-23 19:15:29 CET", +#' source_files = c("/path/to/file1.nc", "/path/to/file2.nc"), +#' Datasets = list( +#' exp1 = list(InitializationsDates = list(Member_1 = "01011990", +#' Members = "Member_1")))) +#'class(exp7) +#'dim(exp_original) <- c(dataset = 1, member = 1, time = 10, lat = 2, lon = 5) +#'exp8 <- s2dv_cube(data = exp_original, coords = coords, +#' varName = 'tas', metadata = metadata, +#' Dates = Dates, original_dates = Dates) +#'class(exp8) +#'@export +s2dv_cube <- function(data, coords = NULL, varName = NULL, metadata = NULL, + Datasets = NULL, Dates = NULL, when = NULL, + source_files = NULL, ...) { + + # data + if (is.null(data) | !is.array(data) | is.null(names(dim(data)))) { + stop("Parameter 'data' must be an array with named dimensions.") + } + # dims + dims <- dim(data) + + ## coords + if (!is.null(coords)) { + if (!all(names(coords) %in% names(dims))) { + coords <- coords[-which(!names(coords) %in% names(dims))] + } + for (i_coord in names(dims)) { + if (i_coord %in% names(coords)) { + if (length(coords[[i_coord]]) != dims[i_coord]) { + warning(paste0("Coordinate '", i_coord, "' has different lenght as ", + "its dimension and it will not be used.")) + coords[[i_coord]] <- 1:dims[i_coord] + attr(coords[[i_coord]], 'indices') <- TRUE + } else { + attr(coords[[i_coord]], 'indices') <- FALSE + } + } else { + warning(paste0("Coordinate '", i_coord, "' is not provided ", + "and it will be set as index in element coords.")) + coords[[i_coord]] <- 1:dims[i_coord] + attr(coords[[i_coord]], 'indices') <- TRUE + } + } + } else { + coords <- sapply(names(dims), function(x) 1:dims[x]) + for (i in 1:length(coords)) { + attr(coords[[i]], "indices") <- TRUE + } + } + + ## attrs + attrs <- list() + # Dates + if (is.null(Dates)) { + warning("Parameter 'Dates' is not provided so the metadata ", + "of 's2dv_cube' object will be incomplete.") + attrs$Dates <- NULL + } else if (length(Dates) == 1 & inherits(Dates[1], "POSIXct")) { + attrs$Dates <- Dates + } else { + if (!is.array(Dates)) { + warning("Parameter 'Dates' must be an array with named time dimensions.") + } else { + if (is.null(names(dim(Dates)))) { + warning("Parameter 'Dates' must have dimension names.") + } else if (!all(names(dim(Dates)) %in% names(dims))) { + warning("Parameter 'Dates' must have the corresponding time dimension names in 'data'.") + } else { + if (inherits(Dates[1], "POSIXct")) { + attrs$Dates <- Dates + } else { + warning("Parameter 'Dates' must be of class 'POSIXct'.") + } + } + } + } + # Variable + if (is.null(varName)) { + warning("Parameter 'varName' is not provided so the metadata ", + "of 's2dv_cube' object will be incomplete.") + attrs$Variable$varName <- NULL + } else { + if (!is.character(varName)) { + warning("Parameter 'varName' must be a character.") + } else { + attrs$Variable$varName <- varName + } + } + if (is.null(metadata)) { + warning("Parameter 'metadata' is not provided so the metadata ", + "of 's2dv_cube' object will be incomplete.") + attrs$Variable$metadata <- NULL + } else { + if (!is.list(metadata)) { + metadata <- list(metadata) + } + attrs$Variable$metadata <- metadata + } + # Datasets + if (!is.null(Datasets)) { + attrs$Datasets <- Datasets + } + # when + if (!is.null(when)) { + attrs$when <- when + } + # source_files + if (!is.null(source_files)) { + attrs$source_files <- source_files + } + # dots + dots <- list(...) + if (length(dots) != 0) { + for (i_arg in 1:length(dots)) { + attrs[[names(dots)[[i_arg]]]] <- dots[[i_arg]] + } + } + + ## object + object <- list(data = data, dims = dims, coords = coords, attrs = attrs) + class(object) <- 's2dv_cube' + return(object) +} + diff --git a/modules/Loading/check_latlon.R b/modules/Loading/R/check_latlon.R similarity index 100% rename from modules/Loading/check_latlon.R rename to modules/Loading/R/check_latlon.R diff --git a/modules/Loading/R/compare_exp_obs_grids.R b/modules/Loading/R/compare_exp_obs_grids.R new file mode 100644 index 0000000000000000000000000000000000000000..0270cd63a1193e8f444cbd19a9a05f6f78749bab --- /dev/null +++ b/modules/Loading/R/compare_exp_obs_grids.R @@ -0,0 +1,35 @@ +compare_exp_obs_grids <- function(exp, obs) { + # Check for consistency between exp and obs grid + if (!isTRUE(all.equal(as.vector(exp$coords$latitude), + as.vector(obs$coords$latitude)))) { + lat_error_msg <- paste("Latitude mismatch between exp and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + exp_lat_msg <- paste0("First exp lat: ", exp$coords$latitude[1], + "; Last exp lat: ", + exp$coords$latitude[length(exp$coords$latitude)]) + info(recipe$Run$logger, exp_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$coords$latitude[1], + "; Last obs lat: ", + obs$coords$latitude[length(obs$coords$latitude)]) + info(recipe$Run$logger, obs_lat_msg) + stop("exp and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(exp$coords$longitude), + as.vector(obs$coords$longitude)))) { + lon_error_msg <- paste("Longitude mismatch between exp and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + exp_lon_msg <- paste0("First exp lon: ", exp$coords$longitude[1], + "; Last exp lon: ", + exp$coords$longitude[length(exp$coords$longitude)]) + info(recipe$Run$logger, exp_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$coords$longitude[1], + "; Last obs lon: ", + obs$coords$longitude[length(obs$coords$longitude)]) + info(recipe$Run$logger, obs_lon_msg) + stop("exp and obs don't share the same longitudes.") + } +} diff --git a/modules/Loading/dates2load.R b/modules/Loading/R/dates2load.R similarity index 51% rename from modules/Loading/dates2load.R rename to modules/Loading/R/dates2load.R index 0e3613f3a3a7e6b09d8317cdadb8bcb850b2bccc..f084ce62fb1e4798e5dc3948fe0edd3b2c3bfdd6 100644 --- a/modules/Loading/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -49,56 +49,3 @@ dates2load <- function(recipe, logger) { dim(data) <- default_dims return(data) } - -# Gets the corresponding dates or indices according -# to the sdate/leadtimes requested in the recipe -# -# The leadtimes are defined by months -# Ex. 20201101 with leadtimes 1-4 corresponds to -# the forecasting times covering December to March - -get_timeidx <- function(sdates, ltmin, ltmax, - time_freq="monthly_mean") { - - if (time_freq == "daily_mean") { - - sdates <- ymd(sdates) - idx_min <- sdates + months(ltmin - 1) - idx_max <- sdates + months(ltmax) - days(1) - - day_seq <- seq(idx_min[1], idx_max[1], by = 'days') - if (any("0229" %in% (format(day_seq, "%m%d")))) { - time_length <- as.integer(idx_max[1]-idx_min[1]) - } else { - time_length <- as.integer(idx_max[1]-idx_min[1]+1) - } - indxs <- array(numeric(), c(file_date = length(sdates), - time = time_length)) - #syear = length(sdates), - #sday = 1, sweek = 1, - - for (sdate in 1:length(sdates)) { - day_seq <- seq(idx_min[sdate], idx_max[sdate], by='days') - indxs[sdate,] <- day_seq[!(format(day_seq, "%m%d") == "0229")] - } - indxs <- as.POSIXct(indxs*86400, - tz = 'UTC', origin = '1970-01-01') - lubridate::hour(indxs) <- 12 - lubridate::minute(indxs) <- 00 - dim(indxs) <- c(file_date = length(sdates), - time = time_length) - - } else if (time_freq == "monthly_mean") { - - idx_min <- ltmin - idx_max <- ltmax - indxs <- indices(idx_min:idx_max) - - } - - # TODO: 6 hourly case - #idx1 <- (sdates + months(ltmin-1) - sdates)*4 - #idx2 <- idx1 + ndays*4 - 1 - - return(indxs) -} diff --git a/modules/Loading/R/get_regrid_params.R b/modules/Loading/R/get_regrid_params.R new file mode 100644 index 0000000000000000000000000000000000000000..ef08adcd11f609e08ccc484e0408bb87317eb9b3 --- /dev/null +++ b/modules/Loading/R/get_regrid_params.R @@ -0,0 +1,73 @@ +#'Read regrid parameters from recipe and returns a list for use with Start() +#' +#'The purpose of this function is to read the recipe and archive configuration +#'data for Auto-S2S workflows, retrieve the regridding parameters for hcst and +#'obs, and return an object that can be the input for 'transform' and +#''transform_params' when the data is loaded using Start(). +#'Requires CDORemapper. +#' +#'@param recipe Auto-S2S configuration recipe as returned by read_yaml() +#'@param archive Auto-S2S exp and obs archive as returned by read_yaml() +#' +#'@return A list containing regridding parameters for fcst and obs +#' +#'@import startR +#'@examples +#'setwd("/esarchive/scratch/vagudets/repos/auto-s2s/") +#'library(yaml) +#'library(startR) +#'recipe <- read_yaml("modules/data_load/recipe_1.yml") +#'archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive +#'regrid_params <- get_regrid_params(recipe, archive) +#' +#'@export +get_regrid_params <- function(recipe, archive) { + + ## TODO: Multi-model case + ## If multi-model, use the first system grid? + ## TODO: 'NULL' entries had to be removed due to bug in Start(). Rewrite when + ## the bug is fixed. + exp.name <- recipe$Analysis$Datasets$System$name + ref.name <- recipe$Analysis$Datasets$Reference$name + exp_descrip <- archive$System[[exp.name]] + reference_descrip <- archive$Reference[[ref.name]] + + if (tolower(recipe$Analysis$Regrid$type) == 'to_reference') { + + regrid_params <- list(fcst.gridtype = reference_descrip$reference_grid, + fcst.gridmethod = recipe$Analysis$Regrid$method, + fcst.transform = CDORemapper, + obs.gridtype = NULL, + obs.gridmethod = NULL, + obs.transform = NULL) + + } else if (tolower(recipe$Analysis$Regrid$type) == 'to_system') { + + regrid_params <- list(fcst.gridtype = NULL, + fcst.gridmethod = NULL, + fcst.transform = NULL, + obs.gridtype = exp_descrip$reference_grid, + obs.gridmethod = recipe$Analysis$Regrid$method, + obs.transform = CDORemapper) + + } else if (tolower(recipe$Analysis$Regrid$type) == 'none') { + + regrid_params <- list(fcst.gridtype = NULL, + fcst.gridmethod = NULL, + fcst.transform = NULL, + obs.gridtype = NULL, + obs.gridmethod = NULL, + obs.transform = NULL) + + } else { + regrid_params <- list(fcst.gridtype = recipe$Analysis$Regrid$type, + fcst.gridmethod = recipe$Analysis$Regrid$method, + fcst.transform = CDORemapper, + obs.gridtype = recipe$Analysis$Regrid$type, + obs.gridmethod = recipe$Analysis$Regrid$method, + obs.transform = CDORemapper) + } + + return(regrid_params) +} + diff --git a/modules/Loading/R/get_timeidx.R b/modules/Loading/R/get_timeidx.R new file mode 100644 index 0000000000000000000000000000000000000000..d2c66ddeaede994c5884c90609ed29e3b8515067 --- /dev/null +++ b/modules/Loading/R/get_timeidx.R @@ -0,0 +1,57 @@ +#'Gets the corresponding dates or indices according +#'to the start dates, min. and max. leadtimes and +#'time frequency. +# +#'The leadtimes are defined by months +#'Ex. 20201101 with leadtimes 1-4 corresponds to +#'the forecasting times covering November to february +#' +#'@param sdates vector containind the start dates +#'@param ltmin first leadtime +#'@param ltmax last leadtime +#'@param time_freq time frequency ("monthly_mean" or "daily_mean" or "daily") + +get_timeidx <- function(sdates, ltmin, ltmax, + time_freq="monthly_mean") { + + if (time_freq %in% c("daily_mean", "daily")) { + + sdates <- ymd(sdates) + idx_min <- sdates + months(ltmin - 1) + idx_max <- sdates + months(ltmax) - days(1) + + day_seq <- seq(idx_min[1], idx_max[1], by = 'days') + if (any("0229" %in% (format(day_seq, "%m%d")))) { + time_length <- as.integer(idx_max[1]-idx_min[1]) + } else { + time_length <- as.integer(idx_max[1]-idx_min[1]+1) + } + indxs <- array(numeric(), c(file_date = length(sdates), + time = time_length)) + #syear = length(sdates), + #sday = 1, sweek = 1, + + for (sdate in 1:length(sdates)) { + day_seq <- seq(idx_min[sdate], idx_max[sdate], by='days') + indxs[sdate,] <- day_seq[!(format(day_seq, "%m%d") == "0229")] + } + indxs <- as.POSIXct(indxs*86400, + tz = 'UTC', origin = '1970-01-01') + lubridate::hour(indxs) <- 12 + lubridate::minute(indxs) <- 00 + dim(indxs) <- c(file_date = length(sdates), + time = time_length) + + } else if (time_freq == "monthly_mean") { + + idx_min <- ltmin + idx_max <- ltmax + indxs <- indices(idx_min:idx_max) + } + + # TODO: 6 hourly case + #idx1 <- (sdates + months(ltmin-1) - sdates)*4 + #idx2 <- idx1 + ndays*4 - 1 + + return(indxs) +} diff --git a/modules/Loading/helper_loading_decadal.R b/modules/Loading/R/helper_loading_decadal.R similarity index 83% rename from modules/Loading/helper_loading_decadal.R rename to modules/Loading/R/helper_loading_decadal.R index f4f1ec3246d675d7eb39809c8665fb13a19fe123..b93f32792359739042af1613b8ba4b2e26aff92c 100644 --- a/modules/Loading/helper_loading_decadal.R +++ b/modules/Loading/R/helper_loading_decadal.R @@ -106,22 +106,36 @@ correct_daily_for_leap <- function(data = NULL, time_attr, return_time = TRUE) { #========================================== # This function generates the path for Start() call. It shouldn't be needed when Start() is improved. +# table, grid, version: A list with variables as name. E.g., list(tas = 'Amon') get_dcpp_path <- function(archive, exp.name, table, grid, version, sdates) { # Define path (monthly and daily) multi_path <- FALSE if (is.null(archive$System[[exp.name]]$src$first_dcppB_syear) | isTRUE(all(sdates < archive$System[[exp.name]]$src$first_dcppB_syear))) { # only dcppA - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, - '$ensemble$', table, '$var$', grid, version) - fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + if (length(table) == 1) { # only one variable + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + '$ensemble$', table, '$var$', grid, version) + fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + } else { # multiple vars + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + '$ensemble$', '$table$', '$var$', '$grid$', '$version$') + fcst.files <- paste0('$var$_', '$table$', '_*_dcppA-hindcast_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') + } path_list <- file.path(fcst.path, fcst.files) } else { if (all(sdates >= archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppB - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, - '$ensemble$', table, '$var$', grid) #, version) - fcst.files <- paste0('v*/$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + if (length(table) == 1) { # only one variable + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, + '$ensemble$', table, '$var$', grid, version) + + fcst.files <- paste0('$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + } else { + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, + '$ensemble$', '$table$', '$var$', '$grid$', '$version$') + fcst.files <- paste0('$var$_', '$table$', '_*_dcppB-forecast_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') + } path_list <- file.path(fcst.path, fcst.files) } else { # have both dcppA and dcppB diff --git a/modules/Loading/R/load_GRIB.R b/modules/Loading/R/load_GRIB.R new file mode 100644 index 0000000000000000000000000000000000000000..8ae2a74d2efb2c3e5f82034ee1f647b6548620d7 --- /dev/null +++ b/modules/Loading/R/load_GRIB.R @@ -0,0 +1,279 @@ +################################################# +# Load GRIB files from MARS +################################################# + +source('modules/Loading/R/GRIB/GrbLoad.R') +source('tools/libs.R') + +load_GRIB <- function(recipe) { + + # Set params + #------------------------------------------------------------------- + + # get recipe info + hcst.inityear <- recipe$Analysis$Time$hcst_start + hcst.endyear <- recipe$Analysis$Time$hcst_end + hcst.sdate <- recipe$Analysis$Time$sdate + hcst.ftime <- as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max) + fcst.year <- recipe$Analysis$Time$fcst_year + lats.min <- recipe$Analysis$Region$latmin # can only be -90 + lats.max <- recipe$Analysis$Region$latmax # can only be 90 + lons.min <- recipe$Analysis$Region$lonmin # can only be 0 + lons.max <- recipe$Analysis$Region$lonmax # can only be 360 + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + variable <- recipe$Analysis$Variables$name #'tas' + store.freq <- recipe$Analysis$Variables$freq + + regrid.method <- recipe$Analysis$Regrid$method + regrid.type <- recipe$Analysis$Regrid$type + + # get MARS datasets dict: + archive <- read_yaml("conf/archive.yml")[[recipe$Run$filesystem]] + exp_descrip <- archive$System[[exp.name]] + freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) + reference_descrip <- archive$Reference[[ref.name]] + freq.obs <- unlist(reference_descrip[[store.freq]][variable]) + obs.dir <- reference_descrip$src + fcst.dir <- exp_descrip$src + hcst.dir <- exp_descrip$src + #NOTE: We can use this info in GrbLoad() to substitute param 'has.memb' + fcst.nmember <- exp_descrip$nmember$fcst + hcst.nmember <- exp_descrip$nmember$hcst + + info(recipe$Run$logger, + "========== PARAMETERS RETRIEVED. ==========") + + # Load hindcast + #------------------------------------------------------------------- + +## original file dir +#exp_path <- "/esarchive/exp/ecmwf/system5_m1/original_files/fcmean_od_sfc_msmm_ecmf/" +## soft link to original file dir +#exp_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas/" #files are not correct +# The correct files +#exp_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/" + + hcst.path <- paste0(archive$src, hcst.dir) + hcst.year <- paste0(as.numeric(hcst.inityear):as.numeric(hcst.endyear)) + hcst.files <- paste0(hcst.path, variable, '_', hcst.year, hcst.sdate, '.grb') + + if (!regrid.type %in% c('none', 'to_system')) { + if (regrid.type == 'to_reference') { + regrid_list <- c(method = regrid.method, type = reference_descrip$reference_grid) + } else { # e.g., "r360x181" + regrid_list <- list(method = regrid.method, type = regrid.type) + } + } else { + regrid_list <- NULL + } + + .log_memory_usage(recipe$Run$logger, when = "Before loading the data") + hcst <- GrbLoad(dat = as.list(hcst.files), time_step = hcst.ftime, has.memb = hcst.nmember, + syear_time_dim = NULL, regrid = regrid_list) + gc() + + info(recipe$Run$logger, + "========== HCST LOADED. ==========") + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(fcst.year)) { + fcst.path <- paste0(archive$src, hcst.dir) + fcst.files <- paste0(fcst.path, variable, '_', fcst.year, hcst.sdate, '.grb') + fcst <- GrbLoad(dat = as.list(fcst.files), time_step = hcst.ftime, has.memb = fcst.nmember, + syear_time_dim = NULL, regrid = regrid_list) + gc() + } else { + fcst <- NULL + } + + info(recipe$Run$logger, + "========== FCST LOADED. ==========") + + # Load reference + #------------------------------------------------------------------- +#obs_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_era5_tas/" + obs.path <- paste0(archive$src, obs.dir) + # Use hcst time attr to load obs + hcst_times <- attr(hcst, 'time') + hcst_times_strings <- format(hcst_times, '%Y%m') + + obs.files <- paste0(obs.path, variable, '_', hcst_times_strings, '.grb') + + if (!regrid.type %in% c('none', 'to_reference')) { + if (regrid.type == 'to_system') { + regrid_list <- c(method = regrid.method, type = exp_descrip$reference_grid) + } else { # e.g., "r360x181" + regrid_list <- list(method = regrid.method, type = regrid.type) + } + } else { + regrid_list <- NULL + } + + #NOTE: only 1 time step in each obs file + obs <- GrbLoad(dat = as.list(obs.files), time_step = 1, has.memb = NULL, + syear_time_dim = dim(hcst_times), regrid = regrid_list) + gc() + + .log_memory_usage(recipe$Run$logger, when = "After loading the data") + info(recipe$Run$logger, + "========== OBS LOADED. ==========") + + +################################################################################# + +#dim(hcst) +# syear time latitude longitude ensemble +# 4 3 640 1280 51 + +##BEFORE TRANSFER TO S2DV_CUBE +#str(hcst) +# num [1:4, 1:3, 1:640, 1:1280, 1:51] 252 252 252 252 251 ... +# - attr(*, "edition")= num 1 +# - attr(*, "shortName")= chr "2t" +# - attr(*, "longitude")= num [1:1280] 0 0.281 0.563 0.844 1.125 ... +# - attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... +# - attr(*, "time")= POSIXct[1:12], format: "2000-12-01" "2001-12-01" ... + +#dim(attr(hcst, 'time')) +#syear time +# 4 3 + +##BEFORE TRANSFER TO S2DV_CUBE +#str(obs) +# num [1:4, 1:3, 1:640, 1:1280] 251 251 251 251 251 ... +# - attr(*, "edition")= num 1 +# - attr(*, "shortName")= chr "2t" +# - attr(*, "longitude")= num [1:1280] 0 0.281 0.562 0.844 1.125 ... +# - attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... +# - attr(*, "time")= POSIXct[1:12], format: "2000-12-01" "2001-12-01" ... + +################################################################################# + + info(recipe$Run$logger, + "========== REGRID DONE. ==========") + + + # Turn into s2dv_cube + #------------------------------------------------------------------- + # hcst + metadata_list <- vector("list", length = 1) + names(metadata_list) <- variable + metadata_list[[variable]] <- list(long_name = attr(hcst, 'name'), + units = attr(hcst, 'units')) + load_parameters_list <- list(dat1 = list(file_date = list(paste0(hcst.year, hcst.sdate)))) + + hcst <- s2dv_cube(data = array(hcst, dim = dim(hcst)), + coords = list(dat = 'dat1', + var = variable, + sday = 1, + sweek = 1, + syear = hcst.year, + time = hcst.ftime, + latitude = attr(hcst, 'latitude'), + longitude = attr(hcst, 'longitude'), + ensemble = 1:hcst.nmember), + varName = attr(hcst, 'shortName'), + metadata = metadata_list, + Dates = attributes(hcst)$time, + source_files = hcst.files, + load_parameters = load_parameters_list, + # extra attrs + gribEdition = attr(hcst, 'edition')) + + # fcst + if (!is.null(fcst)) { + metadata_list <- vector("list", length = 1) + names(metadata_list) <- variable + metadata_list[[variable]] <- list(long_name = attr(fcst, 'name'), + units = attr(fcst, 'units')) + load_parameters_list <- list(dat1 = list(file_date = list(paste0(fcst.year, hcst.sdate)))) + + fcst <- s2dv_cube(data = array(fcst, dim = dim(fcst)), + coords = list(dat = 'dat1', + var = variable, + sday = 1, + sweek = 1, + syear = fcst.year, + time = hcst.ftime, + latitude = attr(fcst, 'latitude'), + longitude = attr(fcst, 'longitude'), + ensemble = 1:fcst.nmember), + varName = attr(fcst, 'shortName'), + metadata = metadata_list, + Dates = attributes(fcst)$time, + source_files = fcst.files, + load_parameters = load_parameters_list, + gribEdition = attr(fcst, 'edition')) + } + + # obs + metadata_list <- vector("list", length = 1) + names(metadata_list) <- variable + metadata_list[[variable]] <- list(long_name = attr(obs, 'name'), + units = attr(obs, 'units')) + load_parameters_list <- list(dat1 = list(file_date = list(hcst_times_strings))) + + obs <- s2dv_cube(data = array(obs, dim = dim(obs)), + coords = list(dat = 'dat1', + var = variable, + sday = 1, + sweek = 1, + #NOTE: Can we directly use hcst info? + syear = hcst.year, + time = hcst.ftime, + latitude = attr(obs, 'latitude'), + longitude = attr(obs, 'longitude'), + ensemble = 1), + varName = attr(obs, 'shortName'), + metadata = metadata_list, + Dates = attributes(obs)$time, + source_files = obs.files, + load_parameters = load_parameters_list, + gribEdition = attr(obs, 'edition')) + + +#str(hcst) +#List of 4 +# $ data : num [1, 1, 1, 1, 1:2, 1:2, 1:640, 1:1280, 1:51] 252 253 248 251 251 ... +# ..- attr(*, "edition")= num 1 +# ..- attr(*, "shortName")= chr "2t" +# ..- attr(*, "longitude")= num [1:1280] 0 0.281 0.563 0.844 1.125 ... +# ..- attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... +# ..- attr(*, "time")= POSIXct[1:4], format: "2000-12-01" "2001-12-01" ... +# $ dims : Named int [1:9] 1 1 1 1 2 2 640 1280 51 +# ..- attr(*, "names")= chr [1:9] "dat" "var" "sday" "sweek" ... +# $ coords:List of 9 +# ..$ dat : chr "dat1" +# .. ..- attr(*, "indices")= logi FALSE +# ..$ var : chr "tas" +# .. ..- attr(*, "indices")= logi FALSE +# ..$ sday : num 1 +# .. ..- attr(*, "indices")= logi FALSE +# ..$ sweek : num 1 +# .. ..- attr(*, "indices")= logi FALSE +# ..$ syear : chr [1:2] "2000" "2001" +# .. ..- attr(*, "indices")= logi FALSE +# ..$ time : int [1:2] 1 2 +# .. ..- attr(*, "indices")= logi FALSE +# ..$ latitude : num [1:640] 89.8 89.5 89.2 88.9 88.7 ... +# .. ..- attr(*, "indices")= logi FALSE +# ..$ longitude: num [1:1280] 0 0.281 0.563 0.844 1.125 ... +# .. ..- attr(*, "indices")= logi FALSE +# ..$ ensemble : int [1:51] 1 2 3 4 5 6 7 8 9 10 ... +# .. ..- attr(*, "indices")= logi FALSE +# $ attrs :List of 4 +# ..$ Dates : POSIXct[1:4], format: "2000-12-01" "2001-12-01" ... +# ..$ Variable :List of 1 +# .. ..$ varName: chr "2t" +# ..$ source_files: chr [1:2] "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/tas_20001101.grb" "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/tas_20011101.grb" +# ..$ gribEdition : num 1 +# - attr(*, "class")= chr "s2dv_cube" + .log_memory_usage(recipe$Run$logger, when = "After regridding") + info(recipe$Run$logger, + "##### GRIB DATA LOADED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) + +} diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/R/load_decadal.R similarity index 74% rename from modules/Loading/Loading_decadal.R rename to modules/Loading/R/load_decadal.R index e3677e1db3bceae19bfa8e3c27493575a43a00f6..9268b090fc70db506279c87cbc3a697dd763fe67 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -1,27 +1,19 @@ -# Loading module: -# 1. archive.yml -# 2. recipe.yml -# 3. Load_decadal.R (V) -#setwd('/esarchive/scratch/aho/git/auto-s2s/') - -## TODO: remove paths to personal scratchs -source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") # Load required libraries/funs -source("modules/Loading/helper_loading_decadal.R") -source("modules/Loading/dates2load.R") -source("modules/Loading/check_latlon.R") -source("tools/libs.R") -## TODO: Remove once the fun is included in CSTools -source("tools/tmp/as.s2dv_cube.R") +source("modules/Loading/R/get_regrid_params.R") +source("modules/Loading/R/helper_loading_decadal.R") +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/check_latlon.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/compare_exp_obs_grids.R") #==================================================================== -# recipe_file <- "modules/Loading/testing_recipes/recipe_decadal.yml" -# recipe_file <- "modules/Loading/testing_recipes/recipe_decadal_daily.yml" +# recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml" +# recipe_file <- "recipes/atomic_recipes/recipe_decadal_daily.yml" -load_datasets <- function(recipe) { - - archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive +load_decadal <- function(recipe) { + ## + archive <- read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]] # Print Start() info or not DEBUG <- FALSE @@ -34,8 +26,9 @@ load_datasets <- function(recipe) { #------------------------- exp.name <- recipe$Analysis$Datasets$System$name #'HadGEM3' ref.name <- recipe$Analysis$Datasets$Reference$name #'era5' - member <- strsplit(recipe$Analysis$Datasets$System$member, ',')[[1]] #c("r1i1p1f2", "r2i1p1f2") - variable <- recipe$Analysis$Variables$name #'tas' + member <- strsplit(recipe$Analysis$Datasets$System$member, ', | |,')[[1]] #c("r1i1p1f2", "r2i1p1f2") +# variable <- recipe$Analysis$Variables$name #'tas' + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq #monthly_mean lats.min <- as.numeric(recipe$Analysis$Region$latmin) #0 lats.max <- as.numeric(recipe$Analysis$Region$latmax) #10 @@ -64,12 +57,12 @@ load_datasets <- function(recipe) { # Read from archive: #------------------------- if (store.freq == "monthly_mean") { - table <- archive$System[[exp.name]][[store.freq]]$table[[variable]] #'Amon' + table <- archive$System[[exp.name]][[store.freq]]$table[variable] #list(tas = 'Amon') } else { table <- 'day' } - grid <- archive$System[[exp.name]][[store.freq]]$grid[[variable]] - version <- archive$System[[exp.name]][[store.freq]]$version[[variable]] + grid <- archive$System[[exp.name]][[store.freq]]$grid[variable] #list(tas = 'gr') + version <- archive$System[[exp.name]][[store.freq]]$version[variable] #list(tas = 'v20210910') if (identical(member, 'all')) { member <- strsplit(archive$System[[exp.name]]$member, ',')[[1]] } @@ -84,7 +77,7 @@ load_datasets <- function(recipe) { regrid_params <- get_regrid_params(recipe, archive) # Only if the time length in each chunk may differ that we need largest_dims_length to be TRUE. Otherwise, set FALSE to increase efficiency. - need_largest_dims_length <- ifelse(exp.name == 'EC-Earth3-i2', TRUE, FALSE) + need_largest_dims_length <- ifelse(exp.name %in% c('HadGEM3-GC31-MM', 'EC-Earth3-i2'), TRUE, FALSE) #------------------------------------------- @@ -95,13 +88,14 @@ load_datasets <- function(recipe) { version = version, sdates = sdates_hcst) path_list <- tmp$path_list multi_path <- tmp$multi_path -# hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, -# '$ensemble$', table, '$var$', grid, version) -# hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + #TODO: to make this case work; enhance Start() if it's possible + if (multi_path & length(variable) > 1) { + stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") + } Start_default_arg_list <- list( - dat = path_list, #file.path(hcst.path, hcst.files), + dat = path_list, var = variable, syear = paste0(sdates_hcst), chunk = 'all', @@ -120,7 +114,7 @@ load_datasets <- function(recipe) { transform_params = list(grid = regrid_params$fcst.gridtype, method = regrid_params$fcst.gridmethod), transform_vars = c('latitude', 'longitude'), - path_glob_permissive = 2, # for version +# path_glob_permissive = 2, # for version synonims = list(longitude = c('lon', 'longitude'), latitude = c('lat', 'latitude')), return_vars = list(latitude = NULL, longitude = NULL, @@ -128,6 +122,13 @@ load_datasets <- function(recipe) { silent = !DEBUG, retrieve = T) + if (length(variable) > 1) { + Start_default_arg_list <- c(Start_default_arg_list, + list(table = table, grid = grid, version = version, + table_depends = 'var', grid_depends = 'var', version_depends = 'var', + metadata_dims = 'var')) + } + if (!multi_path) { Start_hcst_arg_list <- Start_default_arg_list hcst <- do.call(Start, Start_hcst_arg_list) @@ -189,10 +190,11 @@ load_datasets <- function(recipe) { version = version, sdates = sdates_fcst) path_list <- tmp$path_list multi_path <- tmp$multi_path -# fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, -# '$ensemble$', table, '$var$', grid, version) -# fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + #TODO: to make this case work; enhance Start() if it's possible + if (multi_path & length(variable) > 1) { + stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") + } # monthly & daily if (!multi_path) { @@ -268,14 +270,18 @@ load_datasets <- function(recipe) { #------------------------------------------- # Step 3. Load the reference #------------------------------------------- - obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, store.freq, - paste0(variable, archive$Reference[[ref.name]][[store.freq]][[variable]])) - obs.files <- paste0('$var$_$file_date$.nc') + obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, + store.freq, "$var$$var_dir$", "$var$_$file_date$.nc") + var_dir_obs <- archive$Reference[[ref.name]][[store.freq]][variable] # list(tas = "_f1h-r1440x721cds", tos = "_f1h-r1440x721cds") + +# obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, store.freq, +# paste0(variable, archive$Reference[[ref.name]][[store.freq]][[variable]])) +# obs.files <- paste0('$var$_$file_date$.nc') # Get from startR_cube # dates <- attr(hcst, 'Variables')$common$time # Get from s2dv_cube - dates <- hcst$Dates$start + dates <- hcst$attrs$Dates dates_file <- sapply(dates, format, '%Y%m') dim(dates_file) <- dim(dates) @@ -290,8 +296,10 @@ load_datasets <- function(recipe) { # Restore correct dimensions dim(dates) <- dim(dates_file) - obs <- Start(dat = file.path(obs.path, obs.files), + obs <- Start(dat = obs.path, var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', file_date = unique(format(dates, '%Y%m')), time = dates, # [sday, sweek, syear, time] time_across = 'file_date', @@ -318,8 +326,10 @@ load_datasets <- function(recipe) { # Method 2: reshape hcst time attr's date into an array with time dim then as obs date selector #//////////////// - obs <- Start(dat = file.path(obs.path, obs.files), + obs <- Start(dat = obs.path, var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', file_date = dates_file, #dates_arr, # [sday, sweek, syear, time] split_multiselected_dims = TRUE, latitude = values(list(lats.min, lats.max)), @@ -335,6 +345,7 @@ load_datasets <- function(recipe) { longitude = c('lon','longitude')), return_vars = list(latitude = NULL, longitude = NULL, time = 'file_date'), + metadata_dims = 'var', silent = !DEBUG, retrieve = TRUE) } @@ -344,6 +355,9 @@ load_datasets <- function(recipe) { # sday sweek syear time # 1 1 2 14 + # Remove var_dir dimension + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + # Only ensemble dim could be different if (!identical(dim(obs), dim(hcst$data)[-9])) { error(recipe$Run$logger, @@ -358,7 +372,6 @@ load_datasets <- function(recipe) { obs <- as.s2dv_cube(obs) ) - #------------------------------------------- # Step 4. Verify the consistance between data #------------------------------------------- @@ -378,8 +391,8 @@ load_datasets <- function(recipe) { } # time attribute - if (!identical(format(hcst$Dates$start, '%Y%m'), - format(obs$Dates$start, '%Y%m'))) { + if (!identical(format(hcst$attrs$Dates, '%Y%m'), + format(obs$attrs$Dates, '%Y%m'))) { error(recipe$Run$logger, "hcst and obs don't share the same time.") stop() @@ -435,90 +448,38 @@ load_datasets <- function(recipe) { # lat and lon attributes if (!(recipe$Analysis$Regrid$type == 'none')) { - if (!identical(as.vector(hcst$lat), as.vector(fcst$lat))) { - lat_error_msg <- paste("Latitude mismatch between hcst and fcst.", - "Please check the original grids and the", - "regrid parameters in your recipe.") - error(recipe$Run$logger, lat_error_msg) - hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) - info(recipe$Run$logger, hcst_lat_msg) - fcst_lat_msg <- paste0("First fcst lat: ", fcst$lat[1], - "; Last fcst lat: ", fcst$lat[length(fcst$lat)]) - info(recipe$Run$logger, fcst_lat_msg) - stop("hcst and fcst don't share the same latitudes.") - } - - if (!identical(as.vector(hcst$lon), as.vector(fcst$lon))) { - lon_error_msg <- paste("Longitude mismatch between hcst and fcst.", - "Please check the original grids and the", - "regrid parameters in your recipe.") - error(recipe$Run$logger, lon_error_msg) - hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) - info(recipe$Run$logger, hcst_lon_msg) - fcst_lon_msg <- paste0("First fcst lon: ", fcst$lon[1], - "; Last fcst lon: ", fcst$lon[length(fcst$lon)]) - info(recipe$Run$logger, fcst_lon_msg) - stop("hcst and fcst don't share the same longitudes.") - } - } - - } - - -#------------------------------------------- -# Step 5. Tune data -#------------------------------------------- - # Remove negative values in accumulative variables - dictionary <- read_yaml("conf/variable-dictionary.yml") - if (dictionary$vars[[variable]]$accum) { - info(recipe$Run$logger, - " Accumulated variable: setting negative values to zero.") - obs$data[obs$data < 0] <- 0 - hcst$data[hcst$data < 0] <- 0 - if (!is.null(fcst)) { - fcst$data[fcst$data < 0] <- 0 - } - } - # Convert precipitation to mm/day - ## TODO: Make a function? - if (variable == "prlr") { - # Verify that the units are m/s and the same in obs and hcst - if (((attr(obs$Variable, "variable")$units == "m s-1") || - (attr(obs$Variable, "variable")$units == "m s**-1")) && - ((attr(hcst$Variable, "variable")$units == "m s-1") || - (attr(hcst$Variable, "variable")$units == "m s**-1"))) { - - info(recipe$Run$logger, - "Converting precipitation from m/s to mm/day.") - obs$data <- obs$data*86400*1000 - attr(obs$Variable, "variable")$units <- "mm/day" - hcst$data <- hcst$data*86400*1000 - attr(hcst$Variable, "variable")$units <- "mm/day" - if (!is.null(fcst)) { - fcst$data <- fcst$data*86400*1000 - attr(fcst$Variable, "variable")$units <- "mm/day" - } + compare_exp_obs_grids(hcst, obs) } } -#------------------------------------------- -# Step 6. Print summary -#------------------------------------------- + #------------------------------------------- + # Step 5. Tune data + #------------------------------------------- + # # Remove negative values in accumulative variables + # dictionary <- read_yaml("conf/variable-dictionary.yml") + # for (var_idx in 1:length(variable)) { + # var_name <- variable[var_idx] + # if (dictionary$vars[[var_name]]$accum) { + # info(recipe$Run$logger, + # paste0("Accumulated variable ", var_name, + # ": setting negative values to zero.")) + # # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, + # # along = "var", + # # indices = var_idx, F), 0) + # obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 + # hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 + # if (!is.null(fcst)) { + # fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + # } + # } - # Print a summary of the loaded data for the user, for each object - if (recipe$Run$logger$threshold <= 2) { - data_summary(hcst, recipe) - data_summary(obs, recipe) - if (!is.null(fcst)) { - data_summary(fcst, recipe) - } - } + #------------------------------------------- + # Step 6. Print summary + #------------------------------------------- info(recipe$Run$logger, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") - + .log_memory_usage(recipe$Run$logger, when = "After loading") return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Loading/R/load_sample.R b/modules/Loading/R/load_sample.R new file mode 100644 index 0000000000000000000000000000000000000000..e0d906d3dbc6beb4f59a3fe0eece2f6bd5fea8a4 --- /dev/null +++ b/modules/Loading/R/load_sample.R @@ -0,0 +1,49 @@ +## TODO: remove paths to personal scratchs +source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") +# Load required libraries/funs +source("tools/CST_ChangeDimName.R") +source("modules/Loading/R/compare_exp_obs_grids.R") + +load_sample <- function(recipe) { + # Hindcast: + # Change hcst dimension names + hcst <- CST_ChangeDimName(lonlat_temp_st$exp, + original_dimnames = c("dataset", "sdate", "ftime", + "lat", "lon", "member"), + final_dimnames = c("dat", "syear", "time", + "latitude", "longitude", + "ensemble")) + # Add sday and sweek dimensions + hcst <- CST_InsertDim(hcst, posdim = 3, lendim = 1, name = "sday", values = 1) + hcst <- CST_InsertDim(hcst, posdim = 4, lendim = 1, name = "sweek", values = 1) + dim(hcst$attrs$Dates) <- c(sday = 1, sweek = 1, + syear = dim(hcst$attrs$Dates)[['syear']], + time = dim(hcst$attrs$Dates)[['time']]) + # Observations: + # Change obs dimension names + obs <- CST_ChangeDimName(lonlat_temp_st$obs, + original_dimnames = c("dataset", "sdate", "ftime", + "lat", "lon"), + final_dimnames = c("dat", "syear", "time", + "latitude", "longitude")) + # Add sday and sweek dimensions + obs <- CST_InsertDim(obs, posdim = 3, lendim = 1, name = "sday", values = 1) + obs <- CST_InsertDim(obs, posdim = 4, lendim = 1, name = "sweek", values = 1) + dim(obs$attrs$Dates) <- c(sday = 1, sweek = 1, + syear = dim(obs$attrs$Dates)[['syear']], + time = dim(obs$attrs$Dates)[['time']]) + # Add ensemble dimension to obs + obs <- CST_InsertDim(obs, posdim = 7, lendim = 1, name = "ensemble", values = 1) + # Adjust name of 'load_parameters$date' attribute + obs$attrs$load_parameters$dat1$file_date <- obs$attrs$load_parameters$dat1$date + + # Sample fcst NULL + fcst <- NULL + # Check for consistency between hcst and obs grid + compare_exp_obs_grids(hcst, obs) + + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + .log_memory_usage(recipe$Run$logger, when = "After loading (sample data)") + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R new file mode 100644 index 0000000000000000000000000000000000000000..2caa34a9ebe10315f94280cfbae44901045cd306 --- /dev/null +++ b/modules/Loading/R/load_seasonal.R @@ -0,0 +1,393 @@ +# Load required libraries/funs +source("modules/Loading/R/get_regrid_params.R") +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source("modules/Loading/R/compare_exp_obs_grids.R") + +load_seasonal <- function(recipe) { + + # ------------------------------------------- + # Set params ----------------------------------------- + + hcst.inityear <- recipe$Analysis$Time$hcst_start + hcst.endyear <- recipe$Analysis$Time$hcst_end + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + store.freq <- recipe$Analysis$Variables$freq + + # get sdates array + ## LOGGER: Change dates2load to extract logger from recipe? + sdates <- dates2load(recipe, recipe$Run$logger) + + idxs <- NULL + idxs$hcst <- get_timeidx(sdates$hcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + + if (!(is.null(sdates$fcst))) { + idxs$fcst <- get_timeidx(sdates$fcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + } + + ## TODO: Examine this verifications part, verify if it's necessary + # stream <- verifications$stream + # sdates <- verifications$fcst.sdate + + ## TODO: define fcst.name + ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name + + # get esarchive datasets dict: + ## TODO: Adapt to 'filesystem' option in recipe + archive <- read_yaml("conf/archive.yml")$esarchive + exp_descrip <- archive$System[[exp.name]] + + freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]]) + reference_descrip <- archive$Reference[[ref.name]] + freq.obs <- unlist(reference_descrip[[store.freq]][variable[1]]) + obs.dir <- reference_descrip$src + fcst.dir <- exp_descrip$src + hcst.dir <- exp_descrip$src + fcst.nmember <- exp_descrip$nmember$fcst + hcst.nmember <- exp_descrip$nmember$hcst + + ## TODO: it is necessary? + ##if ("accum" %in% names(reference_descrip)) { + ## accum <- unlist(reference_descrip$accum[store.freq][[1]]) + ##} else { + ## accum <- FALSE + ##} + + if (store.freq %in% c("daily", "daily_mean")) { + frequency <- "daily_mean" + } else if (store.freq == "monthly_mean") { + frequency <- "monthly_mean" + } + var_dir_obs <- reference_descrip[[store.freq]][variable] + var_dir_exp <- exp_descrip[[store.freq]][variable] + + # ----------- + obs.path <- paste0(archive$src, obs.dir, "$var_dir$", + "$var$_$file_date$.nc") + + hcst.path <- paste0(archive$src, hcst.dir, "$var_dir$", + "$var$_$file_date$.nc") + + fcst.path <- paste0(archive$src, hcst.dir, "$var_dir$", + "$var$_$file_date$.nc") + + # Define regrid parameters: + #------------------------------------------------------------------- + regrid_params <- get_regrid_params(recipe, archive) + + # Longitude circular sort and latitude check + #------------------------------------------------------------------- + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean"){ + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + var_dir = var_dir_exp, + file_date = sdates$hcst, + time = idxs$hcst, + var_dir_depends = 'var', + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:hcst.nmember), + metadata_dims = 'var', # change to just 'var'? + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(hcst))) { + hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + + # Convert hcst to s2dv_cube object + ## TODO: Give correct dimensions to $Dates + ## (sday, sweek, syear instead of file_date) + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + # the call uses file_date instead of fcst_syear so that it can work + # with the daily case and the current version of startR not allowing + # multiple dims split + + fcst <- Start(dat = fcst.path, + var = variable, + var_dir = var_dir_exp, + var_dir_depends = 'var', + file_date = sdates$fcst, + time = idxs$fcst, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + if ("var_dir" %in% names(dim(fcst))) { + fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + + # Convert fcst to s2dv_cube + fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$attrs$Dates[] <- + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Separate Start() call for monthly vs daily data + if (store.freq == "monthly_mean") { + + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") + dim(dates_file) <- dim(dates) + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq %in% c("daily_mean", "daily")) { + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + } + + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(obs))) { + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + } + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!(recipe$Analysis$Regrid$type == 'none')) { + compare_exp_obs_grids(hcst, obs) + } + + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + for (var_idx in 1:length(variable)) { + var_name <- variable[var_idx] + if (dictionary$vars[[var_name]]$accum) { + info(recipe$Run$logger, + paste0("Accumulated variable ", var_name, + ": setting negative values to zero.")) + # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, + # along = "var", + # indices = var_idx, F), 0) + obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 + hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + } + } + } + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + ############################################################################ + # + # CHECKS ON MISSING FILES + # + ############################################################################ + + #obs.NA_dates.ind <- Apply(obs, + # fun=(function(x){ all(is.na(x))}), + # target_dims=c('time', 'latitude', 'longitude'))[[1]] + #obs.NA_dates <- dates_file[obs.NA_dates.ind] + #obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] + #obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", + # freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") + # + #if (any(is.na(hcst))){ + # fatal(recipe$Run$logger, + # paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "hcst files:", + # hcst.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status = 1) + #} + # + #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ + # fatal(recipe$logger, + # paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "obs files:", + # obs.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status=1) + #} + # + #info(recipe$logger, + # "######### DATA LOADING COMPLETED SUCCESFULLY ##############") + + ############################################################################ + ############################################################################ + .log_memory_usage(recipe$Run$logger, when = "After loading") + return(list(hcst = hcst, fcst = fcst, obs = obs)) + +} diff --git a/modules/Loading/R/load_tas_tos.R b/modules/Loading/R/load_tas_tos.R new file mode 100644 index 0000000000000000000000000000000000000000..ea231b56d8a00f3833dc7bf9eaa271bc9a97a097 --- /dev/null +++ b/modules/Loading/R/load_tas_tos.R @@ -0,0 +1,487 @@ +# Load required libraries/funs +source("modules/Loading/R/get_regrid_params.R") +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source('modules/Loading/R/mask_tas_tos.R') +source("modules/Loading/R/compare_exp_obs_grids.R") + +load_tas_tos <- function(recipe) { + + # ------------------------------------------- + # Set params ----------------------------------------- + + hcst.inityear <- recipe$Analysis$Time$hcst_start + hcst.endyear <- recipe$Analysis$Time$hcst_end + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + + variable <- c("tas", "tos", "sic") + store.freq <- recipe$Analysis$Variables$freq + + if(is.null(recipe$Analysis$Variables$sic_threshold)){ + sic.threshold = 0.15 + } else { + sic.threshold <- recipe$Analysis$Variables$sic_threshold + } + + data_order <- c('dat', 'var', 'sday', 'sweek', 'syear', 'time', 'latitude', 'longitude', 'ensemble') + + + # get sdates array + ## LOGGER: Change dates2load to extract logger from recipe? + sdates <- dates2load(recipe, recipe$Run$logger) + + idxs <- NULL + idxs$hcst <- get_timeidx(sdates$hcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + + if (!(is.null(sdates$fcst))) { + idxs$fcst <- get_timeidx(sdates$fcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + } + + # get esarchive datasets dict: + archive <- read_yaml("conf/archive.yml")[[recipe$Run$filesystem]] + exp_descrip <- archive$System[[exp.name]] + + freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]]) + reference_descrip <- archive$Reference[[ref.name]] + freq.obs <- unlist(reference_descrip[[store.freq]][variable[1]]) + obs.dir <- reference_descrip$src + fcst.dir <- exp_descrip$src + hcst.dir <- exp_descrip$src + fcst.nmember <- exp_descrip$nmember$fcst + hcst.nmember <- exp_descrip$nmember$hcst + + var_dir_obs <- reference_descrip[[store.freq]][variable] + var_dir_exp <- exp_descrip[[store.freq]][variable] + + # ----------- + obs.path <- paste0(archive$src, obs.dir, "$var_dir$", + "$var$_$file_date$.nc") + + hcst.path <- paste0(archive$src, hcst.dir, "$var_dir$", + "$var$_$file_date$.nc") + + fcst.path <- paste0(archive$src, hcst.dir, "$var_dir$", + "/$var$_$file_date$.nc") + + # Define regrid parameters: + #------------------------------------------------------------------- + regrid_params <- get_regrid_params(recipe, archive) + + # Longitude circular sort and latitude check + #------------------------------------------------------------------- + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean"){ + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Load hindcast data without regrid + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + var_dir = var_dir_exp, + file_date = sdates$hcst, + time = idxs$hcst, + var_dir_depends = 'var', + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:hcst.nmember), + metadata_dims = 'var', # change to just 'var'? + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(hcst))) { + hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (recipe$Analysis$Variables$freq == "daily_mean") { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + + # Define sea-ice grid points based of sea-ice concentration threshold + ice_hcst <- hcst[,3,,,,,,,] >= sic.threshold + + # Replace Tos with Tas for data points with sea ice + hcst[,2,,,,,,,][ice_hcst] <- hcst[,1,,,,,,,][ice_hcst] + + + # Convert hcst to s2dv_cube object + ## TODO: Give correct dimensions to $Dates + ## (sday, sweek, syear instead of file_date) + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + # Combine hcst tas and tos data + #------------------------------------------------------------------- + + hcst <- mask_tas_tos(input_data = hcst, region = c(lons.min, lons.max,lats.min, lats.max), + mask_path = archive$System[[exp.name]]$land_sea_mask, lsm_var_name = 'lsm', + lon = hcst$coords$longitude, lat = hcst$coords$latitude, + lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + + hcst$dims[['var']] <- dim(hcst$data)[['var']] + hcst$attrs$Variable$varName <- 'tas-tos' + + hcst$data <- Reorder(hcst$data, data_order) + + # Load forecast data without regrid + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + # the call uses file_date instead of fcst_syear so that it can work + # with the daily case and the current version of startR not allowing + # multiple dims split + + fcst <- Start(dat = fcst.path, + var = variable, + var_dir = var_dir_exp, + var_dir_depends = 'var', + file_date = sdates$fcst, + time = idxs$fcst, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + if ("var_dir" %in% names(dim(fcst))) { + fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (recipe$Analysis$Variables$freq == "daily_mean") { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + + # Define sea-ice grid points based of sea-ice concentration threshold + ice_fcst <- fcst[,3,,,,,,,] >= sic.threshold + + # Replace Tos with Tas for datapoints with sea ice + fcst[,2,,,,,,,][ice_fcst] <- fcst[,1,,,,,,,][ice_fcst] + + + # Convert fcst to s2dv_cube + fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$attrs$Dates[] <- + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + # Combine fcst tas and tos data + #------------------------------------------------------------------- + + fcst <- mask_tas_tos(input_data = fcst, region = c(lons.min, lons.max,lats.min, lats.max), + mask_path = archive$System[[exp.name]]$land_sea_mask, lsm_var_name = 'lsm', + lon = fcst$coords$longitude, lat = fcst$coords$latitude, + lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + + fcst$dims[['var']] <- dim(fcst$data)[['var']] + fcst$attrs$Variable$varName <- 'tas-tos' + + fcst$data <- Reorder(fcst$data, data_order) + + } else { + fcst <- NULL + } + + # Load obs data without regrid + #------------------------------------------------------------------- + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Separate Start() call for monthly vs daily data + if (store.freq == "monthly_mean") { + + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") + dim(dates_file) <- dim(dates) + + + # Define variables for blended tas-tos datasets + if (recipe$Analysis$Datasets$Reference$name == 'BEST'){ + variable <- 'tas' + var_dir_obs <- reference_descrip[[store.freq]][variable] + } + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + + } else if (store.freq == "daily_mean") { + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(obs))) { + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + } + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + if(!recipe$Analysis$Datasets$Reference$name %in% c('HadCRUT4','HadCRUT5','BEST','GISTEMPv4')){ + + # Define sea-ice grid points based of sea-ice concentration threshold + ice_obs <- (obs[,3,,,,,,,]) >= sic.threshold + + # Replace NA values with False + ice_obs[is.na(ice_obs)] <- FALSE + + # Replace Tos with Tas for datapoints with sea ice + obs[,2,,,,,,,][ice_obs] <- obs[,1,,,,,,,][ice_obs] + } + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + + # Combine obs tas and tos data + #------------------------------------------------------------------- + ## TODO: Ask about this list + if(!recipe$Analysis$Datasets$Reference$name %in% c('HadCRUT4','HadCRUT5','BEST','GISTEMPv4')){ + + obs <- mask_tas_tos(input_data = obs, region = c(lons.min, lons.max,lats.min, lats.max), + mask_path = archive$Reference[[ref.name]]$land_sea_mask, lsm_var_name = 'sftof', + lon = obs$coords$longitude, lat = obs$coords$latitude, + lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + + obs$dims[['var']] <- dim(obs$data)[['var']] + obs$attrs$Variable$varName <- 'tas-tos' + + } ## close if on reference name + + obs$data <- Reorder(obs$data, data_order) + + + # Regrid data + #------------------------------------------------------------------- + + # Regrid reference to system grid: + if(recipe$Analysis$Regrid$type == 'to_system'){ + + aux <- CDORemap(data_array = obs$data, ## Not regridding to desired grid when latitudes are ordered descending + lons = obs$coords$longitude, + lats = obs$coords$latitude, + grid = regrid_params$obs.gridtype, + method = recipe$Analysis$Regrid$method, + avoid_writes = TRUE, + crop = c(lons.min, lons.max,lats.min, lats.max), + force_remap = TRUE) + + obs$data <- aux$data_array + obs$coords$longitude <- aux$lons + obs$coords$latitude <- aux$lats + obs$dims['longitude'] <- dim(aux$data_array)['longitude'] + obs$dims['latitude'] <- dim(aux$data_array)['latitude'] + rm(aux) + } + + # Regrid system to reference grid: + if(recipe$Analysis$Regrid$type == 'to_reference'){ + + aux <- CDORemap(data_array = hcst$data, + lons = hcst$coords$longitude, lats = hcst$coords$latitude, + grid = regrid_params$fcst.gridtype, method = recipe$Analysis$Regrid$method, + avoid_writes = TRUE, crop = TRUE, + force_remap = TRUE) + + hcst$data <- aux$data_array + hcst$coords$longitude <- aux$lons + hcst$coords$latitude <- aux$lats + hcst$dims['longitude'] <- dim(aux$data_array)['longitude'] + hcst$dims['latitude'] <- dim(aux$data_array)['latitude'] + rm(aux) + + if (!is.null(recipe$Analysis$Time$fcst_year)) { + aux <- CDORemap(data_array = fcst$data, + lons = fcst$coords$longitude, + lats = fcst$coords$latitude, + grid = regrid_params$fcst.gridtype, + method = recipe$Analysis$Regrid$method, + avoid_writes = TRUE, crop = TRUE, + force_remap = TRUE) + + fcst$data <- aux$data_array + fcst$coords$longitude <- aux$lons + fcst$coords$latitude <- aux$lats + fcst$dims['longitude'] <- dim(aux$data_array)['longitude'] + fcst$dims['latitude'] <- dim(aux$data_array)['latitude'] + rm(aux) + } + } + + # Regrid all data to user defined grid: + if(!recipe$Analysis$Regrid$type %in% c('to_system','to_reference')){ + + aux <- CDORemap(data_array = hcst$data, + lons = hcst$coords$longitude, lats = hcst$coords$latitude, + grid = regrid_params$fcst.gridtype, method = recipe$Analysis$Regrid$method, + avoid_writes = TRUE, crop = TRUE, + force_remap = TRUE) + + hcst$data <- aux$data_array + hcst$coords$longitude <- aux$lons + hcst$coords$latitude <- aux$lats + hcst$dims['longitude'] <- dim(aux$data_array)['longitude'] + hcst$dims['latitude'] <- dim(aux$data_array)['latitude'] + rm(aux) + + if (!is.null(recipe$Analysis$Time$fcst_year)) { + aux <- CDORemap(data_array = fcst$data, + lons = fcst$coords$longitude, + lats = fcst$coords$latitude, + grid = regrid_params$fcst.gridtype, + method = recipe$Analysis$Regrid$method, + avoid_writes = TRUE, crop = TRUE, + force_remap = TRUE) + + fcst$data <- aux$data_array + fcst$coords$longitude <- aux$lons + fcst$coords$latitude <- aux$lats + fcst$dims['longitude'] <- dim(aux$data_array)['longitude'] + fcst$dims['latitude'] <- dim(aux$data_array)['latitude'] + rm(aux) + } + + aux <- CDORemap(data_array = obs$data, + lons = obs$coords$longitude, + lats = obs$coords$latitude, + grid = regrid_params$obs.gridtype, + method = recipe$Analysis$Regrid$method, + avoid_writes = TRUE, crop = TRUE, + force_remap = TRUE) + + obs$data <- aux$data_array + obs$coords$longitude <- aux$lons + obs$coords$latitude <- aux$lats + obs$dims['longitude'] <- dim(aux$data_array)['longitude'] + obs$dims['latitude'] <- dim(aux$data_array)['latitude'] + rm(aux) + } + + + # Check for consistency between hcst and obs grid + if (!(recipe$Analysis$Regrid$type == 'none')) { + compare_exp_obs_grids(exp = hcst, obs = obs) + } + + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/modules/Loading/R/mask_tas_tos.R b/modules/Loading/R/mask_tas_tos.R new file mode 100644 index 0000000000000000000000000000000000000000..cc6937957a4aa88993b95b905765622e852f9560 --- /dev/null +++ b/modules/Loading/R/mask_tas_tos.R @@ -0,0 +1,63 @@ +library(multiApply) +library(startR) +library(s2dv) + +mask_tas_tos <- function(input_data, mask_path, lsm_var_name = lsm_var_name, lon, lat, region = region, + lon_dim = 'lon', lat_dim = 'lat', + ncores = NULL){ + + + mask <- .load_mask(mask_path = mask_path, lsm_var_name = lsm_var_name, lon_dim = lon_dim, lat_dim = lat_dim, + sea_value = 1, land_value = 0, region = region) + + ## TO DO: improve the check and correct lats + stopifnot(all(lon == mask$lon)) + stopifnot(max(abs(as.numeric(round(lat,2) - round(mask$lat,2)))) < 0.1) # stopifnot(all(lat == mask$lat)) + + tas <- Subset(input_data$data, along = 'var', indices = 1) + tos <- Subset(input_data$data, along = 'var', indices = 2) + + input_data$data <- multiApply::Apply(data = list(tas, tos), + target_dims = c(lon_dim, lat_dim), + fun = .mask_tas_tos, + mask = mask$mask, + sea_value = 1, + ncores = ncores)$output1 + + return(input_data) +} + +.mask_tas_tos <- function(data_tas, data_tos, mask, sea_value){ + data_tas[mask == sea_value] <- data_tos[mask == sea_value] + return(data_tas) +} + +.load_mask <- function(mask_path = mask_path, lsm_var_name = lsm_var_name, land_value = 0, sea_value = 1, + lon_dim = 'lon', lat_dim = 'lat', region = region){ + + lons.min <- region[1] + lons.max <- region[2] + lats.min <- region[3] + lats.max <- region[4] + + data <- startR::Start(dat = mask_path, + var = lsm_var_name, + lon = values(list(lons.min, lons.max)), + lat = values(list(lats.min, lats.max)), + return_vars = list(lat = NULL, lon = NULL), + synonims = list(lon = c('lon','longitude'), + lat = c('lat','latitude')), + lat_reorder = Sort(decreasing = TRUE), + lon_reorder = CircularSort(0,360), + num_procs = 1, retrieve = TRUE) + + mask <- list(mask = drop(data), + lon = as.numeric(attr(data,'Variables')$common$lon), + lat = as.numeric(attr(data,'Variables')$common$lat)) + + mask$mask[data <= 0.5] <- sea_value + mask$mask[data > 0.5] <- land_value + names(dim(mask$mask)) <- c(lon_dim, lat_dim) + + return(mask) +} diff --git a/modules/Loading/load_datasets.R b/modules/Loading/load_datasets.R new file mode 100644 index 0000000000000000000000000000000000000000..86010780c1667b65406f14b57f7ff03ac1821809 --- /dev/null +++ b/modules/Loading/load_datasets.R @@ -0,0 +1,7 @@ +load_datasets <- function(recipe) { + warning(paste0("The function load_datasets() has been renamed to: ", + "'Loading()'. The name 'load_datasets' will be ", + "deprecated in the next release. Please change your scripts ", + "accordingly.")) + return(Loading(recipe)) +} diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml deleted file mode 100644 index cda98c913123a5ec1f9f194c21e26f2d3dcddea4..0000000000000000000000000000000000000000 --- a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml +++ /dev/null @@ -1,49 +0,0 @@ -Description: - Author: V. Agudetse - -Analysis: - Horizon: Seasonal - Variables: - name: tas - freq: monthly_mean - Datasets: - System: - name: ECMWF-SEAS5.1 - Multimodel: False - Reference: - name: ERA5 - Time: - sdate: '0101' - fcst_year: - hcst_start: '2000' - hcst_end: '2015' - ftime_min: 1 - ftime_max: 2 - Region: - latmin: 30 - latmax: 50 - lonmin: -10 - lonmax: 30 - Regrid: - method: bilinear - type: to_system - Workflow: - Calibration: - method: raw - Anomalies: - compute: yes - cross_validation: yes - Skill: - metric: mean_bias EnsCorr RPSS CRPSS EnsSprErr # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS - Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] - Indicators: - index: no - ncores: 14 - remove_NAs: yes - Output_format: Scorecards -Run: - Loglevel: INFO - Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml b/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml deleted file mode 100644 index 0cb2d29f1880041fc90e732a9b7e9cd3960cdfc6..0000000000000000000000000000000000000000 --- a/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml +++ /dev/null @@ -1,49 +0,0 @@ -Description: - Author: V. Agudetse - -Analysis: - Horizon: Seasonal - Variables: - name: rsds - freq: monthly_mean - Datasets: - System: - name: ECMWF-SEAS5 - Multimodel: False - Reference: - name: ERA5 - Time: - sdate: '1101' - fcst_year: '2020' - hcst_start: '1993' - hcst_end: '2016' - ftime_min: 1 - ftime_max: 3 - Region: - latmin: -10 - latmax: 10 - lonmin: 0 - lonmax: 20 - Regrid: - method: bilinear - type: to_system - Workflow: - Anomalies: - compute: no - cross_validation: - Calibration: - method: mse_min - Skill: - metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr - Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] - Indicators: - index: no - ncores: 1 - remove_NAs: yes - Output_format: S2S4E -Run: - Loglevel: INFO - Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml deleted file mode 100644 index b14c90e13ffd42534689ec0f1c4a179321e23cca..0000000000000000000000000000000000000000 --- a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml +++ /dev/null @@ -1,50 +0,0 @@ -Description: - Author: V. Agudetse - Info: ECMWF System5 Seasonal Forecast Example recipe (daily mean, tas) - -Analysis: - Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal - Variables: - name: tas # Mandatory, str: variable name in /esarchive/ - freq: daily_mean # Mandatory, str: either monthly_mean or daily_mean - Datasets: - System: - name: ECMWF-SEAS5 # Mandatory, str: System codename. See docu. - Multimodel: no # Mandatory, bool: Either yes/true or no/false - Reference: - name: ERA5 # Mandatory, str: Reference codename. See docu. - Time: - sdate: '1101' - fcst_year: '2020' # Optional, int: Forecast year 'YYYY' - hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '1996' # Mandatory, int: Hindcast end year 'YYYY' - ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 2 # Mandatory, int: Last leadtime time step in months - Region: - latmin: -10 # Mandatory, int: minimum latitude - latmax: 10 # Mandatory, int: maximum latitude - lonmin: 0 # Mandatory, int: minimum longitude - lonmax: 20 # Mandatory, int: maximum longitude - Regrid: - method: bilinear # Mandatory, str: Interpolation method. See docu. - type: to_reference # Mandatory, str: to_system, to_reference, or CDO-accepted grid. - Workflow: - Anomalies: - compute: no # Whether to compute the anomalies and use them for skill metrics - cross_validation: # whether they should be computed in cross-validation - Calibration: - method: qmap # Mandatory, str: Calibration method. See docu. - Skill: - metric: RPSS FRPSS # str: Skill metric or list of skill metrics. See docu. - Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. - Indicators: - index: no - ncores: 4 # Optional, int: number of cores, defaults to 1 - remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE - Output_format: S2S4E -Run: - Loglevel: INFO - Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml deleted file mode 100644 index 5c899f97273cf183518ee7ebb560ced275e2dc0a..0000000000000000000000000000000000000000 --- a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml +++ /dev/null @@ -1,47 +0,0 @@ -Description: - Author: V. Agudetse - -Analysis: - Horizon: Seasonal - Variables: - name: tas - freq: daily_mean - Datasets: - System: - name: ECMWF-SEAS5 - Multimodel: no - Reference: - name: ERA5 - Time: - sdate: '1101' - fcst_year: '2020' - hcst_start: '1993' - hcst_end: '2003' - ftime_min: 1 - ftime_max: 2 - Region: - latmin: -10 - latmax: 10 - lonmin: 0 - lonmax: 20 - Regrid: - method: bilinear - type: to_system - Workflow: - Anomalies: - compute: no - cross_validation: - Calibration: - method: qmap - Skill: - metric: FRPS RPSS - Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] - Indicators: - index: no - Output_format: S2S4E -Run: - Loglevel: INFO - Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Saving/R/Utils.R b/modules/Saving/R/Utils.R new file mode 100644 index 0000000000000000000000000000000000000000..a5bd5d0c9d6d299f2b4fa8340a529a9eecb05459 --- /dev/null +++ b/modules/Saving/R/Utils.R @@ -0,0 +1,69 @@ +.get_global_attributes <- function(recipe, archive) { + # Generates metadata of interest to add to the global attributes of the + # netCDF files. + parameters <- recipe$Analysis + hcst_period <- paste0(parameters$Time$hcst_start, " to ", + parameters$Time$hcst_end) + current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) + system_name <- parameters$Datasets$System$name + reference_name <- parameters$Datasets$Reference$name + + attrs <- list(reference_period = hcst_period, + institution_system = archive$System[[system_name]]$institution, + institution_reference = archive$Reference[[reference_name]]$institution, + system = system_name, + reference = reference_name, + calibration_method = parameters$Workflow$Calibration$method, + computed_on = current_time) + + return(attrs) +} + +.get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { + # Generates time dimensions and the corresponding metadata. + ## TODO: Subseasonal + + switch(fcst.horizon, + "seasonal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}, + "subseasonal" = {len <- 4; ref <- 'hours since '; + stdname <- ''}, + "decadal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}) + + dim(time) <- length(time) + sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting + metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), + calendar = calendar)) + attr(time, 'variables') <- metadata + names(dim(time)) <- 'time' + + sdate <- 1:length(sdate) + dim(sdate) <- length(sdate) + metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), + collapse=", "), + units = paste0('Init date'))) + attr(sdate, 'variables') <- metadata + names(dim(sdate)) <- 'sdate' + + return(list(time=time)) +} + +.get_latlon <- function(latitude, longitude) { + # Adds dimensions and metadata to lat and lon + # latitude: array containing the latitude values + # longitude: array containing the longitude values + + dim(longitude) <- length(longitude) + metadata <- list(longitude = list(units = 'degrees_east')) + attr(longitude, 'variables') <- metadata + names(dim(longitude)) <- 'longitude' + + dim(latitude) <- length(latitude) + metadata <- list(latitude = list(units = 'degrees_north')) + attr(latitude, 'variables') <- metadata + names(dim(latitude)) <- 'latitude' + + return(list(lat=latitude, lon=longitude)) + +} diff --git a/modules/Saving/R/drop_dims.R b/modules/Saving/R/drop_dims.R new file mode 100644 index 0000000000000000000000000000000000000000..7361faa8ea1056b414b5e2ed67740e4ceeb38bae --- /dev/null +++ b/modules/Saving/R/drop_dims.R @@ -0,0 +1,86 @@ +# version victoria https://earth.bsc.es/gitlab/es/auto-s2s/-/blob/dev-Loading-multivar/modules/Skill/Skill.R +.drop_dims <- function(metric_array) { + # Define dimensions that are not essential for saving + droppable_dims <- c("dat", "sday", "sweek", "ensemble", "nobs", + "nexp", "exp_memb", "obs_memb", "bin") + # Select non-essential dimensions of length 1 + dims_to_drop <- intersect(names(which(dim(metric_array) == 1)), + droppable_dims) + drop_indices <- grep(paste(dims_to_drop, collapse = "|"), + names(dim(metric_array))) + # Drop selected dimensions + metric_array <- abind::adrop(metric_array, drop = drop_indices) + # If array has memb dim (Corr case), change name to 'ensemble' + if ("exp_memb" %in% names(dim(metric_array))) { + names(dim(metric_array))[which(names(dim(metric_array)) == + "exp_memb")] <- "ensemble" + } + return(metric_array) +} + + + + + +## TODO: Replace with ClimProjDiags::Subset and add var and dat dimensions +#.drop_dims <- function(metric_array) { +# # Drop all singleton dimensions +# dims <- dim(metric_array) +# metric_array <- drop(metric_array) +# if ("region" %in% names(dims)) { +# if (!is.array(metric_array)) { +# dim(metric_array) <- length(metric_array) +# if (!all(dims > 1)) { +# if ("syear" %in% names(dims)) { +# names(dim(metric_array)) <- "syear" +# } else { +# names(dim(metric_array)) <- "time" +# } +# } else { +# names(dim(metric_array)) <- names(dims[which(dims > 1)]) +# } +# } +# if (!("time" %in% names(dim(metric_array)))) { +# dim(metric_array) <- c("time" = 1, dim(metric_array)) +# } +# # If latitude was singleton, add it back +# if (!("region" %in% names(dim(metric_array)))) { +# dim(metric_array) <- c(dim(metric_array), "region" = 1) +# } +# # If array has memb dim (Corr case), change name to 'ensemble' +# if ("exp_memb" %in% names(dim(metric_array))) { +# names(dim(metric_array))[which(names(dim(metric_array)) == +# "exp_memb")] <- "ensemble" +# } +# } else if (all(c("latitude", "longiguted") %in% names(dims))) { +# # If the array becomes a vector as a result, restore dimensions. +# ## This applies to the case of 'corr' with one lon and one lat +# if (!is.array(metric_array)) { +# dim(metric_array) <- length(metric_array) +# names(dim(metric_array)) <- names(dims[which(dims > 1)]) +# } +# # If the array becomes a vector as a result, restore dimensions. +# ## This applies to the case of 'corr' with one lon and one lat +# # If time happened to be a singleton dimension, add it back in the array +# if (!("time" %in% names(dim(metric_array)))) { +# dim(metric_array) <- c("time" = 1, dim(metric_array)) +# } +# # If latitude was singleton, add it back +# if (!("latitude" %in% names(dim(metric_array)))) { +# dim(metric_array) <- c(dim(metric_array), "latitude" = 1) +# } +# # If longitude was singleton, add it back +# if (!("longitude" %in% names(dim(metric_array)))) { +# dim(metric_array) <- c(dim(metric_array), "longitude" = 1) +# } +# # If array has memb dim (Corr case), change name to 'ensemble' +# if ("exp_memb" %in% names(dim(metric_array))) { +# names(dim(metric_array))[which(names(dim(metric_array)) == +# "exp_memb")] <- "ensemble" +# } else { +# stop("what dimensions") +# } +# } +# return(metric_array) +#} + diff --git a/modules/Saving/R/get_dir.R b/modules/Saving/R/get_dir.R new file mode 100644 index 0000000000000000000000000000000000000000..b8463114039f146bfbaf85c9b66a0e856abc31c2 --- /dev/null +++ b/modules/Saving/R/get_dir.R @@ -0,0 +1,60 @@ +## TODO: Separate by time aggregation + +get_dir <- function(recipe, variable, agg = "global") { + # This function builds the path for the output directory. The output + # directories will be subdirectories within outdir, organized by variable, + # startdate, and aggregation. + + ## TODO: Get aggregation from recipe + ## TODO: Change variable name to s2dv_cube name + # variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + outdir <- recipe$Run$output_dir + system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) + + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { + # Define output dir name accordint to Scorecards format + dict <- read_yaml("conf/output_dictionaries/scorecards.yml") + # system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name + dir <- paste0(outdir, "/", system, "/", variable, "/") + } else { + # Default generic output format based on FOCUS + # Get startdate or hindcast period + if (!is.null(recipe$Analysis$Time$fcst_year)) { + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + ## PROBLEM: decadal doesn't have sdate + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') + } else { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } + } else { + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + ## PROBLEM: decadal doesn't have sdate + fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$hcst_end, + sep = '_')) + } else { + fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) + } + } + ## TODO: Remove calibration method from output directory? + if (!is.null(recipe$Analysis$Workflow$Calibration$method)) { + calib.method <- paste0(tolower(recipe$Analysis$Workflow$Calibration$method), "-") + } else { + calib.method <- "" + } + store.freq <- recipe$Analysis$Variables$freq + if (!is.null(recipe$Analysis$Region$name)) { + outdir <- paste0(outdir, "/", recipe$Analysis$Region$name) + } + switch(tolower(agg), + "region" = {dir <- paste0(outdir, "/", system, "/", calib.method, + store.freq, "/", variable, + "_region/", fcst.sdate, "/")}, + "global" = {dir <- paste0(outdir, "/", system, "/", calib.method, + store.freq, "/", variable, "/", + fcst.sdate, "/")}) + } + + return(dir) +} diff --git a/modules/Saving/paths2save.R b/modules/Saving/R/get_filename.R similarity index 51% rename from modules/Saving/paths2save.R rename to modules/Saving/R/get_filename.R index 93196b86d194fa2dbed8913d6c675f84d038ec9b..54bea81cf6d3af9797daae015fd6432df6ca0fb2 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/R/get_filename.R @@ -16,7 +16,7 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { } switch(tolower(agg), - "country" = {gg <- "-country"}, + "region" = {gg <- "-region"}, "global" = {gg <- ""}) system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) @@ -55,54 +55,3 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { return(paste0(dir, file, ".nc")) } -get_dir <- function(recipe, agg = "global") { - # This function builds the path for the output directory. The output - # directories will be subdirectories within outdir, organized by variable, - # startdate, and aggregation. - - ## TODO: Get aggregation from recipe - outdir <- paste0(recipe$Run$output_dir, "/outputs/") - ## TODO: multivar case - variable <- recipe$Analysis$Variables$name - system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) - - if (tolower(recipe$Analysis$Output_format) == 'scorecards') { - # Define output dir name accordint to Scorecards format - dict <- read_yaml("conf/output_dictionaries/scorecards.yml") - # system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name - dir <- paste0(outdir, "/", system, "/", variable, "/") - } else { - # Default generic output format based on FOCUS - # Get startdate or hindcast period - if (!is.null(recipe$Analysis$Time$fcst_year)) { - if (tolower(recipe$Analysis$Horizon) == 'decadal') { - # decadal doesn't have sdate - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') - } else { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } - } else { - if (tolower(recipe$Analysis$Horizon) == 'decadal') { - # decadal doesn't have sdate - fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$hcst_end, - sep = '_')) - } else { - fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) - } - } - - calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - store.freq <- recipe$Analysis$Variables$freq - ## TODO: Change "_country" - switch(tolower(agg), - "country" = {dir <- paste0(outdir, "/", system, "/", calib.method, - "-", store.freq, "/", variable, - "_country/", fcst.sdate, "/")}, - "global" = {dir <- paste0(outdir, "/", system, "/", calib.method, - "-", store.freq, "/", variable, "/", - fcst.sdate, "/")}) - } - return(dir) -} diff --git a/modules/Saving/R/get_global_attributes.R b/modules/Saving/R/get_global_attributes.R new file mode 100644 index 0000000000000000000000000000000000000000..25c47279c19911fbf47dd9de8d9be635913958ba --- /dev/null +++ b/modules/Saving/R/get_global_attributes.R @@ -0,0 +1,20 @@ +get_global_attributes <- function(recipe, archive) { + # Generates metadata of interest to add to the global attributes of the + # netCDF files. + parameters <- recipe$Analysis + hcst_period <- paste0(parameters$Time$hcst_start, " to ", + parameters$Time$hcst_end) + current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) + system_name <- parameters$Datasets$System$name + reference_name <- parameters$Datasets$Reference$name + + attrs <- list(reference_period = hcst_period, + institution_system = archive$System[[system_name]]$institution, + institution_reference = archive$Reference[[reference_name]]$institution, + system = system_name, + reference = reference_name, + calibration_method = parameters$Workflow$Calibration$method, + computed_on = current_time) + + return(attrs) +} diff --git a/modules/Saving/R/get_latlon.R b/modules/Saving/R/get_latlon.R new file mode 100644 index 0000000000000000000000000000000000000000..25d536380bd67300fb0ca57a839f467470b6f9ea --- /dev/null +++ b/modules/Saving/R/get_latlon.R @@ -0,0 +1,17 @@ +get_latlon <- function(latitude, longitude) { + # Adds dimensions and metadata to lat and lon + # latitude: array containing the latitude values + # longitude: array containing the longitude values + dim(longitude) <- length(longitude) + metadata <- list(longitude = list(units = 'degrees_east')) + attr(longitude, 'variables') <- metadata + names(dim(longitude)) <- 'longitude' + + dim(latitude) <- length(latitude) + metadata <- list(latitude = list(units = 'degrees_north')) + attr(latitude, 'variables') <- metadata + names(dim(latitude)) <- 'latitude' + + return(list(lat=latitude, lon=longitude)) + +} diff --git a/modules/Saving/R/get_time.R b/modules/Saving/R/get_time.R new file mode 100644 index 0000000000000000000000000000000000000000..5426c177f1bd31e14b27ca9e831eb880da183f04 --- /dev/null +++ b/modules/Saving/R/get_time.R @@ -0,0 +1,30 @@ +get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { + # Generates time dimensions and the corresponding metadata. + ## TODO: Subseasonal + + switch(fcst.horizon, + "seasonal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}, + "subseasonal" = {len <- 4; ref <- 'hours since '; + stdname <- ''}, + "decadal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}) + + dim(time) <- length(time) + sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting + metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), + calendar = calendar)) + attr(time, 'variables') <- metadata + names(dim(time)) <- 'time' + + sdate <- 1:length(sdate) + dim(sdate) <- length(sdate) + metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), + collapse=", "), + units = paste0('Init date'))) + attr(sdate, 'variables') <- metadata + names(dim(sdate)) <- 'sdate' + + return(list(time=time)) +} + diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R new file mode 100644 index 0000000000000000000000000000000000000000..050fe68db9e2974d9568a611f2e69300ec76cb48 --- /dev/null +++ b/modules/Saving/R/save_corr.R @@ -0,0 +1,127 @@ +save_corr <- function(recipe, + skill, + data_cube, + agg = "global", + outdir = NULL) { + # This function adds metadata to the ensemble correlation in 'skill' + # and exports it to a netCDF file inside 'outdir'. + + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Add global and variable attributes + global_attributes <- .get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(global_attributes, + list(from_anomalies = "Yes")) + } else { + global_attributes <- c(global_attributes, + list(from_anomalies = "No")) + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Loop over variable dimension + for (var in 1:data_cube$dims[['var']]) { + # Subset skill arrays + subset_skill <- lapply(skill, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + + # Generate name of output file + variable <- data_cube$attrs$Variable$varName[[var]] + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) + } + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "corr") + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + subset_skill <- lapply(subset_skill, function(x) { + Reorder(x, c(lalo, 'ensemble', 'time'))}) + } + attr(subset_skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(subset_skill)) { + metric <- names(subset_skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + skill[[i]][is.na(subset_skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'ensemble', 'time') + } else { + sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) + dims <- c(lalo, 'ensemble', 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- .get_countries(grid) + ArrayToNc(append(country, time, subset_skill), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, subset_skill) + ArrayToNc(vars, outfile) + } + } + info(recipe$Run$logger, + "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R new file mode 100644 index 0000000000000000000000000000000000000000..00a22850dfa13e5a3eadd73e55d24be5c10b5585 --- /dev/null +++ b/modules/Saving/R/save_forecast.R @@ -0,0 +1,147 @@ +save_forecast <- function(recipe, + data_cube, + type = "hcst", + agg = "global", + outdir = NULL) { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + global_attributes <- .get_global_attributes(recipe, archive) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) + # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') + ## Method 2: use initial month + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + if (type == 'hcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else if (type == 'fcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } + } else { + if (type == 'hcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } else if (type == 'fcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + } + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + # expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + + # Loop over variables + for (var in 1:data_cube$dims[['var']]) { + subset_cube <- CST_Subset(data_cube, along = 'var', indices = var, + drop = F, var_dim = 'var', dat_dim = 'dat') + variable <- subset_cube$attrs$Variable$varName + var.longname <- subset_cube$attrs$Variable$metadata[[variable]]$long_name + + # Create output directory + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) + } + + # Loop over each year in the data and save independently + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(subset_cube$data, 'syear', i, drop = T) + + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'ensemble', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- subset_cube$attrs$Variable$metadata[[variable]]$units + } else { + dims <- c(lalo, 'ensemble', 'time') + var.expname <- variable + var.sdname <- var.sdname + var.units <- subset_cube$attrs$Variable$metadata[[variable]]$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal + ## data has been reshaped + # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') + + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, variable, fcst.sdate, + agg, "exp") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } + } + } + info(recipe$Run$logger, paste("#####", toupper(type), + "SAVED TO NETCDF FILE #####")) +} diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R new file mode 100644 index 0000000000000000000000000000000000000000..cd4252ab7365d038b9333f506604e2ad4d6b9afb --- /dev/null +++ b/modules/Saving/R/save_metrics.R @@ -0,0 +1,136 @@ +save_metrics <- function(recipe, + skill, + dictionary = NULL, + data_cube, + agg = "global", + outdir = NULL) { + # This function adds metadata to the skill metrics in 'skill' + # and exports them to a netCDF file inside 'outdir'. + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + + global_attributes <- .get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + # Loop over variable dimension + for (var in 1:data_cube$dims[['var']]) { + # Subset skill arrays + subset_skill <- lapply(skill, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + # Generate name of output file + variable <- data_cube$attrs$Variable$varName[[var]] + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) + } + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "skill") + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + subset_skill <- lapply(subset_skill, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + attr(subset_skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(subset_skill)) { + metric <- names(subset_skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + subset_skill[[i]][is.na(subset_skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'time') + } else if (tolower(agg) == "region") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('region', 'time') + } else { + sdname <- paste0(metric) + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(subset_skill[[i]], 'variables') <- metadata + names(dim(subset_skill[[i]])) <- dims + } + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, subset_skill), outfile) + } else if (tolower(agg) == "region") { + region <- array(1:dim(skill[[1]])['region'], c(dim(skill[[1]])['region'])) + # TODO: check metadata when more than 1 region is store in the data array + metadata <- list(region = list(long_name = data_cube$attrs$Variable$metadata$region$name)) + attr(region, 'variables') <- metadata + vars <- list(region, time) + vars <- c(vars, subset_skill) + ArrayToNc(vars, outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, subset_skill) + ArrayToNc(vars, outfile) + } + } + info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") +} + diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R new file mode 100644 index 0000000000000000000000000000000000000000..127e98909d3e067c3516bc79aef8474e33cc7d17 --- /dev/null +++ b/modules/Saving/R/save_observations.R @@ -0,0 +1,145 @@ +save_observations <- function(recipe, + data_cube, + agg = "global", + outdir = NULL) { + # Loops over the years in the s2dv_cube containing the observations and + # exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + global_attributes <- .get_global_attributes(recipe, archive) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$Reference[[global_attributes$reference]]$calendar + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) + # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') + ## Method 2: use initial month + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + + # Loop over variables + for (var in 1:data_cube$dims[['var']]) { + subset_cube <- CST_Subset(data_cube, along = 'var', indices = var, + drop = F, var_dim = 'var', dat_dim = 'dat') + variable <- subset_cube$attrs$Variable$varName + var.longname <- subset_cube$attrs$Variable$metadata[[variable]]$long_name + + # Create output directory + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) + } + + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(subset_cube$data, 'syear', i, drop = T) + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } else { + dims <- c(lalo, 'time') + var.expname <- variable + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date. The date is computed for each year, and adapted for + # consistency with the hcst/fcst dates, so that both sets of files have + # the same name pattern. + ## Because observations are loaded differently in the daily vs. monthly + ## cases, different approaches are necessary. + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + } else { + + if (store.freq == "monthly_mean") { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + } else { + fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) + } + } + + # Ensure the year is correct if the first leadtime goes to the next year + init_date <- as.POSIXct(init_date) + if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { + lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 + } + # Ensure that the initialization month is consistent with the hindcast + lubridate::month(fcst.sdate) <- lubridate::month(init_date) + fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "obs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } + } + } + info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R new file mode 100644 index 0000000000000000000000000000000000000000..862ed5fff9fc4501050dacd51349949534309c8a --- /dev/null +++ b/modules/Saving/R/save_percentiles.R @@ -0,0 +1,117 @@ +save_percentiles <- function(recipe, + percentiles, + data_cube, + agg = "global", + outdir = NULL) { + # This function adds metadata to the percentiles + # and exports them to a netCDF file inside 'outdir'. + archive <- get_archive(recipe) + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Add global and variable attributes + global_attributes <- .get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + for (var in 1:data_cube$dims[['var']]) { + # Subset arrays + subset_percentiles <- lapply(percentiles, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + # Generate name of output file + variable <- data_cube$attrs$Variable$varName[[var]] + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) + } + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "percentiles") + + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + subset_percentiles <- lapply(subset_percentiles, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + + attr(subset_percentiles[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(subset_percentiles)) { + ## TODO: replace with proper standard names + percentile <- names(subset_percentiles[i]) + long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = percentile, long_name = long_name)) + attr(subset_percentiles[[i]], 'variables') <- metadata + names(dim(subset_percentiles[[i]])) <- dims + } + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, subset_percentiles), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, subset_percentiles) + ArrayToNc(vars, outfile) + } + } + info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R new file mode 100644 index 0000000000000000000000000000000000000000..b7da0449d489ae6f6ccb8e3737842195e5e1227a --- /dev/null +++ b/modules/Saving/R/save_probabilities.R @@ -0,0 +1,133 @@ +save_probabilities <- function(recipe, + probs, + data_cube, + agg = "global", + type = "hcst", + outdir = NULL) { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports the corresponding category probabilities to a netCDF file. + # probs: array containing the probability data + # recipe: the auto-s2s recipe + # data_cube: s2dv_cube containing the data and metadata + # outdir: directory where the files should be saved + # type: 'exp' (hcst and fcst) or 'obs' + # agg: aggregation, "global" or "country" + # type: 'hcst' or 'fcst' + + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + global_attributes <- .get_global_attributes(recipe, archive) + # Add anomaly computation to global attributes + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + + # Loop over variable dimension + for (var in 1:data_cube$dims[['var']]) { + subset_probs <- lapply(probs, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + # Create output directory + variable <- data_cube$attrs$Variable$varName[[var]] + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) + } + + # Loop over each year in the data and save independently + for (i in syears) { + # Select year from array and rearrange dimensions + probs_syear <- lapply(subset_probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') + if (tolower(agg) == "global") { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c(lalo, 'time'))}) + } else { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c('country', 'time'))}) + } + + for (bin in 1:length(probs_syear)) { + prob_bin <- names(probs_syear[bin]) + long_name <- paste0(prob_bin, " probability category") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = prob_bin, long_name = long_name)) + attr(probs_syear[[bin]], 'variables') <- metadata + names(dim(probs_syear[[bin]])) <- dims # is this necessary? + } + + # Add global attributes + attr(probs_syear[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "probs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, probs_syear), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, probs_syear) + ArrayToNc(vars, outfile) + } + } + } + info(recipe$Run$logger, + paste("#####", toupper(type), + "PROBABILITIES SAVED TO NETCDF FILE #####")) +} diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 28b5e5529bad1ae5e4402b154b674500a175dd0d..fc9fe4eebd0d51f1b33a9a397a566d424215d99c 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -1,11 +1,25 @@ ## TODO: Save obs percentiles - -source("modules/Saving/paths2save.R") - -save_data <- function(recipe, data, - skill_metrics = NULL, - probabilities = NULL, - archive = NULL) { +## TODO: Insert vardim to simplify the code? + +source("modules/Saving/R/get_dir.R") +source("modules/Saving/R/get_filename.R") +source("modules/Saving/R/Utils.R") +source("modules/Saving/R/save_forecast.R") +source("modules/Saving/R/save_observations.R") +source("modules/Saving/R/save_metrics.R") +source("modules/Saving/R/save_corr.R") +source("modules/Saving/R/save_probabilities.R") +source("modules/Saving/R/save_percentiles.R") +source("modules/Saving/R/get_time.R") +source("modules/Saving/R/get_latlon.R") +source("modules/Saving/R/get_global_attributes.R") +source("modules/Saving/R/drop_dims.R") + +Saving <- function(recipe, data, + skill_metrics = NULL, + probabilities = NULL, + agg = 'global', + archive = NULL) { # Wrapper for the saving functions. # recipe: The auto-s2s recipe # archive: The auto-s2s archive @@ -15,45 +29,23 @@ save_data <- function(recipe, data, # probabilities: output of compute_probabilities() # mean_bias: output of compute_mean_bias() + # Sanity checks if (is.null(recipe)) { error(recipe$Run$logger, "The 'recipe' parameter is mandatory.") stop() } if (is.null(data)) { - error(recupe$Run$logger, - paste("The 'data' parameter is mandatory. It should be a list", - "of at least two s2dv_cubes containing the hcst and obs.")) + error(recipe$Run$logger, + paste("The 'data' parameter is mandatory. It should be a list", + "of at least two s2dv_cubes containing the hcst and obs.")) stop() } - if (is.null(archive)) { - if (tolower(recipe$Analysis$Horizon) == "seasonal") { - archive <- read_yaml(paste0(recipe$Run$code_dir, - "conf/archive.yml"))$archive - } else if (tolower(recipe$Analysis$Horizon) == "decadal") { - archive <- read_yaml(paste0(recipe$Run$code_dir, - "conf/archive_decadal.yml"))$archive - } - } - dict <- read_yaml("conf/variable-dictionary.yml") - - # Create output directory - outdir <- get_dir(recipe) - dir.create(outdir, showWarnings = FALSE, recursive = TRUE) - - # Export hindcast, forecast and observations onto outfile - save_forecast(data$hcst, recipe, dict, outdir, archive = archive, - type = 'hcst') - if (!is.null(data$fcst)) { - save_forecast(data$fcst, recipe, dict, outdir, - archive = archive, type = 'fcst') - } - save_observations(data$obs, recipe, dict, outdir, archive = archive) # Separate ensemble correlation from the rest of the metrics, as it has one # extra dimension "ensemble" and must be saved to a different file - if ("corr" %in% names(skill_metrics)) { - corr_metric_names <- grep("^corr", names(skill_metrics)) + if ("corr_individual_members" %in% names(skill_metrics)) { + corr_metric_names <- grep("^corr_individual_members", names(skill_metrics)) corr_metrics <- skill_metrics[corr_metric_names] skill_metrics <- skill_metrics[-corr_metric_names] if (length(skill_metrics) == 0) { @@ -63,844 +55,51 @@ save_data <- function(recipe, data, corr_metrics <- NULL } - # Export skill metrics onto outfile + # Iterate over variables to subset s2dv_cubes and save outputs + save_forecast(recipe = recipe, + data_cube = data$hcst, + outdir = outdir[var], + type = 'hcst') + if (!is.null(data$fcst)) { + save_forecast(recipe = recipe, + data_cube = data$fcst, + outdir = outdir[var], + type = 'fcst') + } + save_observations(recipe = recipe, + data_cube = data$obs, + outdir = outdir[var]) + + # Export skill metrics if (!is.null(skill_metrics)) { - save_metrics(skill_metrics, recipe, dict, data$hcst, outdir, - archive = archive) + save_metrics(recipe = recipe, + skill = skill_metrics, + data_cube = data$hcst, agg = agg, + outdir = outdir[var]) } if (!is.null(corr_metrics)) { - save_corr(corr_metrics, recipe, dict, data$hcst, outdir, - archive = archive) - } - + save_corr(recipe = recipe, + skill = corr_metrics, + data_cube = data$hcst, + outdir = outdir[var]) # Export probabilities onto outfile if (!is.null(probabilities)) { - save_percentiles(probabilities$percentiles, recipe, data$hcst, outdir, - archive = archive) - save_probabilities(probabilities$probs, recipe, data$hcst, outdir, - archive = archive, type = "hcst") - if (!is.null(probabilities$probs_fcst)) { - save_probabilities(probabilities$probs_fcst, recipe, data$fcst, outdir, - archive = archive, type = "fcst") - } - } -} - -get_global_attributes <- function(recipe, archive) { - # Generates metadata of interest to add to the global attributes of the - # netCDF files. - parameters <- recipe$Analysis - hcst_period <- paste0(parameters$Time$hcst_start, " to ", - parameters$Time$hcst_end) - current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) - system_name <- parameters$Datasets$System$name - reference_name <- parameters$Datasets$Reference$name - - attrs <- list(reference_period = hcst_period, - institution_system = archive$System[[system_name]]$institution, - institution_reference = archive$Reference[[reference_name]]$institution, - system = system_name, - reference = reference_name, - calibration_method = parameters$Workflow$Calibration$method, - computed_on = current_time) - - return(attrs) -} - -get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { - # Generates time dimensions and the corresponding metadata. - ## TODO: Subseasonal - - switch(fcst.horizon, - "seasonal" = {time <- leadtimes; ref <- 'hours since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- 4; ref <- 'hours since '; - stdname <- ''}, - "decadal" = {time <- leadtimes; ref <- 'hours since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}) - - dim(time) <- length(time) - sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting - metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), - calendar = calendar)) - attr(time, 'variables') <- metadata - names(dim(time)) <- 'time' - - sdate <- 1:length(sdate) - dim(sdate) <- length(sdate) - metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), - collapse=", "), - units = paste0('Init date'))) - attr(sdate, 'variables') <- metadata - names(dim(sdate)) <- 'sdate' - - return(list(time=time)) -} - -get_latlon <- function(latitude, longitude) { - # Adds dimensions and metadata to lat and lon - # latitude: array containing the latitude values - # longitude: array containing the longitude values - - dim(longitude) <- length(longitude) - metadata <- list(longitude = list(units = 'degrees_east')) - attr(longitude, 'variables') <- metadata - names(dim(longitude)) <- 'longitude' - - dim(latitude) <- length(latitude) - metadata <- list(latitude = list(units = 'degrees_north')) - attr(latitude, 'variables') <- metadata - names(dim(latitude)) <- 'latitude' - - return(list(lat=latitude, lon=longitude)) - -} - -save_forecast <- function(data_cube, - recipe, - dictionary, - outdir, - agg = "global", - archive = NULL, - type = NULL) { - # Loops over the years in the s2dv_cube containing a hindcast or forecast - # and exports each year to a netCDF file. - # data_cube: s2dv_cube containing the data and metadata - # recipe: the auto-s2s recipe - # outdir: directory where the files should be saved - # agg: aggregation, "global" or "country" - - lalo <- c('longitude', 'latitude') - - variable <- data_cube$Variable$varName - var.longname <- attr(data_cube$Variable, 'variable')$long_name - global_attributes <- get_global_attributes(recipe, archive) - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - -# if (fcst.horizon == "seasonal") { -# calendar <- attr(data_cube$Variable, "variable")$dim$time$calendar -# } else { -# calendar <- attr(data_cube$Variable, "variable")$dim[[3]]$calendar -# } - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - ## Method 1: Use the first date as init_date. But it may be better to use - ## the real initialized date (ask users) -# init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - ## Method 2: use initial month - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - if (type == 'hcst') { - ## PROBLEM for fcst!!!!!!!!!!!! - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else if (type == 'fcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } - } else { - if (type == 'hcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } else if (type == 'fcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - # expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) - - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) - } - - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'ensemble', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- attr(data_cube$Variable, 'variable')$units - } else { - dims <- c(lalo, 'ensemble', 'time') - var.expname <- variable - var.sdname <- var.sdname - var.units <- attr(data_cube$Variable, 'variable')$units - } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes - - # Select start date - if (fcst.horizon == 'decadal') { - ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal - ## data has been reshaped - # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') - - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') - - } else { - fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] - } - - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, - fcst.sdate, agg, "exp") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) - } - } - info(recipe$Run$logger, paste("#####", toupper(type), - "SAVED TO NETCDF FILE #####")) -} - - -save_observations <- function(data_cube, - recipe, - dictionary, - outdir, - agg = "global", - archive = NULL) { - # Loops over the years in the s2dv_cube containing the observations and - # exports each year to a netCDF file. - # data_cube: s2dv_cube containing the data and metadata - # recipe: the auto-s2s recipe - # outdir: directory where the files should be saved - # agg: aggregation, "global" or "country" - - lalo <- c('longitude', 'latitude') - - variable <- data_cube$Variable$varName - var.longname <- attr(data_cube$Variable, 'variable')$long_name - global_attributes <- get_global_attributes(recipe, archive) - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$Reference[[global_attributes$reference]]$calendar - - # Generate vector containing leadtimes - ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - ## Method 1: Use the first date as init_date. But it may be better to use - ## the real initialized date (ask users) -# init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - ## Method 2: use initial month - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - ## expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) - - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'time'))) - } - - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- attr(data_cube$Variable, 'variable')$units - } else { - dims <- c(lalo, 'time') - var.expname <- variable - var.units <- attr(data_cube$Variable, 'variable')$units - } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes - - # Select start date. The date is computed for each year, and adapted for - # consistency with the hcst/fcst dates, so that both sets of files have - # the same name pattern. - ## Because observations are loaded differently in the daily vs. monthly - ## cases, different approaches are necessary. - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - } else { - - if (store.freq == "monthly_mean") { - fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] - fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') - } else { - fcst.sdate <- as.Date(data_cube$Dates$start[i]) + save_percentiles(recipe = recipe, + percentiles = probabilities$percentiles, + data_cube = data$hcst, + outdir = outdir[var]) + save_probabilities(recipe = recipe, + probs = probabilities$probs, + data_cube = data$hcst, + outdir = outdir[var], + type = "hcst") + if (!is.null(probabilities$probs_fcst)) { + save_probabilities(recipe = recipe, + probs = probabilities$probs_fcst, + data_cube = data$fcst, + outdir = outdir[var], + type = "fcst") } } - - # Ensure the year is correct if the first leadtime goes to the next year - init_date <- as.POSIXct(init_date) - if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { - lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 - } - # Ensure that the initialization month is consistent with the hindcast - lubridate::month(fcst.sdate) <- lubridate::month(init_date) - fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') - - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, - fcst.sdate, agg, "obs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) - } - } - info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") -} - -## TODO: Place inside a function somewhere -# if (tolower(agg) == "country") { -# load(mask.path) -# grid <- europe.countries.iso -# } else { -# grid <- list(lon=attr(var.obs, 'Variables')$dat1$longitude, -# lat=attr(var.obs, 'Variables')$dat1$latitude) -# } - -save_metrics <- function(skill, - recipe, - dictionary, - data_cube, - outdir, - agg = "global", - archive = NULL) { - # This function adds metadata to the skill metrics in 'skill' - # and exports them to a netCDF file inside 'outdir'. - - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - - # Add global and variable attributes - global_attributes <- get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - attr(skill[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(skill)) { - metric <- names(skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - skill[[i]][is.na(skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'time') - } else { - sdname <- paste0(metric) #, " grid point metric") - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(skill[[i]], 'variables') <- metadata - names(dim(skill[[i]])) <- dims } - - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), - cal = calendar) - - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, - fcst.sdate, agg, "skill") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, skill), outfile) - } else { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill) - ArrayToNc(vars, outfile) - } - info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") -} - -save_corr <- function(skill, - recipe, - dictionary, - data_cube, - outdir, - agg = "global", - archive = NULL) { - # This function adds metadata to the ensemble correlation in 'skill' - # and exports it to a netCDF file inside 'outdir'. - - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x) { - Reorder(x, c(lalo, 'ensemble', 'time'))}) - } - - # Add global and variable attributes - global_attributes <- get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(global_attributes, - list(from_anomalies = "Yes")) - } else { - global_attributes <- c(global_attributes, - list(from_anomalies = "No")) - } - attr(skill[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(skill)) { - metric <- names(skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - skill[[i]][is.na(skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'ensemble', 'time') - } else { - sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) - dims <- c(lalo, 'ensemble', 'time') - } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(skill[[i]], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } - - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, - fcst.sdate, agg, "corr") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, skill), outfile) - } else { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill) - ArrayToNc(vars, outfile) - } - info(recipe$Run$logger, - "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") -} - -save_percentiles <- function(percentiles, - recipe, - data_cube, - outdir, - agg = "global", - archive = NULL) { - # This function adds metadata to the percentiles - # and exports them to a netCDF file inside 'outdir'. - - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - percentiles <- lapply(percentiles, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - - # Add global and variable attributes - global_attributes <- get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - attr(percentiles[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(percentiles)) { - ## TODO: replace with proper standard names - percentile <- names(percentiles[i]) - long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - } else { - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = percentile, long_name = long_name)) - attr(percentiles[[i]], 'variables') <- metadata - names(dim(percentiles[[i]])) <- dims - } - - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, - fcst.sdate, agg, "percentiles") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, percentiles), outfile) - } else { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, percentiles) - ArrayToNc(vars, outfile) - } - info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") -} - -save_probabilities <- function(probs, - recipe, - data_cube, - outdir, - agg = "global", - archive = NULL, - type = "hcst") { - # Loops over the years in the s2dv_cube containing a hindcast or forecast - # and exports the corresponding category probabilities to a netCDF file. - # probs: array containing the probability data - # recipe: the auto-s2s recipe - # data_cube: s2dv_cube containing the data and metadata - # outdir: directory where the files should be saved - # type: 'exp' (hcst and fcst) or 'obs' - # agg: aggregation, "global" or "country" - # type: 'hcst' or 'fcst' - - lalo <- c('longitude', 'latitude') - - variable <- data_cube$Variable$varName - var.longname <- attr(data_cube$Variable, 'variable')$long_name - global_attributes <- get_global_attributes(recipe, archive) - # Add anomaly computation to global attributes - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - # Generate vector containing leadtimes - ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - ## expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') - if (tolower(agg) == "global") { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c(lalo, 'time'))}) - } else { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c('country', 'time'))}) - } - - ## TODO: Replace for loop with something more efficient? - for (bin in 1:length(probs_syear)) { - prob_bin <- names(probs_syear[bin]) - long_name <- paste0(prob_bin, " probability category") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - } else { - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = prob_bin, long_name = long_name)) - attr(probs_syear[[bin]], 'variables') <- metadata - names(dim(probs_syear[[bin]])) <- dims # is this necessary? - } - - # Add global attributes - attr(probs_syear[[1]], 'global_attrs') <- global_attributes - - # Select start date - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') - } else { - fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] - } - - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, - fcst.sdate, agg, "probs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, probs_syear), outfile) - } else { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, probs_syear) - ArrayToNc(vars, outfile) - } - } - - info(recipe$Run$logger, - paste("#####", toupper(type), - "PROBABILITIES SAVED TO NETCDF FILE #####")) } diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R new file mode 100644 index 0000000000000000000000000000000000000000..e5e154213377be7ec82100507a414d93e091d0b6 --- /dev/null +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -0,0 +1,215 @@ +#' Scorecards load metrics from verification suite +#' +#'@description Scorecards function to load saved data files +#' +#'@param system A vector of character strings defining the names of the +#' system names following the archive.yml format from verification suite. +#' Accepted system names: 'ECMWF-SEAS5', 'DWD-GFCS2.1', 'CMCC-SPS3.5', +#' 'ecmwfs5','Meteo-France-System 7', 'UK-MetOffice-GloSea600', 'NCEP-CFSv2'. +#'@param reference A vector of character strings defining the names of +#' the references following the archive.yml format from verification suite +#' Pending to be test with more than one. The accepted names are: 'era5'. +#'@param var A character string following the format from +#' variable-dictionary.yml from verification suite (TO DO: multiple variables). +#' The accepted names are: 'psl', 'tas', 'sfcWind', 'prlr'. +#'@param start.year A numeric indicating the start year of the reference period +#'@param end.year A numeric indicating the end year of the reference period +#'@param start.months A vector indicating the numbers of the start months +#'@param forecast.months A vector indicating the numbers of the forecast months +#'@param input.path A character string indicating the path where metrics output +#' files from verification suite are saved (or any other compatible files) +#' +#'@return A is a list by system and reference containing an array of with +#' the following dimensions: longitude, latitude, forecast months, metrics, +#' start dates. + +#'@examples +#'\dontrun{ +#'loaded_metrics <- LoadMetrics(system = c('ECMWF-SEAS5','DWD-GFCS2.1'), +#' reference. = 'ERA5', +#' var = 'tas', +#' start.year = 1993, +#' end.year = 2016, +#' metrics = c('mean_bias', 'enscorr', 'rpss', 'crpss', 'enssprerr'), +#' start.months = sprintf("%02d", 1:12), +#' forecast.months = 1:6, +#' input.path = '/esarchive/scratch/nmilders/scorecards_data/input_data') +#'} +#'@import easyNCDF +#'@import multiApply +#'@export +LoadMetrics <- function(system, reference, var, start.year, end.year, + metrics, start.months, forecast.months, + inf_to_na = FALSE, + input.path) { + + # Initial checks + ## system + if (!is.character(system)) { + stop("Parameter 'system' must be a character vector with the system names.") + } + ## reference + if (!is.character(reference)) { + stop("Parameter 'reference' must be a character vector with the reference ", + "names.") + } + ## var + if (!is.character(var)) { + stop("Parameter 'var' must be a character vector with the var ", + "names.") + } + if (length(var) > 1) { + warning("Parameter 'var' must be of length one. Only the first value ", + "will be used.") + var <- var[1] + } + ## start.year + if (!is.numeric(start.year)) { + stop("Parameter 'start.year' must be a numeric value.") + } + ## end.year + if (!is.numeric(end.year)) { + stop("Parameter 'end.year' must be a numeric value.") + } + ## metrics + if (!is.character(metrics)) { + stop("Parameter 'metrics' cannot be NULL.") + } + ## start.months + if (is.character(start.months)) { + warning("Parameter 'start.months' must be a numeric vector indicating ", + "the starting months.") + start.months <- as.numeric(start.months) + } + if (!is.numeric(start.months)) { + stop("Parameter 'start.months' must be a numeric vector indicating ", + "the starting months.") + } + start.months <- sprintf("%02d", start.months) + ## Check if sdates are continuous or discrete + if (all(diff(as.numeric(start.months)) == 1)) { + consecutive_start.months <- TRUE + } else { + consecutive_start.months <- FALSE + } + ## forecast.months + if (!is.numeric(forecast.months)) { + stop("Parameter 'forecast.months' must be a numeric vector indicating ", + "the starting months.") + } + ## input.path + if (!is.character(input.path)) { + stop("Parameter 'input.path must be a character string.") + } + if (length(input.path) > 1) { + input.path <- input.path[1] + warning("Parameter 'input.path' has length greater than 1 and only the ", + "first element will be used.") + } + + ## Remove . from names + system <- gsub('.','', system, fixed = T) + reference <- gsub('.','', reference, fixed = T) + + period <- paste0(start.year, "-", end.year) + + ## Define empty list to saved data + all_metrics <- sapply(system, function(x) NULL) + ## Load data for each system + for (sys in 1:length(system)) { + ## Define empty list to saved data + by_reference <- sapply(reference, function(x) NULL) + ## Load data for each reference + for (ref in 1:length(reference)) { + ## Call function to load metrics data + met <- .Loadmetrics(input.path = input.path, # recipe$Run$output, + system = system[sys], + reference = reference[ref], + var = var, + period = period, + start.months = start.months, + forecast.months = forecast.months, + metrics = metrics) + ## Save metric data as array in reference list + by_reference[[reference[ref]]] <- met + ## Remove -Inf from crpss data if variable is precipitation + if (inf_to_na) { + by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA + } + } ## close loop on reference + ## Save reference data in list of system + all_metrics[[system[sys]]] <- by_reference + } ## close loop on system + + return(all_metrics) +} ## close function + +############################################################ + +.Loadmetrics <- function(input.path, system, reference, + var, period, start.months, + forecast.months, metrics) { + + ## Load data for each start date + allfiles <- sapply(start.months, function(m) { + paste0(input.path, "/", system, "/", var, + "/scorecards_", system, "_", reference, "_", + var, "-skill_", period, "_s", m, # mod.pressure, + ".nc")}) + allfiles_exist <- sapply(allfiles, file.exists) + + # Check dims + files_exist_by_month <- seq(1:length(allfiles))[allfiles_exist] + allfiledims <- sapply(allfiles[allfiles_exist], easyNCDF::NcReadDims) + if (length(files_exist_by_month) == 0) { + stop("No files are found.") + } + + num_dims <- numeric(dim(allfiledims)[1]) + for (i in 1:dim(allfiledims)[1]) { + if (length(unique(allfiledims[i,])) > 1) { + warning(paste0("Dimensions of system ", system," with var ", var, + " don't match.")) + } + num_dims[i] <- max(allfiledims[i,]) # We take the largest dimension + } + # dims: [metric, longitude, latitude, time, smonth] + # or [metric, region, time, smonth] + + # Loop for file + dim(allfiles) <- c(dat = 1, sdate = length(allfiles)) + + array_met_by_sdate <- Apply(data = allfiles, target_dims = 'dat', fun = function(x) { + if (file.exists(x)) { + res <- easyNCDF::NcToArray(x, vars_to_read = metrics, unlist = T, + drop_var_dim = T) + names(dim(res)) <- NULL + } else { + res <- array(dim = c(length(metrics), allfiledims[-1,1])) + names(dim(res)) <- NULL + } + res})$output1 + + dim(array_met_by_sdate) <- c(metric = length(metrics), allfiledims[-1,1], + sdate = length(allfiles)) + + # Attributes + # Read attributes from the first existing file + if ("region" %in% rownames(allfiledims)) { + file_for_att <- ncdf4::nc_open(allfiles[allfiles_exist[1]]) + region <- ncdf4::ncatt_get(file_for_att, 'region') + ncdf4::nc_close(file_for_att) + attributes(array_met_by_sdate)$region <- region + } else { + lon <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'longitude', + unlist = T, drop_var_dim = T) + lat <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'latitude', + unlist = T, drop_var_dim = T) + attributes(array_met_by_sdate)$lon <- lon + attributes(array_met_by_sdate)$lat <- lat + } + attributes(array_met_by_sdate)$metrics <- metrics + attributes(array_met_by_sdate)$start.months <- start.months + attributes(array_met_by_sdate)$forecast.months <- forecast.months + return(array_met_by_sdate) +} diff --git a/modules/Scorecards/R/tmp/SCPlotScorecard.R b/modules/Scorecards/R/tmp/SCPlotScorecard.R new file mode 100644 index 0000000000000000000000000000000000000000..4373057b6e0d3901abcb3fc27c09006f5156131d --- /dev/null +++ b/modules/Scorecards/R/tmp/SCPlotScorecard.R @@ -0,0 +1,444 @@ +#'Scorecards function create simple scorecards by region (types 1 & 3) +#' +#'@description This function creates a scorecard for a single system and +#'reference combination, showing data by region and forecast month. +#' +#'@param data A multidimensional array containing spatially aggregated metrics +#' data with dimensions: metric, region, sdate and ftime. +#'@param row.dim A character string indicating the dimension name to show in the +#' rows of the plot. +#'@param subrow.dim A character string indicating the dimension name to show in +#' the sub-rows of the plot. +#'@param col.dim A character string indicating the dimension name to show in the +#' columns of the plot. +#'@param subcol.dim A character string indicating the dimension name to show in +#' the sub-columns of the plot. +#'@param legend.dim A character string indicating the dimension name to use for +#' the legend. +#'@param row.names A vector of character strings with row display names. +#'@param subrow.names A vector of character strings with sub-row display names. +#'@param col.names A vector of character strings with column display names. +#'@param subcol.names A vector of character strings with sub-column display +#' names. +#'@param row.title A character string for the title of the row names. +#'@param subrow.title A character string for the title of the sub-row names. +#'@param table.title A character string for the title of the plot. +#'@param table.subtitle A character string for the sub-title of the plot. +#'@param legend.breaks A vector of numerics or a list of vectors of numerics, +#' containing the breaks for the legends. If a vector is given as input, then +#' these breaks will be repeated for each legend.dim. A list of vectors can be +#' given as input if the legend.dims require different breaks. This parameter +#' is required even if the legend is not plotted, to define the colors in the +#' scorecard table. +#'@param plot.legend A logical value to determine if the legend is plotted. +#'@param legend.width A numeric value to define the width of the legend bars. +#'@param legend.height A numeric value to define the height of the legend bars. +#'@param palette A vector of character strings or a list of vectors of +#' character strings containing the colors to use in the legends. If a vector +#' is given as input, then these colors will be used for each legend.dim. A +#' list of vectors can be given as input if different colors are desired for +#' the legend.dims. This parameter must be included even if the the legend is +#' not plotted, to define the colors in the scorecard table. +#'@param colorunder A character string or of vector of character strings +#' defining the colors to use for data values with are inferior to the lowest +#' breaks value. This parameter will also plot a inferior triangle in the +#' legend bar. The parameter can be set to NULL if there are no inferior values. +#' If a character string is given this color will be applied to all legend.dims. +#'@param colorsup A character string or of vector of character strings +#' defining the colors to use for data values with are superior to the highest +#' breaks value. This parameter will also plot a inferior triangle in the +#' legend bar. The parameter can be set to NULL if there are no superior values. +#' If a character string is given this color will be applied to all legend.dims. +#'@param round.decimal A numeric indicating to which decimal point the data +#' is to be displayed in the scorecard table. +#'@param font.size A numeric indicating the font size on the scorecard table. +#'@param fileout A path of the location to save the scorecard plots. +#' +#'@return An image file containing the scorecard. +#'@example +#'data <- array(rnorm(1000), dim = c('sdate' = 12, 'metric' = 4, 'region' = 3, +#' 'time' = 6)) +#'row.names <- c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') +#'col.names <- c('Mean bias (K)', 'Correlation', 'RPSS','CRPSS') +#'SCPlotScorecard(data = data, row.names = row.names, col.names = col.names, +#' subcol.names = month.abb[as.numeric(1:12)], +#' row.title = 'Region', subrow.title = 'Forecast Month', +#' col.title = 'Start date', +#' table.title = "Temperature of ECMWF System 5", +#' table.subtitle = "(Ref: ERA5 1994-2016)", +#' fileout = 'test.png') +#' +#'@import kableExtra +#'@import s2dv +#'@import ClimProjDiags +#'@export +SCPlotScorecard <- function(data, row.dim = 'region', subrow.dim = 'time', + col.dim = 'metric', subcol.dim = 'sdate', + legend.dim = 'metric', row.names = NULL, + subrow.names = NULL, col.names = NULL, + subcol.names = NULL, row.title = NULL, + subrow.title = NULL, col.title = NULL, + table.title = NULL, table.subtitle = NULL, + legend.breaks = NULL, plot.legend = TRUE, + label.scale = NULL, legend.width = NULL, + legend.height = NULL, palette = NULL, + colorunder = NULL, colorsup = NULL, + round.decimal = 2, font.size = 1.1, + legend.white.space = NULL, + col1.width = NULL, col2.width = NULL, + fileout = './scorecard.png') { + # Input parameter checks + ## Check data + if (!is.array(data)) { + stop("Parameter 'data' must be a numeric array.") + } + ## Check row.dim + if (!is.character(row.dim)) { + stop("Parameter 'row.dim' must be a character string.") + } + if (!row.dim %in% names(dim(data))) { + stop("Parameter 'row.dim' is not found in 'data' dimensions.") + } + ## Check row.names + if (!is.null(row.names)) { + if (length(row.names) != as.numeric(dim(data)[row.dim])) { + stop("Parameter 'row.names' must have the same length of dimension 'row.dims'.") + } + } else { + row.names <- as.character(1:dim(data)[row.dim]) + } + ## Check subrow.dim + if (!is.character(subrow.dim)) { + stop("Parameter 'subrow.dim' must be a character string.") + } + if (!subrow.dim %in% names(dim(data))) { + stop("Parameter 'subrow.dim' is not found in 'data' dimensions.") + } + ## Check subrow.names + if (!is.null(subrow.names)) { + if (length(subrow.names) != as.numeric(dim(data)[subrow.dim])) { + stop("Parameter 'subrow.names' must have the same length of dimension 'subrow.dims'.") + } + } else { + subrow.names <- as.character(1:dim(data)[subrow.dim]) + } + ## Check col.dim + if (!is.character(col.dim)) { + stop("Parameter 'col.dim' must be a character string.") + } + if (!col.dim %in% names(dim(data))) { + stop("Parameter 'col.dim' is not found in 'data' dimensions.") + } + ## Check col.names + if (!is.null(col.names)) { + if (length(col.names) != as.numeric(dim(data)[col.dim])) { + stop("Parameter 'col.names' must have the same length of dimension 'col.dims'.") + } + } else { + col.names <- as.character(1:dim(data)[col.dim]) + } + ## Check subcol.dim + if (!is.character(subcol.dim)) { + stop("Parameter 'subcol.dim' must be a character string.") + } + if (!subcol.dim %in% names(dim(data))) { + stop("Parameter 'subcol.dim' is not found in 'data' dimensions.") + } + ## Check subcol.names + if (!is.null(subcol.names)) { + if (length(subcol.names) != as.numeric(dim(data)[subcol.dim])) { + stop("Parameter 'subcol.names' must have the same length of dimension 'subcol.dims'.") + } + } else { + subcol.names <- as.character(1:dim(data)[subcol.dim]) + } + ## Check legend.dim + if (!is.character(legend.dim)) { + stop("Parameter 'legend.dim' must be a character string.") + } + if (!legend.dim %in% names(dim(data))) { + stop("Parameter 'legend.dim' is not found in 'data' dimensions.") + } + ## Check row.title inputs + if (!is.null(row.title)) { + if (!is.character(row.title)) { + stop("Parameter 'row.title must be a character string.") + } + } else { + row.title <- "" + } + ## Check subrow.title + if (!is.null(subrow.title)) { + if (!is.character(subrow.title)) { + stop("Parameter 'subrow.title must be a character string.") + } + } else { + subrow.title <- "" + } + ## Check col.title + if (!is.null(col.title)) { + if (!is.character(col.title)) { + stop("Parameter 'col.title must be a character string.") + } + } else { + col.title <- "" + } + ## Check table.title + if (!is.null(table.title)) { + if (!is.character(table.title)) { + stop("Parameter 'table.title' must be a character string.") + } + } else { + table.title <- "" + } + ## Check table.subtitle + if (!is.null(table.subtitle)) { + if (!is.character(table.subtitle)) { + stop("Parameter 'table.subtitle' must be a character string.") + } + } else { + table.subtitle <- "" + } + # Check legend.breaks + if (is.vector(legend.breaks) && is.numeric(legend.breaks)) { + legend.breaks <- rep(list(legend.breaks), as.numeric(dim(data)[legend.dim])) + } else if (is.null(legend.breaks)) { + legend.breaks <- rep(list(seq(-1, 1, 0.2)), as.numeric(dim(data)[legend.dim])) + } else if (inherits(legend.breaks, 'list')) { + stopifnot(length(legend.breaks) == as.numeric(dim(data)[legend.dim])) + } else { + stop("Parameter 'legend.breaks' must be a numeric vector, a list or NULL.") + } + ## Check plot.legend + if (!inherits(plot.legend, 'logical')) { + stop("Parameter 'plot.legend' must be a logical value.") + } + ## Check label.scale + if (is.null(label.scale)) { + label.scale <- 1.4 + } else { + if (!is.numeric(label.scale) | length(label.scale) != 1) { + stop("Parameter 'label.scale' must be a numeric value of length 1.") + } + } + ## Check legend.width + if (is.null(legend.width)) { + legend.width <- length(subcol.names) * 46.5 + } else { + if (!is.numeric(legend.width) | length(legend.width) != 1) { + stop("Parameter 'legend.width' must be a numeric value of length 1.") + } + } + if (is.null(legend.height)) { + legend.height <- 50 + } else { + if (!is.numeric(legend.height) | length(legend.height) != 1) { + stop("Parameter 'legend.height' must be a numeric value of length 1.") + } + } + ## Check colour palette input + if (is.vector(palette)) { + palette <- rep(list(palette), as.numeric(dim(data)[legend.dim])) + } else if (is.null(palette)) { + palette <- rep(list(c('#2D004B', '#542789', '#8073AC', '#B2ABD2', '#D8DAEB', + '#FEE0B6', '#FDB863', '#E08214', '#B35806', '#7F3B08')), + as.numeric(dim(data)[legend.dim])) + } else if (inherits(palette, 'list')) { + stopifnot(length(palette) == as.numeric(dim(data)[legend.dim])) + } else { + stop("Parameter 'palette' must be a numeric vector, a list or NULL.") + } + ## Check colorunder + if (is.null(colorunder)) { + colorunder <- rep("#04040E",as.numeric(dim(data)[legend.dim])) + } else if (is.character(colorunder) && length(colorunder) == 1) { + colorunder <- rep(colorunder, as.numeric(dim(data)[legend.dim])) + } else if (is.character(colorunder) && + length(colorunder) != as.numeric(dim(data)[legend.dim])) { + stop("Parameter 'colorunder' must be a numeric vector, a list or NULL.") + } + ## Check colorsup + if (is.null(colorsup)) { + colorsup <- rep("#730C04", as.numeric(dim(data)[legend.dim])) + } else if (is.character(colorsup) && length(colorsup) == 1) { + colorsup <- rep(colorsup,as.numeric(dim(data)[legend.dim])) + } else if (is.character(colorsup) && + length(colorsup) != as.numeric(dim(data)[legend.dim])) { + stop("Parameter 'colorsup' must be a numeric vector, a list or NULL.") + } + ## Check round.decimal + if (is.null(round.decimal)) { + round.decimal <- 2 + } else if (!is.numeric(round.decimal) | length(round.decimal) != 1) { + stop("Parameter 'round.decimal' must be a numeric value of length 1.") + } + ## Check font.size + if (is.null(font.size)) { + font.size <- 1 + } else if (!is.numeric(font.size) | length(font.size) != 1) { + stop("Parameter 'font.size' must be a numeric value of length 1.") + } + ## Check legend white space + if (is.null(legend.white.space)){ + legend.white.space <- 6 + } else { + legend.white.space <- legend.white.space + } + ## Check col1.width + if (is.null(col1.width)) { + if (max(nchar(row.names)) == 1 ) { + col1.width <- max(nchar(row.names)) + } else { + col1.width <- max(nchar(row.names))/4 + } + } else if (!is.numeric(col1.width)) { + stop("Parameter 'col1.width' must be a numeric value of length 1.") + } + ## Check col2.width + if (is.null(col2.width)) { + if (max(nchar(subrow.names)) == 1 ) { + col2.width <- max(nchar(subrow.names)) + } else { + col2.width <- max(nchar(subrow.names))/4 + } + } else if (!is.numeric(col2.width)) { + stop("Parameter 'col2.width' must be a numeric value of length 1.") + } + + + # Get dimensions of inputs + n.col.names <- length(col.names) + n.subcol.names <- length(subcol.names) + n.row.names <- length(row.names) + n.subrow.names <- length(subrow.names) + + # Define table size + n.rows <- n.row.names * n.subrow.names + n.columns <- 2 + (n.col.names * n.subcol.names) + + # Column names + row.names.table <- rep("", n.rows) + for (row in 1:n.row.names) { + row.names.table[floor(n.subrow.names/2) + (row - 1) * n.subrow.names] <- row.names[row] + } + + # Define scorecard table titles + column.titles <- c(row.title, subrow.title, rep(c(subcol.names), n.col.names)) + + # Round data + data <- round(data, round.decimal) + + # Define data inside the scorecards table + for (row in 1:n.row.names) { + table_temp <- data.frame(table_column_2 = as.character(subrow.names)) + for (col in 1:n.col.names) { + table_temp <- data.frame(table_temp, + Reorder(data = Subset(x = data, along = c(col.dim, row.dim), + indices = list(col, row), drop = 'selected'), + order = c(subrow.dim, subcol.dim))) + } + if (row == 1) { + table_data <- table_temp + } else { + table_data <- rbind(table_data, table_temp) + } + } + + # All data for plotting in table + table <- data.frame(table_column_1 = row.names.table, table_data) + table_temp <- array(unlist(table[3:n.columns]), dim = c(n.rows, n.columns - 2)) + # Define colors to show in table + table_colors <- .SCTableColors(table = table_temp, n.col = n.col.names, + n.subcol = n.subcol.names, n.row = n.row.names, + n.subrow = n.subrow.names, legend.breaks = legend.breaks, + palette = palette, colorunder = colorunder, + colorsup = colorsup) + metric.color <- table_colors$metric.color + metric.text.color <- table_colors$metric.text.color + # metric.text.bold <- table_colors$metric.text.bold + + options(stringsAsFactors = FALSE) + title <- data.frame(c1 = table.title, c2 = n.columns) + subtitle <- data.frame(c1 = table.subtitle, c2 = n.columns) + header.names <- as.data.frame(data.frame(c1 = c("", col.names), + c2 = c(2, rep(n.subcol.names, n.col.names)))) + header.names2 <- as.data.frame(data.frame(c1 = c("", paste0(rep(col.title, n.col.names))), + c2 = c(2, rep(n.subcol.names, n.col.names)))) + title.space <- data.frame(c1 = "\n", c2 = n.columns) + + # Hide NA values in table + options(knitr.kable.NA = '') + + # Create HTML table + table.html.part <- list() + table.html.part[[1]] <- kbl(table, escape = F, col.names = column.titles, align = rep("c", n.columns)) %>% + kable_paper("hover", full_width = F, font_size = 14 * font.size) %>% + add_header_above(header = header.names2, font_size = 16 * font.size) %>% + add_header_above(header = title.space, font_size = 10 * font.size) %>% + add_header_above(header = header.names, font_size = 20 * font.size) %>% + add_header_above(header = title.space, font_size = 10 * font.size) %>% + add_header_above(header = subtitle, font_size = 16 * font.size, align = "left") %>% + add_header_above(header = title.space, font_size = 10 * font.size) %>% + add_header_above(header = title, font_size = 22 * font.size, align = "left") + + for (i in 1:n.col.names) { + for (j in 1:n.subcol.names) { + my.background <- metric.color[, (i - 1) * n.subcol.names + j] + my.text.color <- metric.text.color[, (i - 1) * n.subcol.names + j] + # my.bold <- metric.text.bold[(i - 1) * n.subcol.names + j] + + table.html.part[[(i - 1) * n.subcol.names + j + 1]] <- + column_spec(table.html.part[[(i - 1) * n.subcol.names + j]], + 2 + n.subcol.names * (i - 1) + j, + background = my.background[1:n.rows], + color = my.text.color[1:n.rows], + bold = T) ## strsplit(toString(bold), ', ')[[1]] + } + } + + # Define position of table borders + column.borders <- NULL + for (i in 1:n.col.names) { + column.spacing <- (n.subcol.names * i) + 2 + column.borders <- c(column.borders, column.spacing) + } + + n.last.list <- n.col.names * n.subcol.names + 1 + + table.html <- column_spec(table.html.part[[n.last.list]], 1, bold = TRUE, width_min = paste0(col1.width, 'cm')) %>% + column_spec(2, bold = TRUE, width_min = paste0(col2.width, 'cm')) %>% + column_spec(3:n.columns, width_min = "1.2cm") %>% + column_spec(c(1, 2, column.borders), border_right = "2px solid black") %>% + column_spec(1, border_left = "2px solid black") %>% + column_spec(n.columns, border_right = "2px solid black") %>% + row_spec(seq(from = 0, to = n.subrow.names * n.row.names, by = n.subrow.names), + extra_css = "border-bottom: 2px solid black", hline_after = TRUE) + + if (plot.legend == TRUE) { + # Save the scorecard (without legend) + save_kable(table.html, file = paste0(fileout, '_tmpScorecard.png'), vheight = 1) + + # White space for legend + legend.white.space <- 37.8 * legend.white.space ## converting pixels to cm + + # Create and save color bar legend + scorecard_legend <- .SCLegend(legend.breaks = legend.breaks, + palette = palette, + colorunder = colorunder, + colorsup = colorsup, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + legend.white.space = legend.white.space, + fileout = fileout) + + # Add the legends below the scorecard table + system(paste0('convert -append ', fileout, '_tmpScorecard.png ', fileout, + '_tmpScorecardLegend.png ', fileout)) + # Remove temporary scorecard table + unlink(paste0(fileout, '_tmpScorecard*.png')) + } + if (plot.legend == FALSE) { + save_kable(table.html, file = fileout) + } +} diff --git a/modules/Scorecards/R/tmp/SCTransform.R b/modules/Scorecards/R/tmp/SCTransform.R new file mode 100644 index 0000000000000000000000000000000000000000..585e75766c5e157286c0e42c996134b9b6f8bfe5 --- /dev/null +++ b/modules/Scorecards/R/tmp/SCTransform.R @@ -0,0 +1,42 @@ +#' Scorecards spatially transform calculated means +#' +#'@description Scorecards function to spatially transform the layout of the +#'calculated metric means, to show 'Target Month' instead of 'Start Date'. +#' +#'@param data A multidimensional array of spatially aggregated data containing +#' the following dimensions; system, reference, metric, time, sdate, region. +#'@param sdate_dim A character name referring to the dimension of start date +#' in the array aggregated_metrics. +#'@param ftime_dim A character name referring to the dimension of forecast +#' time in the array aggregated_metrics. +#'@param ncores An integer indicating the number of cores to use in parallel +#' computation. It is NULL by default (1 core). +#' +#'@example +#'transformed_data <- SCTransform(data = aggregated_metrics, +#' sdate_dim = 'sdate', +#' ftime_dim = 'time') +#'@import multiApply +#'@importFrom s2dv Reorder +#'@export +SCTransform <- function(data, + sdate_dim, + ftime_dim, + ncores = NULL) { + + output <- multiApply::Apply(data = data, + target_dims = c(ftime_dim, sdate_dim), + fun = .SCTransform, + ncores = ncores)$output1 + + return(Reorder(data = output, order = names(dim(data)))) +} + +.SCTransform <- function(data) { + output <- data + n_sdates <- dim(data)[sdate_dim] + for (i in 2:dim(data)[ftime_dim]) { + output[i, ] <- data[i, c((n_sdates - i + 2):n_sdates, 1:(n_sdates - i + 1))] + } + return(output) +} diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R new file mode 100644 index 0000000000000000000000000000000000000000..89f1df44d8302759caa6d3529e32a6fbda26dbca --- /dev/null +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -0,0 +1,360 @@ +#'Function to create all multi system/reference scorecards +#' +#'@description Scorecards function to create scorecard tables for multiple systems +#' and references (types 9 to 12). +#'@param input_data is an array of spatially aggregated metrics containing the +#' following dimensions; system, reference, metric, time, sdate, region. +#'@param system a vector of character strings defining the systems following the +#' archive.yml format from verification suite +#'@param reference a vector of character strings defining the references +#' following the archive.yml format from verification suite +#'@param var a character string following the format from +#' variable-dictionary.yml from verification suite +#'@param start.year a numeric indicating the start year of the reference period +#'@param end.year a numeric indicating the end year of the reference period +#'@param start.date a vector of character strings indicating the start months +#'@param forecast.month a vector of numeric indicating the forecast months +#'@param region.names a vector of character strings containing names of the +#' regions corresponding to the input data +#'@param metrics a vector of character strings containing the metrics. +#'@param table.label a character string containing additional information to +#' include in the scorecard title +#'@param fileout.label a character string containing additional information to +#' include in the output png file when saving the scorecard. +#'@param output.path a path of the location to save the scorecard plots. +#' +#'@return +#' This function returns 4 scorecard images for each region requested, the +#' images are saved in the directory output.path. + +#'@example +#' scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, +#' system.name = c('ECMWF-SEAS5','DWD-GFCS2.1'), +#' reference.name = 'ERA5', +#' var = 'tas', +#' start.year = 1993, +#' end.year = 2016, +#' start.months = 1:12, +#' forecast.months = 1:6, +#' region.names = c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') +#' metrics = c('mean_bias', 'enscorr', 'rpss','crpss', 'enssprerr'), +#' table.label = '(Interpolation = to system, Aggregation level = skill, Cross-validation = terciles)', +#' fileout.label = '_crossval-terciles_agg-skill', +#' output.path = '/esarchive/scratch/nmilders/scorecards_images/testing' +#' ) + + +ScorecardsMulti <- function(data, + system, + reference, + var, + start.year, + end.year, + start.months, + forecast.months, + region.names, + metrics, + table.label, + fileout.label, + output.path){ + + ## Checks to apply: + # first dimension in aggregated_metrics is system and second dimension is reference + # either multi-system and one reference, or multi-reference and one system + + ## Initial checks + if (is.null(table.label)){ + table.label <- "" + } + if (is.null(fileout.label)){ + fileout.label <- "" + } + + ## Make sure input_data is in correct order for using in functions: + data_order <- c('system','reference','metric','time','sdate','region') + data <- Reorder(data, data_order) + + ## Identify metrics loaded + metrics_loaded <- attributes(data)$metrics + + ## Select only the metrics to visualize from data + input_data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(input_data)$metrics <- metrics + + ## Transform data for scorecards by forecast month (types 11 & 12) + transformed_data <- SCTransform(data = input_data, + sdate_dim = 'sdate', + ftime_dim = 'time') + + ## Load configuration files + sys_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/conf/archive.yml")$esarchive + var_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/csscorecards/inst/config/variable-dictionary.yml")$vars + + ## Get scorecards table display names from configuration files + var.name <- var_dict[[var]]$long_name + var.units <- var_dict[[var]]$units + + system.name <- NULL + reference.name <- NULL + + for(sys in 1:length(system)){ + system.name1 <- sys_dict$System[[system[sys]]]$name + system.name <- c(system.name, system.name1) + } + for(ref in 1:length(length)){ + reference.name1 <- sys_dict$Reference[[reference[ref]]]$name + reference.name <- c(reference.name, reference.name1) + } + + ## Get metric long names + metric.names.list <- .met_names(metrics, var.units) + + ## format the metric names as character instead of list + for(met in metrics){ + if(met == metrics[1]){ + metric.names <- metric.names.list[[met]] + } else { + metric.names <- c(metric.names, metric.names.list[[met]]) + } + } + + ## Define parameters depending on Multi-system or Multi-reference + if(length(system) > 1 && length(reference) == 1){ + model <- 'system' + table.model.name <- 'System' + model.name <- system.name + eval.label <- 'Ref' + eval.name <- reference.name + eval.filename <- reference + } else if(length(system) == 1 && length(reference) > 1){ + model <- 'reference' + table.model.name <- 'Reference' + model.name <- reference.name + eval.label <- 'Sys' + eval.name <- system.name + eval.filename <- system + } else {stop('Not multi system or multi reference')} + + ## Define table colors + palette <- c('#2D004B', '#542789', '#8073AC', '#B2ABD2', '#D8DAEB', '#FEE0B6', '#FDB863', '#E08214', '#B35806', '#7F3B08') + colorunder <- "#04040E" + colorsup <- "#730C04" + + ## Legend lower limit color + legend.col.inf <- .legend_col_inf(metrics, colorunder) + legend.col.inf <- legend.col.inf[metrics] + + ## Legend upper limit color + legend.col.sup <- .legend_col_sup(metrics, colorsup) + legend.col.sup <- legend.col.sup[metrics] + + ## Legend inputs + plot.legend = TRUE + label.scale = 1.4 + legend.width = 555 + legend.height = 50 + + ## Data display inputs + round.decimal = 2 + font.size = 1.1 + + legend.white.space <- col1.width <- col2.width <- NULL ## Use default values of function + + ## Loop over region + for(reg in 1:length(region.names)){ + + breaks_bias <- NULL + + ## Find position of mean bias metric to calculate breaks + if ('mean_bias' %in% metrics) { + pos_bias <- which(metrics == 'mean_bias') + if(var == 'psl'){ + data[,,pos_bias,,,] <- data[,,pos_bias,,,]/100 ## temporary + } + breaks_bias <- .SCBiasBreaks(Subset(data, along = c('metric','region'), + indices = list(pos_bias,reg))) + } + + ## Define breaks for each metric based of metric position: + legend.breaks <- .met_breaks(metrics, breaks_bias) + + ## Define scorecard titles + table.title <- paste0(var.name, ". Region: ", region.names[reg], " ", table.label) + table.subtitle <- paste0("(", eval.label, ": ", eval.name, " ", start.year, "-", end.year, ")") + + + #### Scorecard_type 9 #### + ## (no transformation or reorder) + fileout <- .Filename(model = model, eval.name = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 9, + region = sub(" ", "-", region.names[reg]), + fileout.label = fileout.label, output.path = output.path) + if(model == 'system'){ + data_sc_9 <- Subset(input_data, c('reference','region'), list(1, reg), drop = 'selected') + } else if(model == 'reference'){ + data_sc_9 <- Subset(input_data, c('system','region'), list(1, reg), drop = 'selected') + } + SCPlotScorecard(data = data_sc_9, + row.dim = model, + subrow.dim = 'time', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = model.name, + subrow.names = forecast.months, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = table.model.name, + subrow.title = 'Forecast Month', + col.title = 'Start date', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = 4, + col2.width = col2.width, + fileout = fileout) + + + #### Scorecard_type 10 #### + ## (reorder only) + fileout <- .Filename(model = model, eval.name = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 10, + region = sub(" ", "-", region.names[reg]), + fileout.label = fileout.label, output.path = output.path) + new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') + if(model == 'system'){ + data_sc_10 <- Subset(Reorder(input_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') + } else if(model == 'reference'){ + data_sc_10 <- Subset(Reorder(input_data, new_order), c('system','region'), list(1, reg), drop = 'selected') + } + SCPlotScorecard(data = data_sc_10, + row.dim = 'time', + subrow.dim = model, + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = forecast.months, + subrow.names = model.name, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Forecast month', + subrow.title = table.model.name, + col.title = 'Start date', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = 4, + fileout = fileout) + + + #### Scorecard_type 11 #### + ## (transformation only) + fileout <- .Filename(model = model, eval.name = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 11, + region = sub(" ", "-", region.names[reg]), + fileout.label = fileout.label, output.path = output.path) + if(model == 'system'){ + data_sc_11 <- Subset(transformed_data, c('reference','region'), list(1, reg), drop = 'selected') + } else if(model == 'reference'){ + data_sc_11 <- Subset(transformed_data, c('system','region'), list(1, reg), drop = 'selected') + } + SCPlotScorecard(data = data_sc_11, + row.dim = model, + subrow.dim = 'time', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = model.name, + subrow.names = forecast.months, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = table.model.name, + subrow.title = 'Forecast Month', + col.title = 'Target month', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = 4, + col2.width = col2.width, + fileout = fileout) + + + #### Scorecard_type 12 #### + ## (transformation and reorder) + fileout <- .Filename(model = model, eval.name = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 12, + region = sub(" ", "-", region.names[reg]), + fileout.label = fileout.label, output.path = output.path) + new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') + if(model == 'system'){ + data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') + } else if(model == 'reference'){ + data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('system','region'), list(1, reg), drop = 'selected') + } + SCPlotScorecard(data = data_sc_12, + row.dim = 'time', + subrow.dim = model, + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = forecast.months, + subrow.names = model.name, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Forecast Month', + subrow.title = table.model.name, + col.title = 'Target month', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = 4, + fileout = fileout) + + } ## close loop on region + + print("All multi scorecard plots created") + +} ## close function + diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R new file mode 100644 index 0000000000000000000000000000000000000000..56f08204ad5443b94bbc281a31f3c75cf5cb7614 --- /dev/null +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -0,0 +1,343 @@ +#'Function to create all single system/reference scorecards +#' +#'@description Scorecards function to create scorecard tables for one system and +#' one reference combination (types 1 to 4). +#'@param input_data is an array of spatially aggregated metrics containing the +#' following dimensions; system, reference, metric, time, sdate, region. +#'@param system a vector of character strings defining the systems following the +#' archive.yml format from verification suite +#'@param reference a vector of character strings defining the references +#' following the archive.yml format from verification suite +#'@param var a character string following the format from +#' variable-dictionary.yml from verification suite +#'@param start.year a numeric indicating the start year of the reference period +#'@param end.year a numeric indicating the end year of the reference period +#'@param start.date a vector of character strings indicating the start months +#'@param forecast.month a vector of numeric indicating the forecast months +#'@param region.names a vector of character strings containing names of the +#' regions corresponding to the input data +#'@param metrics a vector of character strings containing the metrics. +#'@param table.label a character string containing additional information to +#' include in the scorecard title +#'@param fileout.label a character string containing additional information to +#' include in the output png file when saving the scorecard. +#'@param output.path a path of the location to save the scorecard plots +#' +#'@return +#' This function returns 4 scorecards images, saved in the directory output.path +#'@examples +#' scorecard_single <- ScorecardsSingle(data = aggregated_metrics, +#' system.name = c('ECMWF-SEAS5','DWD-GFCS2.1'), +#' reference.name = 'ERA5', +#' var = 'tas', +#' start.year = 1993, +#' end.year = 2016, +#' start.months = 1:12, +#' forecast.months = 1:6, +#' region.names = c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') +#' metrics = c('mean_bias', 'enscorr', 'rpss','crpss', 'enssprerr'), +#' table.label = '(Interpolation = to system, Aggregation level = skill, Cross-validation = terciles)', +#' fileout.label = '_crossval-terciles_agg-skill', +#' output.path = '/esarchive/scratch/nmilders/scorecards_images/test' +#' ) +#'@export +ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, + start.months, forecast.months, region.names, + metrics, legend.breaks = NULL, + table.label = NULL, fileout.label = NULL, + legend.white.space = NULL, + col1.width = NULL, col2.width = NULL, + output.path){ + + ## Checks to apply: + # First dimension in aggregated_metrics is system and second dimension is reference + # To allow 1 region - if region = 1 --> only scorecards 1 & 3 need to be plotted + # If any dimension of input dat is 1, make sure dimension is still present in array + + ## Initial checks + # data + if (!is.array(data)) { + stop("Parameter 'data' must be an array with named dimensions.") + } + if (!is.array(data)) { + stop("Parameter 'data' must be an array with named dimensions.") + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have dimenision names.") + } + if (!all(c('system','reference','metric','time','sdate','region') %in% + names(dim(data)))) { + stop("Dimension names of 'data' must be: 'system','reference','metric', + 'time','sdate','region'.") + } + if (is.null(table.label)){ + table.label <- "" + } + if (is.null(fileout.label)){ + fileout.label <- "" + } + + ## Make sure input_data is in correct order for using in functions: + data_order <- c('system', 'reference', 'metric', 'time', 'sdate', 'region') + data <- Reorder(data, data_order) + + ## Identify metrics loaded + metrics_loaded <- attributes(data)$metrics + + ## Select only the metrics to visualize from data + input_data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(input_data)$metrics <- metrics + + ## Transform data for scorecards by forecast month (types 3 & 4) + transformed_data <- SCTransform(data = input_data, + sdate_dim = 'sdate', + ftime_dim = 'time') + + ## Load configuration files + sys_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/conf/archive.yml")$esarchive + var_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/csscorecards/inst/config/variable-dictionary.yml")$vars + + ## Get scorecards table display names from configuration files + var.name <- var_dict[[var]]$long_name + var.units <- var_dict[[var]]$units + + ## Get metric long names + metric.names.list <- .met_names(metrics, var.units) + + ## format the metric names as character instead of list + for(met in metrics){ + if(met == metrics[1]){ + metric.names <- metric.names.list[[met]] + } else { + metric.names <- c(metric.names, metric.names.list[[met]]) + } + } + + ## Define table colors + palette <- c('#2D004B', '#542789', '#8073AC', '#B2ABD2', '#D8DAEB', '#FEE0B6', '#FDB863', '#E08214', '#B35806', '#7F3B08') + colorunder <- "#04040E" + colorsup <- "#730C04" + + ## Legend lower limit color + legend.col.inf <- .legend_col_inf(metrics, colorunder) + legend.col.inf <- legend.col.inf[metrics] + + ## Legend upper limit color + legend.col.sup <- .legend_col_sup(metrics, colorsup) + legend.col.sup <- legend.col.sup[metrics] + + ## Legend inputs + plot.legend = TRUE + label.scale = 1.4 + legend.width = 555 + legend.height = 50 + + ## Data display inputs + round.decimal = 2 + font.size = 1.1 + + ## Loop over system and reference for each scorecard plot + for (sys in 1:dim(input_data)['system']) { + for (ref in 1:dim(input_data)['reference']) { + + ## TO DO: Apply check to each scorecard function + ## check dimension 'metric' exists: + if (!("metric" %in% names(dim(input_data)))) { + dim(input_data) <- c(metric = 1, dim(input_data)) + } + + ## Find position of mean bias metric to calculate breaks + breaks_bias <- NULL + if ('mean_bias' %in% metrics){ + stopifnot(identical(names(dim(Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected'))), c('metric','time','sdate','region'))) + temp_data <- Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected') + pos_bias <- which(metrics == 'mean_bias') + if(var == 'psl'){ + temp_data[pos_bias,,,] <- temp_data[pos_bias,,,]/100 + } + breaks_bias <- .SCBiasBreaks(Subset(temp_data, along = 'metric', + indices = pos_bias)) + } + + ## Define breaks for each metric based of metric position: + legend.breaks <- .met_breaks(metrics, breaks_bias) + + ## Put breaks in same order as metrics + legend.breaks <- legend.breaks[metrics] + + ## Get scorecards table display names from configuration files + system.name <- sys_dict$System[[system[sys]]]$name + reference.name <- sys_dict$Reference[[reference[ref]]]$name + + ## Define scorecard titles + table.title <- paste0(var.name, " of ", system.name, " ", table.label) + table.subtitle <- paste0("(Ref: ", reference.name, " ", start.year, "-", end.year, ")") + + ############################################################################# + + #### Scorecard_type 1 #### + ## (no transformation or reorder) + fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, + start.year = start.year, end.year = end.year, scorecard.type = 1, + fileout.label = fileout.label, output.path = output.path) + data_sc_1 <- Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected') + SCPlotScorecard(data = data_sc_1, + row.dim = 'region', + subrow.dim = 'time', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = region.names, + subrow.names = forecast.months, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Region', + subrow.title = 'Forecast Month', + col.title = 'Start date', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + fileout = fileout) + + + #### Scorecard_type 2 #### + ## (reorder only) + ## Scorecard type 2 is same as type 1 for only one region, therefore is + ## only plotted if more that one region is requested + if(dim(input_data)['region'] > 1) { + fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, + start.year = start.year, end.year = end.year, scorecard.type = 2, + fileout.label = fileout.label, output.path = output.path) + new_order <- c('metric', 'region', 'sdate', 'time') + data_sc_2 <- Reorder(Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + SCPlotScorecard(data = data_sc_2, + row.dim = 'time', + subrow.dim = 'region', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = forecast.months, + subrow.names = region.names, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Forecast Month', + subrow.title = 'Region', + col.title = 'Start date', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + fileout = fileout) + } ## close if + + + #### Scorecard_type 3 #### + ## (transformation only) + fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, + start.year = start.year, end.year = end.year, scorecard.type = 3, + fileout.label = fileout.label, output.path = output.path) + data_sc_3 <- Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected') + SCPlotScorecard(data = data_sc_3, + row.dim = 'region', + subrow.dim = 'time', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = region.names, + subrow.names = forecast.months, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Region', + subrow.title = 'Forecast Month', + col.title = 'Target month', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + fileout = fileout) + + + #### Scorecard_type 4 #### + ## (transformation and reorder) + ## Scorecard type 4 is same as type 3 for only one region, therefore is + ## only plotted if more that one region is requested + if(dim(input_data)['region'] > 1) { + fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, + start.year = start.year, end.year = end.year, scorecard.type = 4, + fileout.label = fileout.label, output.path = output.path) + new_order <- c('metric', 'region', 'sdate', 'time') + data_sc_4 <- Reorder(Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + SCPlotScorecard(data = data_sc_4, + row.dim = 'time', + subrow.dim = 'region', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = forecast.months, + subrow.names = region.names, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Forecast Month', + subrow.title = 'Region', + col.title = 'Target month', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + fileout = fileout) + } ## close if + + } ## close loop on ref + } ## close loop on sys + + print("All single system scorecard plots created") + +} ## close function + + + diff --git a/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R new file mode 100644 index 0000000000000000000000000000000000000000..a19b1651140a5a3bcc174cc39f81117ac937d361 --- /dev/null +++ b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R @@ -0,0 +1,350 @@ +#'Function to create all multi system/reference scorecards +#' +#'@description Scorecards function to create scorecard tables for multiple systems +#' and references (types 9 to 12). +#'@param input_data is an array of spatially aggregated metrics containing the +#' following dimensions; system, reference, metric, time, sdate, region. +#'@param system a vector of character strings defining the systems following the +#' archive.yml format from verification suite +#'@param reference a vector of character strings defining the references +#' following the archive.yml format from verification suite +#'@param var a character string following the format from +#' variable-dictionary.yml from verification suite +#'@param start.year a numeric indicating the start year of the reference period +#'@param end.year a numeric indicating the end year of the reference period +#'@param start.date a vector of character strings indicating the start months +#'@param forecast.month a vector of numeric indicating the forecast months +#'@param region.names a vector of character strings containing names of the +#' regions corresponding to the input data +#'@param metrics a vector of character strings containing the metrics. +#'@param table.label a character string containing additional information to +#' include in the scorecard title +#'@param fileout.label a character string containing additional information to +#' include in the output png file when saving the scorecard. +#'@param output.path a path of the location to save the scorecard plots. +#' +#'@return +#' This function returns 4 scorecard images for each region requested, the +#' images are saved in the directory output.path. + +#'@example +#' scorecard_diff <- ScorecardsDiff(data = aggregated_metrics, +#' system.name = c('ECMWF-SEAS5','DWD-GFCS2.1'), +#' reference.name = 'ERA5', +#' var = 'tas', +#' start.year = 1993, +#' end.year = 2016, +#' start.months = 1:12, +#' forecast.months = 1:6, +#' region.names = c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') +#' metrics = c('mean_bias', 'enscorr', 'rpss','crpss', 'enssprerr'), +#' table.label = '(Interpolation = to system, Aggregation level = skill, Cross-validation = terciles)', +#' fileout.label = '_crossval-terciles_agg-skill', +#' output.path = '/esarchive/scratch/nmilders/scorecards_images/testing' +#' ) + + +ScorecardsSystemDiff <- function(data, + system, + reference, + var, + start.year, + end.year, + start.months, + forecast.months, + region.names, + metrics, + table.label = NULL, + fileout.label = NULL, + legend.white.space = NULL, + col1.width = NULL, + col2.width = NULL, + output.path){ + + ## Checks to apply: + # first dimension in aggregated_metrics is system and second dimension is reference + # either multi-system and one reference, or multi-reference and one system + + ## Initial checks + if (is.null(table.label)){ + table.label <- "" + } + if (is.null(fileout.label)){ + fileout.label <- "" + } + + ## Make sure input_data is in correct order for using in functions: + data_order <- c('system','reference','metric','time','sdate','region') + data <- Reorder(data, data_order) + + ## Identify metrics loaded + metrics_loaded <- attributes(data)$metrics + + ## Select only the metrics to visualize from data + input_data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(input_data)$metrics <- metrics + + ## Calculate difference between two systems/references + if(length(system) == 2 && length(reference) == 1){ + dataset1 <- Subset(input_data, c('system', 'reference'), list(1, 1), drop = 'selected') + dataset2 <- Subset(input_data, c('system', 'reference'), list(2, 1), drop = 'selected') + } + else if(length(system) == 1 && length(reference) == 2){ + dataset1 <- Subset(input_data, c('system', 'reference'), list(1, 1), drop = 'selected') + dataset2 <- Subset(input_data, c('system', 'reference'), list(1, 2), drop = 'selected') + } + + ## Calculate difference of mean_bias from 0 for each dataset + if ('mean_bias' %in% metrics){ + pos_bias <- which(metrics == 'mean_bias') + dataset1[pos_bias,,,] <- abs(dataset1[pos_bias,,,]) + dataset2[pos_bias,,,] <- abs(dataset2[pos_bias,,,]) + } + + ## Calculate difference of enssprerr from 1 for each dataset + if ('enssprerr' %in% metrics){ + pos_enssprerr <- which(metrics == 'enssprerr') + dataset1[pos_enssprerr,,,] <- abs(1-dataset1[pos_enssprerr,,,]) + dataset2[pos_enssprerr,,,] <- abs(1-dataset2[pos_enssprerr,,,]) + } + + diff_data <- dataset1 - dataset2 + + # Transform data for scorecards by forecast month (types 3 & 4) + transformed_data <- SCTransform(data = diff_data, + sdate_dim = 'sdate', + ftime_dim = 'time') + + ## Load configuration files + sys_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/conf/archive.yml")$esarchive + var_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/csscorecards/inst/config/variable-dictionary.yml")$vars + + ## Get scorecards table display names from configuration files + var.name <- var_dict[[var]]$long_name + var.units <- var_dict[[var]]$units + + system.name <- NULL + reference.name <- NULL + + for(sys in 1:length(system)){ + system.name1 <- sys_dict$System[[system[sys]]]$name + system.name <- c(system.name, system.name1) + } + for(ref in 1:length(reference)){ + reference.name1 <- sys_dict$Reference[[reference[ref]]]$name + reference.name <- c(reference.name, reference.name1) + } + + ## Get metric long names + metric.names.list <- .met_names(metrics, var.units) + + ## format the metric names as character instead of list + for(met in metrics){ + if(met == metrics[1]){ + metric.names <- metric.names.list[[met]] + } else { + metric.names <- c(metric.names, metric.names.list[[met]]) + } + } + + ## Define parameters depending on system compariosn or reference comparison + if(length(system) > 1 && length(reference) == 1){ + comparison <- system + model <- 'system' + table.model.name <- 'System' + model.name <- system.name + eval.label <- 'Ref' + eval.name <- reference.name + eval.filename <- reference + } else if(length(system) == 1 && length(reference) > 1){ + comparison <- reference + model <- 'reference' + table.model.name <- 'Reference' + model.name <- reference.name + eval.label <- 'Sys' + eval.name <- system.name + eval.filename <- system + } else {stop('Not multi system or multi reference')} + + ## Define table colors + palette <- c('#2D004B', '#542789', '#8073AC', '#B2ABD2', '#D8DAEB', '#FEE0B6', '#FDB863', '#E08214', '#B35806', '#7F3B08') + colorunder <- "#04040E" + colorsup <- "#730C04" + + ## Legend inputs + plot.legend = TRUE + label.scale = 1.4 + legend.width = 555 + legend.height = 50 + + ## Data display inputs + round.decimal = 2 + font.size = 1.1 + + ## Define breaks for each metric based of metric position: + legend.breaks <- c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5) + + ## Define scorecard titles + table.title <- paste0(var.name, " - Difference: ", model.name[1], " - ", model.name[2], " ",table.label) + table.subtitle <- paste0("(", eval.label, ": ", eval.name, " ", start.year, "-", end.year, ")") + + #### Scorecard_type 1 #### + ## (no transformation or reorder) + + fileout <- .Filename(system = paste0("diff_",comparison[1],"_",comparison[2]), reference = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 1, + fileout.label = fileout.label, output.path = output.path) + SCPlotScorecard(data = diff_data, + row.dim = 'region', + subrow.dim = 'time', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = region.names, + subrow.names = forecast.months, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Region', + subrow.title = 'Forecast Month', + col.title = 'Start date', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = colorunder, + colorsup = colorsup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + fileout = fileout) + + + #### Scorecard_type 2 #### + ## (reorder only) + ## Scorecard type 2 is same as type 1 for only one region, therefore is + ## only plotted if more that one region is requested + if(dim(input_data)['region'] > 1) { + fileout <- .Filename(system = paste0("diff_",comparison[1],"_",comparison[2]), reference = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 2, + fileout.label = fileout.label, output.path = output.path) + new_order <- c('metric', 'region', 'sdate', 'time') + data_sc_2 <- Reorder(diff_data, new_order) + SCPlotScorecard(data = data_sc_2, + row.dim = 'time', + subrow.dim = 'region', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = forecast.months, + subrow.names = region.names, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Forecast Month', + subrow.title = 'Region', + col.title = 'Start date', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = colorunder, + colorsup = colorsup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + fileout = fileout) + } ## close if + + + #### Scorecard_type 3 #### + ## (transformation only) + fileout <- .Filename(system = paste0("diff_",comparison[1],"_",comparison[2]), reference = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 3, + fileout.label = fileout.label, output.path = output.path) + SCPlotScorecard(data = transformed_data, + row.dim = 'region', + subrow.dim = 'time', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = region.names, + subrow.names = forecast.months, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Region', + subrow.title = 'Forecast Month', + col.title = 'Target month', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = colorunder, + colorsup = colorsup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + fileout = fileout) + + + #### Scorecard_type 4 #### + ## (transformation and reorder) + ## Scorecard type 4 is same as type 3 for only one region, therefore is + ## only plotted if more that one region is requested + if(dim(input_data)['region'] > 1) { + fileout <- .Filename(system = paste0("diff_",comparison[1],"_",comparison[2]), reference = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 4, + fileout.label = fileout.label, output.path = output.path) + new_order <- c('metric', 'region', 'sdate', 'time') + data_sc_4 <- Reorder(transformed_data, new_order) + SCPlotScorecard(data = data_sc_4, + row.dim = 'time', + subrow.dim = 'region', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = forecast.months, + subrow.names = region.names, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Forecast Month', + subrow.title = 'Region', + col.title = 'Target month', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = colorunder, + colorsup = colorsup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + fileout = fileout) + } ## close if + +print("All system difference scorecard plots created") + +} ## close function + diff --git a/modules/Scorecards/R/tmp/Utils.R b/modules/Scorecards/R/tmp/Utils.R new file mode 100644 index 0000000000000000000000000000000000000000..6ba49e8c5c887c938902e30d200e49bff1af142e --- /dev/null +++ b/modules/Scorecards/R/tmp/Utils.R @@ -0,0 +1,361 @@ +############### FUNCTIONS FOR SCORECARDS ################ + + +## Define metric names + +.met_names <- function(metrics, var.units) { # metrics is a object with the names of the metrics to be displayed + result <- list() + if ('mean_bias' %in% metrics) { + result <- append(result, list('mean_bias' = paste0('Mean bias (', var.units,')'))) + } + if ('enscorr' %in% metrics) { + result <- append(result, list('enscorr' = 'Correlation')) + } + if ('rps' %in% metrics ) { + result <- append(result, list('rps' = 'RPS')) + } + if ('frps' %in% metrics ) { + result <- append(result, list('frps' = 'Fair RPS')) + } + if ('rpss' %in% metrics) { + result <- append(result, list('rpss' = 'RPSS')) + } + if ('rpss_score_aggr' %in% metrics) { + result <- append(result, list('rpss_score_aggr' = 'RPSS')) + } + if ('frpss' %in% metrics) { + result <- append(result, list('frpss' = 'Fair RPSS')) + } + if ('crps' %in% metrics) { + result <- append(result, list('crps' = 'CRPS')) + } + if ('crpss' %in% metrics) { + result <- append(result, list('crpss' = 'CRPSS')) + } + if ('crpss_score_aggr' %in% metrics) { + result <- append(result, list('crpss_score_aggr' = 'CRPSS')) + } + if ('bss10' %in% metrics) { + result <- append(result, list('bss10' = 'Brier skill score 10%')) + } + if ('bss90' %in% metrics) { + result <- append(result, list('bss90' = 'Brier skill score 90%')) + } + if ('enssprerr' %in% metrics ) { + result <- append(result, list('enssprerr' = 'Spread-to-error ratio')) + } + if ('rmsss' %in% metrics ) { + result <- append(result, list('rmsss' = 'RMSSS')) + } + return(result) +} + + + +## Define metrics breaks for each input metric + +.met_breaks <- function(metrics, breaks_bias) { # metrics is a object with the names of the metrics to be displayed + result <- list() + if ('mean_bias' %in% metrics) { + result <- append(result, list('mean_bias' = breaks_bias)) + } + if ('enscorr' %in% metrics) { + result <- append(result, list('enscorr' = c(-1, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1))) + } + if ('rps' %in% metrics ) { + result <- append(result, list('rps' = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.8, 1))) + } + if ('frps' %in% metrics ) { + result <- append(result, list('frps' = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5))) + } + if ('rpss' %in% metrics) { + result <- append(result, list('rpss' = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5))) + } + if ('rpss_score_aggr' %in% metrics) { + result <- append(result, list('rpss_score_aggr' = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5))) + } + if ('frpss' %in% metrics) { + result <- append(result, list('frpss' = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5))) + } + if ('crps' %in% metrics) { + result <- append(result, list('crps' = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5))) + } + if ('crpss' %in% metrics) { + result <- append(result, list('crpss' = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5))) + } + if ('crpss_score_aggr' %in% metrics) { + result <- append(result, list('crpss_score_aggr' = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5))) + } + if ('bss10' %in% metrics) { + result <- append(result, list('bss10' = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5))) + } + if ('bss90' %in% metrics) { + result <- append(result, list('bss90' = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5))) + } + if ('enssprerr' %in% metrics ) { + result <- append(result, list('enssprerr' = c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2))) + } + if ('rmsss' %in% metrics ) { + result <- append(result, list('rmsss' = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5))) + } + return(result) +} + +## Define legend lower limit color + +.legend_col_inf <- function(metrics, colorunder) { + result <- list() + if ('mean_bias' %in% metrics) { + result <- append(result, list('mean_bias' = colorunder)) + } + if ('enscorr' %in% metrics) { + result <- append(result, list('enscorr' = NULL)) + } + if ('rps' %in% metrics ) { + result <- append(result, list('rps' = NULL)) + } + if ('frps' %in% metrics ) { + result <- append(result, list('frps' = NULL)) + } + if ('rpss' %in% metrics) { + result <- append(result, list('rpss' = colorunder)) + } + if ('rpss_score_aggr' %in% metrics) { + result <- append(result, list('rpss_score_aggr' = colorunder)) + } + if ('frpss' %in% metrics) { + result <- append(result, list('frpss' = colorunder)) + } + if ('crps' %in% metrics) { + result <- append(result, list('crps' = NULL)) + } + if ('crpss' %in% metrics) { + result <- append(result, list('crpss' = colorunder)) + } + if ('crpss_score_aggr' %in% metrics) { + result <- append(result, list('crpss_score_aggr' = colorunder)) + } + if ('bss10' %in% metrics) { + result <- append(result, list('bss10' = colorunder)) + } + if ('bss90' %in% metrics) { + result <- append(result, list('bss90' = colorunder)) + } + if ('enssprerr' %in% metrics ) { + result <- append(result, list('enssprerr' = NULL)) + } + if ('rmsss' %in% metrics ) { + result <- append(result, list('rmsss' = colorunder)) + } + return(result) +} + + +## Define legend upper limit color + +.legend_col_sup <- function(metrics, colorsup) { + result <- list() + if ('mean_bias' %in% metrics) { + result <- append(result, list('mean_bias' = colorsup)) + } + if ('enscorr' %in% metrics) { + result <- append(result, list('enscorr' = NULL)) + } + if ('rps' %in% metrics ) { + result <- append(result, list('rps' = NULL)) + } + if ('frps' %in% metrics ) { + result <- append(result, list('frps' = NULL)) + } + if ('rpss' %in% metrics) { + result <- append(result, list('rpss' = colorsup)) + } + if ('rpss_score_aggr' %in% metrics) { + result <- append(result, list('rpss_score_aggr' = colorsup)) + } + if ('frpss' %in% metrics) { + result <- append(result, list('frpss' = colorsup)) + } + if ('crps' %in% metrics) { + result <- append(result, list('crps' = colorsup)) + } + if ('crpss' %in% metrics) { + result <- append(result, list('crpss' = colorsup)) + } + if ('crpss_score_aggr' %in% metrics) { + result <- append(result, list('crpss_score_aggr' = colorsup)) + } + if ('bss10' %in% metrics) { + result <- append(result, list('bss10' = colorsup)) + } + if ('bss90' %in% metrics) { + result <- append(result, list('bss90' = colorsup)) + } + if ('enssprerr' %in% metrics ) { + result <- append(result, list('enssprerr' = colorsup)) + } + if ('rmsss' %in% metrics ) { + result <- append(result, list('rmsss' = colorsup)) + } + return(result) +} + + + + +## Output file name to save scorecard +.Filename <- function(system = NULL, reference = NULL, model = NULL, eval.name = NULL, + var = NULL, start.year = NULL, end.year = NULL, scorecard.type = NULL, + region = NULL, fileout.label = NULL, output.path = NULL) { + + ## Remove . from names + system <- gsub('.','', system, fixed = T) + reference <- gsub('.','', reference, fixed = T) + + period <- paste0(start.year, "-", end.year) + + if (scorecard.type == 1 || scorecard.type == 2 || scorecard.type == 3 || scorecard.type == 4 ) { + scorecard_save_path <- paste0(output.path, + "/scorecard-", scorecard.type, "_", system, "_", + reference, "_", var, "_", period, + fileout.label, ".png") + } else { + scorecard_save_path <- paste0(output.path, + "/scorecard-", scorecard.type, "_multi-", + tolower(model), "_", eval.name, "_", + var, "_", period, "_", region, + fileout.label, ".png") + } + + return(scorecard_save_path) +} + +# Scorecards function to assign background color of table cells, +# color of text in table and to bold the text. +# +# It will return a list with 2 arrays: +# (1) metric.color, A 2-dimensional array with character strings containing the +# color codes for each cell background. +# (2) metric.text.color, A 2-dimensional array with character strings +# containing the color codes for each cell text. +.SCTableColors <- function(table, n.col, n.subcol, n.row, n.subrow, + legend.breaks, palette, colorunder, colorsup) { + # Define rows and columns + n.rows <- n.row * n.subrow + n.columns <- n.col * n.subcol + + ## Set table background colors + metric.color <- array(colorunder, c(n.row * n.subrow, n.columns)) + metric.text.color <- array("#2A2A2A", c(n.row * n.subrow , n.columns)) + # metric.text.bold <- array(TRUE, c(n.row * n.subrow , n.columns - 2)) ## Setting all values to bold + + ## Define cell and text colors to show in table + for (i in 1:n.col) { + metric.int <- legend.breaks[[i]] + for (rr in 1:n.rows) { + for (j in 1:n.subcol) { + for (pp in 1:(length(metric.int) - 1)) { + if (is.nan(table[rr,((i - 1) * n.subcol + j)])) { + metric.color[rr,((i - 1) * n.subcol + j)] <- "gray" + } else { + if (table[rr,((i - 1) * n.subcol + j)] >= + metric.int[pp] && table[rr,((i - 1) * n.subcol + j)] <= + metric.int[pp+1]) { + metric.color[rr,((i - 1) * n.subcol + j)] <- palette[[i]][pp] #palette[pp] + } + if (table[rr,((i - 1) * n.subcol + j)] < metric.int[1]) { + metric.color[rr,((i - 1) * n.subcol + j)] <- colorunder[i] + } + if (table[rr,((i - 1) * n.subcol + j)] >= + metric.int[length(metric.int)]) { + metric.color[rr,((i - 1) * n.subcol + j)] <- colorsup[i] + } + } + ## color text in white and bold if background is white or dark blue or dark red: + if (is.nan(table[rr,((i - 1) * n.subcol + j)]) || + (!is.nan(table[rr,((i - 1) * n.subcol + j)]) && pp == 1 && + table[rr,((i - 1) * n.subcol + j)] < metric.int[2]) || + (!is.nan(table[rr,((i - 1) * n.subcol + j)]) && pp == 2 && + table[rr,((i - 1) * n.subcol + j)] < metric.int[3]) || + (!is.nan(table[rr,((i - 1) * n.subcol + j)]) && pp == (length(metric.int) - 1) && + table[rr,((i - 1) * n.subcol + j)] >= metric.int[length(metric.int) - 1]) || + (!is.nan(table[rr,((i - 1) * n.subcol + j)]) && pp == (length(metric.int) - 2) && + table[rr,((i - 1) * n.subcol + j)] >= metric.int[length(metric.int) - 2])) { + metric.text.color[rr,((i - 1) * n.subcol + j)] <- "white" + #metric.text.bold[rr,((i - 1) * n.subcol + j)] <- TRUE + } + } + } + } + } + + return(list(metric.color = metric.color, + metric.text.color = metric.text.color)) + +} + +# Scorecards function to create the color bar legends for the required metrics +# and paste them below the scorecard table +.SCLegend <- function(legend.breaks, palette, colorunder, colorsup, + label.scale, legend.width, legend.height, + legend.white.space, fileout) { + + ## Create color bar legends for each metric + for (i in 1:length(palette)) { + png(filename = paste0(fileout, '_tmpLegend', i, '.png'), width = legend.width, + height = legend.height) + ColorBar(brks = legend.breaks[[i]], cols = palette[[i]], vertical = FALSE, + label_scale = label.scale, col_inf = colorunder[[i]], + col_sup = colorsup[[i]]) + dev.off() + if (i == 1) { + ## Add white space to the left of the first color bar legend + system(paste0('convert ', fileout, '_tmpLegend1.png -background white -splice ', + legend.white.space, 'x0 ', fileout, '_tmpScorecardLegend.png')) + } else { + system(paste0('convert +append ', fileout, '_tmpScorecardLegend.png ', + fileout, '_tmpLegend', i, '.png ', fileout, + '_tmpScorecardLegend.png')) + } + } + unlink(paste0(fileout,'_tmpLegend*.png')) +} + +# Function to calculate color bar breaks for bias metric +.SCBiasBreaks <- function(data){ + + bias.minmax_max <- quantile(data, 0.98, na.rm = TRUE) + bias.minmax_min <- quantile(data, 0.02, na.rm = TRUE) + bias.max <- max(abs(bias.minmax_min), abs(bias.minmax_max)) + + ## one point more than the colors below (the intervals) + bias.int <- bias.max * c(-1, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1) + + ## round to 2 significance figures + bias.int <- signif(bias.int, digits = 2) + + return(bias.int) +} + +# Function to calculate color bar breaks for CRPS metric +.SCCrpsBreaks <- function(data){ + + crps.max <- quantile(data, 0.98, na.rm = TRUE) + + crps.int <- crps.max * c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1) + + ## round to 2 significance figures + crps.int <- signif(crps.int, digits = 2) + + return(crps.int) +} + +# Auxiliary function to get the names of the longitude coordinate +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'lons', 'longitude', 'x', 'i', 'nav_lon') +} + +# Auxiliary function to get the names of the latitude coordinate +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'lats', 'latitude', 'y', 'j', 'nav_lat') +} diff --git a/modules/Scorecards/R/tmp/WeightedMetrics.R b/modules/Scorecards/R/tmp/WeightedMetrics.R new file mode 100644 index 0000000000000000000000000000000000000000..aea23c566851f523cc8ee19ae2336861e329d0c0 --- /dev/null +++ b/modules/Scorecards/R/tmp/WeightedMetrics.R @@ -0,0 +1,134 @@ +#' Scorecards spatial aggregation of loaded metrics +#' +#'@description Scorecards function to perform the spatial aggregation of the +#' loaded metrics for the specified regions. +#' +#'@param loaded_metrics is a list of arrays containing the metrics loaded by the +#' function SC_load_metrics. +#'@param region is a named list of vectors containing the desired regions to +#' analyze. For each region the following should be specified in this order: +#' lon_min, lon_max, lat_min, lat_max. +#'@param metric.aggregation a character indicating whether the skill score RPS +#' and CRPSS are calculated from aggregated scores or aggregated skill score +#' directly, either 'score' or 'skill' respectively +#'@param ncores is the number of cores to use for the calculation. +#' +#'@return An array with the following dimensions: system, reference, metrics, +#' time, sdate, region. +#' +#'@examples +#'regions <- list('global' = c(lon.min = 0, lon.max = 360, lat.min = -90, lat.max = 90), +#' 'europe' = c(lon.min = -10, lon.max = 40, lat.min = 30, lat.max = 70)) +#'aggregated_metrics <- WeightedMetrics(loaded_metrics, +#' regions = regions, +#' metric.aggregation = 'skill', +#' ncores = 4) +#'@import multiApply +#'@importFrom ClimProjDiags WeightedMean +#'@importFrom s2dv Reorder +#'@export +WeightedMetrics <- function(loaded_metrics, regions, metric.aggregation, + ncores = NULL, na.rm = TRUE) { + ## Initial checks + # loaded_metrics + if (any(sapply(loaded_metrics, function(x) { + sapply(x, function(y) {is.null(dim(y))}) + }))) { + stop(paste0("Parameter 'loaded_metrics' must be a list of lists of arrays ", + "with named dimensions.")) + } + # regions + if (!all(sapply(regions, is.numeric))) { + stop(paste0("Parameter 'regions' must be a named list of vectors ", + "containing the desired regions to analyze.")) + } + # metric.aggregation + if (!is.character(metric.aggregation)) { + stop("Parameter 'metric.aggregation' must be a character indicating.") + } + # ncores + if (!is.numeric(ncores)) { + stop("Parameter 'ncores' must be an integer.") + } + + ## Get metric names + ## TO DO: check all metric are in the same order for all sys + metrics <- attributes(loaded_metrics[[1]][[1]])$metrics + forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months + start.months <- attributes(loaded_metrics[[1]][[1]])$start.months + + all_metric_means <- array(dim = c(metric = length(metrics), + time = length(forecast.months), + sdate = length(start.months), + region = length(regions), + reference = length(loaded_metrics[[1]]), + system = length(loaded_metrics))) + + ## Loop over system + for (sys in 1:length(loaded_metrics)) { + ## Loop over reference + for (ref in 1:length(loaded_metrics[[sys]])) { + dimnames <- names(dim(loaded_metrics[[sys]][[ref]])) + lon_dim_name <- dimnames[which(dimnames %in% .KnownLonNames())] + lat_dim_name <- dimnames[which(dimnames %in% .KnownLatNames())] + ## Get latitude and longitude from attributes of loaded metrics + ## Loop over region + for (reg in 1:length(regions)) { + ## Calculate weighted means for defined regions for each system and reference + weighted.mean <- WeightedMean(data = loaded_metrics[[sys]][[ref]], + lon = as.vector(attributes(loaded_metrics[[sys]][[ref]])$lon), + lat = as.vector(attributes(loaded_metrics[[sys]][[ref]])$lat), + region = regions[[reg]], + londim = lon_dim_name, + latdim = lat_dim_name, + na.rm = na.rm, + ncores = ncores) + all_metric_means[, , , reg, ref, sys] <- weighted.mean + } ## close loop on region + } ## close loop on reference + } ## close loop on system + + ## skill aggregation: + if (metric.aggregation == 'score') { + if (all(c("rps", "rps_clim") %in% metrics)) { + ## Calculate RPSS from aggregated RPS and RPS_clim + all_metric_means <- multiApply::Apply(data = all_metric_means, + target_dims = 'metric', + fun = function(x, met) { + res <- 1 - x[which(met == 'rps')] / x[which(met == 'rps_clim')] + c(x, res)}, met = metrics, + output_dims = 'metric', + ncores = ncores)$output1 + ## Define name of newly calculated RPSS metric + metrics <- c(metrics, "rpss_score_aggr") + } + if (all(c("crps", "crps_clim") %in% metrics)) { + ## Calculate CRPSS from aggragated CRPS and CRPS_clim + all_metric_means <- multiApply::Apply(data = all_metric_means, + target_dims = 'metric', + fun = function(x, met) { + res <- 1 - x[which(met == 'crps')] / x[which(met == 'crps_clim')] + c(x, res)}, + met = metrics, + output_dims = 'metric', + ncores = ncores)$output1 + ## Define name of newly calculated CRPSS metric + metrics <- c(metrics, "crpss_score_aggr") + } + ## Add warning in case metric.aggregation == 'score' but 1 of the metrics from each pair is missing + } + ## reorder dimensions in array + all_metric_means <- s2dv::Reorder(all_metric_means, c('system','reference','metric','time','sdate','region')) + + ## Add attributes + attributes(all_metric_means)$metrics <- metrics + attributes(all_metric_means)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months + attributes(all_metric_means)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months + attributes(all_metric_means)$regions <- regions + attributes(all_metric_means)$system.name <- names(loaded_metrics) + attributes(all_metric_means)$reference.name <- names(loaded_metrics[[1]]) + + return(all_metric_means) + +} ## close function + diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R new file mode 100644 index 0000000000000000000000000000000000000000..0dbcd9210c9227200c872204526ea4e6df05adcb --- /dev/null +++ b/modules/Scorecards/Scorecards.R @@ -0,0 +1,192 @@ +############################################################################### +##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### +############################################################################### + +##### Load source functions ##### +source('modules/Scorecards/R/tmp/LoadMetrics.R') +source('modules/Scorecards/R/tmp/WeightedMetrics.R') +source('modules/Scorecards/R/tmp/Utils.R') +source('modules/Scorecards/R/tmp/SCTransform.R') +source('modules/Scorecards/R/tmp/ScorecardsSingle.R') +source('modules/Scorecards/R/tmp/ScorecardsMulti.R') +source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') +source('modules/Scorecards/R/tmp/SCPlotScorecard.R') + + +## TODO: Change function name to 'Scorecards'? +## Define function +Scorecards <- function(recipe) { + + ## set parameters + input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") + output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") + dir.create(output.path, recursive = T, showWarnings = F) + + system <- recipe$Analysis$Datasets$System$name + reference <- recipe$Analysis$Datasets$Reference$name + var <- recipe$Analysis$Variables$name + start.year <- as.numeric(recipe$Analysis$Time$hcst_start) + end.year <- as.numeric(recipe$Analysis$Time$hcst_end) + forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max + + if (recipe$Analysis$Workflow$Scorecards$start_months == 'all') { + start.months <- 1:12 + } else { + start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, + split = ", | |,")[[1]]) + } + + regions <- recipe$Analysis$Workflow$Scorecards$regions + for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} + + metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation + metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) + + ## Define skill scores in score aggregation has been requested + + if(metric.aggregation == 'score'){ + if('rps' %in% metrics.load){ + metrics.load <- c(metrics.load, 'rps_clim') + } + if('crps' %in% metrics.load){ + metrics.load <- c(metrics.load, 'crps_clim') + } + } + + metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + + ## Define skill scores in score aggregation has been requested + + if(metric.aggregation == 'score'){ + if('rpss' %in% metrics.visualize){ + metrics.visualize[metrics.visualize == 'rpss'] <- 'rpss_score_aggr' + } + if('crpss' %in% metrics.visualize){ + metrics.visualize[metrics.visualize == 'crpss'] <- 'crpss_score_aggr' + } + } + + inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na + table.label <- recipe$Analysis$Workflow$Scorecards$table_label + fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label + legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space + col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width + col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width + calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff + ncores <- 1 # recipe$Analysis$ncores + + ## Load data files + loaded_metrics <- LoadMetrics(system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + metrics = metrics.load, + start.months = start.months, + forecast.months = forecast.months, + inf_to_na = inf.to.na, + input.path = input.path) + + + if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ + + ### Convert loaded metrics to array for allready aggregated data + metrics.dim <- attributes(loaded_metrics[[1]][[1]])$metrics + forecast.months.dim <- attributes(loaded_metrics[[1]][[1]])$forecast.months + start.months.dim <- attributes(loaded_metrics[[1]][[1]])$start.months + regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) + + aggregated_metrics <- array(dim = c(system = length(loaded_metrics), + reference = length(loaded_metrics[[1]]), + metric = length(metrics.dim), + time = length(forecast.months.dim), + sdate = length(start.months.dim), + region = length(regions.dim))) + + + for (sys in 1:length(names(loaded_metrics))){ + for (ref in 1:length(names(loaded_metrics[[sys]]))){ + aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) + } + } + + ## Add attributes + attributes(aggregated_metrics)$metrics <- metrics.load + attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months + attributes(aggregated_metrics)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months + attributes(aggregated_metrics)$regions <- regions + attributes(aggregated_metrics)$system.name <- names(loaded_metrics) + attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) + + + } else { + ## Calculate weighted mean of spatial aggregation + aggregated_metrics <- WeightedMetrics(loaded_metrics, + regions = regions, + metric.aggregation = metric.aggregation, + ncores = ncores) + }## close if + + ## Create simple scorecard tables + ## (one system only) + ## Metrics input must be in the same order as function SC_spatial_aggregation + scorecard_single <- ScorecardsSingle(data = aggregated_metrics, + system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = names(regions), + metrics = metrics.visualize, + table.label = table.label, + fileout.label = fileout.label, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + output.path = output.path) + + ## Create multi system/reference scorecard tables + ## (multiple systems with one reference or one system with multiple references) + ## Metrics input must be in the same order as function SC_spatial_aggregation + if(length(system) > 1 || length(reference) > 1){ + scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, + system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = attributes(regions)$names, + metrics = metrics.visualize, + table.label = table.label, + fileout.label = fileout.label, + output.path = output.path) + } ## close if + + + if(calculate.diff == TRUE){ + if(length(system) == 2 || length(reference) == 2){ + scorecard_diff <- ScorecardsSystemDiff(data = aggregated_metrics, + system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = attributes(regions)$names, + metrics = metrics.visualize, + table.label = table.label, + fileout.label = fileout.label, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + output.path = output.path) + } else {stop ("Difference scorecard can only be computed with two systems or two references.")} + } ## close if on calculate.diff + +} + diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R new file mode 100644 index 0000000000000000000000000000000000000000..2c54c48f45ca0f70ecb26370e396d624948b7c0b --- /dev/null +++ b/modules/Scorecards/execute_scorecards.R @@ -0,0 +1,31 @@ +source('tools/libs.R') +source('modules/Scorecards/Scorecards.R') + +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +output_dir <- args[2] + +## TODO: Replace with function +# Read recipe and set outdir +recipe <- read_yaml(recipe_file) +recipe$Run$output_dir <- output_dir + +## Loop over variables +datasets <- recipe$Analysis$Datasets +## TODO: Improve dependency system? +for (system in 1:length(datasets$System)) { + for (reference in 1:length(datasets$Reference)) { + for (variable in 1:length(recipe$Analysis$Variables)) { + scorecard_recipe <- recipe + scorecard_recipe$Analysis$Datasets$System <- + recipe$Analysis$Datasets$System[[system]] + scorecard_recipe$Analysis$Datasets$Reference <- + recipe$Analysis$Datasets$Reference[[reference]] + scorecard_recipe$Analysis$Variables <- + recipe$Analysis$Variables[[variable]] + # Plot Scorecards + Scorecards(scorecard_recipe) + } + } +} +print("##### SCORECARDS SAVED TO THE OUTPUT DIRECTORY #####") diff --git a/modules/Skill/R/CRPS_clim.R b/modules/Skill/R/CRPS_clim.R new file mode 100644 index 0000000000000000000000000000000000000000..b66cab7872bf91102766a9124c82830deba90d24 --- /dev/null +++ b/modules/Skill/R/CRPS_clim.R @@ -0,0 +1,27 @@ +# CRPS version for climatology +CRPS_clim <- function(obs, memb_dim ='ensemble', return_mean = TRUE, clim.cross.val= TRUE){ + time_dim <- names(dim(obs)) + obs_time_len <- dim(obs)[time_dim] + + if (isFALSE(clim.cross.val)) { ## Without cross-validation + ref <- array(data = rep(obs, each = obs_time_len), dim = c(obs_time_len, obs_time_len)) + } else if (isTRUE(clim.cross.val)) { ## With cross-validation (excluding the value of that year to create ref for that year) + ref <- array(data = NA, dim = c(obs_time_len, obs_time_len - 1)) + for (i in 1:obs_time_len) { + ref[i, ] <- obs[-i] + } + } + + names(dim(ref)) <- c(time_dim, memb_dim) + # ref: [sdate, memb] + # obs: [sdate] + crps_ref <- s2dv:::.CRPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + dat_dim = NULL, Fair = FALSE) + + # crps_ref should be [sdate] + if (return_mean == TRUE) { + return(mean(crps_ref)) + } else { + return(crps_ref) + } +} diff --git a/modules/Skill/R/RPS_clim.R b/modules/Skill/R/RPS_clim.R new file mode 100644 index 0000000000000000000000000000000000000000..c93c67476ffcdcbf450f3f2efb37d9f6a2c69b7e --- /dev/null +++ b/modules/Skill/R/RPS_clim.R @@ -0,0 +1,20 @@ +# RPS version for climatology +RPS_clim <- function(obs, indices_for_clim = NULL, prob_thresholds = c(1/3, 2/3), cross.val = TRUE) { + + if (is.null(indices_for_clim)){ + indices_for_clim <- 1:length(obs) + } + + obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: + prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + # clim_probs: [bin, sdate] + clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) + + return(mean(rps_ref)) +} diff --git a/modules/Skill/compute_probs.R b/modules/Skill/R/compute_probs.R similarity index 100% rename from modules/Skill/compute_probs.R rename to modules/Skill/R/compute_probs.R diff --git a/modules/Skill/compute_quants.R b/modules/Skill/R/compute_quants.R similarity index 100% rename from modules/Skill/compute_quants.R rename to modules/Skill/R/compute_quants.R diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/R/s2s.metrics.R similarity index 100% rename from modules/Skill/s2s.metrics.R rename to modules/Skill/R/s2s.metrics.R diff --git a/modules/Skill/R/tmp/GetProbs.R b/modules/Skill/R/tmp/GetProbs.R new file mode 100644 index 0000000000000000000000000000000000000000..59304b4e673775438231f94bfcc1d3f5aabeee19 --- /dev/null +++ b/modules/Skill/R/tmp/GetProbs.R @@ -0,0 +1,258 @@ +#'Compute probabilistic forecasts or the corresponding observations +#' +#'Compute probabilistic forecasts from an ensemble based on the relative +#'thresholds, or the probabilistic observations (i.e., which probabilistic +#'category was observed). A reference period can be specified to calculate the +#'absolute thresholds between each probabilistic category. The absolute +#'thresholds can be computed in cross-validation mode. If data is an ensemble, +#'the probabilities are calculated as the percentage of members that fall into +#'each category. For observations (or forecast without member dimension), 1 +#'means that the event happened, while 0 indicates that the event did not +#'happen. Weighted probabilities can be computed if the weights are provided for +#'each ensemble member and time step. +#' +#'@param data A named numerical array of the forecasts or observations with, at +#' least, time dimension. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast, or NULL if there is no member +#' dimension (e.g., for observations, or for forecast with only one ensemble +#' member). The default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_quantiles A vector of the indices to be taken along +#' 'time_dim' for computing the absolute thresholds between the probabilistic +#' categories. If NULL, the whole period is used. The default value is NULL. +#'@param weights A named numerical array of the weights for 'data' with +#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value +#' is NULL. The ensemble should have at least 70 members or span at least 10 +#' time steps and have more than 45 members if consistency between the weighted +#' and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation mode. The default value +#' is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of probabilities with dimensions c(bin, the rest dimensions +#'of 'data' except 'memb_dim'). 'bin' dimension has the length of probabilistic +#'categories, i.e., \code{length(prob_thresholds) + 1}. +#' +#'@examples +#'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) +#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' indices_for_quantiles = 4:17) +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', indices_for_quantiles = NULL, + prob_thresholds = c(1/3, 2/3), weights = NULL, cross.val = FALSE, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimensions.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimensions. If no member ", + "dimension exists, set it as NULL.") + } + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_quantiles + if (is.null(indices_for_quantiles)) { + indices_for_quantiles <- 1:dim(data)[time_dim] + } else { + if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { + stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") + } else if (length(indices_for_quantiles) > dim(data)[time_dim] | + max(indices_for_quantiles) > dim(data)[time_dim] | + any(indices_for_quantiles < 1)) { + stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + } + } + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + +# if (is.null(dat_dim)) { + if (!is.null(memb_dim)) { + lendim_weights <- 2 + namesdim_weights <- c(time_dim, memb_dim) + } else { + lendim_weights <- 1 + namesdim_weights <- c(time_dim) + } + if (length(dim(weights)) != lendim_weights | + any(!names(dim(weights)) %in% namesdim_weights)) { + stop(paste0("Parameter 'weights' must have dimension ", + paste0(namesdim_weights, collapse = ' and '), ".")) + } + if (any(dim(weights)[namesdim_weights] != dim(data)[namesdim_weights])) { + stop(paste0("Parameter 'weights' must have the same dimension length as ", + paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.")) + } + weights <- Reorder(weights, namesdim_weights) + +# } else { +# if (length(dim(weights)) != 3 | any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) +# stop("Parameter 'weights' must have three dimensions with the names of 'memb_dim', 'time_dim' and 'dat_dim'.") +# if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | +# dim(weights)[time_dim] != dim(exp)[time_dim] | +# dim(weights)[dat_dim] != dim(exp)[dat_dim]) { +# stop(paste0("Parameter 'weights' must have the same dimension lengths ", +# "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) +# } +# weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) +# } + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + res <- Apply(data = list(data = data), + target_dims = c(time_dim, memb_dim), #, dat_dim), + output_dims = c("bin", time_dim), + fun = .GetProbs, +# dat_dim = dat_dim, + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = prob_thresholds, + indices_for_quantiles = indices_for_quantiles, + weights = weights, cross.val = cross.val, ncores = ncores)$output1 + + return(res) +} + +.GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', indices_for_quantiles = NULL, + prob_thresholds = c(1/3, 2/3), weights = NULL, cross.val = FALSE) { + # .GetProbs() is used in RPS, RPSS, ROCSS + # data + ## if data is exp: [sdate, memb] + ## if data is obs: [sdate, (memb)] + # weights: [sdate, (memb)], same as data + + # Add dim [memb = 1] to data if it doesn't have memb_dim + if (length(dim(data)) == 1) { + dim(data) <- c(dim(data), 1) + if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) + } + # Absolute thresholds + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i_time in 1:dim(data)[1]) { + if (is.null(weights)) { + quantiles[, i_time] <- quantile(x = as.vector(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ], + weights[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[, i_time] <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + } + + } else { + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], + weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + quantiles <- array(rep(quantiles, dim(data)[1]), dim = c(bin = length(quantiles), dim(data)[1])) + } + # quantiles: [bin-1, sdate] + + # Probabilities + probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] + for (i_time in 1:dim(data)[1]) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) + } else { + if (is.null(weights)) { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], + threshold = quantiles[, i_time])) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = dim(quantiles)) + for (i_quant in 1:dim(quantiles)[1]) { + # for thresholds falling under the distribution + if (quantiles[i_quant, i_time] < min(sorted_data)) { + integrated_probs[i_quant, i_time] <- 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant, i_time]) { + integrated_probs[i_quant, i_time] <- 1 + } else { + integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, + quantiles[i_quant, i_time], "linear")$y + } + } + probs[, i_time] <- append(integrated_probs[, i_time], 1) - append(0, integrated_probs[, i_time]) + if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { + stop(paste0("Probability in i_time = ", i_time, " is out of [0, 1].")) + } + } + } + } + + return(probs) +} + +.sorted_distributions <- function(data_vector, weights_vector) { + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list("data" = data_vector[sorter], "cumulative_weights" = cumulative_weights)) +} + diff --git a/modules/Skill/R/tmp/RandomWalkTest.R b/modules/Skill/R/tmp/RandomWalkTest.R new file mode 100644 index 0000000000000000000000000000000000000000..8d5f67f361a679dc078b8de2c692dc3f692fb0fb --- /dev/null +++ b/modules/Skill/R/tmp/RandomWalkTest.R @@ -0,0 +1,184 @@ +#'Random Walk test for skill differences +#' +#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a +#'common observational reference) based on Random Walks (DelSole and Tippett, +#'2016). +#' +#'@param skill_A A numerical array of the time series of the scores obtained +#' with the forecaster A. +#'@param skill_B A numerical array of the time series of the scores obtained +#' with the forecaster B. The dimensions should be identical as parameter +#' 'skill_A'. +#'@param time_dim A character string indicating the name of the dimension along +#' which the tests are computed. The default value is 'sdate'. +#'@param test.type A character string indicating the type of significance test. +#' It can be "two.sided.approx" (to assess whether forecaster A and forecaster +#' B are significantly different in terms of skill with a two-sided test using +#' the approximation of DelSole and Tippett, 2016), "two.sided" (to assess +#' whether forecaster A and forecaster B are significantly different in terms +#' of skill with an exact two-sided test), "greater" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for negatively oriented scores), or "less" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for positively oriented scores). The default value is +#' "two.sided.approx". +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test (output "sign"). The default value is 0.05. +#'@param pval A logical value indicating whether to return the p-value of the +#' significance test. The default value is TRUE. +#'@param sign A logical value indicating whether to return the statistical +#' significance of the test based on 'alpha'. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list with: +#'\item{$score}{ +#' A numerical array with the same dimensions as the input arrays except +#' 'time_dim'. The number of times that forecaster A has been better than +#' forecaster B minus the number of times that forecaster B has been better +#' than forecaster A (for skill negatively oriented, i.e., the lower the +#' better). If $score is positive, forecaster A has been better more times +#' than forecaster B. If $score is negative, forecaster B has been better more +#' times than forecaster A. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance with the same dimensions +#' as the input arrays except "time_dim". Returned only if "sign" is TRUE. +#'} +#'\item{$p.val}{ +#' A numeric array of the p-values with the same dimensions as the input arrays +#' except "time_dim". Returned only if "pval" is TRUE. +#'} +#' +#'@details +#' Null and alternative hypothesis for "two-sided" test (regardless of the +#' orientation of the scores):\cr +#' H0: forecaster A and forecaster B are not different in terms of skill\cr +#' H1: forecaster A and forecaster B are different in terms of skill +#' +#' Null and alternative hypothesis for one-sided "greater" (for negatively +#' oriented scores, i.e., the lower the better) and "less" (for positively +#' oriented scores, i.e., the higher the better) tests:\cr +#' H0: forecaster A is not better than forecaster B\cr +#' H1: forecaster A is better than forecaster B +#' +#' Examples of negatively oriented scores are the RPS, RMSE and the Error, while +#' the ROC score is a positively oriented score. +#' +#' DelSole and Tippett (2016) approximation for two-sided test at 95% confidence +#' level: significant if the difference between the number of times that +#' forecaster A has been better than forecaster B and forecaster B has been +#' better than forecaster A is above 2sqrt(N) or below -2sqrt(N). +#' +#'@references +#'DelSole and Tippett (2016): https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#' fcst_A <- array(data = 11:50, dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_B <- array(data = 21:60, dim = c(sdate = 10, lat = 2, lon = 2)) +#' reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +#' scores_A <- abs(fcst_A - reference) +#' scores_B <- abs(fcst_B - reference) +#' res1 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, pval = FALSE, sign = TRUE) +#' res2 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, test.type = 'greater') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', + test.type = 'two.sided.approx', alpha = 0.05, pval = TRUE, + sign = FALSE, ncores = NULL) { + + # Check inputs + ## skill_A and skill_B + if (is.null(skill_A) | is.null(skill_B)) { + stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") + } + if(!is.numeric(skill_A) | !is.numeric(skill_B)) { + stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") + } + if (!identical(dim(skill_A), dim(skill_B))) { + stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))) { + stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") + } + ## alpha + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be a number between 0 and 1.") + } + ## test.type + if (!test.type %in% c('two.sided.approx','two.sided','greater','less')) { + stop("Parameter 'test.type' must be 'two.sided.approx', 'two.sided', 'greater', or 'less'.") + } + if (test.type == 'two.sided.approx') { + if (alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + if (pval) { + .warning("p-value cannot be returned with the DelSole and Tippett (2016) ", + "aproximation. Returning the significance at the 0.05 significance level.") + } + sign <- TRUE + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ## Compute the Random Walk Test + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B), + target_dims = list(skill_A = time_dim, + skill_B = time_dim), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + ncores = ncores) + + return(res) +} + +.RandomWalkTest <- function(skill_A, skill_B, test.type = 'two.sided.approx', + alpha = 0.05, pval = TRUE, sign = FALSE) { + #skill_A and skill_B: [sdate] + + N.eff <- length(skill_A) + + A_better <- sum(skill_B > skill_A) + B_better <- sum(skill_B < skill_A) + + output <- NULL + output$score <- A_better - B_better + + if (test.type == 'two.sided.approx') { + output$sign <- ifelse(test = abs(output$score) > (2 * sqrt(N.eff)), yes = TRUE, no = FALSE) + + } else { + + if (!is.na(output$score)) { + p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1 - alpha, + alternative = test.type)$p.value + + } else { + p.val <- NA + } + + if (pval) { + output$p.val <- p.val + } + if (sign) { + output$sign <- ifelse(!is.na(p.val) & p.val <= alpha, TRUE, FALSE) + } + + } + + return(output) +} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 9f97e688167ab992515641234b9d05efe86ee354..04fbc52efb5246e86c601007be1ced893439fc14 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -7,56 +7,19 @@ # - reliability diagram # - ask Carlos which decadal metrics he is currently using -source("modules/Skill/compute_quants.R") -source("modules/Skill/compute_probs.R") -source("modules/Skill/s2s.metrics.R") -## TODO: Remove when new version of s2dv is released -source("modules/Skill/tmp/RandomWalkTest.R") -source("modules/Skill/tmp/Bias.R") -source("modules/Skill/tmp/AbsBiasSS.R") -source("modules/Skill/tmp/RMSSS.R") -source("modules/Skill/tmp/Corr.R") +source("modules/Skill/R/compute_quants.R") +source("modules/Skill/R/compute_probs.R") +source("modules/Skill/R/s2s.metrics.R") +source("modules/Saving/R/drop_dims.R") +## TODO: Remove when they are included in s2dv +source("modules/Skill/R/RPS_clim.R") +source("modules/Skill/R/CRPS_clim.R") +source("modules/Skill/R/tmp/GetProbs.R") +## TODO: Remove in the next release +source("modules/Skill/compute_skill_metrics.R") +source("modules/Skill/compute_probabilities.R") -## TODO: Implement this in the future -## Which parameter are required? -# if (!("obs" %in% ls()) || is.null(obs)) { -# error(logger, -# "There is no object 'obs' in the global environment or it is NULL") -# } -# if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) { -# error(logger, -# "There is no object 'fcst' in the global environment or it is NULL") -# } -# if (!("hcst" %in% ls()) || is.null(hcst)) { -# error(logger, -# "There is no object 'hcst' in the global environment or it is NULL") -# } -# if (!("metric" %in% ls()) || is.null(metric)) { -# warn(logger, -# "Verification metric not found and it is set as 'EnsCorr'.") -# metric <- 'EnsCorr' -# } -# if (metric %in% c('FRPSS', 'RPSS')) { -# metric_fun <- "veriApply" -# metric_method <- "FairRpss" -# } else if (metric %in% c("FCRPSS", "CRPSS")) { -# metric_fun <- "veriApply" -# } else if (metric %in% c("EnsCorr", "EnsCor")) { -# metric_fun <- "veriApply" -# metric_method <- "EnsCorr" -# #... -# } else { -# error(logger, "Unknown verification metric defined in the recipe.") -# metric_fun <- 'NotFound' -# } -# info(logger, paste("#-------------------------- ", "\n", -# " running Skill module ", "\n", -# " it can call ", metric_fun )) - -# compute_skill_metrics <- function(recipe, data$hcst, obs, -# clim_data$hcst = NULL, -# clim_obs = NULL) { -compute_skill_metrics <- function(recipe, data) { +Skill <- function(recipe, data, agg = 'global') { # data$hcst: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations @@ -70,14 +33,14 @@ compute_skill_metrics <- function(recipe, data) { # if (recipe$Analysis$Workflow$Anomalies$compute) { # if (is.null(clim_data$hcst) || is.null(clim_obs)) { # warn(recipe$Run$logger, "Anomalies have been requested in the recipe, -# but the climatologies have not been provided in the -# compute_skill_metrics call. Be aware that some metrics like the -# Mean Bias may not be correct.") +# but the climatologies have not been provided in the +# compute_skill_metrics call. Be aware that some metrics like the +# Mean Bias may not be correct.") # } # } else { # warn(recipe$Run$logger, "Anomaly computation was not requested in the -# recipe. Be aware that some metrics, such as the CRPSS may not be -# correct.") +# recipe. Be aware that some metrics, such as the CRPSS may not be +# correct.") # } time_dim <- 'syear' memb_dim <- 'ensemble' @@ -92,17 +55,24 @@ compute_skill_metrics <- function(recipe, data) { } else { na.rm = recipe$Analysis$remove_NAs } + if (is.null(recipe$Analysis$Workflow$Skill$cross_validation)) { + warn(recipe$Run$logger, + "cross_validation parameter not defined, setting it to FALSE.") + cross.val <- FALSE + } else { + cross.val <- recipe$Analysis$Workflow$Skill$cross_validation + } skill_metrics <- list() for (metric in strsplit(metrics, ", | |,")[[1]]) { # Whether the fair version of the metric is to be computed if (metric %in% c('frps', 'frpss', 'bss10', 'bss90', - 'fcrps', 'fcrpss')) { + 'fcrps', 'fcrpss')) { Fair <- T } else { Fair <- F } # Whether to compute correlation for the ensemble mean or for each member - if (metric == 'corr') { + if (metric == 'corr_individual_members') { memb <- T } else if (metric == 'enscorr') { memb <- F @@ -110,149 +80,200 @@ compute_skill_metrics <- function(recipe, data) { # Ranked Probability Score and Fair version if (metric %in% c('rps', 'frps')) { skill <- RPS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + cross.val = cross.val, + ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill + rps_clim <- Apply(list(data$obs$data), + target_dims = c(time_dim, memb_dim), + cross.val = cross.val, + RPS_clim)$output1 + rps_clim <- .drop_dims(rps_clim) + skill_metrics[[paste0(metric, "_clim")]] <- rps_clim # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + cross.val = cross.val, + ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 10th percentile } else if (metric == 'bss10') { skill <- RPSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - prob_thresholds = 0.1, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.1, + cross.val = cross.val, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 90th percentile } else if (metric == 'bss90') { skill <- RPSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - prob_thresholds = 0.9, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.9, + cross.val = cross.val, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # CRPS and FCRPS } else if (metric %in% c('crps', 'fcrps')) { skill <- CRPS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill + crps_clim <- Apply(list(data$obs$data), target_dims = time_dim, + fun = CRPS_clim, memb_dim = memb_dim, + clim.cross.val = cross.val)$output1 + crps_clim <- .drop_dims(crps_clim) + skill_metrics[['crps_clim']] <- crps_clim # CRPSS and FCRPSS } else if (metric %in% c('crpss', 'fcrpss')) { skill <- CRPSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + clim.cross.val = cross.val, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$crpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + } else if (metric == 'rms') { + source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/RMS.R") + hcst_mean <- Apply(list(data$hcst$data), target_dims = memb_dim, + fun = mean, na.rm = na.rm, ncores = ncores)$output1 + hcst_mean <- InsertDim(hcst_mean, pos = 1, len = 1, name = memb_dim) + skill <- RMS(exp = hcst_mean, + obs = data$obs$data, + time_dim = time_dim, dat_dim = NULL, comp_dim = NULL, + limits = NULL, conf = FALSE, alpha = 0.05, ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rms # Mean bias (climatology) } else if (metric == 'mean_bias') { ## TODO: Eliminate option to compute from anomalies # Compute from full field if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, - time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) } else { skill <- Bias(data$hcst$data, data$obs$data, time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + memb_dim = memb_dim, + ncores = ncores) } skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # Mean bias skill score } else if (metric == 'mean_bias_ss') { if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - skill <- AbsBiasSS(data$hcst.full_val$data, data$obs.full_val$data, - time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- AbsBiasSS(data$hcst.full_val$data, data$obs.full_val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) } else { skill <- AbsBiasSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) } skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$biasSS skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Ensemble mean correlation - } else if (metric %in% c('enscorr', 'corr')) { + } else if (metric %in% c('enscorr', 'corr_individual_members')) { ## TODO: Return significance ## TODO: Implement option for Kendall and Spearman methods? - skill <- Corr(data$hcst$data, data$obs$data, - dat_dim = 'dat', - time_dim = time_dim, - method = 'pearson', - memb_dim = memb_dim, - memb = memb, - conf = F, - pval = F, - sign = T, - alpha = 0.05, - ncores = ncores) + skill <- s2dv::Corr(data$hcst$data, data$obs$data, + dat_dim = 'dat', + time_dim = time_dim, + method = 'pearson', + memb_dim = memb_dim, + memb = memb, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$corr skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'rmsss') { - # Compute RMSS + # Compute RMSSS skill <- RMSSS(data$hcst$data, data$obs$data, - dat_dim = 'dat', - time_dim = time_dim, - memb_dim = memb_dim, - pval = FALSE, - sign = TRUE, - sig_method = 'Random Walk', - ncores = ncores) + dat_dim = 'dat', + time_dim = time_dim, + memb_dim = memb_dim, + pval = FALSE, + sign = TRUE, + sig_method = 'Random Walk', + ncores = ncores) # Compute ensemble mean and modify dimensions skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rmsss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + } else if (metric == 'mse') { + skill <- MSE(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + dat_dim = 'dat', + comp_dim = NULL, + conf = FALSE, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$mse + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + } else if (metric == 'msss') { + skill <- MSSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + dat_dim = 'dat', + sign = TRUE, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$msss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'enssprerr') { # Remove ensemble dim from obs to avoid veriApply warning obs_noensdim <- ClimProjDiags::Subset(data$obs$data, "ensemble", 1, - drop = "selected") + drop = "selected") capture.output( skill <- easyVerification::veriApply(verifun = 'EnsSprErr', - fcst = data$hcst$data, - obs = obs_noensdim, - tdim = which(names(dim(data$hcst$data))==time_dim), - ensdim = which(names(dim(data$hcst$data))==memb_dim), - na.rm = na.rm, - ncpus = ncores) + fcst = data$hcst$data, + obs = obs_noensdim, + tdim = which(names(dim(data$hcst$data))==time_dim), + ensdim = which(names(dim(data$hcst$data))==memb_dim), + na.rm = na.rm, + ncpus = ncores) ) remove(obs_noensdim) skill <- .drop_dims(skill) @@ -263,32 +284,59 @@ compute_skill_metrics <- function(recipe, data) { ## Retain _specs in metric name for clarity metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', - 'rpss'))) { - warn(recipe$Run$logger, - "Some of the requested SpecsVerification metrics are not available.") + 'rpss'))) { + warn(recipe$Run$logger, + "Some of the requested SpecsVerification metrics are not available.") } capture.output( skill <- Compute_verif_metrics(data$hcst$data, data$obs$data, - skill_metrics = metric_name, - verif.dims=c("syear", "sday", "sweek"), - na.rm = na.rm, - ncores = ncores) + skill_metrics = metric_name, + verif.dims=c("syear", "sday", "sweek"), + na.rm = na.rm, + ncores = ncores) ) skill <- .drop_dims(skill) if (metric_name == "frps") { - # Compute yearly mean for FRPS - skill <- colMeans(skill, dims = 1) + # Compute yearly mean for FRPS + skill <- colMeans(skill, dims = 1) } skill_metrics[[ metric ]] <- skill } } info(recipe$Run$logger, "##### SKILL METRIC COMPUTATION COMPLETE #####") + .log_memory_usage(recipe$Run$logger, when = "After skill metric computation") + # Save outputs + #NURIA: I think change the output_dir is a problem for future savings + if (recipe$Analysis$Workflow$Skill$save != 'none') { + info(recipe$Run$logger, "##### START SAVING SKILL METRIC #####") + } + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Skill/") + # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim + if (recipe$Analysis$Workflow$Skill$save == 'all') { + corr_metric_names <- grep("^corr_individual_members", names(skill_metrics)) + if (length(corr_metric_names) == 0) { + save_metrics(recipe = recipe, skill = skill_metrics, + data_cube = data$hcst, agg = agg) + } else { + # Save corr + if (length(skill_metrics[corr_metric_names]) > 0) { + save_corr(recipe = recipe, skill = skill_metrics[corr_metric_names], + data_cube = data$hcst) + } + # Save other skill metrics + if (length(skill_metrics[-corr_metric_names]) > 0) { + save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], + data_cube = data$hcst, agg = agg) + } + } + } + # Return results return(skill_metrics) } -compute_probabilities <- function(recipe, data) { - ## TODO: Do hcst and fcst at the same time - +Probabilities <- function(recipe, data) { + ## TODO: Do hcst and fcst at the same time if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { @@ -307,46 +355,46 @@ compute_probabilities <- function(recipe, data) { if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, "Quantiles and probability bins have been - requested, but no thresholds are provided in the recipe.") + requested, but no thresholds are provided in the recipe.") stop() } else { for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { # Parse thresholds in recipe thresholds <- sapply(element, function (x) eval(parse(text = x))) quants <- compute_quants(data$hcst$data, thresholds, - ncores = ncores, - na.rm = na.rm) + ncores = ncores, + na.rm = na.rm) probs <- compute_probs(data$hcst$data, quants, - ncores = ncores, - na.rm = na.rm) + ncores = ncores, + na.rm = na.rm) for (i in seq(1:dim(quants)['bin'][[1]])) { - named_quantiles <- append(named_quantiles, - list(ClimProjDiags::Subset(quants, - 'bin', i))) - names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", - as.integer(thresholds[i]*100)) + named_quantiles <- append(named_quantiles, + list(ClimProjDiags::Subset(quants, + 'bin', i))) + names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", + as.integer(thresholds[i]*100)) } for (i in seq(1:dim(probs)['bin'][[1]])) { - if (i == 1) { - name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) - } else if (i == dim(probs)['bin'][[1]]) { - name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) - } else { - name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", - as.integer(thresholds[i]*100)) - } - named_probs <- append(named_probs, - list(ClimProjDiags::Subset(probs, - 'bin', i))) - names(named_probs)[length(named_probs)] <- name_i + if (i == 1) { + name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) + } else if (i == dim(probs)['bin'][[1]]) { + name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) + } else { + name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", + as.integer(thresholds[i]*100)) + } + named_probs <- append(named_probs, + list(ClimProjDiags::Subset(probs, + 'bin', i))) + names(named_probs)[length(named_probs)] <- name_i } # Compute fcst probability bins if (!is.null(data$fcst)) { - probs_fcst <- compute_probs(data$fcst$data, quants, - ncores = ncores, - na.rm = na.rm) + probs_fcst <- compute_probs(data$fcst$data, quants, + ncores = ncores, + na.rm = na.rm) for (i in seq(1:dim(probs_fcst)['bin'][[1]])) { if (i == 1) { @@ -355,54 +403,63 @@ compute_probabilities <- function(recipe, data) { name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) } else { name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", - as.integer(thresholds[i]*100)) + as.integer(thresholds[i]*100)) } named_probs_fcst <- append(named_probs_fcst, - list(ClimProjDiags::Subset(probs_fcst, - 'bin', i))) + list(ClimProjDiags::Subset(probs_fcst, + 'bin', i))) names(named_probs_fcst)[length(named_probs_fcst)] <- name_i } } } - # Rearrange dimensions and return probabilities named_probs <- lapply(named_probs, function(x) {.drop_dims(x)}) named_quantiles <- lapply(named_quantiles, function(x) {.drop_dims(x)}) if (!is.null(data$fcst)) { fcst_years <- dim(data$fcst$data)[['syear']] - named_probs_fcst <- lapply(named_probs_fcst, - function(x) {Subset(x, - along = 'syear', - indices = 1:fcst_years, - drop = 'non-selected')}) + named_probs_fcst <- lapply(named_probs_fcst, function(x) {.drop_dims(x)}) + # function(x) {Subset(x, + # along = 'syear', + # indices = 1:fcst_years, + # drop = 'non-selected')}) results <- list(probs = named_probs, - probs_fcst = named_probs_fcst, - percentiles = named_quantiles) + probs_fcst = named_probs_fcst, + percentiles = named_quantiles) } else { results <- list(probs = named_probs, - percentiles = named_quantiles) + percentiles = named_quantiles) } info(recipe$Run$logger, "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") + .log_memory_usage(recipe$Run$logger, when = "After anomaly computation") + # Save outputs + if (recipe$Analysis$Workflow$Probabilities$save != 'none') { + info(recipe$Run$logger, + "##### START SAVING PERCENTILES AND PROBABILITY CATEGORIES #####") + + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Skill/") + # Save percentiles + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'percentiles_only')) { + save_percentiles(recipe = recipe, percentiles = results$percentiles, + data_cube = data$hcst) + } + # Save probability bins + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = results$probs, + data_cube = data$hcst, type = "hcst") + if (!is.null(results$probs_fcst)) { + save_probabilities(recipe = recipe, probs = results$probs_fcst, + data_cube = data$fcst, type = "fcst") + } + } + } + + # Return results return(results) } } -## TODO: Replace with ClimProjDiags::Subset -.drop_dims <- function(metric_array) { - # Drop all singleton dimensions - metric_array <- drop(metric_array) - # If time happened to be a singleton dimension, add it back in the array - if (!("time" %in% names(dim(metric_array)))) { - dim(metric_array) <- c("time" = 1, dim(metric_array)) - } - # If array has memb dim (Corr case), change name to 'ensemble' - if ("exp_memb" %in% names(dim(metric_array))) { - names(dim(metric_array))[which(names(dim(metric_array)) == - "exp_memb")] <- "ensemble" - # } else { - # dim(metric_array) <- c(dim(metric_array), "ensemble" = 1) - } - return(metric_array) -} diff --git a/modules/Skill/compute_probabilities.R b/modules/Skill/compute_probabilities.R new file mode 100644 index 0000000000000000000000000000000000000000..44a91b96e8573d4dac77cb1f02b59fa9a3c3b3cc --- /dev/null +++ b/modules/Skill/compute_probabilities.R @@ -0,0 +1,7 @@ +compute_probabilities <- function(recipe, data) { + warning(paste0("The function compute_probabilities() has been renamed to: ", + "'Probabilities()'. The name 'compute_probabilities' will be ", + "deprecated in the next release. Please change your scripts ", + "accordingly.")) + return(Probabilities(recipe, data)) +} diff --git a/modules/Skill/compute_skill_metrics.R b/modules/Skill/compute_skill_metrics.R new file mode 100644 index 0000000000000000000000000000000000000000..98f0f2ebd2f0975fa483bc808e3f1a9df8ecfec4 --- /dev/null +++ b/modules/Skill/compute_skill_metrics.R @@ -0,0 +1,7 @@ +compute_skill_metrics <- function(recipe, data) { + warning(paste0("The function compute_skill_metrics() has been renamed to: ", + "'Skill()'. The name 'compute_skill_metrics' will be ", + "deprecated in the next release. Please change your scripts ", + "accordingly.")) + return(Skill(recipe, data)) +} diff --git a/modules/Skill/tmp/AbsBiasSS.R b/modules/Skill/tmp/AbsBiasSS.R deleted file mode 100644 index 0ceb009c7b4dc33b6f9788dc6cb8459f0e25767b..0000000000000000000000000000000000000000 --- a/modules/Skill/tmp/AbsBiasSS.R +++ /dev/null @@ -1,281 +0,0 @@ -#'Compute the Absolute Mean Bias Skill Score -#' -#'The Absolute Mean Bias Skill Score is based on the Absolute Mean Error (Wilks, -#' 2011) between the ensemble mean forecast and the observations. It measures -#'the accuracy of the forecast in comparison with a reference forecast to assess -#'whether the forecast presents an improvement or a worsening with respect to -#'that reference. The Mean Bias Skill Score ranges between minus infinite and 1. -#'Positive values indicate that the forecast has higher skill than the reference -#'forecast, while negative values indicate that it has a lower skill. Examples -#'of reference forecasts are the climatological forecast (average of the -#'observations), a previous model version, or another model. It is computed as -#'\code{AbsBiasSS = 1 - AbsBias_exp / AbsBias_ref}. The statistical significance -#'is obtained based on a Random Walk test at the 95% confidence level (DelSole -#'and Tippett, 2016). If there is more than one dataset, the result will be -#'computed for each pair of exp and obs data. -#' -#'@param exp A named numerical array of the forecast with at least time -#' dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -#' 'dat_dim'. -#'@param ref A named numerical array of the reference forecast data with at -#' least time dimension. The dimensions must be the same as 'exp' except -#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should -#' not have dataset dimension. If there is corresponding reference for each -#' experiement, the dataset dimension must have the same length as in 'exp'. If -#' 'ref' is NULL, the climatological forecast is used as reference forecast. -#' The default value is NULL. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' -#' and 'ref' are already the ensemble mean. The default value is NULL. -#'@param dat_dim A character string indicating the name of dataset dimension. -#' The length of this dimension can be different between 'exp' and 'obs'. -#' The default value is NULL. -#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or -#' kept (FALSE) for computation. The default value is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'\item{$biasSS}{ -#' A numerical array of BiasSS with dimensions nexp, nobs and the rest -#' dimensions of 'exp' except 'time_dim' and 'memb_dim'. -#'} -#'\item{$sign}{ -#' A logical array of the statistical significance of the BiasSS -#' with the same dimensions as $biasSS. nexp is the number of -#' experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation -#' (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. -#'} -#' -#'@references -#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 -#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 -#' -#'@examples -#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) -#'ref <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) -#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) -#'biasSS1 <- AbsBiasSS(exp = exp, obs = obs, ref = ref, memb_dim = 'member') -#'biasSS2 <- AbsBiasSS(exp = exp, obs = obs, ref = NULL, memb_dim = 'member') -#' -#'@import multiApply -#'@export -AbsBiasSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = NULL, - dat_dim = NULL, na.rm = FALSE, ncores = NULL) { - - # Check inputs - ## exp, obs, and ref (1) - if (!is.array(exp) | !is.numeric(exp)) { - stop("Parameter 'exp' must be a numeric array.") - } - if (!is.array(obs) | !is.numeric(obs)) { - stop("Parameter 'obs' must be a numeric array.") - } - if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names.") - } - if (!is.null(ref)) { - if (!is.array(ref) | !is.numeric(ref)) - stop("Parameter 'ref' must be a numeric array.") - if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { - stop("Parameter 'ref' must have dimension names.") - } - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - if (!is.null(ref) & !time_dim %in% names(dim(ref))) { - stop("Parameter 'time_dim' is not found in 'ref' dimension.") - } - ## memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else { - stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", - "but it should be of length = 1).") - } - } - } - ## dat_dim - if (!is.null(dat_dim)) { - if (!is.character(dat_dim) | length(dat_dim) > 1) { - stop("Parameter 'dat_dim' must be a character string.") - } - if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { - stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", - " Set it as NULL if there is no dataset dimension.") - } - } - ## exp, obs, and ref (2) - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - if (!is.null(memb_dim)) { - name_exp <- name_exp[-which(name_exp == memb_dim)] - } - if (!is.null(dat_dim)) { - name_exp <- name_exp[-which(name_exp == dat_dim)] - name_obs <- name_obs[-which(name_obs == dat_dim)] - } - if (!identical(length(name_exp), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'.")) - } - if (!is.null(ref)) { - name_ref <- sort(names(dim(ref))) - if (!is.null(memb_dim) && memb_dim %in% name_ref) { - name_ref <- name_ref[-which(name_ref == memb_dim)] - } - if (!is.null(dat_dim)) { - if (dat_dim %in% name_ref) { - if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has dataset dimension, it must be", - " equal to dataset dimension of 'exp'.")) - } - name_ref <- name_ref[-which(name_ref == dat_dim)] - } - } - if (!identical(length(name_exp), length(name_ref)) | - !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", - "all dimensions except 'memb_dim' and 'dat_dim' if there is ", - "only one reference dataset.")) - } - } - ## na.rm - if (!is.logical(na.rm) | length(na.rm) > 1) { - stop("Parameter 'na.rm' must be one logical value.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################ - - ## Ensemble mean - if (!is.null(memb_dim)) { - exp <- MeanDims(exp, memb_dim, na.rm = na.rm) - if (!is.null(ref) & memb_dim %in% names(dim(ref))) { - ref <- MeanDims(ref, memb_dim, na.rm = na.rm) - } - } - - ## Mean bias skill score - if (!is.null(ref)) { # use "ref" as reference forecast - if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { - target_dims_ref <- c(time_dim, dat_dim) - } else { - target_dims_ref <- c(time_dim) - } - data <- list(exp = exp, obs = obs, ref = ref) - target_dims = list(exp = c(time_dim, dat_dim), - obs = c(time_dim, dat_dim), - ref = target_dims_ref) - } else { - data <- list(exp = exp, obs = obs) - target_dims = list(exp = c(time_dim, dat_dim), - obs = c(time_dim, dat_dim)) - } - - output <- Apply(data, - target_dims = target_dims, - fun = .AbsBiasSS, - dat_dim = dat_dim, - na.rm = na.rm, - ncores = ncores) - - return(output) -} - -.AbsBiasSS <- function(exp, obs, ref = NULL, dat_dim = NULL, na.rm = FALSE) { - # exp and obs: [sdate, (dat_dim)] - # ref: [sdate, (dat_dim)] or NULL - - # Adjust exp, obs, ref to have dat_dim temporarily - if (is.null(dat_dim)) { - nexp <- 1 - nobs <- 1 - exp <- InsertDim(exp, posdim = 2, lendim = 1, name = 'dataset') - obs <- InsertDim(obs, posdim = 2, lendim = 1, name = 'dataset') - if (!is.null(ref)) { - ref <- InsertDim(ref, posdim = 2, lendim = 1, name = 'dataset') - } - ref_dat_dim <- FALSE - } else { - nexp <- as.numeric(dim(exp)[dat_dim]) - nobs <- as.numeric(dim(obs)[dat_dim]) - if (length(dim(ref)) == 1) { # ref: [sdate] - ref_dat_dim <- FALSE - } else { - ref_dat_dim <- TRUE - } - } - - biasSS <- array(dim = c(nexp = nexp, nobs = nobs)) - sign <- array(dim = c(nexp = nexp, nobs = nobs)) - - for (i in 1:nexp) { - exp_data <- exp[, i] - if (isTRUE(ref_dat_dim)) { - ref_data <- ref[, i] - } else { - ref_data <- ref - } - for (j in 1:nobs) { - obs_data <- obs[, j] - - if (isTRUE(na.rm)) { - if (is.null(ref)) { - good_values <- !is.na(exp_data) & !is.na(obs_data) - exp_data <- exp_data[good_values] - obs_data <- obs_data[good_values] - } else { - good_values <- !is.na(exp_data) & !is.na(ref_data) & !is.na(obs_data) - exp_data <- exp_data[good_values] - ref_data <- ref_data[good_values] - obs_data <- obs_data[good_values] - } - } - - ## Bias of the exp - bias_exp <- .Bias(exp = exp_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = FALSE) - ## Bias of the ref - if (is.null(ref)) { ## Climatological forecast - ref_data <- rep(mean(obs_data, na.rm = na.rm), length(obs_data)) - } - bias_ref <- .Bias(exp = ref_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = FALSE) - ## Skill score and significance - biasSS[i, j] <- 1 - mean(bias_exp) / mean(bias_ref) - sign[i, j] <- .RandomWalkTest(skill_A = bias_exp, skill_B = bias_ref)$signif - } - } - - if (is.null(dat_dim)) { - dim(biasSS) <- NULL - dim(sign) <- NULL - } - - - return(list(biasSS = biasSS, sign = sign)) -} diff --git a/modules/Skill/tmp/Bias.R b/modules/Skill/tmp/Bias.R deleted file mode 100644 index 0319a0f08e23e388046ce895e7a06aa82a0d9a41..0000000000000000000000000000000000000000 --- a/modules/Skill/tmp/Bias.R +++ /dev/null @@ -1,189 +0,0 @@ -#'Compute the Mean Bias -#' -#'The Mean Bias or Mean Error (Wilks, 2011) is defined as the mean difference -#'between the ensemble mean forecast and the observations. It is a deterministic -#'metric. Positive values indicate that the forecasts are on average too high -#'and negative values indicate that the forecasts are on average too low. -#'It also allows to compute the Absolute Mean Bias or bias without temporal -#'mean. If there is more than one dataset, the result will be computed for each -#'pair of exp and obs data. -#' -#'@param exp A named numerical array of the forecast with at least time -#' dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -#' 'dat_dim'. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param dat_dim A character string indicating the name of dataset dimension. -#' The length of this dimension can be different between 'exp' and 'obs'. -#' The default value is NULL. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the ensemble mean; it should be set to NULL if the parameter -#' 'exp' is already the ensemble mean. The default value is NULL. -#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or -#' kept (FALSE) for computation. The default value is FALSE. -#'@param absolute A logical value indicating whether to compute the absolute -#' bias. The default value is FALSE. -#'@param time_mean A logical value indicating whether to compute the temporal -#' mean of the bias. The default value is TRUE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of -#''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number -#'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation -#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. -#' -#'@references -#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 -#' -#'@examples -#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) -#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) -#'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') -#' -#'@import multiApply -#'@importFrom ClimProjDiags Subset -#'@export -Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, - absolute = FALSE, time_mean = TRUE, ncores = NULL) { - - # Check inputs - ## exp and obs (1) - if (!is.array(exp) | !is.numeric(exp)) - stop("Parameter 'exp' must be a numeric array.") - if (!is.array(obs) | !is.numeric(obs)) - stop("Parameter 'obs' must be a numeric array.") - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) - stop("Parameter 'time_dim' must be a character string.") - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - ## memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else { - stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", - "but it should be of length = 1).") - } - } - } - ## dat_dim - if (!is.null(dat_dim)) { - if (!is.character(dat_dim) | length(dat_dim) > 1) { - stop("Parameter 'dat_dim' must be a character string.") - } - if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { - stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", - " Set it as NULL if there is no dataset dimension.") - } - } - ## exp and obs (2) - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - if (!is.null(memb_dim)) { - name_exp <- name_exp[-which(name_exp == memb_dim)] - } - if (!is.null(dat_dim)) { - name_exp <- name_exp[-which(name_exp == dat_dim)] - name_obs <- name_obs[-which(name_obs == dat_dim)] - } - if (!identical(length(name_exp), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'.")) - } - ## na.rm - if (!is.logical(na.rm) | length(na.rm) > 1) { - stop("Parameter 'na.rm' must be one logical value.") - } - ## absolute - if (!is.logical(absolute) | length(absolute) > 1) { - stop("Parameter 'absolute' must be one logical value.") - } - ## time_mean - if (!is.logical(time_mean) | length(time_mean) > 1) { - stop("Parameter 'time_mean' must be one logical value.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################### - - ## Ensemble mean - if (!is.null(memb_dim)) { - exp <- MeanDims(exp, memb_dim, na.rm = na.rm) - } - - ## (Mean) Bias - bias <- Apply(data = list(exp, obs), - target_dims = c(time_dim, dat_dim), - fun = .Bias, - time_dim = time_dim, - dat_dim = dat_dim, - na.rm = na.rm, - absolute = absolute, - time_mean = time_mean, - ncores = ncores)$output1 - - return(bias) -} - - -.Bias <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, na.rm = FALSE, - absolute = FALSE, time_mean = TRUE) { - # exp and obs: [sdate, (dat)] - - if (is.null(dat_dim)) { - bias <- exp - obs - - if (isTRUE(absolute)) { - bias <- abs(bias) - } - - if (isTRUE(time_mean)) { - bias <- mean(bias, na.rm = na.rm) - } - - } else { - nexp <- as.numeric(dim(exp)[dat_dim]) - nobs <- as.numeric(dim(obs)[dat_dim]) - bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) - - for (i in 1:nexp) { - for (j in 1:nobs) { - bias[, i, j] <- exp[, i] - obs[, j] - } - } - - if (isTRUE(absolute)) { - bias <- abs(bias) - } - - if (isTRUE(time_mean)) { - bias <- MeanDims(bias, time_dim, na.rm = na.rm) - } - } - - return(bias) -} diff --git a/modules/Skill/tmp/Corr.R b/modules/Skill/tmp/Corr.R deleted file mode 100644 index c95b103492a9ea67f302c83a9bbe9e8e0bdc61fc..0000000000000000000000000000000000000000 --- a/modules/Skill/tmp/Corr.R +++ /dev/null @@ -1,463 +0,0 @@ -#'Compute the correlation coefficient between an array of forecast and their corresponding observation -#' -#'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for -#'an array of forecast and an array of observation. The correlations are -#'computed along 'time_dim' that usually refers to the start date dimension. If -#''comp_dim' is given, the correlations are computed only if obs along comp_dim -#'dimension are complete between limits[1] and limits[2], i.e., there is no NA -#'between limits[1] and limits[2]. This option can be activated if the user -#'wants to account only for the forecasts which the corresponding observations -#'are available at all leadtimes.\cr -#'The confidence interval is computed by the Fisher transformation and the -#'significance level relies on an one-sided student-T distribution.\cr -#'The function can calculate ensemble mean before correlation by 'memb_dim' -#'specified and 'memb = F'. If ensemble mean is not calculated, correlation will -#'be calculated for each member. -#'If there is only one dataset for exp and obs, you can simply use cor() to -#'compute the correlation. -#' -#'@param exp A named numeric array of experimental data, with at least dimension -#' 'time_dim'. -#'@param obs A named numeric array of observational data, same dimensions as -#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. -#'@param time_dim A character string indicating the name of dimension along -#' which the correlations are computed. The default value is 'sdate'. -#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) -#' dimension. The default value is 'dataset'. If there is no dataset -#' dimension, set NULL. -#'@param comp_dim A character string indicating the name of dimension along which -#' obs is taken into account only if it is complete. The default value -#' is NULL. -#'@param limits A vector of two integers indicating the range along comp_dim to -#' be completed. The default is c(1, length(comp_dim dimension)). -#'@param method A character string indicating the type of correlation: -#' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. -#'@param memb_dim A character string indicating the name of the member -#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no -#' member dimension, set NULL. The default value is NULL. -#'@param memb A logical value indicating whether to remain 'memb_dim' dimension -#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when -#' 'memb_dim' is not NULL. The default value is TRUE. -#'@param pval A logical value indicating whether to return or not the p-value -#' of the test Ho: Corr = 0. The default value is TRUE. -#'@param conf A logical value indicating whether to return or not the confidence -#' intervals. The default value is TRUE. -#'@param sign A logical value indicating whether to retrieve the statistical -#' significance of the test Ho: Corr = 0 based on 'alpha'. The default value is -#' FALSE. -#'@param alpha A numeric indicating the significance level for the statistical -#' significance test. The default value is 0.05. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A list containing the numeric arrays with dimension:\cr -#' c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except -#' time_dim and memb_dim).\cr -#'nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the -#'number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and -#'nobs are omitted. exp_memb is the number of member in experiment (i.e., -#''memb_dim' in exp) and obs_memb is the number of member in observation (i.e., -#''memb_dim' in obs). If memb = F, exp_memb and obs_memb are omitted.\cr\cr -#'\item{$corr}{ -#' The correlation coefficient. -#'} -#'\item{$p.val}{ -#' The p-value. Only present if \code{pval = TRUE}. -#'} -#'\item{$conf.lower}{ -#' The lower confidence interval. Only present if \code{conf = TRUE}. -#'} -#'\item{$conf.upper}{ -#' The upper confidence interval. Only present if \code{conf = TRUE}. -#'} -#'\item{$sign}{ -#' The statistical significance. Only present if \code{sign = TRUE}. -#'} -#' -#'@examples -#'# Case 1: Load sample data as in Load() example: -#'example(Load) -#'clim <- Clim(sampleData$mod, sampleData$obs) -#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) -#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) -#'runmean_months <- 12 -#' -#'# Smooth along lead-times -#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) -#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) -#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times -#'leadtimes_per_startdate <- 60 -#'corr <- Corr(MeanDims(smooth_ano_exp, 'member'), -#' MeanDims(smooth_ano_obs, 'member'), -#' comp_dim = 'ftime', -#' limits = c(ceiling((runmean_months + 1) / 2), -#' leadtimes_per_startdate - floor(runmean_months / 2))) -#' -#'# Case 2: Keep member dimension -#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member') -#'# ensemble mean -#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE) -#' -#'@import multiApply -#'@importFrom ClimProjDiags Subset -#'@importFrom stats cor pt qnorm -#'@export -Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', - comp_dim = NULL, limits = NULL, method = 'pearson', - memb_dim = NULL, memb = TRUE, - pval = TRUE, conf = TRUE, sign = FALSE, - alpha = 0.05, ncores = NULL) { - - # Check inputs - ## exp and obs (1) - if (is.null(exp) | is.null(obs)) { - stop("Parameter 'exp' and 'obs' cannot be NULL.") - } - if (!is.numeric(exp) | !is.numeric(obs)) { - stop("Parameter 'exp' and 'obs' must be a numeric array.") - } - if (is.null(dim(exp)) | is.null(dim(obs))) { - stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", - "containing time_dim and dat_dim.")) - } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names.") - } - if(!all(names(dim(exp)) %in% names(dim(obs))) | - !all(names(dim(obs)) %in% names(dim(exp)))) { - stop("Parameter 'exp' and 'obs' must have same dimension name") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) > 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - ## dat_dim - if (!is.null(dat_dim)) { - if (!is.character(dat_dim) | length(dat_dim) > 1) { - stop("Parameter 'dat_dim' must be a character string or NULL.") - } - if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { - stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", - " Set it as NULL if there is no dataset dimension.") - } - } - ## comp_dim - if (!is.null(comp_dim)) { - if (!is.character(comp_dim) | length(comp_dim) > 1) { - stop("Parameter 'comp_dim' must be a character string.") - } - if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { - stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") - } - } - ## limits - if (!is.null(limits)) { - if (is.null(comp_dim)) { - stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") - } - if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | - length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { - stop(paste0("Parameter 'limits' must be a vector of two positive ", - "integers smaller than the length of paramter 'comp_dim'.")) - } - } - ## method - if (!(method %in% c("kendall", "spearman", "pearson"))) { - stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") - } - ## memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { - stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") - } - } - ## memb - if (!is.logical(memb) | length(memb) > 1) { - stop("Parameter 'memb' must be one logical value.") - } - ## pval - if (!is.logical(pval) | length(pval) > 1) { - stop("Parameter 'pval' must be one logical value.") - } - ## conf - if (!is.logical(conf) | length(conf) > 1) { - stop("Parameter 'conf' must be one logical value.") - } - ## sign - if (!is.logical(sign) | length(sign) > 1) { - stop("Parameter 'sign' must be one logical value.") - } - ## alpha - if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { - stop("Parameter 'alpha' must be a numeric number between 0 and 1.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - ## exp and obs (2) - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - if (!is.null(dat_dim)) { - name_exp <- name_exp[-which(name_exp == dat_dim)] - name_obs <- name_obs[-which(name_obs == dat_dim)] - } - if (!is.null(memb_dim)) { - name_exp <- name_exp[-which(name_exp == memb_dim)] - name_obs <- name_obs[-which(name_obs == memb_dim)] - } - if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension except 'dat_dim' and 'memb_dim'.")) - } - if (dim(exp)[time_dim] < 3) { - stop("The length of time_dim must be at least 3 to compute correlation.") - } - - - ############################### - # Sort dimension - name_exp <- names(dim(exp)) - name_obs <- names(dim(obs)) - order_obs <- match(name_exp, name_obs) - obs <- Reorder(obs, order_obs) - - - ############################### - # Calculate Corr - - # Remove data along comp_dim dim if there is at least one NA between limits - if (!is.null(comp_dim)) { - pos <- which(names(dim(obs)) == comp_dim) - if (is.null(limits)) { - obs_sub <- obs - } else { - obs_sub <- ClimProjDiags::Subset(obs, pos, list(limits[1]:limits[2])) - } - outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) - outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) - obs[which(outrows)] <- NA - rm(obs_sub, outrows) - } - - if (!is.null(memb_dim)) { - if (!memb) { #ensemble mean - exp <- MeanDims(exp, memb_dim, na.rm = TRUE) - obs <- MeanDims(obs, memb_dim, na.rm = TRUE) -# name_exp <- names(dim(exp)) -# margin_dims_ind <- c(1:length(name_exp))[-which(name_exp == memb_dim)] -# exp <- apply(exp, margin_dims_ind, mean, na.rm = TRUE) #NOTE: remove NAs here -# obs <- apply(obs, margin_dims_ind, mean, na.rm = TRUE) - memb_dim <- NULL - } - } - - res <- Apply(list(exp, obs), - target_dims = list(c(time_dim, dat_dim, memb_dim), - c(time_dim, dat_dim, memb_dim)), - fun = .Corr, - dat_dim = dat_dim, memb_dim = memb_dim, - time_dim = time_dim, method = method, - pval = pval, conf = conf, sign = sign, alpha = alpha, - ncores = ncores) - - return(res) -} - -.Corr <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', - time_dim = 'sdate', method = 'pearson', - conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) { - if (is.null(memb_dim)) { - if (is.null(dat_dim)) { - # exp: [sdate] - # obs: [sdate] - nexp <- 1 - nobs <- 1 - CORR <- array(dim = c(nexp = nexp, nobs = nobs)) - if (any(!is.na(exp)) && sum(!is.na(obs)) > 2) { - CORR <- cor(exp, obs, use = "pairwise.complete.obs", method = method) - } - } else { - # exp: [sdate, dat_exp] - # obs: [sdate, dat_obs] - nexp <- as.numeric(dim(exp)[dat_dim]) - nobs <- as.numeric(dim(obs)[dat_dim]) - CORR <- array(dim = c(nexp = nexp, nobs = nobs)) - for (j in 1:nobs) { - for (y in 1:nexp) { - if (any(!is.na(exp[, y])) && sum(!is.na(obs[, j])) > 2) { - CORR[y, j] <- cor(exp[, y], obs[, j], - use = "pairwise.complete.obs", - method = method) - } - } - } -#---------------------------------------- -# Same as above calculation. -#TODO: Compare which is faster. -# CORR <- sapply(1:nobs, function(i) { -# sapply(1:nexp, function (x) { -# if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { -# cor(exp[, x], obs[, i], -# use = "pairwise.complete.obs", -# method = method) -# } else { -# NA -# } -# }) -# }) -#----------------------------------------- - } - - } else { # memb_dim != NULL - exp_memb <- as.numeric(dim(exp)[memb_dim]) # memb_dim - obs_memb <- as.numeric(dim(obs)[memb_dim]) - - if (is.null(dat_dim)) { - # exp: [sdate, memb_exp] - # obs: [sdate, memb_obs] - nexp <- 1 - nobs <- 1 - CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) - - for (j in 1:obs_memb) { - for (y in 1:exp_memb) { - - if (any(!is.na(exp[,y])) && sum(!is.na(obs[, j])) > 2) { - CORR[, , y, j] <- cor(exp[, y], obs[, j], - use = "pairwise.complete.obs", - method = method) - } - - } - } - } else { - # exp: [sdate, dat_exp, memb_exp] - # obs: [sdate, dat_obs, memb_obs] - nexp <- as.numeric(dim(exp)[dat_dim]) - nobs <- as.numeric(dim(obs)[dat_dim]) - - CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) - - for (j in 1:obs_memb) { - for (y in 1:exp_memb) { - CORR[, , y, j] <- sapply(1:nobs, function(i) { - sapply(1:nexp, function (x) { - if (any(!is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) { - cor(exp[, x, y], obs[, i, j], - use = "pairwise.complete.obs", - method = method) - } else { - NA - } - }) - }) - - } - } - } - - } - - -# if (pval) { -# for (i in 1:nobs) { -# p.val[, i] <- try(sapply(1:nexp, -# function(x) {(cor.test(exp[, x], obs[, i], -# use = "pairwise.complete.obs", -# method = method)$p.value)/2}), silent = TRUE) -# if (class(p.val[, i]) == 'character') { -# p.val[, i] <- NA -# } -# } -# } - - if (pval || conf || sign) { - if (method == "kendall" | method == "spearman") { - if (!is.null(dat_dim) | !is.null(memb_dim)) { - tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) - names(dim(tmp))[1] <- time_dim - eno <- Eno(tmp, time_dim) - } else { - tmp <- rank(obs) - tmp <- array(tmp) - names(dim(tmp)) <- time_dim - eno <- Eno(tmp, time_dim) - } - } else if (method == "pearson") { - eno <- Eno(obs, time_dim) - } - - if (is.null(memb_dim)) { - eno_expand <- array(dim = c(nexp = nexp, nobs = nobs)) - for (i in 1:nexp) { - eno_expand[i, ] <- eno - } - } else { #member - eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) - for (i in 1:nexp) { - for (j in 1:exp_memb) { - eno_expand[i, , j, ] <- eno - } - } - } - - } - -#############old################# -#This doesn't return error but it's diff from cor.test() when method is spearman and kendall - if (pval || sign) { - t <- sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) - p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) - if (sign) signif <- !is.na(p.val) & p.val <= alpha - } -################################### - if (conf) { - conf.lower <- alpha / 2 - conf.upper <- 1 - conf.lower - suppressWarnings({ - conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) - confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) - }) - } - -################################### - # Remove nexp and nobs if dat_dim = NULL - if (is.null(dat_dim) & !is.null(memb_dim)) { - dim(CORR) <- dim(CORR)[3:length(dim(CORR))] - if (pval) { - dim(p.val) <- dim(p.val)[3:length(dim(p.val))] - } - if (conf) { - dim(conflow) <- dim(conflow)[3:length(dim(conflow))] - dim(confhigh) <- dim(confhigh)[3:length(dim(confhigh))] - } - } - -################################### - - res <- list(corr = CORR) - if (pval) { - res <- c(res, list(p.val = p.val)) - } - if (conf) { - res <- c(res, list(conf.lower = conflow, conf.upper = confhigh)) - } - if (sign) { - res <- c(res, list(sign = signif)) - } - - return(res) - -} diff --git a/modules/Skill/tmp/RMSSS.R b/modules/Skill/tmp/RMSSS.R deleted file mode 100644 index d2ff4861e3547a32d84bdbb268df0e7eebcd8e9f..0000000000000000000000000000000000000000 --- a/modules/Skill/tmp/RMSSS.R +++ /dev/null @@ -1,448 +0,0 @@ -#'Compute root mean square error skill score -#' -#'Compute the root mean square error skill score (RMSSS) between an array of -#'forecast 'exp' and an array of observation 'obs'. The two arrays should -#'have the same dimensions except along dat_dim, where the length can be -#'different, with the number of experiments/models (nexp) and the number of -#'observational datasets (nobs).\cr -#'RMSSS computes the root mean square error skill score of each jexp in 1:nexp -#'against each job in 1:nobs which gives nexp * nobs RMSSS for each grid point -#'of the array.\cr -#'The RMSSS are computed along the time_dim dimension which should correspond -#'to the start date dimension.\cr -#'The p-value and significance test are optionally provided by an one-sided -#'Fisher test or Random Walk test.\cr -#' -#'@param exp A named numeric array of experimental data which contains at least -#' two dimensions for dat_dim and time_dim. It can also be a vector with the -#' same length as 'obs', then the vector will automatically be 'time_dim' and -#' 'dat_dim' will be 1. -#'@param obs A named numeric array of observational data which contains at least -#' two dimensions for dat_dim and time_dim. The dimensions should be the same -#' as paramter 'exp' except the length of 'dat_dim' dimension. The order of -#' dimension can be different. It can also be a vector with the same length as -#' 'exp', then the vector will automatically be 'time_dim' and 'dat_dim' will -#' be 1. -#'@param ref A named numerical array of the reference forecast data with at -#' least time dimension, or 0 (typical climatological forecast) or 1 -#' (normalized climatological forecast). If it is an array, the dimensions must -#' be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one -#' reference dataset, it should not have dataset dimension. If there is -#' corresponding reference for each experiment, the dataset dimension must -#' have the same length as in 'exp'. If 'ref' is NULL, the typical -#' climatological forecast is used as reference forecast (equivelant to 0.) -#' The default value is NULL. -#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) -#' dimension. The default value is 'dataset'. -#'@param time_dim A character string indicating the name of dimension along -#' which the RMSSS are computed. The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' -#' and 'ref' are already the ensemble mean. The default value is NULL. -#'@param pval A logical value indicating whether to compute or not the p-value -#' of the test Ho: RMSSS = 0. The default value is TRUE. -#'@param sign A logical value indicating whether to compute or not the -#' statistical significance of the test Ho: RMSSS = 0. The default value is -#' FALSE. -#'@param alpha A numeric of the significance level to be used in the -#' statistical significance test. The default value is 0.05. -#'@param sig_method A character string indicating the significance method. The -#' options are "one-sided Fisher" (default) and "Random Walk". -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A list containing the numeric arrays with dimension:\cr -#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr -#'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the -#'number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and -#'nobs are omitted.\cr -#'\item{$rmsss}{ -#' A numerical array of the root mean square error skill score. -#'} -#'\item{$p.val}{ -#' A numerical array of the p-value with the same dimensions as $rmsss. -#' Only present if \code{pval = TRUE}. -#'} -#'\item{sign}{ -#' A logical array of the statistical significance of the RMSSS with the same -#' dimensions as $rmsss. Only present if \code{sign = TRUE}. -#'} -#' -#'@examples -#' set.seed(1) -#' exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) -#' set.seed(2) -#' obs <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) -#' res <- RMSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset') -#' -#'@rdname RMSSS -#'@import multiApply -#'@importFrom stats pf -#'@export -RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', - memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, - sig_method = 'one-sided Fisher', ncores = NULL) { - - # Check inputs - ## exp, obs, and ref (1) - if (is.null(exp) | is.null(obs)) { - stop("Parameter 'exp' and 'obs' cannot be NULL.") - } - if (!is.numeric(exp) | !is.numeric(obs)) { - stop("Parameter 'exp' and 'obs' must be a numeric array.") - } - if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector - if (length(exp) == length(obs)) { - exp <- array(exp, dim = c(length(exp), 1)) - names(dim(exp)) <- c(time_dim, dat_dim) - obs <- array(obs, dim = c(length(obs), 1)) - names(dim(obs)) <- c(time_dim, dat_dim) - } else { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) - } - } else if (is.null(dim(exp)) | is.null(dim(obs))) { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) - } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names.") - } - if(!all(names(dim(exp)) %in% names(dim(obs))) | - !all(names(dim(obs)) %in% names(dim(exp)))) { - stop("Parameter 'exp' and 'obs' must have same dimension name.") - } - if (!is.null(ref)) { - if (!is.numeric(ref)) { - stop("Parameter 'ref' must be numeric.") - } - if (is.array(ref)) { - if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { - stop("Parameter 'ref' must have dimension names.") - } - } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { - stop("Parameter 'ref' must be a numeric array or number 0 or 1.") - } - } - - ## time_dim - if (!is.character(time_dim) | length(time_dim) > 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - ## dat_dim - if (!is.null(dat_dim)) { - if (!is.character(dat_dim) | length(dat_dim) > 1) { - stop("Parameter 'dat_dim' must be a character string or NULL.") - } - if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { - stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", - " Set it as NULL if there is no dataset dimension.") - } - } - ## memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else { - stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", - "but it should be of length = 1).") - } - } - } - ## pval - if (!is.logical(pval) | length(pval) > 1) { - stop("Parameter 'pval' must be one logical value.") - } - ## sign - if (!is.logical(sign) | length(sign) > 1) { - stop("Parameter 'sign' must be one logical value.") - } - ## alpha - if (!is.numeric(alpha) | length(alpha) > 1) { - stop("Parameter 'alpha' must be one numeric value.") - } - ## sig_method - if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { - stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") - } - if (sig_method == "Random Walk" & pval == T) { - warning("p-value cannot be calculated by significance method 'Random Walk'.") - pval <- FALSE - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - ## exp and obs (2) - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - if (!is.null(memb_dim)) { - name_exp <- name_exp[-which(name_exp == memb_dim)] - } - if (!is.null(dat_dim)) { - name_exp <- name_exp[-which(name_exp == dat_dim)] - name_obs <- name_obs[-which(name_obs == dat_dim)] - } - if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension except 'memb_dim' and 'dat_dim'.")) - } - if (!is.null(ref)) { - name_ref <- sort(names(dim(ref))) - if (!is.null(memb_dim) && memb_dim %in% name_ref) { - name_ref <- name_ref[-which(name_ref == memb_dim)] - } - if (!is.null(dat_dim)) { - if (dat_dim %in% name_ref) { - if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has dataset dimension, it must be ", - "equal to dataset dimension of 'exp'.")) - } - name_ref <- name_ref[-which(name_ref == dat_dim)] - } - } - if (!identical(length(name_exp), length(name_ref)) | - !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", - "all dimensions except 'memb_dim' and 'dat_dim' if there is ", - "only one reference dataset.")) - } - } - - if (dim(exp)[time_dim] <= 2) { - stop("The length of time_dim must be more than 2 to compute RMSSS.") - } - - - ############################### -# # Sort dimension -# name_exp <- names(dim(exp)) -# name_obs <- names(dim(obs)) -# order_obs <- match(name_exp, name_obs) -# obs <- Reorder(obs, order_obs) - - - ############################### - # Create ref array if needed - if (is.null(ref)) ref <- 0 - if (!is.array(ref)) { - ref <- array(data = ref, dim = dim(exp)) - } - - ############################### - ## Ensemble mean - if (!is.null(memb_dim)) { - exp <- MeanDims(exp, memb_dim, na.rm = T) - if (!is.null(ref) & memb_dim %in% names(dim(ref))) { - ref <- MeanDims(ref, memb_dim, na.rm = T) - } - } - - ############################### - # Calculate RMSSS - -# if (!is.null(ref)) { # use "ref" as reference forecast -# if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { -# target_dims_ref <- c(time_dim, dat_dim) -# } else { -# target_dims_ref <- c(time_dim) -# } -# data <- list(exp = exp, obs = obs, ref = ref) -# target_dims = list(exp = c(time_dim, dat_dim), -# obs = c(time_dim, dat_dim), -# ref = target_dims_ref) -# } else { -# data <- list(exp = exp, obs = obs) -# target_dims = list(exp = c(time_dim, dat_dim), -# obs = c(time_dim, dat_dim)) -# } - data <- list(exp = exp, obs = obs, ref = ref) - if (!is.null(dat_dim)) { - if (dat_dim %in% names(dim(ref))) { - target_dims <- list(exp = c(time_dim, dat_dim), - obs = c(time_dim, dat_dim), - ref = c(time_dim, dat_dim)) - } else { - target_dims <- list(exp = c(time_dim, dat_dim), - obs = c(time_dim, dat_dim), - ref = c(time_dim)) - } - } else { - target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) - } - - res <- Apply(data, - target_dims = target_dims, - fun = .RMSSS, - time_dim = time_dim, dat_dim = dat_dim, - pval = pval, sign = sign, alpha = alpha, - sig_method = sig_method, - ncores = ncores) - - return(res) -} - -.RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE, - sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher') { - # exp: [sdate, (dat)] - # obs: [sdate, (dat)] - # ref: [sdate, (dat)] or NULL - - if (is.null(ref)) { - ref <- array(data = 0, dim = dim(obs)) - } else if (identical(ref, 0) | identical(ref, 1)) { - ref <- array(ref, dim = dim(exp)) - } - - if (is.null(dat_dim)) { - # exp: [sdate] - # obs: [sdate] - nexp <- 1 - nobs <- 1 - nref <- 1 - # Add dat dim back temporarily - dim(exp) <- c(dim(exp), dat = 1) - dim(obs) <- c(dim(obs), dat = 1) - dim(ref) <- c(dim(ref), dat = 1) - - } else { - # exp: [sdate, dat_exp] - # obs: [sdate, dat_obs] - nexp <- as.numeric(dim(exp)[2]) - nobs <- as.numeric(dim(obs)[2]) - if (dat_dim %in% names(dim(ref))) { - nref <- as.numeric(dim(ref)[2]) - } else { - dim(ref) <- c(dim(ref), dat = 1) - nref <- 1 - } - } - - nsdate <- as.numeric(dim(exp)[1]) - - # RMS of forecast - dif1 <- array(dim = c(nsdate, nexp, nobs)) - names(dim(dif1)) <- c(time_dim, 'nexp', 'nobs') - - for (i in 1:nobs) { - dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) - } - - rms_exp <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) - - # RMS of reference -# if (!is.null(ref)) { - dif2 <- array(dim = c(nsdate, nref, nobs)) - names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') - for (i in 1:nobs) { - dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) - } - rms_ref <- apply(dif2^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nref, nobs)) - if (nexp != nref) { - # expand rms_ref to nexp (nref is 1) - rms_ref <- array(rms_ref, dim = c(nobs = nobs, nexp = nexp)) - rms_ref <- Reorder(rms_ref, c(2, 1)) - } -# } else { -# rms_ref <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs, nexp = nexp)) -## rms_ref[which(abs(rms_ref) <= (max(abs(rms_ref), na.rm = TRUE) / 1000))] <- max(abs( -## rms_ref), na.rm = TRUE) / 1000 -# rms_ref <- Reorder(rms_ref, c(2, 1)) -# #rms_ref above: [nexp, nobs] -# } - - rmsss <- 1 - rms_exp / rms_ref - -################################################# - -# if (conf) { -# conflow <- (1 - conf.lev) / 2 -# confhigh <- 1 - conflow -# conf_low <- array(dim = c(nexp = nexp, nobs = nobs)) -# conf_high <- array(dim = c(nexp = nexp, nobs = nobs)) -# } - - if (sig_method == 'one-sided Fisher') { - p_val <- array(dim = c(nexp = nexp, nobs = nobs)) - ## pval and sign - if (pval || sign) { - eno1 <- Eno(dif1, time_dim) - if (is.null(ref)) { - eno2 <- Eno(obs, time_dim) - eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) - eno2 <- Reorder(eno2, c(2, 1)) - } else { - eno2 <- Eno(dif2, time_dim) - if (nref != nexp) { - eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) - eno2 <- Reorder(eno2, c(2, 1)) - } - } - - F.stat <- (eno2 * rms_ref^2 / (eno2 - 1)) / ((eno1 * rms_exp^2 / (eno1- 1))) - tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 - p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) - if (sign) signif <- p_val <= alpha - # If there isn't enough valid data, return NA - p_val[which(!tmp)] <- NA - if (sign) signif[which(!tmp)] <- NA - - # change not enough valid data rmsss to NA - rmsss[which(!tmp)] <- NA - } - - } else if (sig_method == "Random Walk") { - signif <- array(dim = c(nexp = nexp, nobs = nobs)) - for (i in 1:nexp) { - for (j in 1:nobs) { - - # Error - error_exp <- array(data = abs(exp[, i] - obs[, j]), dim = c(time = nsdate)) - if (nref == nexp) { - error_ref <- array(data = abs(ref[, i] - obs[, j]), dim = c(time = nsdate)) - } else { - # nref = 1 - error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) - } - signif[i, j] <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref)$signif - } - } - } - - ################################### - # Remove extra dimensions if dat_dim = NULL - if (is.null(dat_dim)) { - dim(rmsss) <- NULL - dim(p_val) <- NULL - if (sign) dim(signif) <- NULL - } - ################################### - - # output - res <- list(rmsss = rmsss) - if (pval) { - p.val <- list(p.val = p_val) - res <- c(res, p.val) - } - if (sign) { - signif <- list(sign = signif) - res <- c(res, signif) - } - - return(res) -} diff --git a/modules/Skill/tmp/RandomWalkTest.R b/modules/Skill/tmp/RandomWalkTest.R deleted file mode 100644 index adeadc1ec94b62920c885640938f966c91e75ddc..0000000000000000000000000000000000000000 --- a/modules/Skill/tmp/RandomWalkTest.R +++ /dev/null @@ -1,82 +0,0 @@ -#'Random walk test for skill differences -#' -#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a -#'common reference) based on Random Walks, with significance estimate at the 95% -#'confidence level, as in DelSole and Tippett (2016). -#' -#'@param skill_A A numerical array of the time series of the skill with the -#' forecaster A's. -#'@param skill_B A numerical array of the time series of the skill with the -#' forecaster B's. The dimensions should be identical as parameter 'skill_A'. -#'@param time_dim A character string indicating the name of the dimension along -#' which the tests are computed. The default value is 'sdate'. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return A list of 2: -#'\item{$score}{ -#' A numerical array with the same dimensions as the input arrays except -#' 'time_dim'. The number of times that forecaster A has been better than -#' forecaster B minus the number of times that forecaster B has been better -#' than forecaster A (for skill positively oriented). If $score is positive -#' forecaster A is better than forecaster B, and if $score is negative -#' forecaster B is better than forecaster B. -#'} -#'\item{$signif}{ -#' A logical array with the same dimensions as the input arrays except -#' 'time_dim'. Whether the difference is significant or not at the 5% -#' significance level. -#'} -#' -#'@examples -#' fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) -#' fcst_B <- array(c(21:60), dim = c(sdate = 10, lat = 2, lon = 2)) -#' reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) -#' skill_A <- abs(fcst_A - reference) -#' skill_B <- abs(fcst_B - reference) -#' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) -#' -#'@import multiApply -#'@export -RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ - - ## Check inputs - if (is.null(skill_A) | is.null(skill_B)){ - stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") - } - if(!is.numeric(skill_A) | !is.numeric(skill_B)){ - stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") - } - if (!identical(dim(skill_A),dim(skill_B))){ - stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") - } - if(!is.character(time_dim)){ - stop("Parameter 'time_dim' must be a character string.") - } - if(!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))){ - stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") - } - if (!is.null(ncores)){ - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ - stop("Parameter 'ncores' must be a positive integer.") - } - } - - ## Compute the Random Walk Test - res <- multiApply::Apply(data = list(skill_A, skill_B), - target_dims = time_dim, - fun = .RandomWalkTest, - ncores = ncores) - return(res) -} - -.RandomWalkTest <- function(skill_A, skill_B){ - - score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) - - ## TRUE if significant (if last value is above or below 2*sqrt(N)) - signif<- ifelse(test = (score[length(skill_A)] < (-2)*sqrt(length(skill_A))) | (score[length(skill_A)] > 2*sqrt(length(skill_A))), - yes = TRUE, no = FALSE) - - return(list("score"=score[length(skill_A)],"signif"=signif)) -} diff --git a/modules/Units/R/deaccumulate.R b/modules/Units/R/deaccumulate.R new file mode 100644 index 0000000000000000000000000000000000000000..e1fcd6f59cc246b4c99267aba35291e622d6e173 --- /dev/null +++ b/modules/Units/R/deaccumulate.R @@ -0,0 +1,6 @@ +deaccumulate <- function(data, ncores = NULL) { + data[[1]]$data <- Apply(list(data[[1]]$data), target_dim = 'time', + fun = function(x) { + c(x[1], diff(x)) + }, ncores = ncores)$output1 +} diff --git a/modules/Units/R/transform_units_precipitation.R b/modules/Units/R/transform_units_precipitation.R new file mode 100644 index 0000000000000000000000000000000000000000..d0dd7ffd50dfb6f04db35006a06cd61b1d8f43e9 --- /dev/null +++ b/modules/Units/R/transform_units_precipitation.R @@ -0,0 +1,120 @@ +transform_units_precipitation <- function(data, original_units, new_units, + var_name, freq, flux = FALSE, ncores = NULL, + var_index = 1) { + +## TODO consider higher frequencies (e.g. 6hourly) +## could create a constant hours <- 24 or hours <- 6 and use the same code + + data_arr <- data[[1]]$data + data_list <- asplit(data_arr, which(names(dim(data_arr)) == "var")) + + + if (original_units == "ms-1") { + if (new_units == "mm") { + data_list[[var_index]] <- data_list[[var_index]] * 3600 * 24 * 1000 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "mm" + } else if (new_units == "m") { + data_list[[var_index]] <- data_list[[var_index]] * 3600 * 24 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm' + } else if (new_units == "kgm-2s-1") { + data_list[[var_index]] <- data_list[[var_index]] * 1000 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'kg m-2 s-1' + } else { + stop(paste("Unknown transformation from", original_units, "to", new_units)) + } + } else if (original_units == "mm") { + if (new_units == "ms-1") { + data_list[[var_index]] <- data_list[[var_index]] / (3600 * 24 * 1000) + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm s-1' + } else if (new_units == "m") { + data_list[[var_index]] <- data_list[[var_index]] / 1000 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm' + } else if (new_units == "kgm-2s-1") { + data_list[[var_index]] <- data_list[[var_index]] / (3600 * 24) + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'kg m-2 s-1' + } else { + stop(paste("Unknown transformation from", original_units, "to", new_units)) + } + } else if (original_units == "m") { + if (new_units == "ms-1") { + data_list[[var_index]] <- data_list[[var_index]] / (3600 * 24) + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm s-1' + } else if (new_units == "mm") { + data_list[[var_index]] <- data_list[[var_index]] * 1000 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'mm' + } else if (new_units == "kgm-2s-1") { + data_list[[var_index]] <- data_list[[var_index]] * 1000 / (3600 * 24) + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'kg m-2 s-1' + } else { + stop(paste("Unknown transformation from", original_units, "to", new_units)) + } + } else if (original_units == "kgm-2s-1") { + if (new_units == "ms-1") { + data_list[[var_index]] <- data_list[[var_index]] / 1000 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm s-1' + } else if (new_units == "mm") { + data_list[[var_index]] <- data_list[[var_index]] * 3600 * 24 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'mm' + } else if (new_units == "m") { + data_list[[var_index]] <- data_list[[var_index]] * 3600 * 24 / 1000 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm' + } else { + stop(paste("Unknown transformation from", original_units, "to", new_units)) + } + } else { + stop("Unknown precipitation units transformation") + } + + if (flux) { + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- paste0( + data[[1]]$attrs$Variable$metadata[[var_name]]$units, "/day") + } else { + ## TODO: Shouldn't use time dimension, need it for Compute() + if (freq == "monthly_mean") { # could it not be mean? + time_pos <- which(lapply(data[[1]]$attrs$Variable$metadata[[var_name]]$dim, + function(x) {x$name}) == 'time') + cal <- tolower(data[[1]]$attrs$Variable$metadata[[var_name]]$dim[[time_pos]]$calendar) + data_list[[var_index]] <- + Apply(list(data_list[[var_index]], data[[1]]$attrs$Dates), + target_dim = list(c('syear'), c('syear')), + extra_info = list(cal = cal, days_in_month = .days_in_month), + fun = function(x, y) { + date <- as.Date(y, "%Y-%m-%d") + num_days <- .days_in_month(date, cal = .cal) + res <- x * num_days + }, ncores = ncores)$output1 + } + } + + # Combine list back to array + data_arr <- array(unlist(data_list), + dim = c(dim(data_list[[1]]), var = length(data_list))) + data[[1]]$data <- aperm(data_arr, match(names(dim(data[[1]]$data)), + names(dim(data_arr)))) + + return(data) +} + + +.days_in_month <- function(dates, cal) { + n_days <- array() + for (x in 1:length(dates)) { + if (cal %in% c('gregorian', 'standard', 'proleptic_gregorian')) { + N_DAYS_IN_MONTHS <- lubridate:::N_DAYS_IN_MONTHS + if (leap_year(year(dates[x]))) { + N_DAYS_IN_MONTHS[2] <- N_DAYS_IN_MONTHS[2] + 1 + } + } else if (cal %in% c('360', '360_day')) { + N_DAYS_IN_MONTHS <- rep(30, 12) + names(N_DAYS_IN_MONTHS) <- month.abb + } else if (cal %in% c('365', '365_day')) { + N_DAYS_IN_MONTHS <- lubridate:::N_DAYS_IN_MONTHS + } else { + stop("Unknown calendar") + } + month_x <- month(dates[x], label = TRUE, locale = "C") + n_days[x] <- N_DAYS_IN_MONTHS[month_x] + # n_days[month_x == "Feb" & leap_year(x)] <- 29L + } + return(n_days) +} diff --git a/modules/Units/R/transform_units_pressure.R b/modules/Units/R/transform_units_pressure.R new file mode 100644 index 0000000000000000000000000000000000000000..58d51b1e98df3a5f1758a45ab7ee7817cc670267 --- /dev/null +++ b/modules/Units/R/transform_units_pressure.R @@ -0,0 +1,37 @@ +transform_units_pressure <- function(data, original_units, new_units, var_name, + var_index = 1) { + + data_arr <- data[[1]]$data + data_list <- asplit(data_arr, which(names(dim(data_arr)) == "var")) + + if (original_units == 'pa') { + data_list[[var_index]] <- data_list[[var_index]] / 100 + if (new_units == 'hpa') { + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'hPa' + } else if (new_units == 'mb') { + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'mb' + } + } else if (original_units == 'hpa') { + if (new_units == 'pa') { + data_list[[var_index]] <- data_list[[var_index]] * 100 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "Pa" + } else if (new_units == "mb") { + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "mb" + } + } else if (original_units == "mb") { + if (new_units == 'pa') { + data_list[[var_index]] <- data_list[[var_index]] * 100 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "Pa" + } else if (new_units == "hPa") { + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "hPa" + } + } + + # Combine list back to array + data_arr <- array(unlist(data_list), + dim = c(dim(data_list[[1]]), var = length(data_list))) + data[[1]]$data <- aperm(data_arr, match(names(dim(data[[1]]$data)), + names(dim(data_arr)))) + + return(data) +} diff --git a/modules/Units/R/transform_units_temperature.R b/modules/Units/R/transform_units_temperature.R new file mode 100644 index 0000000000000000000000000000000000000000..985483f08897d6f0e84a40f2db66aab89f2c3bec --- /dev/null +++ b/modules/Units/R/transform_units_temperature.R @@ -0,0 +1,22 @@ +transform_units_temperature <- function(data, original_units, new_units, + var_name, var_index = 1) { + + data_arr <- data[[1]]$data + data_list <- asplit(data_arr, which(names(dim(data_arr)) == "var")) + if (original_units == 'c' & new_units == 'k') { + data_list[[var_index]] <- data_list[[var_index]] + 273.15 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "K" + } + if (original_units == 'k' & new_units == 'c') { + data_list[[var_index]] <- data_list[[var_index]] - 273.15 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "C" + } + + # Combine list back to array + data_arr <- array(unlist(data_list), + dim = c(dim(data_list[[1]]), var = length(data_list))) + data[[1]]$data <- aperm(data_arr, match(names(dim(data[[1]]$data)), + names(dim(data_arr)))) + + return(data) +} diff --git a/modules/Units/R/units_transform.R b/modules/Units/R/units_transform.R new file mode 100644 index 0000000000000000000000000000000000000000..11e05f3116ca2870e742606db99276064592aa85 --- /dev/null +++ b/modules/Units/R/units_transform.R @@ -0,0 +1,37 @@ +# data is a s2dv_cube object +# units as character +units_transform <- function(data, orig_units, user_units, var_name, freq, flux = FALSE, + ncores = NULL) { + ## TODO: Change how argument 'flux' works + for (i in 1:length(var_name)) { + if (!(orig_units[i] == user_units[i])) { + if (orig_units[i] %in% c("c", "k")) { + if (user_units[i] %in% c("c", "k")) { + data <- transform_units_temperature(data, orig_units[i], user_units[i], + var_name[i], var_index = i) + } else { + stop("Transformation temperature units not available.") + } + } else if (orig_units[i] %in% c("ms-1", "kgm-2s-1", "mm", "m")) { + if (user_units[i] %in% c("ms-1", "kgm-2s-1", "mm", "m")) { + data <- transform_units_precipitation(data, orig_units[i], user_units[i], + var_name[i], freq, flux, + ncores = ncores, var_index = i) + } else { + stop("Transformation precipitation units not available.") + } + } else if (orig_units[i] %in% c("pa", "hpa", "mb")) { + if (user_units[i] %in% c("pa", "hpa", "mb")) { + data <- transform_units_pressure(data, orig_units[i], user_units[i], + var_name[i]) + } else { + stop("Transformation precipitation units not available.") + } + } else { + stop(paste("Transformation unknown from", orig_units[i], "to", user_units[i])) + } + } + } + return(data) +} + diff --git a/modules/Units/Units.R b/modules/Units/Units.R new file mode 100644 index 0000000000000000000000000000000000000000..a143c0a2037684594cd7642149fa039240e4784c --- /dev/null +++ b/modules/Units/Units.R @@ -0,0 +1,92 @@ +# This function aims to convert units +## +source("modules/Units/R/units_transform.R") +source("modules/Units/R/transform_units_temperature.R") +source("modules/Units/R/transform_units_precipitation.R") +source("modules/Units/R/transform_units_pressure.R") +# Requires recipe to include: +# recipe$Analysis$Variables: +# $freq +# $flux (if precipitation) +# $units (optional) +Units <- function(recipe, data) { + # from recipe read the user defined units + # from data the original units + # deaccumulate option for CDS accumulated variables? + ncores <- recipe$Analysis$ncores + ## Do we need to convert other than ECVs? + var_names <- lapply(data, function(x) {x$attrs$Variable$varName}) + freq <- recipe$Analysis$Variable$freq + if (is.null(recipe$Analysis$Variable$flux)) { + flux <- FALSE + } else { + flux <- recipe$Analysis$Variable$flux + } + orig_units <- vector('list', length = length(data)) + names(orig_units) <- names(var_names) + for (element in names(var_names)) { + for (x in var_names[[element]]) { + orig_units[[element]] <- c(orig_units[[element]], + data[[element]]$attrs$Variable$metadata[[x]]$units) + } + } + if (is.null(recipe$Analysis$Variables$units)) { + user_units <- orig_units[[which(!is.null(orig_units))[1]]] + } else { + user_units <- recipe$Analysis$Variables$units + if (!is.null(names(user_units))) { + # Change to vector and ensure that the units are in the correct order + user_units <- unlist(user_units[var_names[[1]]]) + } else if (length(var_names[[1]] > 1)) { + ## TODO: How to handle spaces in multi-var case? + user_units <- strsplit(recipe$Analysis$Variables$units, ", |,")[[1]] + } + } + # remove spaces, "**", "*" and "per" from units + user_units <- tolower(user_units) + user_units <- gsub(" ", "", user_units) + user_units <- gsub("\\**", "", user_units) + user_units <- gsub("\\*", "", user_units) + orig_units <- lapply(orig_units, function(x) { + if (!is.null(x)) { + x <- tolower(x) + x <- gsub(" ", "", x) + x <- gsub("\\**", "", x) + x <- gsub("\\*", "", x) + } + }) + ## TODO: + ## if "/" appears substitute by -1 in at the end of next unit. How to know? + # Check if all units are equal (if so, no conversion needed; else convert) + if (length(unique(c(unlist(orig_units), user_units))) == length(user_units)) { + info(recipe$Run$logger, "##### NO UNIT CONVERSION NEEDED #####") + res <- data + } else { + if (recipe$Run$filesystem == 'esarchive' && + (sum(!sapply(unique(orig_units), is.null)) != 1)) { + warn(recipe$Run$logger, + paste("The units in", paste(names(orig_units), collapse = ', '), + "were not all equal and will be uniformized.", + "If this is not expected, please contact the ES data team.")) + } + res <- sapply(1:length(data), function(x) { + if (!is.null(data[[x]])) { + if (!all(orig_units[x] == user_units)) { + result <- units_transform(data[x], + orig_units = orig_units[[x]], + user_units = user_units, + var_names[[x]], freq = freq, + flux = flux, + ncores = ncores) + } else { + result <- data[x] + } + } else { + result <- data[x] + } + return(result) + }, simplify = TRUE) # instead of lapply to get the named list directly + info(recipe$Run$logger, "##### UNIT CONVERSION COMPLETE #####") + } + return(res) +} diff --git a/modules/Visualization/R/get_proj_code.R b/modules/Visualization/R/get_proj_code.R new file mode 100644 index 0000000000000000000000000000000000000000..ba040c0089b87dc6295751445871a1d97eafaf62 --- /dev/null +++ b/modules/Visualization/R/get_proj_code.R @@ -0,0 +1,22 @@ +# This function returns the crs code for the projection indicated in +# 'proj_name' depending on the version of GDAL and proj.4, to be used +# in the PlotRobinson() function + +get_proj_code <- function(proj_name) { + # Define list with the grs codes for each projection + code_list <- list(robinson = c(54030, "ESRI:53030"), + stereographic = c(3995, "EPSG:3995"), + lambert_europe = c(102014, paste("+proj=lcc +lat_0=30", + "+lon_0=10 +lat_1=43", + "+lat_2=62 +x_0=0 +y_0=0", + "+ellps=intl +units=m", + "+no_defs +type=crs"))) + # Get crs code version depending on GDAL and PROJ version + if ((sf_extSoftVersion()[['GDAL']] < "3.0.0") && + (sf_extSoftVersion()[['PROJ']] < "6.0.0")) { + proj_code <- as.integer(code_list[[proj_name]][1]) + } else { + proj_code <- code_list[[proj_name]][2] + } + return(proj_code) +} diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R new file mode 100644 index 0000000000000000000000000000000000000000..3d00742dd4de5443c9cafddd13a7990ffafc1dd4 --- /dev/null +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -0,0 +1,221 @@ +plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, output_conf) { + ## TODO: Add 'anomaly' to plot title + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { + stop("Visualization functions not yet implemented for daily data.") + } + latitude <- fcst$coords$lat + longitude <- fcst$coords$lon + archive <- get_archive(recipe) + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, + start = 1, stop = 2)) + } else { + ## TODO: Sort out decadal initial month (is it always January?) + init_month <- 1 + } + if (!is.null(recipe$Analysis$Workflow$Visualization$projection)) { + projection <- tolower(recipe$Analysis$Workflow$Visualization$projection) + } else { + projection <- "cylindrical_equidistant" + } + # Compute ensemble mean + ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') + # Loop over variable dimension + for (var in 1:fcst$dims[['var']]) { + variable <- fcst$attrs$Variable$varName[[var]] + var_long_name <- fcst$attrs$Variable$metadata[[variable]]$long_name + units <- fcst$attrs$Variable$metadata[[variable]]$units + # Subset ensemble mean by variable + var_ens_mean <- ClimProjDiags::Subset(ensemble_mean, + along = c("dat", "var", + "sday", "sweek"), + indices = list(1, var, 1, 1), + drop = 'selected') + + var_ens_mean <- Reorder(var_ens_mean, c("syear", + "time", + "longitude", + "latitude")) + ## TODO: Redefine column colors, possibly depending on variable + if (variable == 'prlr') { + palette = "BrBG" + rev = F + } else { + palette = "RdBu" + rev = T + } + # Define brks, centered around zero in the case of anomalies + if (grepl("anomaly", var_long_name)) { + variable <- paste(variable, "anomaly") + max_value <- max(abs(var_ens_mean)) + ugly_intervals <- seq(-max_value, max_value, max_value/20) + brks <- pretty(ugly_intervals, n = 12, min.n = 8) + cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) + } else { + if (variable == 'prlr') { + cols <- c("#FFAB38", "white", "#41CBC9") + col_fun <- colorRampPalette(cols) + } else { + cols <- c("#33BFD1", "white", "#FF764D") + col_fun <- colorRampPalette(cols) + } + if (all(is.na(var_ens_mean))) { + brks <- NULL + } else { + brks <- pretty(range(var_ens_mean, na.rm = T), n = 15, min.n = 8) + } + if (is.null(brks)) { + cols <- NULL + } else { + cols <- col_fun(length(brks) - 1) + } + } + + for (i_syear in start_date) { + if (length(start_date) == 1) { + i_var_ens_mean <- ClimProjDiags::Subset(var_ens_mean, + along = c("syear"), + indices = which(start_date == i_syear), + drop = 'selected') + outfile <- paste0(outdir[[var]], "forecast_ensemble_mean-", start_date) + } else { + i_var_ens_mean <- ClimProjDiags::Subset(var_ens_mean, + along = c("syear"), + indices = which(start_date == i_syear), + drop = 'selected') + outfile <- paste0(outdir[[var]], "forecast_ensemble_mean-", i_syear) + } + # Mask + if (!is.null(mask)) { + outfile <- paste0(outfile, "_enscormask") + var_mask <- ClimProjDiags::Subset(mask, + along = c("var"), + indices = var, + drop = 'selected') + dim_mask <- dim(var_mask) + var_mask <- as.numeric(var_mask <= 0) + dim(var_mask) <- dim_mask + } + # Dots + if (!is.null(dots)) { + outfile <- paste0(outfile, "_enscordots") + var_dots <- ClimProjDiags::Subset(dots, + along = c("var"), + indices = var, + drop = 'selected') + dim_dots <- dim(var_dots) + var_dots <- as.numeric(var_dots <= 0) + dim(var_dots) <- dim_dots + } + toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), + "\n", "Forecast Ensemble Mean / ", "Init.: ", i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) + + if (recipe$Analysis$Workflow$Visualization$multi_panel) { + # Define name of output file and titles + titles <- as.vector(months) + # Plots + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + i_var_ens_mean, longitude, latitude, + mask = mask, + dots = dots, + filled.continents = F, + toptitle = toptitle, + title_scale = 0.7, + subtitle_scale = 0.8, + subtitle_margin_scale = 2, + titles = titles, + units = units, + cols = cols, + brks = brks, + fileout = paste0(outfile, ".png"), + bar_label_digits = 4, + extra_margin = rep(1, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.1) + } else { + # Define function and parameters depending on projection + if (projection == 'cylindrical_equidistant') { + fun <- PlotEquiMap + output_configuration <- output_conf$PlotEquiMap$forecast_ensemble_mean + base_args <- list(var = NULL, dots = NULL, mask = NULL, + lon = longitude, lat = latitude, + dot_symbol = 20, title_scale = 0.6, + font.main = 2, + filled.continents = F, brks = brks, cols = cols, + bar_label_digits = 4, bar_label_scale = 1.5, + axes_label_scale = 1, units = units) + base_args[names(output_configuration)] <- output_configuration + } else { + fun <- PlotRobinson + common_projections <- c("robinson", "stereographic", "lambert_europe") + if (projection %in% common_projections) { + target_proj <- get_proj_code(projection) + } else { + target_proj <- projection + } + base_args <- list(data = NULL, mask = NULL, dots = NULL, + lon = longitude, lat = latitude, + lon_dim = 'longitude', lat_dim = 'latitude', + target_proj = target_proj, legend = 's2dv', + style = 'point', brks = brks, cols = cols) + } + # Loop over forecast times + for (i in 1:length(months)) { + # Get forecast time label + forecast_time <- match(months[i], month.name) - init_month + 1 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + # Get mask subset + if (!is.null(mask)) { + mask_i <- Subset(var_mask, along = 'time', indices = i, drop = TRUE) + } else { + mask_i <- NULL + } + # Get dots subset + if (!is.null(dots)) { + dots_i <- Subset(var_dots, along = 'time', indices = i, drop = TRUE) + } else { + dots_i <- NULL + } + # Define plot title + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Ensemble Mean / ", + months[i], " ", years[i], + " / Start date: ", + format(as.Date(i_syear, format="%Y%m%d"), + "%d-%m-%Y")) + # Define caption + if (identical(fun, PlotRobinson)) { + ## TODO: Customize technical details + base_args[['caption']] <- + paste0("Nominal start date: ", start_date, "\n", + "Forecast month: ", sprintf("%02d", i), "\n", + "Reference: ", recipe$Analysis$Datasets$Reference) + } + # Modify base arguments + base_args[[1]] <- i_var_ens_mean[i, , ] + fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".png") + base_args$mask <- mask_i + base_args$dots <- dots_i + # Plot + do.call(fun, + args = c(base_args, + list(toptitle = toptitle, + fileout = fileout))) + } + } + } + } + info(recipe$Run$logger, + "##### FORECAST ENSEMBLE MEAN PLOTS SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R new file mode 100644 index 0000000000000000000000000000000000000000..5d60f8c1747c8ec8b5435902a3c44107d9a4873f --- /dev/null +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -0,0 +1,233 @@ +## Functions required for normal cat and triangles end. +## See https://earth.bsc.es/gitlab/external/cstools/-/issues/125 +source("modules/Visualization/R/tmp/PlotMostLikelyQuantileMap.R") +source("modules/Visualization/R/tmp/PlotCombinedMap.R") +source("modules/Visualization/R/tmp/ColorBar.R") +source("modules/Visualization/R/tmp/clim.palette.R") +source("modules/Visualization/R/tmp/Utils.R") +source("modules/Visualization/R/tmp/PlotEquiMap.R") +source("modules/Visualization/R/tmp/ColorBar_onebox.R") +source("modules/Visualization/R/tmp/GradientCatsColorBar.R") + +## TODO: Change name +plot_most_likely_terciles <- function(recipe, + fcst, + probabilities, + mask, + dots, + outdir, + output_conf) { + + ## TODO: Add 'anomaly' to plot title + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { + stop("Visualization functions not yet implemented for daily data.") + } + + latitude <- fcst$coords$lat + longitude <- fcst$coords$lon + archive <- get_archive(recipe) + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, + start = 1, stop = 2)) + } else { + ## TODO: Sort out decadal initial month (is it always January?) + init_month <- 1 + } + + # Retrieve and rearrange probability bins for the forecast + if (is.null(probabilities$probs_fcst$prob_b33) || + is.null(probabilities$probs_fcst$prob_33_to_66) || + is.null(probabilities$probs_fcst$prob_a66)) { + stop("The forecast tercile probability bins are not present inside ", + "'probabilities', the most likely tercile map cannot be plotted.") + } + + probs_fcst <- abind(probabilities$probs_fcst$prob_b33, + probabilities$probs_fcst$prob_33_to_66, + probabilities$probs_fcst$prob_a66, + along = 0) + names(dim(probs_fcst)) <- c("bin", + names(dim(probabilities$probs_fcst$prob_b33))) + + ## TODO: Improve this section + # Drop extra dims, add time dim if missing: + for (var in 1:fcst$dims[['var']]) { + variable <- fcst$attrs$Variable$varName[[var]] + var_long_name <- fcst$attrs$Variable$metadata[[variable]]$long_name + # Choose colors depending on the variable + if (variable %in% c('prlr')) { ## add others + cols <- list(c("#FFC473", "#FFAB38"), + c("grey"), + c("#A0E5E4","#41CBC9")) + col_sup <- list("darkorange1", "grey", "deepskyblue3") + } else { + cols <- list(c("#A0E5E4","#33BFD1"), + c("grey"), + c("#FFB19A", "#FF764D")) + col_sup <- list("deepskyblue3", "grey", "indianred3") + } + var_probs <- ClimProjDiags::Subset(probs_fcst, + along = c("var"), + indices = var, + drop = 'selected') + var_probs <- Reorder(var_probs, + c("syear", "time", "bin", "longitude", "latitude")) + for (i_syear in start_date) { + # Define name of output file and titles + i_var_probs <- ClimProjDiags::Subset(var_probs, + along = c("syear"), + indices = which(start_date == i_syear), + drop = 'selected') + outfile <- paste0(outdir[[var]], "forecast_most_likely_tercile-", i_syear) + # Mask + if (!is.null(mask)) { + outfile <- paste0(outfile, "_rpssmask") + var_mask <- ClimProjDiags::Subset(mask, + along = c("var"), + indices = var, + drop = 'selected') + dim_mask <- dim(var_mask) + var_mask <- as.numeric(var_mask <= 0) + dim(var_mask) <- dim_mask + } + # Dots + if (!is.null(dots)) { + outfile <- paste0(outfile, "_rpssdots") + var_dots <- ClimProjDiags::Subset(dots, + along = c("var"), + indices = var, + drop = 'selected') + dim_dots <- dim(var_dots) + var_dots <- as.numeric(var_dots <= 0) + dim(var_dots) <- dim_dots + } + toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), + "\n", "Most Likely Tercile / Initialization: ", + i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F,locale = "en_GB") + years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) + if (recipe$Analysis$Workflow$Visualization$multi_panel) { + ## TODO: Ensure this works for daily and sub-daily cases + titles <- as.vector(months) + # Plots + ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked + ## on. + suppressWarnings( + PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), + cat_dim = 'bin', + i_var_probs, longitude, latitude, + mask = mask, + dots = dots, + coast_width = 1.5, + title_scale = 0.6, + title_margin_scale = 0.7, + subtitle_scale = 1, + legend_scale = 0.8, #cex_bar_titles = 0.6, + toptitle = toptitle, + titles = titles, + fileout = paste0(outfile, ".png"), + bar_label_digits = 2, + bar_scale = rep(0.7, 4), + extra_margin = rep(1, 4), + bar_label_scale = 1.2, + axes_label_scale = 1.1, + triangle_ends = c(F, F)) # , width = 11, height = 8) + ) + } else { + output_configuration <- output_conf$PlotEquiMap$most_likely_terciles + base_args <- list(cat_dim = 'bin', + probs = NULL, + lon = longitude, lat = latitude, + coast_width = 1.5, + mask = NULL, + dots = NULL, + dot_symbol = 4, + dot_size = 1, + col_mask = 'antiquewhite', + cols = cols, + col_sup = col_sup, + title_scale = 1, + legend_scale = 0.8, + cex_bar_titles = 0.9, + bar_label_digits = 2, + bar_label_scale = 0.7, + bar_limits = list(c(40, 85), c(40, 85), c(40, 85)), + brks = list(4,2,4), + axes_label_scale = 1.1, + plot_margin = c(5.1, 4.1, 4.1, 2.1), + return_leg = T, + triangle_ends = c(F, T) , width = 10, height = 8) + base_args[names(output_configuration)] <- output_configuration + for (i in 1:length(months)) { + # Get forecast time label + forecast_time <- match(months[i], month.name) - init_month + 1 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + # Get mask subset + if (!is.null(mask)) { + mask_i <- Subset(var_mask, along = 'time', indices = i, drop = TRUE) + } else { + mask_i <- NULL + } + # Get dots subset + if (!is.null(dots)) { + dots_i <- Subset(var_dots, along = 'time', indices = i, drop = TRUE) + } else { + dots_i <- NULL + } + # Define plot title + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Most Likely Tercile / ", + months[i], " ", years[i], + " / Start date: ", + format(as.Date(i_syear, format="%Y%m%d"), + "%d-%m-%Y")) + # Plot + fileout <- paste0(outfile, "_ft", forecast_time, ".png") + base_args$probs <- i_var_probs[i, , , ] + base_args$mask <- mask_i + base_args$dots <- dots_i + cb_info <- do.call(PlotMostLikelyQuantileMap, + args = c(base_args, + list(toptitle = toptitle, + fileout = fileout))) + # Add color bars with 1 range for normal category: + for (i_bar in 1:cb_info$nmap) { + tmp <- cb_info + tmp$brks <- tmp$brks[[i_bar]] + tmp$cols <- tmp$cols[[i_bar]] + tmp$bar_limits <- tmp$bar_limits[[i_bar]] + tmp$col_sup <- tmp$col_sup[[i_bar]] + tmp$title <- tmp$bar_titles[i_bar] + tmp$bar_titles <- NULL + tmp$nmap <- NULL + tmp$var_limits <- NULL + if (length(cb_info$brks[[i_bar]]) > 2) { + # plot colorbar as normal + do.call(ColorBar, tmp) + } else { + # plot a square + tmp$brks <- 4 + tmp$draw_ticks <- F + tmp$box_label <- "> 40" + tmp$triangle_ends <- c(F, F) + tmp$draw_separators <- FALSE + do.call(ColorBar_onebox, tmp) + } + } + dev.off() + } + } + } + } + info(recipe$Run$logger, + "##### MOST LIKELY TERCILE PLOTS SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R new file mode 100644 index 0000000000000000000000000000000000000000..b4c2b273dffb70f3d413b33b9575b0c7e1662bc6 --- /dev/null +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -0,0 +1,258 @@ +library(stringr) + +plot_skill_metrics <- function(recipe, data_cube, skill_metrics, + outdir, significance = F, output_conf) { + # recipe: Auto-S2S recipe + # archive: Auto-S2S archive + # data_cube: s2dv_cube object with the corresponding hindcast data + # skill_metrics: list of named skill metrics arrays + # outdir: output directory + # significance: T/F, whether to display the significance dots in the plots + + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { + error(recipe$Run$logger, "Visualization functions not yet implemented + for daily data.") + stop() + } + # Abort if skill_metrics is not list + if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { + stop("The element 'skill_metrics' must be a list of named arrays.") + } + + latitude <- data_cube$coords$lat + longitude <- data_cube$coords$lon + archive <- get_archive(recipe) + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, + start = 1, stop = 2)) + } else { + ## TODO: Sort out decadal initial month (is it always January?) + init_month <- 1 + } + month_label <- tolower(month.name[init_month]) + month_abbreviation <- month.abb[init_month] + # Get months + months <- lubridate::month(Subset(data_cube$attrs$Dates, + "syear", indices = 1), + label = T, abb = F,locale = "en_GB") + if (!is.null(recipe$Analysis$Workflow$Visualization$projection)) { + projection <- tolower(recipe$Analysis$Workflow$Visualization$projection) + } else { + projection <- "cylindrical_equidistant" + } + + # Define color palette and number of breaks according to output format + if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { + diverging_palette <- "purpleorange" + sequential_palette <- "Oranges" + } else { + diverging_palette <- "bluered" + sequential_palette <- "Reds" + } + # Group different metrics by type + skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", + "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", + "enscorr_specs", "rmsss", "msss") + scores <- c("rps", "frps", "crps", "frps_specs", "mse") + # Loop over variables and assign colorbar and plot parameters to each metric + for (var in 1:data_cube$dims[['var']]) { + var_skill <- lapply(skill_metrics, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { + if (name %in% names(skill_metrics)) { + units <- NULL + # Define plot characteristics and metric name to display in plot + if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", + "rpss_specs", "bss90_specs", "bss10_specs", + "rmsss", "msss")) { + display_name <- toupper(strsplit(name, "_")[[1]][1]) + skill <- var_skill[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name == "mean_bias_ss") { + display_name <- "Mean Bias Skill Score" + skill <- var_skill[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name %in% c("enscorr", "enscorr_specs")) { + display_name <- "Ensemble Mean Correlation" + skill <- var_skill[[name]] + brks <- seq(-1, 1, by = 0.2) + cols <- clim.colors(length(brks) - 1, diverging_palette) + col_inf <- NULL + col_sup <- NULL + } else if (name %in% scores) { + skill <- var_skill[[name]] + display_name <- toupper(strsplit(name, "_")[[1]][1]) + brks <- seq(0, 1, by = 0.1) + colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) + cols <- colorbar[1:(length(colorbar) - 1)] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name == "enssprerr") { + skill <- var_skill[[name]] + display_name <- "Spread-to-Error Ratio" + brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) + colorbar <- clim.colors(length(brks), diverging_palette) + cols <- colorbar[1:length(colorbar) - 1] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name %in% "mean_bias") { + skill <- var_skill[[name]] + display_name <- "Mean Bias" + max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), + abs(quantile(skill, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] + units <- data_cube$attrs$Variable$metadata[[var_name]]$units + } + # Reorder dimensions + skill <- Reorder(skill, c("time", "longitude", "latitude")) + # If the significance has been requested and the variable has it, + # retrieve it and reorder its dimensions. + significance_name <- paste0(name, "_significance") + if ((significance) && (significance_name %in% names(skill_metrics))) { + skill_significance <- var_skill[[significance_name]] + skill_significance <- Reorder(skill_significance, c("time", + "longitude", + "latitude")) + # Split skill significance into list of lists, along the time dimension + # This allows for plotting the significance dots correctly. + skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + dim = "time", + level = "sublist", + names = "dots") + } else { + skill_significance <- NULL + } + # Define output file name and titles + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + outfile <- paste0(outdir[var], name, "-", month_label) + } else { + outfile <- paste0(outdir[var], name) + } + # Get variable name and long name + var_name <- data_cube$attrs$Variable$varName[[var]] + var_long_name <- data_cube$attrs$Variable$metadata[[var_name]]$long_name + # Multi-panel or single-panel plots + if (recipe$Analysis$Workflow$Visualization$multi_panel) { + # Define titles + toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), + "\n", display_name, " / ", hcst_period) + titles <- as.vector(months) + ## TODO: Combine PlotLayout with PlotRobinson? + suppressWarnings( + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + asplit(skill, MARGIN=1), # Splitting array into a list + longitude, latitude, + special_args = skill_significance, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents = F, + brks = brks, + cols = cols, + col_inf = col_inf, + col_sup = col_sup, + fileout = paste0(outfile, ".png"), + bar_label_digits = 3, + bar_extra_margin = rep(0.9, 4), + extra_margin = rep(1, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3, + width = 11,#default i + height = 11) + ) + } else { + # Define function and parameters depending on projection + if (projection == 'cylindrical_equidistant') { + fun <- PlotEquiMap + output_configuration <- output_conf$PlotEquiMap$skill_metric + base_args <- list(var = NULL, dots = NULL, + lon = longitude, lat = latitude, + dot_symbol = 20, dot_size = 1, + title_scale = 0.6, + filled.continents = F, brks = brks, cols = cols, + col_inf = col_inf, col_sup = col_sup, + units = units, font.main = 2, + bar_label_digits = 3, bar_label_scale = 1.5, + axes_label_scale = 1, width = 8, height = 5) + base_args[names(output_configuration)] <- output_configuration + } else { + fun <- PlotRobinson + common_projections <- c("robinson", "stereographic", "lambert_europe") + if (projection %in% common_projections) { + target_proj <- get_proj_code(projection) + } else { + target_proj <- projection + } + ## TODO: handle output_conf + base_args <- list(data = NULL, mask = NULL, + lon = longitude, lat = latitude, + lon_dim = 'longitude', lat_dim = 'latitude', + target_proj = target_proj, legend = 's2dv', + style = 'point', brks = brks, cols = cols, + col_inf = col_inf, col_sup = col_sup) + } + # Loop over forecast times + for (i in 1:dim(skill)[['time']]) { + # Get forecast time label + forecast_time <- match(months[i], month.name) - init_month + 1 + + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + # Define plot title + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", months[i], "/", + hcst_period) + # Modify base arguments + base_args[[1]] <- skill[i, , ] + if (!is.null(skill_significance)) { + base_args[[2]] <- skill_significance[[i]][[1]] + significance_caption <- "alpha = 0.05" + } else { + significance_caption <- NULL + } + if (identical(fun, PlotRobinson)) { + ## TODO: Customize alpha and other technical details depending on the metric + base_args[['caption']] <- + paste0("Nominal start date: ", + "1st of ", str_to_title(month_label), "\n", + "Forecast month: ", forecast_time, "\n", + "Reference: ", recipe$Analysis$Datasets$Reference, "\n", + significance_caption) + } + fileout <- paste0(outfile, "_ft", forecast_time, ".png") + # Plot + + do.call(fun, + args = c(base_args, + list(toptitle = toptitle, + fileout = fileout))) + } + } + } + } + } + info(recipe$Run$logger, + "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/R/tmp/ColorBar.R b/modules/Visualization/R/tmp/ColorBar.R new file mode 100644 index 0000000000000000000000000000000000000000..22261c168848241f35fc3061d6b38a4d22ae2a67 --- /dev/null +++ b/modules/Visualization/R/tmp/ColorBar.R @@ -0,0 +1,593 @@ +#'Draws a Color Bar +#' +#'Generates a color bar to use as colouring function for map plots and +#'optionally draws it (horizontally or vertically) to be added to map +#'multipanels or plots. It is possible to draw triangles at the ends of the +#'colour bar to represent values that go beyond the range of interest. A +#'number of options is provided to adjust the colours and the position and +#'size of the components. The drawn colour bar spans a whole figure region +#'and is compatible with figure layouts.\cr\cr +#'The generated colour bar consists of a set of breaks that define the +#'length(brks) - 1 intervals to classify each of the values in each of the +#'grid cells of a two-dimensional field. The corresponding grid cell of a +#'given value of the field will be coloured in function of the interval it +#'belongs to.\cr\cr +#'The only mandatory parameters are 'var_limits' or 'brks' (in its second +#'format, see below). +#' +#'@param brks Can be provided in two formats: +#'\itemize{ +#' \item{A single value with the number of breaks to be generated +#' automatically, between the minimum and maximum specified in 'var_limits' +#' (both inclusive). Hence the parameter 'var_limits' is mandatory if 'brks' +#' is provided with this format. If 'bar_limits' is additionally provided, +#' values only between 'bar_limits' will be generated. The higher the value +#' of 'brks', the smoother the plot will look.} +#' \item{A vector with the actual values of the desired breaks. Values will +#' be reordered by force to ascending order. If provided in this format, no +#' other parameters are required to generate/plot the colour bar.} +#'} +#' This parameter is optional if 'var_limits' is specified. If 'brks' not +#' specified but 'cols' is specified, it will take as value length(cols) + 1. +#' If 'cols' is not specified either, 'brks' will take 21 as value. +#'@param cols Vector of length(brks) - 1 valid colour identifiers, for each +#' interval defined by the breaks. This parameter is optional and will be +#' filled in with a vector of length(brks) - 1 colours generated with the +#' function provided in 'color_fun' (\code{clim.colors} by default).\cr 'cols' +#' can have one additional colour at the beginning and/or at the end with the +#' aim to colour field values beyond the range of interest represented in the +#' colour bar. If any of these extra colours is provided, parameter +#' 'triangle_ends' becomes mandatory in order to disambiguate which of the +#' ends the colours have been provided for. +#'@param vertical TRUE/FALSE for vertical/horizontal colour bar +#' (disregarded if plot = FALSE). +#'@param subsampleg The first of each subsampleg breaks will be ticked on the +#' colorbar. Takes by default an approximation of a value that yields a +#' readable tick arrangement (extreme breaks always ticked). If set to 0 or +#' lower, no labels are drawn. See the code of the function for details or +#' use 'extra_labels' for customized tick arrangements. +#'@param bar_limits Vector of two numeric values with the extremes of the +#' range of values represented in the colour bar. If 'var_limits' go beyond +#' this interval, the drawing of triangle extremes is triggered at the +#' corresponding sides, painted in 'col_inf' and 'col_sup'. Either of them +#' can be set as NA and will then take as value the corresponding extreme in +#' 'var_limits' (hence a triangle end won't be triggered for these sides). +#' Takes as default the extremes of 'brks' if available, else the same values +#' as 'var_limits'. +#'@param var_limits Vector of two numeric values with the minimum and maximum +#' values of the field to represent. These are used to know whether to draw +#' triangle ends at the extremes of the colour bar and what colour to fill +#' them in with. If not specified, take the same value as the extremes of +#' 'brks'. Hence the parameter 'brks' is mandatory if 'var_limits' is not +#' specified. +#'@param triangle_ends Vector of two logical elements, indicating whether to +#' force the drawing of triangle ends at each of the extremes of the colour +#' bar. This choice is automatically made from the provided 'brks', +#' 'bar_limits', 'var_limits', 'col_inf' and 'col_sup', but the behaviour +#' can be manually forced to draw or not to draw the triangle ends with this +#' parameter. If 'cols' is provided, 'col_inf' and 'col_sup' will take +#' priority over 'triangle_ends' when deciding whether to draw the triangle +#' ends or not. +#'@param col_inf Colour to fill the inferior triangle end with. Useful if +#' specifying colours manually with parameter 'cols', to specify the colour +#' and to trigger the drawing of the lower extreme triangle, or if 'cols' is +#' not specified, to replace the colour automatically generated by ColorBar(). +#'@param col_sup Colour to fill the superior triangle end with. Useful if +#' specifying colours manually with parameter 'cols', to specify the colour +#' and to trigger the drawing of the upper extreme triangle, or if 'cols' is +#' not specified, to replace the colour automatically generated by ColorBar(). +#'@param color_fun Function to generate the colours of the color bar. Must +#' take an integer and must return as many colours. The returned colour vector +#' can have the attribute 'na_color', with a colour to draw NA values. This +#' parameter is set by default to clim.palette(). +#'@param plot Logical value indicating whether to only compute its breaks and +#' colours (FALSE) or to also draw it on the current device (TRUE). +#'@param draw_ticks Whether to draw ticks for the labels along the colour bar +#' (TRUE) or not (FALSE). TRUE by default. Disregarded if 'plot = FALSE'. +#'@param draw_separators Whether to draw black lines in the borders of each of +#' the colour rectancles of the colour bar (TRUE) or not (FALSE). FALSE by +#' default. Disregarded if 'plot = FALSE'. +#'@param triangle_ends_scale Scale factor for the drawn triangle ends of the +#' colour bar, if drawn at all. Takes 1 by default (rectangle triangle +#' proportional to the thickness of the colour bar). Disregarded if +#' 'plot = FALSE'. +#'@param extra_labels Numeric vector of extra labels to draw along axis of +#' the colour bar. The number of provided decimals will be conserved. +#' Disregarded if 'plot = FALSE'. +#'@param title Title to draw on top of the colour bar, most commonly with the +#' units of the represented field in the neighbour figures. Empty by default. +#'@param title_scale Scale factor for the 'title' of the colour bar. +#' Takes 1 by default. +#'@param label_scale Scale factor for the labels of the colour bar. +#' Takes 1 by default. +#'@param tick_scale Scale factor for the length of the ticks of the labels +#' along the colour bar. Takes 1 by default. +#'@param extra_margin Extra margins to be added around the colour bar, +#' in the format c(y1, x1, y2, x2). The units are margin lines. Takes +#' rep(0, 4) by default. +#'@param label_digits Number of significant digits to be displayed in the +#' labels of the colour bar, usually to avoid too many decimal digits +#' overflowing the figure region. This does not have effect over the labels +#' provided in 'extra_labels'. Takes 4 by default. +#'@param ... Arguments to be passed to the method. Only accepts the following +#' graphical parameters:\cr adj ann ask bg bty cex.lab cex.main cex.sub cin +#' col.axis col.lab col.main col.sub cra crt csi cxy err family fg fig fin +#' font font.axis font.lab font.main font.sub lend lheight ljoin lmitre lty +#' lwd mai mex mfcol mfrow mfg mkh oma omd omi page pch pin plt pty smo srt +#' tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog.\cr For more +#' information about the parameters see `par`. +#' +#'@return +#'\item{brks}{ +#' Breaks used for splitting the range in intervals. +#'} +#'\item{cols}{ +#' Colours generated for each of the length(brks) - 1 intervals. +#' Always of length length(brks) - 1. +#'} +#'\item{col_inf}{ +#' Colour used to draw the lower triangle end in the colour +#' bar (NULL if not drawn at all). +#'} +#'\item{col_sup}{ +#' Colour used to draw the upper triangle end in the colour +#' bar (NULL if not drawn at all). +#'} +#' +#'@examples +#'cols <- c("dodgerblue4", "dodgerblue1", "forestgreen", "yellowgreen", "white", +#' "white", "yellow", "orange", "red", "saddlebrown") +#'lims <- seq(-1, 1, 0.2) +#'ColorBar(lims, cols) +#'@importFrom grDevices col2rgb rgb +#'@export +ColorBar <- function(brks = NULL, cols = NULL, vertical = TRUE, + subsampleg = NULL, bar_limits = NULL, var_limits = NULL, + triangle_ends = NULL, col_inf = NULL, col_sup = NULL, + color_fun = clim.palette(), plot = TRUE, + draw_ticks = TRUE, draw_separators = FALSE, + triangle_ends_scale = 1, extra_labels = NULL, + title = NULL, title_scale = 1, + label_scale = 1, tick_scale = 1, + extra_margin = rep(0, 4), label_digits = 4, ...) { + # Required checks + if ((is.null(brks) || length(brks) < 2) && is.null(bar_limits) && is.null(var_limits)) { + stop("At least one of 'brks' with the desired breaks, 'bar_limits' or ", + "'var_limits' must be provided to generate the colour bar.") + } + + # Check brks + if (!is.null(brks)) { + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be numeric if specified.") + } else if (length(brks) > 1) { + reorder <- sort(brks, index.return = TRUE) + if (!is.null(cols)) { + cols <- cols[reorder$ix[which(reorder$ix <= length(cols))]] + } + brks <- reorder$x + } + } + + # Check bar_limits + if (!is.null(bar_limits)) { + if (!(all(is.na(bar_limits) | is.numeric(bar_limits)) && (length(bar_limits) == 2))) { + stop("Parameter 'bar_limits' must be a vector of two numeric elements or NAs.") + } + } + + # Check var_limits + if (!is.null(var_limits)) { + if (!(is.numeric(var_limits) && (length(var_limits) == 2))) { + stop("Parameter 'var_limits' must be a numeric vector of length 2.") + } else if (anyNA(var_limits)) { + stop("Parameter 'var_limits' must not contain NA values.") + } else if (any(is.infinite(var_limits))) { + stop("Parameter 'var_limits' must not contain infinite values.") + } + } + + # Check cols + if (!is.null(cols)) { + if (!is.character(cols)) { + stop("Parameter 'cols' must be a vector of character strings.") + } else if (any(!sapply(cols, .IsColor))) { + stop("Parameter 'cols' must contain valid colour identifiers.") + } + } + + # Check color_fun + if (!is.function(color_fun)) { + stop("Parameter 'color_fun' must be a colour-generator function.") + } + + # Check integrity among brks, bar_limits and var_limits + if (is.null(brks) || (length(brks) < 2)) { + if (is.null(brks)) { + if (is.null(cols)) { + brks <- 21 + } else { + brks <- length(cols) + 1 + } + } + if (is.null(bar_limits) || anyNA(bar_limits)) { + # var_limits is defined + if (is.null(bar_limits)) { + bar_limits <- c(NA, NA) + } + half_width <- 0.5 * (var_limits[2] - var_limits[1]) / (brks - 1) + bar_limits[which(is.na(bar_limits))] <- c(var_limits[1] - half_width, var_limits[2] + half_width)[which(is.na(bar_limits))] + brks <- seq(bar_limits[1], bar_limits[2], length.out = brks) + } else if (is.null(var_limits)) { + # bar_limits is defined + var_limits <- bar_limits + half_width <- 0.5 * (var_limits[2] - var_limits[1]) / (brks - 1) + brks <- seq(bar_limits[1], bar_limits[2], length.out = brks) + var_limits[1] <- var_limits[1] + half_width / 50 + } else { + # both bar_limits and var_limits are defined + brks <- seq(bar_limits[1], bar_limits[2], length.out = brks) + } + } else if (is.null(bar_limits)) { + if (is.null(var_limits)) { + # brks is defined + bar_limits <- c(head(brks, 1), tail(brks, 1)) + var_limits <- bar_limits + half_width <- 0.5 * (var_limits[2] - var_limits[1]) / (length(brks) - 1) + var_limits[1] <- var_limits[1] + half_width / 50 + } else { + # brks and var_limits are defined + bar_limits <- c(head(brks, 1), tail(brks, 1)) + } + } else { + # brks and bar_limits are defined + # or + # brks, bar_limits and var_limits are defined + if (head(brks, 1) != bar_limits[1] || tail(brks, 1) != bar_limits[2]) { + stop("Parameters 'brks' and 'bar_limits' are inconsistent.") + } + } + + # Check col_inf + if (!is.null(col_inf)) { + if (!.IsColor(col_inf)) { + stop("Parameter 'col_inf' must be a valid colour identifier.") + } + } + + # Check col_sup + if (!is.null(col_sup)) { + if (!.IsColor(col_sup)) { + stop("Parameter 'col_sup' must be a valid colour identifier.") + } + } + + # Check triangle_ends + if (!is.null(triangle_ends) && (!is.logical(triangle_ends) || length(triangle_ends) != 2)) { + stop("Parameter 'triangle_ends' must be a logical vector with two elements.") + } + teflc <- triangle_ends_from_limit_cols <- c(!is.null(col_inf), !is.null(col_sup)) + if (is.null(triangle_ends) && is.null(col_inf) && is.null(col_sup)) { + triangle_ends <- c(FALSE, FALSE) + if (bar_limits[1] >= var_limits[1]) { + triangle_ends[1] <- TRUE + } + if (bar_limits[2] < var_limits[2]) { + triangle_ends[2] <- TRUE + } + } else if (!is.null(triangle_ends) && is.null(col_inf) && is.null(col_sup)) { + triangle_ends <- triangle_ends + } else if (is.null(triangle_ends) && (!is.null(col_inf) || !is.null(col_sup))) { + triangle_ends <- teflc + } else if (any(teflc != triangle_ends)) { + if (!is.null(brks) && length(brks) > 1 && !is.null(cols) && length(cols) >= length(brks)) { + triangle_ends <- teflc + } else if (!is.null(cols)) { + triangle_ends <- teflc + } else { + triangle_ends <- triangle_ends + } + } + if (plot && !is.null(var_limits)) { + if ((bar_limits[1] >= var_limits[1]) && !triangle_ends[1]) { + .warning("There are variable values smaller or equal to the lower limit ", + "of the colour bar and the lower triangle end has been ", + "disabled. These will be painted in the colour for NA values.") + } + if ((bar_limits[2] < var_limits[2]) && !triangle_ends[2]) { + .warning("There are variable values greater than the higher limit ", + "of the colour bar and the higher triangle end has been ", + "disabled. These will be painted in the colour for NA values.") + } + } + + # Generate colours if needed + if (is.null(cols)) { + cols <- color_fun(length(brks) - 1 + sum(triangle_ends)) + attr_bk <- attributes(cols) + if (triangle_ends[1]) { + if (is.null(col_inf)) col_inf <- head(cols, 1) + cols <- cols[-1] + } + if (triangle_ends[2]) { + if (is.null(col_sup)) col_sup <- tail(cols, 1) + cols <- cols[-length(cols)] + } + attributes(cols) <- attr_bk + } else if ((length(cols) != (length(brks) - 1))) { + stop("Incorrect number of 'brks' and 'cols'. There must be one more break than the number of colours.") + } + + # Check vertical + if (!is.logical(vertical)) { + stop("Parameter 'vertical' must be TRUE or FALSE.") + } + + # Check extra_labels + if (is.null(extra_labels)) { + extra_labels <- numeric(0) + } + if (!is.numeric(extra_labels)) { + stop("Parameter 'extra_labels' must be numeric.") + } else { + if (any(extra_labels > bar_limits[2]) || any(extra_labels < bar_limits[1])) { + stop("Parameter 'extra_labels' must not contain ticks beyond the color bar limits.") + } + } + extra_labels <- sort(extra_labels) + + # Check subsampleg + primes <- function(x) { + # Courtesy of Chase. See http://stackoverflow.com/questions/6424856/r-function-for-returning-all-factors + x <- as.integer(x) + div <- seq_len(abs(x)) + factors <- div[x %% div == 0L] + factors <- list(neg = -factors, pos = factors) + return(factors) + } + remove_final_tick <- FALSE + added_final_tick <- TRUE + if (is.null(subsampleg)) { + subsampleg <- 1 + while (length(brks) / subsampleg > 15 - 1) { + next_factor <- primes((length(brks) - 1) / subsampleg)$pos + next_factor <- next_factor[length(next_factor) - ifelse(length(next_factor) > 2, 1, 0)] + subsampleg <- subsampleg * next_factor + } + if (subsampleg > (length(brks) - 1) / 4) { + subsampleg <- max(1, round(length(brks) / 4)) + extra_labels <- c(extra_labels, bar_limits[2]) + added_final_tick <- TRUE + if ((length(brks) - 1) %% subsampleg < (length(brks) - 1) / 4 / 2) { + remove_final_tick <- TRUE + } + } + } else if (!is.numeric(subsampleg)) { + stop("Parameter 'subsampleg' must be numeric.") + } + subsampleg <- round(subsampleg) + draw_labels <- TRUE + if ((subsampleg) < 1) { + draw_labels <- FALSE + } + + # Check plot + if (!is.logical(plot)) { + stop("Parameter 'plot' must be logical.") + } + + # Check draw_separators + if (!is.logical(draw_separators)) { + stop("Parameter 'draw_separators' must be logical.") + } + + # Check triangle_ends_scale + if (!is.numeric(triangle_ends_scale)) { + stop("Parameter 'triangle_ends_scale' must be numeric.") + } + + # Check draw_ticks + if (!is.logical(draw_ticks)) { + stop("Parameter 'draw_ticks' must be logical.") + } + + # Check title + if (is.null(title)) { + title <- '' + } + if (!is.character(title)) { + stop("Parameter 'title' must be a character string.") + } + + # Check title_scale + if (!is.numeric(title_scale)) { + stop("Parameter 'title_scale' must be numeric.") + } + + # Check label_scale + if (!is.numeric(label_scale)) { + stop("Parameter 'label_scale' must be numeric.") + } + + # Check tick_scale + if (!is.numeric(tick_scale)) { + stop("Parameter 'tick_scale' must be numeric.") + } + + # Check extra_margin + if (!is.numeric(extra_margin) || length(extra_margin) != 4) { + stop("Parameter 'extra_margin' must be a numeric vector of length 4.") + } + + # Check label_digits + if (!is.numeric(label_digits)) { + stop("Parameter 'label_digits' must be numeric.") + } + label_digits <- round(label_digits) + + # Process the user graphical parameters that may be passed in the call + ## Graphical parameters to exclude + excludedArgs <- c("cex", "cex.axis", "col", "lab", "las", "mar", "mgp", "new", "ps") + userArgs <- .FilterUserGraphicArgs(excludedArgs, ...) + + # + # Plotting colorbar + # ~~~~~~~~~~~~~~~~~~~ + # + if (plot) { + pars_to_save <- c('mar', 'cex', names(userArgs), 'mai', 'mgp', 'las', 'xpd') + saved_pars <- par(pars_to_save) + par(mar = c(0, 0, 0, 0), cex = 1) + image(1, 1, t(t(1)), col = rgb(0, 0, 0, 0), axes = FALSE, xlab = '', ylab = '') + # Get the availale space + figure_size <- par('fin') + cs <- par('csi') + # This allows us to assume we always want to plot horizontally + if (vertical) { + figure_size <- rev(figure_size) + } +# pannel_to_redraw <- par('mfg') +# .SwitchToFigure(pannel_to_redraw[1], pannel_to_redraw[2]) + # Load the user parameters + par(new = TRUE) + par(userArgs) + # Set up color bar plot region + margins <- c(0.0, 0, 0.0, 0) + cex_title <- 1 * title_scale + cex_labels <- 0.9 * label_scale + cex_ticks <- -0.3 * tick_scale + spaceticklab <- max(-cex_ticks, 0) + if (vertical) { + margins[1] <- margins[1] + (1.2 * cex_labels * 3 + spaceticklab) * cs + margins <- margins + extra_margin[c(4, 1:3)] * cs + } else { + margins[1] <- margins[1] + (1.2 * cex_labels * 1 + spaceticklab) * cs + margins <- margins + extra_margin * cs + } + if (title != '') { + margins[3] <- margins[3] + (1.0 * cex_title) * cs + } + margins[3] <- margins[3] + sqrt(figure_size[2] / (margins[1] + margins[3])) * + figure_size[2] / 6 * ifelse(title != '', 0.5, 0.8) + # Set side margins + margins[2] <- margins[2] + figure_size[1] / 16 + margins[4] <- margins[4] + figure_size[1] / 16 + triangle_ends_prop <- 1 / 32 * triangle_ends_scale + triangle_ends_cex <- triangle_ends_prop * figure_size[2] + if (triangle_ends[1]) { + margins[2] <- margins[2] + triangle_ends_cex + } + if (triangle_ends[2]) { + margins[4] <- margins[4] + triangle_ends_cex + } + ncols <- length(cols) + # Set up the points of triangles + # Compute the proportion of horiz. space occupied by one plot unit + prop_unit <- (1 - (margins[2] + margins[4]) / figure_size[1]) / ncols + # Convert triangle height to plot inits + triangle_height <- triangle_ends_prop / prop_unit + left_triangle <- list(x = c(1, 1 - triangle_height, 1) - 0.5, + y = c(1.4, 1, 0.6)) + right_triangle <- list(x = c(ncols, ncols + triangle_height, ncols) + 0.5, + y = c(1.4, 1, 0.6)) + # Draw the color squares and title + if (vertical) { + par(mai = c(margins[2:4], margins[1]), + mgp = c(0, spaceticklab + 0.2, 0), las = 1) + d <- 4 + image(1, 1:ncols, t(1:ncols), axes = FALSE, col = cols, + xlab = '', ylab = '') + title(ylab = title, line = cex_title * (0.2 + 0.1), cex.lab = cex_title) + # Draw top and bottom border lines + lines(c(0.6, 0.6), c(1 - 0.5, ncols + 0.5)) + lines(c(1.4, 1.4), c(1 - 0.5, ncols + 0.5)) + # Rotate triangles + names(left_triangle) <- rev(names(left_triangle)) + names(right_triangle) <- rev(names(right_triangle)) + } else { + # The term - cex_labels / 4 * (3 / cex_labels - 1) was found by + # try and error + par(mai = margins, + mgp = c(0, cex_labels / 2 + spaceticklab + - cex_labels / 4 * (3 / cex_labels - 1), 0), + las = 1) + d <- 1 + image(1:ncols, 1, t(t(1:ncols)), axes = FALSE, col = cols, + xlab = '', ylab = '') + title(title, line = cex_title * (0.2 + 0.1), cex.main = cex_title) + # Draw top and bottom border lines + lines(c(1 - 0.5, ncols + 0.5), c(0.6, 0.6)) + lines(c(1 - 0.5, ncols + 0.5), c(1.4, 1.4)) + tick_length <- -0.4 + } + # Draw the triangles + par(xpd = TRUE) + if (triangle_ends[1]) { + # Draw left triangle + polygon(left_triangle$x, left_triangle$y, col = col_inf, border = NA) + lines(left_triangle$x, left_triangle$y) + } + if (triangle_ends[2]) { + # Draw right triangle + polygon(right_triangle$x, right_triangle$y, col = col_sup, border = NA) + lines(right_triangle$x, right_triangle$y) + } + par(xpd = FALSE) + + # Put the separators + if (vertical) { + if (draw_separators) { + for (i in 1:(ncols - 1)) { + lines(c(0.6, 1.4), c(i, i) + 0.5) + } + } + if (draw_separators || is.null(col_inf)) { + lines(c(0.6, 1.4), c(0.5, 0.5)) + } + if (draw_separators || is.null(col_sup)) { + lines(c(0.6, 1.4), c(ncols + 0.5, ncols + 0.5)) + } + } else { + if (draw_separators) { + for (i in 1:(ncols - 1)) { + lines(c(i, i) + 0.5, c(0.6, 1.4)) + } + } + if (draw_separators || is.null(col_inf)) { + lines(c(0.5, 0.5), c(0.6, 1.4)) + } + if (draw_separators || is.null(col_sup)) { + lines(c(ncols + 0.5, ncols + 0.5), c(0.6, 1.4)) + } + } + # Put the ticks + plot_range <- length(brks) - 1 + var_range <- tail(brks, 1) - head(brks, 1) + extra_labels_at <- ((extra_labels - head(brks, 1)) / var_range) * plot_range + 0.5 + at <- seq(1, length(brks), subsampleg) + labels <- brks[at] + # Getting rid of next-to-last tick if too close to last one + if (remove_final_tick) { + at <- at[-length(at)] + labels <- labels[-length(labels)] + } + labels <- signif(labels, label_digits) + if (added_final_tick) { + extra_labels[length(extra_labels)] <- signif(tail(extra_labels, 1), label_digits) + } + at <- at - 0.5 + at <- c(at, extra_labels_at) + labels <- c(labels, extra_labels) + tick_reorder <- sort(at, index.return = TRUE) + at <- tick_reorder$x + if (draw_labels) { + labels <- labels[tick_reorder$ix] + } else { + labels <- FALSE + } + axis(d, at = at, tick = draw_ticks, labels = labels, cex.axis = cex_labels, tcl = cex_ticks) + par(saved_pars) + } + invisible(list(brks = brks, cols = cols, col_inf = col_inf, col_sup = col_sup)) +} + diff --git a/modules/Visualization/R/tmp/ColorBar_onebox.R b/modules/Visualization/R/tmp/ColorBar_onebox.R new file mode 100644 index 0000000000000000000000000000000000000000..679600df6633e019ca995e8f6cf2f5655029c29e --- /dev/null +++ b/modules/Visualization/R/tmp/ColorBar_onebox.R @@ -0,0 +1,496 @@ +#============================================================================ +# This function is adapted from s2dv::ColorBar. It plots only one box with the proper +# width, which is manipulated by the length of breaks. +#============================================================================ +ColorBar_onebox <- function(brks = NULL, cols = NULL, vertical = TRUE, + subsampleg = NULL, bar_limits = NULL, var_limits = NULL, + triangle_ends = NULL, col_inf = NULL, col_sup = NULL, + color_fun = clim.palette(), plot = TRUE, + draw_ticks = TRUE, draw_separators = FALSE, + triangle_ends_scale = 1, extra_labels = NULL, + title = NULL, title_scale = 1, + label_scale = 1, tick_scale = 1, + extra_margin = rep(0, 4), label_digits = 4, box_label = NULL, ...) { + # Required checks +#---------NEW---------- +# if ((is.null(brks) || length(brks) < 2) && is.null(bar_limits) && is.null(var_limits)) { +# stop("At least one of 'brks' with the desired breaks, 'bar_limits' or ", +# "'var_limits' must be provided to generate the colour bar.") +# } + + # Check brks + if (is.null(brks)) { + stop("Parameter 'brks' must be provided.") + } + if (!is.numeric(brks) | length(brks) > 1) { + stop("Parameter 'brks' must be a number.") + } + + # Check bar_limits + if (!is.null(bar_limits)) { + if (!(all(is.na(bar_limits) | is.numeric(bar_limits)) && (length(bar_limits) == 2))) { + stop("Parameter 'bar_limits' must be a vector of two numeric elements or NAs.") + } +#-------NEW---------- + } else { + # randomly set a value + bar_limits <- c(0, 100) + } +#-------NEW_END----------- + + # Check var_limits + if (!is.null(var_limits)) { + if (!(is.numeric(var_limits) && (length(var_limits) == 2))) { + stop("Parameter 'var_limits' must be a numeric vector of length 2.") + } else if (anyNA(var_limits)) { + stop("Parameter 'var_limits' must not contain NA values.") + } else if (any(is.infinite(var_limits))) { + stop("Parameter 'var_limits' must not contain infinite values.") + } + } + + # Check cols + if (!is.null(cols)) { + if (!is.character(cols)) { + stop("Parameter 'cols' must be a vector of character strings.") + } else if (any(!sapply(cols, .IsColor))) { + stop("Parameter 'cols' must contain valid colour identifiers.") + } + } + + # Check color_fun + if (!is.function(color_fun)) { + stop("Parameter 'color_fun' must be a colour-generator function.") + } + + # Check integrity among brks, bar_limits and var_limits + if (is.null(brks) || (length(brks) < 2)) { + if (is.null(brks)) { + if (is.null(cols)) { + brks <- 21 + } else { + brks <- length(cols) + 1 + } + } + if (is.null(bar_limits) || anyNA(bar_limits)) { + # var_limits is defined + if (is.null(bar_limits)) { + bar_limits <- c(NA, NA) + } + half_width <- 0.5 * (var_limits[2] - var_limits[1]) / (brks - 1) + bar_limits[which(is.na(bar_limits))] <- c(var_limits[1] - half_width, var_limits[2] + half_width)[which(is.na(bar_limits))] + brks <- seq(bar_limits[1], bar_limits[2], length.out = brks) + } else if (is.null(var_limits)) { + # bar_limits is defined + var_limits <- bar_limits + half_width <- 0.5 * (var_limits[2] - var_limits[1]) / (brks - 1) + brks <- seq(bar_limits[1], bar_limits[2], length.out = brks) + var_limits[1] <- var_limits[1] + half_width / 50 + } else { + # both bar_limits and var_limits are defined + brks <- seq(bar_limits[1], bar_limits[2], length.out = brks) + } + } else if (is.null(bar_limits)) { + if (is.null(var_limits)) { + # brks is defined + bar_limits <- c(head(brks, 1), tail(brks, 1)) + var_limits <- bar_limits + half_width <- 0.5 * (var_limits[2] - var_limits[1]) / (length(brks) - 1) + var_limits[1] <- var_limits[1] + half_width / 50 + } else { + # brks and var_limits are defined + bar_limits <- c(head(brks, 1), tail(brks, 1)) + } + } else { + # brks and bar_limits are defined + # or + # brks, bar_limits and var_limits are defined + if (head(brks, 1) != bar_limits[1] || tail(brks, 1) != bar_limits[2]) { + stop("Parameters 'brks' and 'bar_limits' are inconsistent.") + } + } + +#--------NEW------------ + white_num <- length(brks) / 2 - 1 + if (length(brks) != 2 & length(cols) == 1) { + # Add whites at two sides + if (white_num %% 1 == 0) { + cols <- c(rep("white", white_num), cols, rep("white", white_num)) + } else { + warning("Set brks length even number so the color box can be in the center.") + cols <- c(rep("white", white_num), cols, rep("white", white_num), "white") + } + } +#------NEW_END---------- + + # Check col_inf + if (!is.null(col_inf)) { + if (!.IsColor(col_inf)) { + stop("Parameter 'col_inf' must be a valid colour identifier.") + } + } + + # Check col_sup + if (!is.null(col_sup)) { + if (!.IsColor(col_sup)) { + stop("Parameter 'col_sup' must be a valid colour identifier.") + } + } + + # Check triangle_ends + if (is.null(triangle_ends)) { + triangle_ends <- c(F, F) + } else if (!identical(triangle_ends, c(FALSE, FALSE))) { + warning("Plotting triangle ends is not a valid option. Set parameter 'triangle_ends' to c(F, F).") + triangle_ends <- c(F, F) + } +#------------NEW----------------- +# if (!is.null(triangle_ends) && (!is.logical(triangle_ends) || length(triangle_ends) != 2)) { +# stop("Parameter 'triangle_ends' must be a logical vector with two elements.") +# } +# teflc <- triangle_ends_from_limit_cols <- c(!is.null(col_inf), !is.null(col_sup)) +# if (is.null(triangle_ends) && is.null(col_inf) && is.null(col_sup)) { +# triangle_ends <- c(FALSE, FALSE) +# if (bar_limits[1] >= var_limits[1]) { +# triangle_ends[1] <- TRUE +# } +# if (bar_limits[2] < var_limits[2]) { +# triangle_ends[2] <- TRUE +# } +# } else if (!is.null(triangle_ends) && is.null(col_inf) && is.null(col_sup)) { +# triangle_ends <- triangle_ends +# } else if (is.null(triangle_ends) && (!is.null(col_inf) || !is.null(col_sup))) { +# triangle_ends <- teflc +# } else if (any(teflc != triangle_ends)) { +# if (!is.null(brks) && length(brks) > 1 && !is.null(cols) && length(cols) >= length(brks)) { +# triangle_ends <- teflc +# } else if (!is.null(cols)) { +# triangle_ends <- teflc +# } else { +# triangle_ends <- triangle_ends +# } +# } +# if (plot) { +# if ((bar_limits[1] >= var_limits[1]) && !triangle_ends[1]) { +# .warning("There are variable values smaller or equal to the lower limit ", +# "of the colour bar and the lower triangle end has been ", +# "disabled. These will be painted in the colour for NA values.") +# } +# if ((bar_limits[2] < var_limits[2]) && !triangle_ends[2]) { +# .warning("There are variable values greater than the higher limit ", +# "of the colour bar and the higher triangle end has been ", +# "disabled. These will be painted in the colour for NA values.") +# } +# } +#--------NEW_END------------ + + # Generate colours if needed + if (is.null(cols)) { + cols <- color_fun(length(brks) - 1 + sum(triangle_ends)) + attr_bk <- attributes(cols) + if (triangle_ends[1]) { + if (is.null(col_inf)) col_inf <- head(cols, 1) + cols <- cols[-1] + } + if (triangle_ends[2]) { + if (is.null(col_sup)) col_sup <- tail(cols, 1) + cols <- cols[-length(cols)] + } + attributes(cols) <- attr_bk + } else if ((length(cols) != (length(brks) - 1))) { + stop("Incorrect number of 'brks' and 'cols'. There must be one more break than the number of colours.") + } + + # Check vertical + if (!is.logical(vertical)) { + stop("Parameter 'vertical' must be TRUE or FALSE.") + } + + # Check extra_labels + if (is.null(extra_labels)) { + extra_labels <- numeric(0) + } + if (!is.numeric(extra_labels)) { + stop("Parameter 'extra_labels' must be numeric.") + } else { + if (any(extra_labels > bar_limits[2]) || any(extra_labels < bar_limits[1])) { + stop("Parameter 'extra_labels' must not contain ticks beyond the color bar limits.") + } + } + extra_labels <- sort(extra_labels) + + # Check subsampleg + primes <- function(x) { + # Courtesy of Chase. See http://stackoverflow.com/questions/6424856/r-function-for-returning-all-factors + x <- as.integer(x) + div <- seq_len(abs(x)) + factors <- div[x %% div == 0L] + factors <- list(neg = -factors, pos = factors) + return(factors) + } + remove_final_tick <- FALSE + added_final_tick <- TRUE + if (is.null(subsampleg)) { + subsampleg <- 1 + while (length(brks) / subsampleg > 15 - 1) { + next_factor <- primes((length(brks) - 1) / subsampleg)$pos + next_factor <- next_factor[length(next_factor) - ifelse(length(next_factor) > 2, 1, 0)] + subsampleg <- subsampleg * next_factor + } + if (subsampleg > (length(brks) - 1) / 4) { + subsampleg <- max(1, round(length(brks) / 4)) + extra_labels <- c(extra_labels, bar_limits[2]) + added_final_tick <- TRUE + if ((length(brks) - 1) %% subsampleg < (length(brks) - 1) / 4 / 2) { + remove_final_tick <- TRUE + } + } + } else if (!is.numeric(subsampleg)) { + stop("Parameter 'subsampleg' must be numeric.") + } + subsampleg <- round(subsampleg) + draw_labels <- TRUE + if ((subsampleg) < 1) { + draw_labels <- FALSE + } + + # Check plot + if (!is.logical(plot)) { + stop("Parameter 'plot' must be logical.") + } + + # Check draw_separators + if (!is.logical(draw_separators)) { + stop("Parameter 'draw_separators' must be logical.") + } +#--------NEW---------- + if (draw_separators) { + warning("Draw only one box. Parameter 'draw_separators' is not effective.") + draw_separators <- FALSE + } +#--------NEW_END---------- + + # Check triangle_ends_scale + if (!is.numeric(triangle_ends_scale)) { + stop("Parameter 'triangle_ends_scale' must be numeric.") + } + + # Check draw_ticks + if (!is.logical(draw_ticks)) { + stop("Parameter 'draw_ticks' must be logical.") + } + + # Check title + if (is.null(title)) { + title <- '' + } + if (!is.character(title)) { + stop("Parameter 'title' must be a character string.") + } + + # Check title_scale + if (!is.numeric(title_scale)) { + stop("Parameter 'title_scale' must be numeric.") + } + + # Check label_scale + if (!is.numeric(label_scale)) { + stop("Parameter 'label_scale' must be numeric.") + } + + # Check tick_scale + if (!is.numeric(tick_scale)) { + stop("Parameter 'tick_scale' must be numeric.") + } + + # Check extra_margin + if (!is.numeric(extra_margin) || length(extra_margin) != 4) { + stop("Parameter 'extra_margin' must be a numeric vector of length 4.") + } + + # Check label_digits + if (!is.numeric(label_digits)) { + stop("Parameter 'label_digits' must be numeric.") + } + label_digits <- round(label_digits) + + # Process the user graphical parameters that may be passed in the call + ## Graphical parameters to exclude + excludedArgs <- c("cex", "cex.axis", "col", "lab", "las", "mar", "mgp", "new", "ps") + userArgs <- .FilterUserGraphicArgs(excludedArgs, ...) + + # + # Plotting colorbar + # ~~~~~~~~~~~~~~~~~~~ + # + if (plot) { + pars_to_save <- c('mar', 'cex', names(userArgs), 'mai', 'mgp', 'las', 'xpd') + saved_pars <- par(pars_to_save) + par(mar = c(0, 0, 0, 0), cex = 1) + image(1, 1, t(t(1)), col = rgb(0, 0, 0, 0), axes = FALSE, xlab = '', ylab = '') + # Get the availale space + figure_size <- par('fin') + cs <- par('csi') + # This allows us to assume we always want to plot horizontally + if (vertical) { + figure_size <- rev(figure_size) + } +# pannel_to_redraw <- par('mfg') +# .SwitchToFigure(pannel_to_redraw[1], pannel_to_redraw[2]) + # Load the user parameters + par(new = TRUE) + par(userArgs) + # Set up color bar plot region + margins <- c(0.0, 0, 0.0, 0) + cex_title <- 1 * title_scale + cex_labels <- 0.9 * label_scale + cex_ticks <- -0.3 * tick_scale + spaceticklab <- max(-cex_ticks, 0) + if (vertical) { + margins[1] <- margins[1] + (1.2 * cex_labels * 3 + spaceticklab) * cs + margins <- margins + extra_margin[c(4, 1:3)] * cs + } else { + margins[1] <- margins[1] + (1.2 * cex_labels * 1 + spaceticklab) * cs + margins <- margins + extra_margin * cs + } + if (title != '') { + margins[3] <- margins[3] + (1.0 * cex_title) * cs + } + margins[3] <- margins[3] + sqrt(figure_size[2] / (margins[1] + margins[3])) * + figure_size[2] / 6 * ifelse(title != '', 0.5, 0.8) + # Set side margins + margins[2] <- margins[2] + figure_size[1] / 16 + margins[4] <- margins[4] + figure_size[1] / 16 + triangle_ends_prop <- 1 / 32 * triangle_ends_scale + triangle_ends_cex <- triangle_ends_prop * figure_size[2] + if (triangle_ends[1]) { + margins[2] <- margins[2] + triangle_ends_cex + } + if (triangle_ends[2]) { + margins[4] <- margins[4] + triangle_ends_cex + } + ncols <- length(cols) + # Set up the points of triangles + # Compute the proportion of horiz. space occupied by one plot unit + prop_unit <- (1 - (margins[2] + margins[4]) / figure_size[1]) / ncols + # Convert triangle height to plot inits + triangle_height <- triangle_ends_prop / prop_unit + left_triangle <- list(x = c(1, 1 - triangle_height, 1) - 0.5, + y = c(1.4, 1, 0.6)) + right_triangle <- list(x = c(ncols, ncols + triangle_height, ncols) + 0.5, + y = c(1.4, 1, 0.6)) + # Draw the color squares and title + if (vertical) { + par(mai = c(margins[2:4], margins[1]), + mgp = c(0, spaceticklab + 0.2, 0), las = 1) + d <- 4 + image(1, 1:ncols, t(1:ncols), axes = FALSE, col = cols, + xlab = '', ylab = '') + title(ylab = title, line = cex_title * (0.2 + 0.1), cex.lab = cex_title) + # Draw top and bottom border lines + lines(c(0.6, 0.6), c(1 - 0.5, ncols + 0.5)) + lines(c(1.4, 1.4), c(1 - 0.5, ncols + 0.5)) + # Rotate triangles + names(left_triangle) <- rev(names(left_triangle)) + names(right_triangle) <- rev(names(right_triangle)) + } else { + # The term - cex_labels / 4 * (3 / cex_labels - 1) was found by + # try and error + par(mai = margins, + mgp = c(0, cex_labels / 2 + spaceticklab + - cex_labels / 4 * (3 / cex_labels - 1), 0), + las = 1) + d <- 1 + image(1:ncols, 1, t(t(1:ncols)), axes = FALSE, col = cols, + xlab = '', ylab = '') + title(title, line = cex_title * (0.2 + 0.1), cex.main = cex_title) +#---------NEW--------------- + # Draw top and bottom border lines + lines(c(1 - 0.5 + white_num, ncols + 0.5 - white_num), c(0.6, 0.6)) + lines(c(1 - 0.5 + white_num, ncols + 0.5 - white_num), c(1.4, 1.4)) +#---------NEW_END-------------- + tick_length <- -0.4 + } + # Draw the triangles + par(xpd = TRUE) + if (triangle_ends[1]) { + # Draw left triangle + polygon(left_triangle$x, left_triangle$y, col = col_inf, border = NA) + lines(left_triangle$x, left_triangle$y) + } + if (triangle_ends[2]) { + # Draw right triangle + polygon(right_triangle$x, right_triangle$y, col = col_sup, border = NA) + lines(right_triangle$x, right_triangle$y) + } + par(xpd = FALSE) + + # Put the separators + if (vertical) { + if (draw_separators) { + for (i in 1:(ncols - 1)) { + lines(c(0.6, 1.4), c(i, i) + 0.5) + } + } + if (draw_separators || is.null(col_inf)) { + lines(c(0.6, 1.4), c(0.5, 0.5)) + } + if (draw_separators || is.null(col_sup)) { + lines(c(0.6, 1.4), c(ncols + 0.5, ncols + 0.5)) + } + } else { + if (draw_separators) { + for (i in 1:(ncols - 1)) { + lines(c(i, i) + 0.5, c(0.6, 1.4)) + } + } +#----------NEW------------- +# if (draw_separators || is.null(col_inf)) { +# lines(c(0.5, 0.5), c(0.6, 1.4)) +# } +# if (draw_separators || is.null(col_sup)) { +# lines(c(ncols + 0.5, ncols + 0.5), c(0.6, 1.4)) +# } +#---------NEW_END------------- + } +#---------NEW------------- + # Draw vertical border lines + lines(c(0.5 + white_num, 0.5 + white_num), c(0.6, 1.4)) + lines(c(ncols + 0.5 - white_num, ncols + 0.5 - white_num), c(0.6, 1.4)) +#-------NEW_END----------- + + # Put the ticks + plot_range <- length(brks) - 1 + var_range <- tail(brks, 1) - head(brks, 1) + extra_labels_at <- ((extra_labels - head(brks, 1)) / var_range) * plot_range + 0.5 + at <- seq(1, length(brks), subsampleg) + labels <- brks[at] + # Getting rid of next-to-last tick if too close to last one + if (remove_final_tick) { + at <- at[-length(at)] + labels <- labels[-length(labels)] + } + labels <- signif(labels, label_digits) + if (added_final_tick) { + extra_labels[length(extra_labels)] <- signif(tail(extra_labels, 1), label_digits) + } + at <- at - 0.5 + at <- c(at, extra_labels_at) + labels <- c(labels, extra_labels) + tick_reorder <- sort(at, index.return = TRUE) + at <- tick_reorder$x + if (draw_labels) { + labels <- labels[tick_reorder$ix] + } else { + labels <- FALSE + } +#---------NEW---------- + if (is.null(box_label)) box_label <- "" + axis(d, at = mean(at), tick = draw_ticks, labels = box_label, cex.axis = cex_labels, tcl = cex_ticks) +# axis(d, at = at, tick = draw_ticks, labels = labels, cex.axis = cex_labels, tcl = cex_ticks) +#-------NEW_END---------- + par(saved_pars) + } + invisible(list(brks = brks, cols = cols, col_inf = col_inf, col_sup = col_sup)) +} + diff --git a/modules/Visualization/R/tmp/GradientCatsColorBar.R b/modules/Visualization/R/tmp/GradientCatsColorBar.R new file mode 100644 index 0000000000000000000000000000000000000000..00c89b1d76be2e36c75529cb3eaa407602a8e3ec --- /dev/null +++ b/modules/Visualization/R/tmp/GradientCatsColorBar.R @@ -0,0 +1,139 @@ +GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits, var_limits = NULL, + triangle_ends = NULL, col_inf = NULL, col_sup = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, label_scale = 1, extra_margin = rep(0, 4), + ...) { + + # bar_limits: a vector of 2 or a list + if (!is.list(bar_limits)) { + if (!is.numeric(bar_limits) || length(bar_limits) != 2) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2 or a list containing that.") + } + # turn into list + bar_limits <- rep(list(bar_limits), nmap) + } else { + if (any(!sapply(bar_limits, is.numeric)) || any(sapply(bar_limits, length) != 2)) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2 or a list containing that.") + } + if (length(bar_limits) != nmap) { + stop("Parameter 'bar_limits' must have the length of 'nmap'.") + } + } + # Check brks + if (!is.list(brks)) { + if (is.null(brks)) { + brks <- 5 + } else if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Turn it into list + brks <- rep(list(brks), nmap) + } else { + if (length(brks) != nmap) { + stop("Parameter 'brks' must have the length of 'nmap'.") + } + } + for (i_map in 1:nmap) { + if (length(brks[[i_map]]) == 1) { + brks[[i_map]] <- seq(from = bar_limits[[i_map]][1], to = bar_limits[[i_map]][2], length.out = brks[[i_map]]) + } + } + + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(1:length(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + + # Set triangle_ends, col_sup, col_inf + #NOTE: The "col" input of ColorBar() later is not NULL (since we determine it here) + # so ColorBar() cannot decide these parameters for us. + #NOTE: Here, col_inf and col_sup are prior to triangle_ends, which is consistent with ColorBar(). + #TODO: Make triangle_ends a list + if (is.null(triangle_ends)) { + if (!is.null(var_limits)) { + triangle_ends <- c(FALSE, FALSE) + #TODO: bar_limits is a list + if (bar_limits[1] >= var_limits[1] | !is.null(col_inf)) { + triangle_ends[1] <- TRUE + if (is.null(col_inf)) { + col_inf <- lapply(cols, head, 1) + cols <- lapply(cols, '[', -1) + } + } + if (bar_limits[2] < var_limits[2] | !is.null(col_sup)) { + triangle_ends[2] <- TRUE + if (is.null(col_sup)) { + col_sup <- lapply(cols, tail, 1) + cols <- lapply(cols, '[', -length(cols[[1]])) + } + } + } else { + triangle_ends <- c(!is.null(col_inf), !is.null(col_sup)) + } + } else { # triangle_ends has values + if (triangle_ends[1] & is.null(col_inf)) { + col_inf <- lapply(cols, head, 1) + cols <- lapply(cols, '[', -1) + } + if (triangle_ends[2] & is.null(col_sup)) { + col_sup <- lapply(cols, tail, 1) + cols <- lapply(cols, '[', -length(cols[[1]])) + } + } + + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != nmap) { + stop("Parameter 'cols' must be a list of the same length as 'nmap'.") + } + } + for (i_map in 1:length(cols)) { + if (length(cols[[i_map]]) != (length(brks[[i_map]]) - 1)) { + cols[[i_map]] <- grDevices::colorRampPalette(cols[[i_map]])(length(brks[[i_map]]) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { +#TODO: Add s2dv:: + ColorBar(brks = brks[[k]], cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, + bar_limits = bar_limits[[k]], #var_limits = var_limits, + triangle_ends = triangle_ends, col_inf = col_inf[[k]], col_sup = col_sup[[k]], plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + return(list(brks = brks, cols = cols, col_inf = col_inf, col_sup = col_sup)) + } + +} + diff --git a/modules/Visualization/R/tmp/PlotCombinedMap.R b/modules/Visualization/R/tmp/PlotCombinedMap.R new file mode 100644 index 0000000000000000000000000000000000000000..e4d6d7f33db3805e5a956a21230148767d3aa2b8 --- /dev/null +++ b/modules/Visualization/R/tmp/PlotCombinedMap.R @@ -0,0 +1,525 @@ +#'Plot Multiple Lon-Lat Variables In a Single Map According to a Decision Function +#' +#'@description Plot a number a two dimensional matrices with (longitude, +#'latitude) dimensions on a single map with the cylindrical equidistant +#'latitude and longitude projection. +#' +#'@author Nicolau Manubens, \email{nicolau.manubens@bsc.es} +#'@author Veronica Torralba, \email{veronica.torralba@bsc.es} +#' +#'@param maps List of matrices to plot, each with (longitude, latitude) +#' dimensions, or 3-dimensional array with the dimensions (longitude, latitude, +#' map). Dimension names are required. +#'@param lon Vector of longitudes. Must match the length of the corresponding +#' dimension in 'maps'. +#'@param lat Vector of latitudes. Must match the length of the corresponding +#' dimension in 'maps'. +#'@param map_select_fun Function that selects, for each grid point, which value +#' to take among all the provided maps. This function receives as input a +#' vector of values for a same grid point for all the provided maps, and must +#' return a single selected value (not its index!) or NA. For example, the +#' \code{min} and \code{max} functions are accepted. +#'@param display_range Range of values to be displayed for all the maps. This +#' must be a numeric vector c(range min, range max). The values in the +#' parameter 'maps' can go beyond the limits specified in this range. If the +#' selected value for a given grid point (according to 'map_select_fun') falls +#' outside the range, it will be coloured with 'col_unknown_map'. +#'@param map_dim Optional name for the dimension of 'maps' along which the +#' multiple maps are arranged. Only applies when 'maps' is provided as a +#' 3-dimensional array. Takes the value 'map' by default. +#'@param brks Colour levels to be sent to PlotEquiMap. This parameter is +#' optional and adjusted automatically by the function. +#'@param cols List of vectors of colours to be sent to PlotEquiMap for the +#' colour bar of each map. This parameter is optional and adjusted +#' automatically by the function (up to 5 maps). The colours provided for each +#' colour bar will be automatically interpolated to match the number of breaks. +#' Each item in this list can be named, and the name will be used as title for +#' the corresponding colour bar (equivalent to the parameter 'bar_titles'). +#'@param col_unknown_map Colour to use to paint the grid cells for which a map +#' is not possible to be chosen according to 'map_select_fun' or for those +#' values that go beyond 'display_range'. Takes the value 'white' by default. +#'@param mask Optional numeric array with dimensions (latitude, longitude), with +#' values in the range [0, 1], indicating the opacity of the mask over each +#' grid point. Cells with a 0 will result in no mask, whereas cells with a 1 +#' will result in a totally opaque superimposed pixel coloured in 'col_mask'. +#'@param col_mask Colour to be used for the superimposed mask (if specified in +#' 'mask'). Takes the value 'grey' by default. +#'@param dots Array of same dimensions as 'var' or with dimensions +#' c(n, dim(var)), where n is the number of dot/symbol layers to add to the +#' plot. A value of TRUE at a grid cell will draw a dot/symbol on the +#' corresponding square of the plot. By default all layers provided in 'dots' +#' are plotted with dots, but a symbol can be specified for each of the +#' layers via the parameter 'dot_symbol'. +#'@param bar_titles Optional vector of character strings providing the titles to +#' be shown on top of each of the colour bars. +#'@param legend_scale Scale factor for the size of the colour bar labels. Takes +#' 1 by default. +#'@param cex_bar_titles Scale factor for the sizes of the bar titles. Takes 1.5 +#' by default. +#'@param plot_margin Numeric vector of length 4 for the margin sizes in the +#' following order: bottom, left, top, and right. If not specified, use the +#' default of par("mar"), c(5.1, 4.1, 4.1, 2.1). Used as 'margin_scale' in +#' s2dv::PlotEquiMap. +#'@param fileout File where to save the plot. If not specified (default) a +#' graphics device will pop up. Extensions allowed: eps/ps, jpeg, png, pdf, bmp +#' and tiff +#'@param width File width, in the units specified in the parameter size_units +#' (inches by default). Takes 8 by default. +#'@param height File height, in the units specified in the parameter size_units +#' (inches by default). Takes 5 by default. +#'@param size_units Units of the size of the device (file or window) to plot in. +#' Inches ('in') by default. See ?Devices and the creator function of the +#' corresponding device. +#'@param res Resolution of the device (file or window) to plot in. See ?Devices +#' and the creator function of the corresponding device. +#'@param drawleg Where to draw the common colour bar. Can take values TRUE, +#' FALSE or:\cr +#' 'up', 'u', 'U', 'top', 't', 'T', 'north', 'n', 'N'\cr +#' 'down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S' (default)\cr +#' 'right', 'r', 'R', 'east', 'e', 'E'\cr +#' 'left', 'l', 'L', 'west', 'w', 'W' +#'@param return_leg A logical value indicating if the color bars information +#' should be returned by the function. If TRUE, the function doesn't plot the +#' color bars but still creates the layout with color bar areas, and the +#' arguments for GradientCatsColorBar() or ColorBar() will be returned. It is +#' convenient for users to adjust the color bars manually. The default is +#' FALSE, the color bars will be plotted directly. +#'@param ... Additional parameters to be passed on to \code{PlotEquiMap}. +#' +#'@examples +#'# Simple example +#'x <- array(1:(20 * 10), dim = c(lat = 10, lon = 20)) / 200 +#'a <- x * 0.6 +#'b <- (1 - x) * 0.6 +#'c <- 1 - (a + b) +#'lons <- seq(0, 359.5, length = 20) +#'lats <- seq(-89.5, 89.5, length = 10) +#'\dontrun{ +#'PlotCombinedMap(list(a, b, c), lons, lats, +#' toptitle = 'Maximum map', +#' map_select_fun = max, +#' display_range = c(0, 1), +#' bar_titles = paste('% of belonging to', c('a', 'b', 'c')), +#' brks = 20, width = 12, height = 10) +#'} +#' +#'Lon <- c(0:40, 350:359) +#'Lat <- 51:26 +#'data <- rnorm(51 * 26 * 3) +#'dim(data) <- c(map = 3, lon = 51, lat = 26) +#'mask <- sample(c(0,1), replace = TRUE, size = 51 * 26) +#'dim(mask) <- c(lat = 26, lon = 51) +#'\dontrun{ +#'PlotCombinedMap(data, lon = Lon, lat = Lat, map_select_fun = max, +#' display_range = range(data), mask = mask, +#' width = 14, height = 10) +#'} +#' +#'@seealso \code{PlotCombinedMap} and \code{PlotEquiMap} +#' +#'@importFrom s2dv PlotEquiMap ColorBar +#'@importFrom maps map +#'@importFrom graphics box image layout mtext par plot.new +#'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off +#' hcl jpeg pdf png postscript svg tiff +#'@export +PlotCombinedMap <- function(maps, lon, lat, + map_select_fun, display_range, + map_dim = 'map', + brks = NULL, cols = NULL, + bar_limits = NULL, triangle_ends = c(F, F), col_inf = NULL, col_sup = NULL, + col_unknown_map = 'white', + mask = NULL, col_mask = 'grey', + dots = NULL, + bar_titles = NULL, legend_scale = 1, + cex_bar_titles = 1.5, + plot_margin = NULL, bar_extra_margin = c(2, 0, 2, 0), + fileout = NULL, width = 8, height = 5, + size_units = 'in', res = 100, drawleg = T, return_leg = FALSE, + ...) { + args <- list(...) + + # If there is any filenames to store the graphics, process them + # to select the right device + if (!is.null(fileout)) { + .SelectDevice <- utils::getFromNamespace(".SelectDevice", "s2dv") + deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, + units = size_units, res = res) + saveToFile <- deviceInfo$fun + fileout <- deviceInfo$files + } + + # Check probs + error <- FALSE + # Change list into an array + if (is.list(maps)) { + if (length(maps) < 1) { + stop("Parameter 'maps' must be of length >= 1 if provided as a list.") + } + check_fun <- function(x) { + is.numeric(x) && (length(dim(x)) == 2) + } + if (!all(sapply(maps, check_fun))) { + error <- TRUE + } + ref_dims <- dim(maps[[1]]) + equal_dims <- all(sapply(maps, function(x) identical(dim(x), ref_dims))) + if (!equal_dims) { + stop("All arrays in parameter 'maps' must have the same dimension ", + "sizes and names when 'maps' is provided as a list of arrays.") + } + num_maps <- length(maps) + maps <- unlist(maps) + dim(maps) <- c(ref_dims, map = num_maps) + map_dim <- 'map' + } + if (!is.numeric(maps)) { + error <- TRUE + } + if (is.null(dim(maps))) { + error <- TRUE + } + if (length(dim(maps)) != 3) { + error <- TRUE + } + if (error) { + stop("Parameter 'maps' must be either a numeric array with 3 dimensions ", + " or a list of numeric arrays of the same size with the 'lon' and ", + "'lat' dimensions.") + } + dimnames <- names(dim(maps)) + + # Check map_dim + if (is.character(map_dim)) { + if (is.null(dimnames)) { + stop("Specified a dimension name in 'map_dim' but no dimension names provided ", + "in 'maps'.") + } + map_dim <- which(dimnames == map_dim) + if (length(map_dim) < 1) { + stop("Dimension 'map_dim' not found in 'maps'.") + } else { + map_dim <- map_dim[1] + } + } else if (!is.numeric(map_dim)) { + stop("Parameter 'map_dim' must be either a numeric value or a ", + "dimension name.") + } + if (length(map_dim) != 1) { + stop("Parameter 'map_dim' must be of length 1.") + } + map_dim <- round(map_dim) + + # Work out lon_dim and lat_dim + lon_dim <- NULL + if (!is.null(dimnames)) { + lon_dim <- which(dimnames %in% c('lon', 'longitude'))[1] + } + if (length(lon_dim) < 1) { + lon_dim <- (1:3)[-map_dim][1] + } + lon_dim <- round(lon_dim) + + lat_dim <- NULL + if (!is.null(dimnames)) { + lat_dim <- which(dimnames %in% c('lat', 'latitude'))[1] + } + if (length(lat_dim) < 1) { + lat_dim <- (1:3)[-map_dim][2] + } + lat_dim <- round(lat_dim) + + # Check lon + if (!is.numeric(lon)) { + stop("Parameter 'lon' must be a numeric vector.") + } + if (length(lon) != dim(maps)[lon_dim]) { + stop("Parameter 'lon' does not match the longitude dimension in 'maps'.") + } + + # Check lat + if (!is.numeric(lat)) { + stop("Parameter 'lat' must be a numeric vector.") + } + if (length(lat) != dim(maps)[lat_dim]) { + stop("Parameter 'lat' does not match the longitude dimension in 'maps'.") + } + + # Check map_select_fun + if (is.numeric(map_select_fun)) { + if (length(dim(map_select_fun)) != 2) { + stop("Parameter 'map_select_fun' must be an array with dimensions ", + "'lon' and 'lat' if provided as an array.") + } + if (!identical(dim(map_select_fun), dim(maps)[-map_dim])) { + stop("The dimensions 'lon' and 'lat' in the 'map_select_fun' array must ", + "have the same size, name and order as in the 'maps' parameter.") + } + } + if (!is.function(map_select_fun)) { + stop("The parameter 'map_select_fun' must be a function or a numeric array.") + } + + # Generate the desired brks and cols. Only nmap, brks, cols, bar_limits, and + # bar_titles matter here because plot = F. + var_limits_maps <- range(maps, na.rm = TRUE) + if (is.null(bar_limits)) bar_limits <- display_range + nmap <- dim(maps)[map_dim] + colorbar <- GradientCatsColorBar(nmap = nmap, + brks = brks, cols = cols, vertical = FALSE, + subsampleg = NULL, bar_limits = bar_limits, + var_limits = var_limits_maps, + triangle_ends = triangle_ends, col_inf = col_inf, col_sup = col_sup, plot = FALSE, draw_separators = TRUE, + bar_titles = bar_titles, title_scale = cex_bar_titles, label_scale = legend_scale * 1.5, + extra_margin = bar_extra_margin) + + # Check legend_scale + if (!is.numeric(legend_scale)) { + stop("Parameter 'legend_scale' must be numeric.") + } + + # Check col_unknown_map + if (!is.character(col_unknown_map)) { + stop("Parameter 'col_unknown_map' must be a character string.") + } + + # Check col_mask + if (!is.character(col_mask)) { + stop("Parameter 'col_mask' must be a character string.") + } + + # Check mask + if (!is.null(mask)) { + if (!is.numeric(mask)) { + stop("Parameter 'mask' must be numeric.") + } + if (length(dim(mask)) != 2) { + stop("Parameter 'mask' must have two dimensions.") + } + if ((dim(mask)[1] != dim(maps)[lat_dim]) || + (dim(mask)[2] != dim(maps)[lon_dim])) { + stop("Parameter 'mask' must have dimensions c(lat, lon).") + } + } + # Check dots + if (!is.null(dots)) { + if (length(dim(dots)) != 2) { + stop("Parameter 'mask' must have two dimensions.") + } + if ((dim(dots)[1] != dim(maps)[lat_dim]) || + (dim(dots)[2] != dim(maps)[lon_dim])) { + stop("Parameter 'mask' must have dimensions c(lat, lon).") + } + } + + #---------------------- + # Identify the most likely map + #---------------------- + if (!is.null(colorbar$col_sup[[1]])) { + + brks_norm <- vector('list', length = nmap) + range_width <- vector('list', length = nmap) + slightly_tune_val <- vector('list', length = nmap) + for (ii in 1:nmap) { + brks_norm[[ii]] <- seq(0, 1, length.out = length(colorbar$brks[[ii]]) + 1) # add one break for col_sup + slightly_tune_val[[ii]] <- brks_norm[[ii]][2] / (length(brks_norm[[ii]]) * 2) + range_width[[ii]] <- diff(range(colorbar$brks[[ii]])) + } + ml_map <- apply(maps, c(lat_dim, lon_dim), function(x) { + if (any(is.na(x))) { + res <- NA + } else { + res <- which(x == map_select_fun(x)) + if (length(res) > 0) { + res <- res_ind <- res[1] + if (map_select_fun(x) < display_range[1] || map_select_fun(x) > display_range[2]) { + res <- -0.5 + } else { + if (map_select_fun(x) > tail(colorbar$brks[[res_ind]], 1)) { # col_sup + res <- res + 1 - slightly_tune_val[[res_ind]] + } else { + res <- res + ((map_select_fun(x) - colorbar$brks[[res_ind]][1]) / + range_width[[res_ind]] * ((length(brks_norm[[res_ind]]) - 2) / (length(brks_norm[[res_ind]]) - 1))) + if (map_select_fun(x) == colorbar$brks[[res_ind]][1]) { + res <- res + slightly_tune_val[[res_ind]] + } + } + } + } else { + res <- -0.5 + } + } + res + }) + + } else { + + brks_norm <- vector('list', length = nmap) + range_width <- display_range[2] - display_range[1] #vector('list', length = nmap) + slightly_tune_val <- vector('list', length = nmap) + for (ii in 1:nmap) { + brks_norm[[ii]] <- seq(0, 1, length.out = length(colorbar$brks[[ii]])) + slightly_tune_val[[ii]] <- brks_norm[[ii]][2] / (length(brks_norm[[ii]]) * 2) + } + ml_map <- apply(maps, c(lat_dim, lon_dim), function(x) { + if (any(is.na(x))) { + res <- NA + } else { + res <- which(x == map_select_fun(x)) + if (length(res) > 0) { + res <- res[1] + if (map_select_fun(x) < display_range[1] || + map_select_fun(x) > display_range[2]) { + res <- -0.5 + } else { + res <- res + (map_select_fun(x) - display_range[1]) / range_width + if (map_select_fun(x) == display_range[1]) { + res <- res + slightly_tune_val + } + } + } else { + res <- -0.5 + } + } + res + }) + } + + nlat <- length(lat) + nlon <- length(lon) + + #---------------------- + # Set latitudes from minimum to maximum + #---------------------- + if (lat[1] > lat[nlat]) { + lat <- lat[nlat:1] + indices <- list(nlat:1, TRUE) + ml_map <- do.call("[", c(list(x = ml_map), indices)) + if (!is.null(mask)){ + mask <- mask[nlat:1, ] + } + if (!is.null(dots)){ + dots <- dots[nlat:1,] + } + } + + #---------------------- + # Set layout and parameters + #---------------------- + # Open connection to graphical device + if (!is.null(fileout)) { + saveToFile(fileout) + } else if (names(dev.cur()) == 'null device') { + dev.new(units = size_units, res = res, width = width, height = height) + } + #NOTE: I think plot.new() is not necessary in any case. +# plot.new() + #TODO: Don't hardcoded. Let users decide. + par(font.main = 1) + # If colorbars need to be plotted, re-define layout. + if (drawleg) { + layout(matrix(c(rep(1, nmap),2:(nmap + 1)), 2, nmap, byrow = TRUE), heights = c(6, 1.5)) + } + + #---------------------- + # Set colors and breaks and then PlotEquiMap + #---------------------- + if (!is.null(colorbar$col_sup[[1]])) { + tcols <- c(col_unknown_map, colorbar$cols[[1]], colorbar$col_sup[[1]]) + tbrks <- c(-1, brks_norm[[1]] + rep(1, each = length(brks_norm[[1]]))) + for (k in 2:nmap) { + tcols <- append(tcols, c(col_unknown_map, colorbar$cols[[k]], colorbar$col_sup[[k]])) + tbrks <- c(tbrks, brks_norm[[k]] + rep(k, each = length(brks_norm[[k]]))) + } + } else { # original code + tcols <- c(col_unknown_map, colorbar$cols[[1]]) + tbrks <- c(-1, brks_norm[[1]] + rep(1, each = length(brks_norm[[1]]))) + for (k in 2:nmap) { + tcols <- append(tcols, c(col_unknown_map, colorbar$cols[[k]])) + tbrks <- c(tbrks, brks_norm[[k]] + rep(k, each = length(brks_norm[[k]]))) + } + } + + if (is.null(plot_margin)) { + plot_margin <- c(5, 4, 4, 2) + 0.1 # default of par()$mar + } + + PlotEquiMap(var = ml_map, lon = lon, lat = lat, + brks = tbrks, cols = tcols, drawleg = FALSE, + filled.continents = FALSE, dots = dots, margin_scale = plot_margin, ...) + + #---------------------- + # Add overplot on top + #---------------------- + if (!is.null(mask)) { + dims_mask <- dim(mask) + latb <- sort(lat, index.return = TRUE) + dlon <- lon[2:dims_mask[2]] - lon[1:(dims_mask[2] - 1)] + wher <- which(dlon > (mean(dlon) + 1)) + if (length(wher) > 0) { + lon[(wher + 1):dims_mask[2]] <- lon[(wher + 1):dims_mask[2]] - 360 + } + lonb <- sort(lon, index.return = TRUE) + + cols_mask <- sapply(seq(from = 0, to = 1, length.out = 10), + function(x) adjustcolor(col_mask, alpha.f = x)) + image(lonb$x, latb$x, t(mask)[lonb$ix, latb$ix], + axes = FALSE, col = cols_mask, + breaks = seq(from = 0, to = 1, by = 0.1), + xlab='', ylab='', add = TRUE, xpd = TRUE) + if (!exists('coast_color')) { + coast_color <- 'black' + } + if (min(lon) < 0) { + map('world', interior = FALSE, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon -180 to 180). + } else { + map('world2', interior = FALSE, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon 0 to 360). + } + box() + } + + #---------------------- + # Add colorbars + #---------------------- + if ('toptitle' %in% names(args)) { + size_title <- 1 + if ('title_scale' %in% names(args)) { + size_title <- args[['title_scale']] + } + old_mar <- par('mar') + old_mar[3] <- old_mar[3] - (2 * size_title + 1) + par(mar = old_mar) + } + + if (drawleg & !return_leg) { + GradientCatsColorBar(nmap = dim(maps)[map_dim], + brks = colorbar$brks, cols = colorbar$cols, vertical = FALSE, + subsampleg = NULL, bar_limits = bar_limits, + var_limits = var_limits_maps, + triangle_ends = triangle_ends, col_inf = colorbar$col_inf, col_sup = colorbar$col_sup, + plot = TRUE, draw_separators = TRUE, + bar_titles = bar_titles, title_scale = cex_bar_titles, label_scale = legend_scale * 1.5, + extra_margin = bar_extra_margin) + } + + if (!return_leg) { + # If the graphic was saved to file, close the connection with the device + if (!is.null(fileout)) dev.off() + } + + if (return_leg) { + tmp <- list(nmap = dim(maps)[map_dim], + brks = colorbar$brks, cols = colorbar$cols, vertical = FALSE, + subsampleg = NULL, bar_limits = bar_limits, + var_limits = var_limits_maps, + triangle_ends = triangle_ends, col_inf = colorbar$col_inf, col_sup = colorbar$col_sup, + plot = TRUE, draw_separators = TRUE, + bar_titles = bar_titles, title_scale = cex_bar_titles, label_scale = legend_scale * 1.5, + extra_margin = bar_extra_margin) + return(tmp) + #NOTE: The device is not off! Can keep plotting the color bars. + } + +} + + diff --git a/modules/Visualization/R/tmp/PlotEquiMap.R b/modules/Visualization/R/tmp/PlotEquiMap.R new file mode 100644 index 0000000000000000000000000000000000000000..78025a02b8bff01139eef256f203750f45c5f00c --- /dev/null +++ b/modules/Visualization/R/tmp/PlotEquiMap.R @@ -0,0 +1,1267 @@ +#'Maps A Two-Dimensional Variable On A Cylindrical Equidistant Projection +#' +#'Map longitude-latitude array (on a regular rectangular or gaussian grid) +#'on a cylindrical equidistant latitude and longitude projection with coloured +#'grid cells. Only the region for which data has been provided is displayed. +#'A colour bar (legend) can be plotted and adjusted. It is possible to draw +#'superimposed arrows, dots, symbols, contour lines and boxes. A number of +#'options is provided to adjust the position, size and colour of the +#'components. Some parameters are provided to add and adjust the masks that +#'include continents, oceans, and lakes. This plot function is compatible with +#'figure layouts if colour bar is disabled. +#' +#'@param var Array with the values at each cell of a grid on a regular +#' rectangular or gaussian grid. The array is expected to have two +#' dimensions: c(latitude, longitude). Longitudes can be in ascending or +#' descending order and latitudes in any order. It can contain NA values +#' (coloured with 'colNA'). Arrays with dimensions c(longitude, latitude) +#' will also be accepted but 'lon' and 'lat' will be used to disambiguate so +#' this alternative is not appropriate for square arrays. It is allowed that +#' the positions of the longitudinal and latitudinal coordinate dimensions +#' are interchanged. +#'@param lon Numeric vector of longitude locations of the cell centers of the +#' grid of 'var', in ascending or descending order (same as 'var'). Expected +#' to be regularly spaced, within either of the ranges [-180, 180] or +#' [0, 360]. Data for two adjacent regions split by the limits of the +#' longitude range can also be provided, e.g. \code{lon = c(0:50, 300:360)} +#' ('var' must be provided consitently). +#'@param lat Numeric vector of latitude locations of the cell centers of the +#' grid of 'var', in any order (same as 'var'). Expected to be from a regular +#' rectangular or gaussian grid, within the range [-90, 90]. +#'@param varu Array of the zonal component of wind/current/other field with +#' the same dimensions as 'var'. It is allowed that the positions of the +#' longitudinal and latitudinal coordinate dimensions are interchanged. +#'@param varv Array of the meridional component of wind/current/other field +#' with the same dimensions as 'var'. It is allowed that the positions of the +#' longitudinal and latitudinal coordinate dimensions are interchanged. +#'@param toptitle Top title of the figure, scalable with parameter +#' 'title_scale'. +#'@param sizetit Scale factor for the figure top title provided in parameter +#' 'toptitle'. Deprecated. Use 'title_scale' instead. +#'@param units Title at the top of the colour bar, most commonly the units of +#' the variable provided in parameter 'var'. +#'@param brks,cols,bar_limits,triangle_ends Usually only providing 'brks' is +#' enough to generate the desired colour bar. These parameters allow to +#' define n breaks that define n - 1 intervals to classify each of the values +#' in 'var'. The corresponding grid cell of a given value in 'var' will be +#' coloured in function of the interval it belongs to. These parameters are +#' sent to \code{ColorBar()} to generate the breaks and colours. Additional +#' colours for values beyond the limits of the colour bar are also generated +#' and applied to the plot if 'bar_limits' or 'brks' and 'triangle_ends' are +#' properly provided to do so. See ?ColorBar for a full explanation. +#'@param col_inf,col_sup,colNA Colour identifiers to colour the values in +#' 'var' that go beyond the extremes of the colour bar and to colour NA +#' values, respectively. 'colNA' takes attr(cols, 'na_color') if available by +#' default, where cols is the parameter 'cols' if provided or the vector of +#' colors returned by 'color_fun'. If not available, it takes 'pink' by +#' default. 'col_inf' and 'col_sup' will take the value of 'colNA' if not +#' specified. See ?ColorBar for a full explanation on 'col_inf' and 'col_sup'. +#'@param color_fun,subsampleg,bar_extra_labels,draw_bar_ticks Set of +#' parameters to control the visual aspect of the drawn colour bar +#' (1/3). See ?ColorBar for a full explanation. +#'@param draw_separators,triangle_ends_scale,bar_label_digits Set of +#' parameters to control the visual aspect of the drawn colour bar +#' (2/3). See ?ColorBar for a full explanation. +#'@param bar_label_scale,units_scale,bar_tick_scale,bar_extra_margin Set of +#' parameters to control the visual aspect of the drawn colour bar (3/3). +#' See ?ColorBar for a full explanation. +#'@param square Logical value to choose either to draw a coloured square for +#' each grid cell in 'var' (TRUE; default) or to draw contour lines and fill +#' the spaces in between with colours (FALSE). In the latter case, +#' 'filled.continents' will take the value FALSE if not specified. +#'@param filled.continents Colour to fill in drawn projected continents. +#' Takes the value gray(0.5) by default or, if 'square = FALSE', takes the +#' value FALSE. If set to FALSE, continents are not filled in. +#'@param filled.oceans A logical value or the color name to fill in drawn +#' projected oceans. The default value is FALSE. If it is TRUE, the default +#' colour is "light blue". +#'@param country.borders A logical value indicating if the country borders +#' should be plotted (TRUE) or not (FALSE). It only works when +#' 'filled.continents' is FALSE. The default value is FALSE. +#'@param coast_color Colour of the coast line of the drawn projected continents. +#' Takes the value gray(0.5) by default. +#'@param coast_width Line width of the coast line of the drawn projected +#' continents. Takes the value 1 by default. +#'@param lake_color Colour of the lake or other water body inside continents. +#' The default value is NULL. +#'@param shapefile A character string of the path to a .rds file or a list +#' object containinig shape file data. If it is a .rds file, it should contain +#' a list. The list should contains 'x' and 'y' at least, which indicate the +#' location of the shape. The default value is NULL. +#'@param shapefile_color Line color of the shapefile. +#'@param shapefile_lwd Line width of the shapefile. The default value is 1. +#'@param contours Array of same dimensions as 'var' to be added to the plot +#' and displayed with contours. Parameter 'brks2' is required to define the +#' magnitude breaks for each contour curve. Disregarded if 'square = FALSE'. +#' It is allowed that the positions of the longitudinal and latitudinal +#' coordinate dimensions are interchanged. +#'@param brks2 Vector of magnitude breaks where to draw contour curves for the +#' array provided in 'contours' or if 'square = FALSE'. +#'@param contour_lwd Line width of the contour curves provided via 'contours' +#' and 'brks2', or if 'square = FALSE'. +#'@param contour_color Line color of the contour curves provided via 'contours' +#' and 'brks2', or if 'square = FALSE'. +#'@param contour_lty Line type of the contour curves. Takes 1 (solid) by +#' default. See help on 'lty' in par() for other accepted values. +#'@param contour_draw_label A logical value indicating whether to draw the +#' contour labels or not. The default value is TRUE. +#'@param contour_label_scale Scale factor for the superimposed labels when +#' drawing contour levels. +#'@param dots Array of same dimensions as 'var' or with dimensions +#' c(n, dim(var)), where n is the number of dot/symbol layers to add to the +#' plot. A value of TRUE at a grid cell will draw a dot/symbol on the +#' corresponding square of the plot. By default all layers provided in 'dots' +#' are plotted with dots, but a symbol can be specified for each of the +#' layers via the parameter 'dot_symbol'. It is allowed that the positions of +#' the longitudinal and latitudinal coordinate dimensions are interchanged. +#'@param dot_symbol Single character/number or vector of characters/numbers +#' that correspond to each of the symbol layers specified in parameter 'dots'. +#' If a single value is specified, it will be applied to all the layers in +#' 'dots'. Takes 15 (centered square) by default. See 'pch' in par() for +#' additional accepted options. +#'@param dot_size Scale factor for the dots/symbols to be plotted, specified +#' in 'dots'. If a single value is specified, it will be applied to all +#' layers in 'dots'. Takes 1 by default. +#'@param arr_subsamp Subsampling factor to select a subset of arrows in +#' 'varu' and 'varv' to be drawn. Only one out of arr_subsamp arrows will +#' be drawn. Takes 1 by default. +#'@param arr_scale Scale factor for drawn arrows from 'varu' and 'varv'. +#' Takes 1 by default. +#'@param arr_ref_len Length of the refence arrow to be drawn as legend at the +#' bottom of the figure (in same units as 'varu' and 'varv', only affects the +#' legend for the wind or variable in these arrays). Defaults to 15. +#'@param arr_units Units of 'varu' and 'varv', to be drawn in the legend. +#' Takes 'm/s' by default. +#'@param arr_scale_shaft Parameter for the scale of the shaft of the arrows +#' (which also depend on the number of figures and the arr_scale parameter). +#' Defaults to 1. +#'@param arr_scale_shaft_angle Parameter for the scale of the angle of the +#' shaft of the arrows (which also depend on the number of figure and the +#' arr_scale parameter). Defaults to 1. +#'@param axelab Whether to draw longitude and latitude axes or not. +#' TRUE by default. +#'@param labW Whether to label the longitude axis with a 'W' instead of minus +#' for negative values. Defaults to FALSE. +#'@param lab_dist_x A numeric of the distance of the longitude labels to the +#' box borders. The default value is NULL and is automatically adjusted by +#' the function. +#'@param lab_dist_y A numeric of the distance of the latitude labels to the +#' box borders. The default value is NULL and is automatically adjusted by +#' the function. +#'@param degree_sym A logical indicating whether to include degree symbol +#' (30° N) or not (30N; default). +#'@param intylat Interval between latitude ticks on y-axis, in degrees. +#' Defaults to 20. +#'@param intxlon Interval between latitude ticks on x-axis, in degrees. +#' Defaults to 20. +#'@param xlonshft A numeric of the degrees to shift the latitude ticks. The +#' default value is 0. +#'@param ylatshft A numeric of the degrees to shift the longitude ticks. The +#' default value is 0. +#'@param xlabels A vector of character string of the custumized x-axis labels. +#' The values should correspond to each tick, which is decided by the longitude +#' and parameter 'intxlon'. The default value is NULL and the labels will be +#' automatically generated. +#'@param ylabels A vector of character string of the custumized y-axis labels. +#' The values should correspond to each tick, which is decided by the latitude +#' and parameter 'intylat'. The default value is NULL and the labels will be +#' automatically generated. +#'@param axes_tick_scale Scale factor for the tick lines along the longitude +#' and latitude axes. +#'@param axes_label_scale Scale factor for the labels along the longitude +#' and latitude axes. +#'@param drawleg Whether to plot a color bar (legend, key) or not. Defaults to +#' TRUE. It is not possible to plot the colour bar if 'add = TRUE'. Use +#' ColorBar() and the return values of PlotEquiMap() instead. +#'@param boxlim Limits of a box to be added to the plot, in degrees: +#' c(x1, y1, x2, y2). A list with multiple box specifications can also be +#' provided. +#'@param boxcol Colour of the box lines. A vector with a colour for each of +#' the boxes is also accepted. Defaults to 'purple2'. +#'@param boxlwd Line width of the box lines. A vector with a line width for +#' each of the boxes is also accepted. Defaults to 5. +#'@param margin_scale Scale factor for the margins around the map plot, with +#' the format c(y1, x1, y2, x2). Defaults to rep(1, 4). If drawleg = TRUE, +#' then margin_scale[1] is subtracted 1 unit. +#'@param title_scale Scale factor for the figure top title. Defaults to 1. +#'@param numbfig Number of figures in the layout the plot will be put into. +#' A higher numbfig will result in narrower margins and smaller labels, +#' axe labels, ticks, thinner lines, ... Defaults to 1. +#'@param fileout File where to save the plot. If not specified (default) a +#' graphics device will pop up. Extensions allowed: eps/ps, jpeg, png, pdf, +#' bmp and tiff. +#'@param width File width, in the units specified in the parameter size_units +#' (inches by default). Takes 8 by default. +#'@param height File height, in the units specified in the parameter +#' size_units (inches by default). Takes 5 by default. +#'@param size_units Units of the size of the device (file or window) to plot +#' in. Inches ('in') by default. See ?Devices and the creator function of +#' the corresponding device. +#'@param res Resolution of the device (file or window) to plot in. See +#' ?Devices and the creator function of the corresponding device. +#'@param \dots Arguments to be passed to the method. Only accepts the following +#' graphical parameters:\cr +#' adj ann ask bg bty cex.sub cin col.axis col.lab col.main col.sub cra crt +#' csi cxy err family fg font font.axis font.lab font.main font.sub lend +#' lheight ljoin lmitre mex mfcol mfrow mfg mkh omd omi page pch pin plt +#' pty smo srt tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog \cr +#' For more information about the parameters see `par`. +#' +#'@return +#'\item{brks}{ +#' Breaks used for colouring the map (and legend if drawleg = TRUE). +#'} +#'\item{cols}{ +#' Colours used for colouring the map (and legend if drawleg = TRUE). Always +#' of length length(brks) - 1. +#'} +#'\item{col_inf}{ +#' Colour used to draw the lower triangle end in the colour bar (NULL if not +#' drawn at all). +#' } +#'\item{col_sup}{ +#' Colour used to draw the upper triangle end in the colour bar (NULL if not +#' drawn at all). +#'} +#' +#'@examples +#'# See examples on Load() to understand the first lines in this example +#' \dontrun{ +#'data_path <- system.file('sample_data', package = 's2dv') +#'expA <- list(name = 'experiment', path = file.path(data_path, +#' 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', +#' '$VAR_NAME$_$START_DATE$.nc')) +#'obsX <- list(name = 'observation', path = file.path(data_path, +#' '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', +#' '$VAR_NAME$_$YEAR$$MONTH$.nc')) +#' +#'# Now we are ready to use Load(). +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- Load('tos', list(expA), list(obsX), startDates, +#' leadtimemin = 1, leadtimemax = 4, output = 'lonlat', +#' latmin = 27, latmax = 48, lonmin = -12, lonmax = 40) +#' } +#' \dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#' } +#'PlotEquiMap(sampleData$mod[1, 1, 1, 1, , ], sampleData$lon, sampleData$lat, +#' toptitle = 'Predicted sea surface temperature for Nov 1960 from 1st Nov', +#' title_scale = 0.5) +#'@import graphics maps +#'@importFrom grDevices dev.cur dev.new dev.off gray +#'@importFrom stats cor +#'@export +PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, + toptitle = NULL, sizetit = NULL, units = NULL, + brks = NULL, cols = NULL, bar_limits = NULL, + triangle_ends = NULL, col_inf = NULL, col_sup = NULL, + colNA = NULL, color_fun = clim.palette(), + square = TRUE, filled.continents = NULL, + filled.oceans = FALSE, country.borders = FALSE, + coast_color = NULL, coast_width = 1, lake_color = NULL, + shapefile = NULL, shapefile_color = NULL, shapefile_lwd = 1, + contours = NULL, brks2 = NULL, contour_lwd = 0.5, + contour_color = 'black', contour_lty = 1, + contour_draw_label = TRUE, contour_label_scale = 1, + dots = NULL, dot_symbol = 4, dot_size = 1, + arr_subsamp = floor(length(lon) / 30), arr_scale = 1, + arr_ref_len = 15, arr_units = "m/s", + arr_scale_shaft = 1, arr_scale_shaft_angle = 1, + axelab = TRUE, labW = FALSE, + lab_dist_x = NULL, lab_dist_y = NULL, degree_sym = FALSE, + intylat = 20, intxlon = 20, + xlonshft = 0, ylatshft = 0, xlabels = NULL, ylabels = NULL, + axes_tick_scale = 1, axes_label_scale = 1, + drawleg = TRUE, subsampleg = NULL, + bar_extra_labels = NULL, draw_bar_ticks = TRUE, + draw_separators = FALSE, triangle_ends_scale = 1, + bar_label_digits = 4, bar_label_scale = 1, + units_scale = 1, bar_tick_scale = 1, + bar_extra_margin = rep(0, 4), + boxlim = NULL, boxcol = 'purple2', boxlwd = 5, + margin_scale = rep(1, 4), title_scale = 1, + numbfig = NULL, fileout = NULL, + width = 8, height = 5, size_units = 'in', + res = 100, ...) { + # Process the user graphical parameters that may be passed in the call + ## Graphical parameters to exclude + excludedArgs <- c("cex", "cex.axis", "cex.lab", "cex.main", "col", "din", "fig", "fin", "lab", "las", "lty", "lwd", "mai", "mar", "mgp", "new", "oma", "ps", "tck") + userArgs <- .FilterUserGraphicArgs(excludedArgs, ...) + + # If there is any filenames to store the graphics, process them + # to select the right device + if (!is.null(fileout)) { + deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, units = size_units, res = res) + saveToFile <- deviceInfo$fun + fileout <- deviceInfo$files + } + + # Check lon, lat + if (!is.numeric(lon) || !is.numeric(lat)) { + stop("Parameters 'lon' and 'lat' must be numeric vectors.") + } + + # Check var + if (is.null(var)) { + stop("Parameter 'var' cannot be NULL.") + } + if (!is.array(var)) { + stop("Parameter 'var' must be a numeric array.") + } + + transpose <- FALSE + if (!is.null(names(dim(var)))) { + if (any(names(dim(var)) %in% .KnownLonNames()) && + any(names(dim(var)) %in% .KnownLatNames())) { + lon_dim <- names(dim(var))[names(dim(var)) %in% .KnownLonNames()] + lat_dim <- names(dim(var))[names(dim(var)) %in% .KnownLatNames()] + } else { + names(dim(var)) <- NULL + lat_dim <- NULL + lon_dim <- NULL + .warning("Dimension names of 'var' doesn't correspond to any coordinates names supported by s2dv package.") + } + } else { + lon_dim <- NULL + lat_dim <- NULL + .warning("Parameter 'var' should have dimension names. Coordinates 'lon' and 'lat' have been assigned into the corresponding coordinates dimensions.") + } + + if (length(dim(var)) > 2) { + if (!is.null(lon_dim) & !is.null(lat_dim)) { + dimnames <- names(dim(var)) + dim(var) <- dim(var)[which((dimnames == lon_dim | dimnames == lat_dim | dim(var) != 1))] + } else { + if (all(dim(var) == 1)) { + dim(var) <- c(1, 1) + } else if (length(dim(var)[which(dim(var) > 1)]) == 2) { + var <- drop(var) + } else if (length(dim(var)[which(dim(var) > 1)]) == 1) { + dim(var) <- c(dim(var)[which(dim(var) > 1)], 1) + } + } + } + + if (length(dim(var)) != 2) { + stop("Parameter 'var' must be a numeric array with two dimensions.") + } + + if ((dim(var)[1] == length(lon) && dim(var)[2] == length(lat)) || + (dim(var)[2] == length(lon) && dim(var)[1] == length(lat))) { + if (dim(var)[2] == length(lon) && dim(var)[1] == length(lat)) { + if (length(lon) == length(lat)) { + if (is.null(names(dim(var)))) { + .warning("Parameter 'var' should have dimension names. Coordinates 'lon' and 'lat' have been assigned into the first and second dimensions.") + } else { + if (names(dim(var)[1]) == lat_dim) { + transpose <- TRUE + } + } + } else { + transpose <- TRUE + } + } + } else { + stop("Parameters 'lon' and 'lat' must have as many elements as the number of cells along longitudes and latitudes in the input array 'var'.") + } + + if (!is.null(names(dim(var)))) { + if (names(dim(var)[1]) == lon_dim) { + if (transpose) { + stop("Coordinates dimensions of 'var' doesn't correspond to lat or lon.") + } + } else if (names(dim(var)[2]) == lon_dim) { + if (!transpose) { + stop("Coordinates dimensions of 'var' doesn't correspond to lat or lon.") + } + } + } + + # Transpose the input matrices because the base plot functions work directly + # with dimensions c(lon, lat). + + if (transpose) { + var <- t(var) + } + + transpose <- FALSE + + names(dim(var)) <- c(lon_dim, lat_dim) + dims <- dim(var) + + # Check varu and varv + if (!is.null(varu) && !is.null(varv)) { + if (!is.array(varu) || !(length(dim(varu)) == 2)) { + stop("Parameter 'varu' must be a numerical array with two dimensions.") + } + if (!is.array(varv) || !(length(dim(varv)) == 2)) { + stop("Parameter 'varv' must be a numerical array with two dimensions.") + } + } else if (!is.null(varu) || !is.null(varv)) { + stop("Only one of the components 'varu' or 'varv' has been provided. Both must be provided.") + } + + if (!is.null(varu) && !is.null(varv)) { + if (!all(dim(varu) %in% dim(varv)) || !all(names(dim(varv)) %in% names(dim(varu)))) { + stop("Parameter 'varu' and 'varv' must have equal dimensions and dimension names.") + } else if (any(dim(varu) != dim(varv)) || any(names(dim(varv)) != names(dim(varu)))) { + varv <- t(varv) + names(dim(varv)) <- names(dim(varu)) + } + + if (is.null(lon_dim)) { + names(dim(varu)) <- NULL + names(dim(varv)) <- NULL + } else { + if (!is.null(names(dim(varu)))) { + if (!(lon_dim %in% names(dim(varu)) && lat_dim %in% names(dim(varu)))) { + stop("Parameters 'varu' and 'varv' must have same dimension names as 'var'.") + } else if (dim(varu)[lon_dim] != dim(var)[lon_dim] || dim(varu)[lat_dim] != dim(var)[lat_dim]) { + stop("Parameters 'varu' and 'varv' must have same dimensions as 'var'.") + } + } else { + .warning("Parameters 'varu' and 'varv' should have dimension names. Coordinates 'lon' and 'lat' have been assigned into the corresponding coordinates dimensions.") + } + } + + + if ((dim(varu)[1] == dims[1] && dim(varu)[2] == dims[2]) || + (dim(varu)[2] == dims[1] && dim(varu)[1] == dims[2])) { + if (dim(varu)[2] == dims[1] && dim(varu)[1] == dims[2]) { + if (length(lon) == length(lat)) { + if (is.null(names(dim(varu)))) { + .warning("Parameters 'varu' and 'varv' should have dimension names. Coordinates 'lon' and 'lat' have been assigned into the first and second dimensions.") + } else { + if (names(dim(varu)[1]) == lat_dim) { + transpose <- TRUE + } + } + } else { + transpose <- TRUE + } + } + } else { + stop("Parameters 'lon' and 'lat' must have as many elements as the number of cells along longitudes and latitudes in the input array 'varu' and 'varv'.") + } + + if (transpose) { + varu <- t(varu) + varv <- t(varv) + } + + transpose <- FALSE + + } + + # Check contours + if (!is.null(contours)) { + if (!is.array(contours) || !(length(dim(contours)) == 2)) { + stop("Parameter 'contours' must be a numerical array with two dimensions.") + } + } + + + if (!is.null(contours)) { + + if (is.null(lon_dim)) { + names(dim(contours)) <- NULL + } else { + if (!is.null(names(dim(contours)))) { + if (!(lon_dim %in% names(dim(contours)) && lat_dim %in% names(dim(contours)))) { + stop("Parameters 'contours' must have same dimension names as 'var'.") + } else if (dim(contours)[lon_dim] != dim(var)[lon_dim] || dim(contours)[lat_dim] != dim(var)[lat_dim]) { + stop("Parameters 'contours' must have same dimensions as 'var'.") + } + } else { + .warning("Parameters 'contours' should have dimension names. Coordinates 'lon' and 'lat' have been assigned into the corresponding coordinates dimensions.") + } + } + + + transpose <- FALSE + if ((dim(contours)[1] == dims[1] && dim(contours)[2] == dims[2]) || + (dim(contours)[2] == dims[1] && dim(contours)[1] == dims[2])) { + if (dim(contours)[2] == dims[1] && dim(contours)[1] == dims[2]) { + if (length(lon) == length(lat)) { + if (is.null(names(dim(contours)))) { + .warning("Parameter 'contours' should have dimension names. Coordinates 'lon' and 'lat' have been assigned into the first and second dimensions.") + } else { + if (names(dim(contours)[1]) == lat_dim) { + transpose <- TRUE + } + } + } else { + transpose <- TRUE + } + } + } else { + stop("Parameters 'lon' and 'lat' must have as many elements as the number of cells along longitudes and latitudes in the input array 'contours'.") + } + + if (transpose) { + contours <- t(contours) + } + + transpose <- FALSE + + } + + # Check toptitle + if (is.null(toptitle) || is.na(toptitle)) { + toptitle <- '' + } + if (!is.character(toptitle)) { + stop("Parameter 'toptitle' must be a character string.") + } + + # Check sizetit + if (!is.null(sizetit)) { + .warning("Parameter 'sizetit' is obsolete. Use 'title_scale' instead.") + if (!is.numeric(sizetit) || length(sizetit) != 1) { + stop("Parameter 'sizetit' must be a single numeric value.") + } + title_scale <- sizetit + } + + var_limits <- c(min(var, na.rm = TRUE), max(var, na.rm = TRUE)) + # Check: brks, cols, subsampleg, bar_limits, color_fun, bar_extra_labels, draw_bar_ticks + # draw_separators, triangle_ends_scale, label_scale, units, units_scale, + # bar_label_digits + # Build: brks, cols, bar_limits, col_inf, col_sup + colorbar <- ColorBar(brks, cols, FALSE, subsampleg, bar_limits, var_limits, + triangle_ends, col_inf, col_sup, color_fun, FALSE, + extra_labels = bar_extra_labels, draw_ticks = draw_bar_ticks, + draw_separators = draw_separators, + triangle_ends_scale = triangle_ends_scale, + label_scale = bar_label_scale, title = units, + title_scale = units_scale, tick_scale = bar_tick_scale, + extra_margin = bar_extra_margin, label_digits = bar_label_digits) + brks <- colorbar$brks + cols <- colorbar$cols + col_inf <- colorbar$col_inf + col_sup <- colorbar$col_sup + bar_limits <- c(head(brks, 1), tail(brks, 1)) + + # Check colNA + if (is.null(colNA)) { + if ('na_color' %in% names(attributes(cols))) { + colNA <- attr(cols, 'na_color') + if (!.IsColor(colNA)) { + stop("The 'na_color' provided as attribute of the colour vector must be a valid colour identifier.") + } + } else { + colNA <- 'pink' + } + } else if (!.IsColor(colNA)) { + stop("Parameter 'colNA' must be a valid colour identifier.") + } + + # Check square + if (!is.logical(square)) { + stop("Parameter 'square' must be logical.") + } + + # Check filled.continents + if (is.null(filled.continents)) { + if (!square) { + filled.continents <- FALSE + } else { + filled.continents <- TRUE + } + } + if (!.IsColor(filled.continents) && !is.logical(filled.continents)) { + stop("Parameter 'filled.continents' must be logical or a colour identifier.") + } else if (!is.logical(filled.continents)) { + continent_color <- filled.continents + filled.continents <- TRUE + } else { + continent_color <- gray(0.5) + } + + # Check filled.oceans + if (!.IsColor(filled.oceans) & !is.logical(filled.oceans)) { + stop("Parameter 'filled.oceans' must be logical or a colour identifier.") + } else if (!is.logical(filled.oceans)) { + ocean_color <- filled.oceans + filled.oceans <- TRUE + } else if (filled.oceans) { + ocean_color <- "light blue" + } + + # Check country.borders + if (!is.logical(country.borders)) { + stop("Parameter 'country.borders' must be logical.") + } + + # Check coast_color + if (is.null(coast_color)) { + if (filled.continents) { + coast_color <- continent_color + } else { + coast_color <- 'black' + } + } + if (!.IsColor(coast_color)) { + stop("Parameter 'coast_color' must be a valid colour identifier.") + } + + # Check coast_width + if (!is.numeric(coast_width)) { + stop("Parameter 'coast_width' must be numeric.") + } + + # Check lake_color + if (!is.null(lake_color)) { + if (!.IsColor(lake_color)) { + stop("Parameter 'lake_color' must be a valid colour identifier.") + } + } + + # Check shapefile + if (!is.null(shapefile)) { + if (is.list(shapefile)) { + shape <- shapefile + if (any(!c('x', 'y') %in% names(shape))) { + stop("The list names of the object in 'shapefile' .rds file should ", + "have at least 'x' and 'y'.") + } + if (length(shape$x) != length(shape$y)) { + stop("The length of x and y in 'shapefile' list should be equal.") + } + } else if (!is.character(shapefile)) { + stop("Parameter 'shapefile' must be a .rds file or a list.") + } else { # .rds file + if (!file.exists(shapefile)) { + stop("Parameter 'shapefile' is not a valid file.") + } + if (!grepl("\\.rds$", shapefile)) { + stop("Parameter 'shapefile' must be a .rds file or a list.") + } + shape <- readRDS(file = shapefile) + if (!is.list(shape)) { + stop("Parameter 'shapefile' should be a .rds file of a list object.") + } + if (any(!c('x', 'y') %in% names(shape))) { + stop("The list names of the object in 'shapefile' .rds file should ", + "have at least 'x' and 'y'.") + } + if (length(shape$x) != length(shape$y)) { + stop("The length of x and y in 'shapefile' list should be equal.") + } + } + } + + # Check shapefile_col + if (is.null(shapefile_color)) { + if (filled.continents) { + shapefile_color <- continent_color + } else { + shapefile_color <- 'black' + } + } + if (!.IsColor(shapefile_color)) { + stop("Parameter 'shapefile_color' must be a valid colour identifier.") + } + + # Check brks2 + if (is.null(brks2)) { + if (is.null(contours)) { + if (!square) { + brks2 <- brks + contours <- var + } + } else { + ll <- signif(min(contours, na.rm = TRUE), 2) + ul <- signif(max(contours, na.rm = TRUE), 2) + brks2 <- signif(seq(ll, ul, length.out = length(brks)), 2) + } + } + + # Check contour_lwd + if (!is.numeric(contour_lwd)) { + stop("Parameter 'contour_lwd' must be numeric.") + } + + # Check contour_color + if (!.IsColor(contour_color)) { + stop("Parameter 'contour_color' must be a valid colour identifier.") + } + + # Check contour_lty + if (!is.numeric(contour_lty) && !is.character(contour_lty)) { + stop("Parameter 'contour_lty' must be either a number or a character string.") + } + + # Check contour_draw_label + if (!is.logical(contour_draw_label)) { + stop("Parameter 'contour_draw_label' must be logical.") + } + + # Check contour_label_scale + if (!is.numeric(contour_label_scale)) { + stop("Parameter 'contour_label_scale' must be numeric.") + } + + # Check dots + if (!is.null(dots)) { + if (!is.array(dots) || !(length(dim(dots)) %in% c(2, 3))) { + stop("Parameter 'dots' must be a logical array with two or three dimensions.") + } + if (length(dim(dots)) == 2) { + dim(dots) <- c(1, dim(dots)) + } + + if (is.null(lon_dim)) { + names(dim(dots)) <- NULL + } else { + if (!is.null(names(dim(dots)))) { + if (!(lon_dim %in% names(dim(dots)) && lat_dim %in% names(dim(dots)))) { + stop("Parameters 'dots' must have same dimension names as 'var'.") + } else if (dim(dots)[lon_dim] != dim(var)[lon_dim] || dim(dots)[lat_dim] != dim(var)[lat_dim]) { + stop("Parameters 'dots' must have same dimensions as 'var'.") + } + } else { + .warning("Parameters 'dots' should have dimension names. Coordinates 'lon' and 'lat' have been assigned into the corresponding coordinates dimensions.") + } + } + + transpose <- FALSE + if ((dim(dots)[2] == dims[1] && dim(dots)[3] == dims[2]) || + (dim(dots)[3] == dims[1] && dim(dots)[2] == dims[2])) { + if (dim(dots)[3] == dims[1] && dim(dots)[2] == dims[2]) { + if (length(lon) == length(lat)) { + if (is.null(names(dim(dots)))) { + .warning("Parameter 'dots' should have dimension names. Coordinates 'lon' and 'lat' have been assigned into the first and second dimensions.") + } else { + if (names(dim(dots)[2]) == lat_dim) { + transpose <- TRUE + } + } + } else { + transpose <- TRUE + } + } + } else { + stop("Parameter 'dots' must have same number of longitudes and latitudes as 'var'.") + } + + if (transpose) { + dots <- aperm(dots, c(1, 3, 2)) + } + + transpose <- FALSE + + } + + # Check dot_symbol and dot_size + if (!is.null(dots)) { + if (!is.numeric(dot_symbol) && !is.character(dot_symbol)) { + stop("Parameter 'dot_symbol' must be a numeric or character string vector.") + } + if (length(dot_symbol) == 1) { + dot_symbol <- rep(dot_symbol, dim(dots)[1]) + } else if (length(dot_symbol) < dim(dots)[1]) { + stop("Parameter 'dot_symbol' does not contain enough symbols.") + } + if (!is.numeric(dot_size)) { + stop("Parameter 'dot_size' must be numeric.") + } + if (length(dot_size) == 1) { + dot_size <- rep(dot_size, dim(dots)[1]) + } else if (length(dot_size) < dim(dots)[1]) { + stop("Parameter 'dot_size' does not contain enough sizes.") + } + } + + # Check arrow parameters + if (!is.numeric(arr_subsamp)) { + stop("Parameter 'arr_subsamp' must be numeric.") + } + if (!is.numeric(arr_scale)) { + stop("Parameter 'arr_scale' must be numeric.") + } + if (!is.numeric(arr_ref_len)) { + stop("Parameter 'arr_ref_len' must be numeric.") + } + if (!is.character(arr_units)) { + stop("Parameter 'arr_units' must be character.") + } + if (!is.numeric(arr_scale_shaft)) { + stop("Parameter 'arr_scale_shaft' must be numeric.") + } + if (!is.numeric(arr_scale_shaft_angle)) { + stop("Parameter 'arr_scale_shaft_angle' must be numeric.") + } + + # Check axis parameters + if (!is.logical(axelab)) { + stop("Parameter 'axelab' must be logical.") + } + if (!is.logical(labW)) { + stop("Parameter 'labW' must be logical.") + } + if (!is.null(lab_dist_x)) { + if (!is.numeric(lab_dist_x)) { + stop("Parameter 'lab_dist_x' must be numeric.") + } + } + if (!is.null(lab_dist_y)) { + if (!is.numeric(lab_dist_y)) { + stop("Parameter 'lab_dist_y' must be numeric.") + } + } + if (!is.numeric(intylat)) { + stop("Parameter 'intylat' must be numeric.") + } else { + intylat <- round(intylat) + } + if (!is.numeric(intxlon)) { + stop("Parameter 'intxlon' must be numeric.") + } else { + intxlon <- round(intxlon) + } + if (!is.numeric(xlonshft) | length(xlonshft) != 1) { + stop("Parameter 'xlonshft' must be a number.") + } + if (!is.numeric(ylatshft) | length(ylatshft) != 1) { + stop("Parameter 'ylatshft' must be a number.") + } + if (!is.null(xlabels)) { + if (!is.character(xlabels) | !is.vector(xlabels)) { + stop("Parameter 'xlabels' must be a vector of character string.") + } + } + if (!is.null(ylabels)) { + if (!is.character(ylabels) | !is.vector(ylabels)) { + stop("Parameter 'ylabels' must be a vector of character string.") + } + } + + # Check legend parameters + if (!is.logical(drawleg)) { + stop("Parameter 'drawleg' must be logical.") + } + + # Check box parameters + if (!is.null(boxlim)) { + if (!is.list(boxlim)) { + boxlim <- list(boxlim) + } + for (i in 1:length(boxlim)) { + if (!is.numeric(boxlim[[i]]) || length(boxlim[[i]]) != 4) { + stop("Parameter 'boxlim' must be a a numeric vector or a list of numeric vectors of length 4 (with W, S, E, N box limits).") + } + } + if (!is.character(boxcol)) { + stop("Parameter 'boxcol' must be a character string or a vector of character strings.") + } else { + if (length(boxlim) != length(boxcol)) { + if (length(boxcol) == 1) { + boxcol <- rep(boxcol, length(boxlim)) + } else { + stop("Parameter 'boxcol' must have a colour for each box in 'boxlim' or a single colour for all boxes.") + } + } + } + if (!is.numeric(boxlwd)) { + stop("Parameter 'boxlwd' must be numeric.") + } else { + if (length(boxlim) != length(boxlwd)) { + if (length(boxlwd) == 1) { + boxlwd <- rep(boxlwd, length(boxlim)) + } else { + stop("Parameter 'boxlwd' must have a line width for each box in 'boxlim' or a single line width for all boxes.") + } + } + } + } + + # Check margin_scale + if (!is.numeric(margin_scale) || length(margin_scale) != 4) { + stop("Parameter 'margin_scale' must be a numeric vector of length 4.") + } + + # Check title_scale + if (!is.numeric(title_scale)) { + stop("Parameter 'title_scale' must be numeric.") + } + + # Check axes_tick_scale + if (!is.numeric(axes_tick_scale)) { + stop("Parameter 'axes_tick_scale' must be numeric.") + } + + # Check axes_label_scale + if (!is.numeric(axes_label_scale)) { + stop("Parameter 'axes_label_scale' must be numeric.") + } + + # Check numbfig + if (!is.null(numbfig)) { + if (!is.numeric(numbfig)) { + stop("Parameter 'numbfig' must be numeric.") + } else { + numbfig <- round(numbfig) + scale <- 1 / numbfig ** 0.3 + axes_tick_scale <- axes_tick_scale * scale + axes_label_scale <- axes_label_scale * scale + title_scale <- title_scale * scale + margin_scale <- margin_scale * scale + arr_scale <- arr_scale * scale + dot_size <- dot_size * scale + contour_label_scale <- contour_label_scale * scale + contour_lwd <- contour_lwd * scale + } + } + + # + # Input arguments + # ~~~~~~~~~~~~~~~~~ + # + latb <- sort(lat, index.return = TRUE) + dlon <- diff(lon) + wher <- which(dlon > (mean(dlon) + 1)) + if (length(wher) > 0) { + .warning("Detect gap in 'lon' vector, which is considered as crossing the border.") + lon[(wher + 1):dims[1]] <- lon[(wher + 1):dims[1]] - 360 + } + lonb <- sort(lon, index.return = TRUE) + latmin <- floor(min(lat) / 10) * 10 + latmax <- ceiling(max(lat) / 10) * 10 + lonmin <- floor(min(lon) / 10) * 10 + lonmax <- ceiling(max(lon) / 10) * 10 + + # + # Plotting the map + # ~~~~~~~~~~~~~~~~~~ + # + + # Open connection to graphical device + if (!is.null(fileout)) { + saveToFile(fileout) + } else if (names(dev.cur()) == 'null device') { + dev.new(units = size_units, res = res, width = width, height = height) + } + + # + # Defining the layout + # ~~~~~~~~~~~~~~~~~~~~~ + # + if (drawleg) { + margin_scale[1] <- margin_scale[1] - 1 + } + margins <- rep(0.4, 4) * margin_scale + margins[4] <- margins[4] + 1 + cex_title <- 2 * title_scale + cex_axes_labels <- 1.3 * axes_label_scale + cex_axes_ticks <- -0.5 * axes_tick_scale + spaceticklab <- 0 + if (axelab) { + # Y axis label + if (!is.null(ylabels)) { + ypos <- seq(latmin, latmax, intylat) + ylatshft + if (length(ypos) != length(ylabels)) { + stop(paste0("Parameter 'ylabels' must have the same length as the latitude ", + "vector spaced by 'intylat' (length = ", length(ypos), ").")) + } + ylabs <- ylabels + } else { + ypos <- seq(latmin, latmax, intylat) + ylatshft + letters <- array('', length(ypos)) + if (degree_sym == FALSE) { + letters[ypos < 0] <- 'S' + letters[ypos > 0] <- 'N' + } else { + letters[ypos < 0] <- paste(intToUtf8(176), 'S') + letters[ypos > 0] <- paste(intToUtf8(176), 'N') + } + ylabs <- paste(as.character(abs(ypos)), letters, sep = '') + } + + # X axis label + if (!is.null(xlabels)) { + xpos <- seq(lonmin, lonmax, intxlon) + xlonshft + if (length(xpos) != length(xlabels)) { + stop(paste0("Parameter 'xlabels' must have the same length as the longitude ", + "vector spaced by 'intxlon' (length = ", length(xpos), ").")) + } + xlabs <- xlabels + } else { + xpos <- seq(lonmin, lonmax, intxlon) + xlonshft + letters <- array('', length(xpos)) + if (labW) { + xpos2 <- xpos + xpos2[xpos2 > 180] <- 360 - xpos2[xpos2 > 180] + } + if (degree_sym == FALSE) { + letters[xpos < 0] <- 'W' + letters[xpos > 0] <- 'E' + } else { + letters[xpos < 0] <- paste(intToUtf8(176), 'W') + letters[xpos > 0] <- paste(intToUtf8(176), 'E') + } + if (labW) { + letters[xpos == 0] <- ' ' + letters[xpos == 180] <- ' ' + if (degree_sym == FALSE) { + letters[xpos > 180] <- 'W' + } else { + letters[xpos > 180] <- paste(intToUtf8(176), 'W') + } + xlabs <- paste(as.character(abs(xpos2)), letters, sep = '') + } else { + xlabs <- paste(as.character(abs(xpos)), letters, sep = '') + } + } + spaceticklab <- max(-cex_axes_ticks, 0) + margins[1] <- margins[1] + 1.2 * cex_axes_labels + spaceticklab + margins[2] <- margins[2] + 1.2 * cex_axes_labels + spaceticklab + } + bar_extra_margin[2] <- bar_extra_margin[2] + margins[2] + bar_extra_margin[4] <- bar_extra_margin[4] + margins[4] + if (toptitle != '') { + margins[3] <- margins[3] + cex_title + 1 + } + if (!is.null(varu)) { + margins[1] <- margins[1] + 2.2 * units_scale + } + + if (drawleg) { + layout(matrix(1:2, ncol = 1, nrow = 2), heights = c(5, 1)) + } + plot.new() + # Load the user parameters + par(userArgs) + par(mar = margins, cex.main = cex_title, cex.axis = cex_axes_labels, + mgp = c(0, spaceticklab, 0), las = 0) + + #NOTE: Here creates the window for later plot. If 'usr' for par() is not specified, + # use the lat/lon as the borders. If 'usr' is specified, use the assigned values. + if (is.null(userArgs$usr)) { + #NOTE: The grids are assumed to be equally spaced + xlim_cal <- c(lonb$x[1] - (lonb$x[2] - lonb$x[1]) / 2, + lonb$x[length(lonb$x)] + (lonb$x[2] - lonb$x[1]) / 2) + ylim_cal <- c(latb$x[1] - (latb$x[2] - latb$x[1]) / 2, + latb$x[length(latb$x)] + (latb$x[2] - latb$x[1]) / 2) + plot.window(xlim = xlim_cal, ylim = ylim_cal, xaxs = 'i', yaxs = 'i') +# Below is Old code. The border grids are only half plotted. +# plot.window(xlim = range(lonb$x, finite = TRUE), ylim = range(latb$x, finite = TRUE), +# xaxs = 'i', yaxs = 'i') + } else { + plot.window(xlim = par("usr")[1:2], ylim = par("usr")[3:4], xaxs = 'i', yaxs = 'i') + } + + if (axelab) { + lab_distance_y <- ifelse(is.null(lab_dist_y), spaceticklab + 0.2, lab_dist_y) + lab_distance_x <- ifelse(is.null(lab_dist_x), spaceticklab + cex_axes_labels / 2 - 0.3, lab_dist_x) + + axis(2, at = ypos, labels = ylabs, cex.axis = cex_axes_labels, tcl = cex_axes_ticks, + mgp = c(0, lab_distance_y, 0)) + axis(1, at = xpos, labels = xlabs, cex.axis = cex_axes_labels, tcl = cex_axes_ticks, + mgp = c(0, lab_distance_x, 0)) + } + title(toptitle, cex.main = cex_title) + rect(par("usr")[1], par("usr")[3], par("usr")[2], par("usr")[4], col = colNA) + col_inf_image <- ifelse(is.null(col_inf), colNA, col_inf) + col_sup_image <- ifelse(is.null(col_sup), colNA, col_sup) + if (square) { + # If lat and lon are both regular-spaced, "useRaster = TRUE" can avoid + # artifact white lines on the figure. If not, useRaster has to be FALSE (default) + tryCatch({ + image(lonb$x, latb$x, var[lonb$ix, latb$ix], + col = c(col_inf_image, cols, col_sup_image), + breaks = c(-.Machine$double.xmax, brks, .Machine$double.xmax), + axes = FALSE, xlab = "", ylab = "", add = TRUE, useRaster = TRUE) + }, error = function(x) { + image(lonb$x, latb$x, var[lonb$ix, latb$ix], + col = c(col_inf_image, cols, col_sup_image), + breaks = c(-.Machine$double.xmax, brks, .Machine$double.xmax), + axes = FALSE, xlab = "", ylab = "", add = TRUE) + }) + } else { + .filled.contour(lonb$x, latb$x, var[lonb$ix, latb$ix], + levels = c(.Machine$double.xmin, brks, .Machine$double.xmax), + col = c(col_inf_image, cols, col_sup_image)) + } + if (!is.null(contours)) { +#NOTE: 'labcex' is the absolute size of contour labels. Parameter 'contour_label_scale' +# is provided in PlotEquiMap() but it was not used. Here, 'cex_axes_labels' was used +# and it was calculated from 'axes_label_scale', the size of lat/lon axis label. +# It is changed to use contour_label_scale*par('cex'). + contour(lonb$x, latb$x, contours[lonb$ix, latb$ix], levels = brks2, + method = "edge", add = TRUE, +# labcex = cex_axes_labels, + labcex = contour_label_scale * par("cex"), + lwd = contour_lwd, lty = contour_lty, + col = contour_color, drawlabels = contour_draw_label) + } + + # + # Adding black dots or symbols + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (!is.null(dots)) { + data_avail <- !is.na(var) + for (counter in 1:(dim(dots)[1])) { + points <- which(dots[counter, , ] & data_avail, arr.ind = TRUE) + points(lon[points[, 1]], lat[points[, 2]], + pch = dot_symbol[counter], + cex = dot_size[counter] * 3 / sqrt(sqrt(length(var))), + lwd = dot_size[counter] * 3 / sqrt(sqrt(length(var)))) + } + } + # + # Plotting continents + # ~~~~~~~~~~~~~~~~~~~~~ + # + wrap_vec <- c(lonb$x[1], lonb$x[1] + 360) + old_lwd <- par('lwd') + par(lwd = coast_width) + # If [0, 360], use GEOmap; if [-180, 180], use maps::map + # UPDATE: Use maps::map for both cases. The difference between GEOmap and + # maps is trivial. The only thing we can see for now is that + # GEOmap has better lakes. + coast <- maps::map(interior = country.borders, wrap = wrap_vec, + fill = filled.continents, add = TRUE, plot = FALSE) + + if (filled.continents) { + polygon(coast, col = continent_color, border = coast_color, lwd = coast_width) + } else { + lines(coast, col = coast_color, lwd = coast_width) + } + if (!is.null(lake_color)) { + maps::map('lakes', add = TRUE, wrap = wrap_vec, fill = filled.continents, col = lake_color) + } + par(lwd = old_lwd) + + # filled.oceans + if (filled.oceans) { + old_lwd <- par('lwd') + par(lwd = coast_width) + + outline <- maps::map(wrap = wrap_vec, fill = T, plot = FALSE) # must be fill = T + xbox <- wrap_vec + c(-2, 2) + ybox <- c(-92, 92) + outline$x <- c(outline$x, NA, c(xbox, rev(xbox), xbox[1])) + outline$y <- c(outline$y, NA, rep(ybox, each = 2), ybox[1]) + polypath(outline, col = ocean_color, rule = 'evenodd', border = NA) + + par(lwd = old_lwd) + } + + # Plot shapefile + #NOTE: the longitude range cannot cut shapefile range, or not all the shapefile will be plotted. + if (!is.null(shapefile)) { + maps::map(shape, interior = country.borders, #wrap = wrap_vec, + fill = filled.continents, add = TRUE, plot = TRUE, + lwd = shapefile_lwd, col = shapefile_color) + } + + box() + # Draw rectangle on the map + if (!is.null(boxlim)) { + counter <- 1 + for (box in boxlim) { + if (box[1] > box[3]) { + box[1] <- box[1] - 360 + } + if (length(box) != 4) { + stop(paste("The", counter, "st box defined in the parameter 'boxlim' is ill defined.")) + } else if (box[2] < latmin || box[4] > latmax || + box[1] < lonmin || box[3] > lonmax) { + stop(paste("The limits of the", counter, "st box defined in the parameter 'boxlim' are invalid.")) + } else if (box[1] < 0 && box[3] > 0) { + #segments south + segments(box[1], box[2], 0, box[2], col = boxcol[counter], lwd = boxlwd[counter]) + segments(0, box[2], box[3], box[2], col = boxcol[counter], lwd = boxlwd[counter]) + #segments north + segments(box[1], box[4], 0, box[4], col = boxcol[counter], lwd = boxlwd[counter]) + segments(0, box[4], box[3], box[4], col = boxcol[counter], lwd = boxlwd[counter]) + #segments west + segments(box[1], box[2], box[1], box[4], col = boxcol[counter], + lwd = boxlwd[counter]) + #segments est + segments(box[3], box[2], box[3],box[4], col = boxcol[counter], + lwd = boxlwd[counter]) + } else { + rect(box[1], box[2], box[3], box[4], border = boxcol[counter], col = NULL, + lwd = boxlwd[counter], lty = 'solid') + } + counter <- counter + 1 + } + } + # + # PlotWind + # ~~~~~~~~~~ + # + if (!is.null(varu) && !is.null(varv)) { + # Create a two dimention array of longitude and latitude + lontab <- InsertDim(lonb$x, 2, length(latb$x), name = 'lat') + lattab <- InsertDim(latb$x, 1, length(lonb$x), name = 'lon') + varplotu <- varu[lonb$ix, latb$ix] + varplotv <- varv[lonb$ix, latb$ix] + + # Select a subsample af the points to an arrow + #for each "subsample" grid point + sublon <- seq(1,length(lon), arr_subsamp) + sublat <- seq(1,length(lat), arr_subsamp) + + uaux <- lontab[sublon, sublat] + varplotu[sublon, sublat] * 0.5 * arr_scale + vaux <- lattab[sublon, sublat] + varplotv[sublon, sublat] * 0.5 * arr_scale + + lenshaft <- 0.18 * arr_scale * arr_scale_shaft + angleshaft <- 12 * arr_scale_shaft_angle + # Plot Wind + arrows(lontab[sublon, sublat], lattab[sublon, sublat], + uaux, vaux, + angle = angleshaft, + length = lenshaft) + + # Plotting an arrow at the bottom of the plot for the legend + posarlon <- lonb$x[1] + (lonmax - lonmin) * 0.1 + posarlat <- latmin - ((latmax - latmin) + 1) / par('pin')[2] * + (spaceticklab + 0.2 + cex_axes_labels + 0.6 * units_scale) * par('csi') + + arrows(posarlon, posarlat, + posarlon + 0.5 * arr_scale * arr_ref_len, posarlat, + length = lenshaft, angle = angleshaft, + xpd = TRUE) + #save the parameter value + xpdsave <- par('xpd') + #desactivate xpd to be able to plot in margen + par(xpd = NA) + #plot text + mtext(paste(as.character(arr_ref_len), arr_units, sep = ""), + line = spaceticklab + 0.2 + cex_axes_labels + 1.2 * units_scale, side = 1, + at = posarlon + (0.5 * arr_scale * arr_ref_len) / 2, + cex = units_scale) + #come back to the previous xpd value + par(xpd = xpdsave) + } + # + # Colorbar + # ~~~~~~~~~~ + # + if (drawleg) { + ColorBar(brks, cols, FALSE, subsampleg, bar_limits, var_limits, + triangle_ends, col_inf = col_inf, col_sup = col_sup, + extra_labels = bar_extra_labels, draw_ticks = draw_bar_ticks, + draw_separators = draw_separators, title = units, + title_scale = units_scale, triangle_ends_scale = triangle_ends_scale, + label_scale = bar_label_scale, tick_scale = bar_tick_scale, + extra_margin = bar_extra_margin, label_digits = bar_label_digits) + } + + # If the graphic was saved to file, close the connection with the device + if (!is.null(fileout)) dev.off() + + invisible(list(brks = brks, cols = cols, col_inf = col_inf, col_sup = col_sup)) +} + diff --git a/modules/Visualization/tmp/PlotMostLikelyQuantileMap.R b/modules/Visualization/R/tmp/PlotMostLikelyQuantileMap.R similarity index 74% rename from modules/Visualization/tmp/PlotMostLikelyQuantileMap.R rename to modules/Visualization/R/tmp/PlotMostLikelyQuantileMap.R index 9f9f1914d8de30f26adc1b285a3c60a41f264b6a..a81515e986cb78a0b52339f580ab9ff918053036 100644 --- a/modules/Visualization/tmp/PlotMostLikelyQuantileMap.R +++ b/modules/Visualization/R/tmp/PlotMostLikelyQuantileMap.R @@ -1,26 +1,46 @@ #'Plot Maps of Most Likely Quantiles #' -#'@author Veronica Torralba, \email{veronica.torralba@bsc.es}, Nicolau Manubens, \email{nicolau.manubens@bsc.es} -#'@description This function receives as main input (via the parameter \code{probs}) a collection of longitude-latitude maps, each containing the probabilities (from 0 to 1) of the different grid cells of belonging to a category. As many categories as maps provided as inputs are understood to exist. The maps of probabilities must be provided on a common rectangular regular grid, and a vector with the longitudes and a vector with the latitudes of the grid must be provided. The input maps can be provided in two forms, either as a list of multiple two-dimensional arrays (one for each category) or as a three-dimensional array, where one of the dimensions corresponds to the different categories. +#'@author Veronica Torralba, \email{veronica.torralba@bsc.es}, Nicolau Manubens, +#'\email{nicolau.manubens@bsc.es} +#'@description This function receives as main input (via the parameter +#'\code{probs}) a collection of longitude-latitude maps, each containing the +#'probabilities (from 0 to 1) of the different grid cells of belonging to a +#'category. As many categories as maps provided as inputs are understood to +#'exist. The maps of probabilities must be provided on a common rectangular +#'regular grid, and a vector with the longitudes and a vector with the latitudes +#'of the grid must be provided. The input maps can be provided in two forms, +#'either as a list of multiple two-dimensional arrays (one for each category) or +#'as a three-dimensional array, where one of the dimensions corresponds to the +#'different categories. #' -#'@param probs a list of bi-dimensional arrays with the named dimensions 'latitude' (or 'lat') and 'longitude' (or 'lon'), with equal size and in the same order, or a single tri-dimensional array with an additional dimension (e.g. 'bin') for the different categories. The arrays must contain probability values between 0 and 1, and the probabilities for all categories of a grid cell should not exceed 1 when added. -#'@param lon a numeric vector with the longitudes of the map grid, in the same order as the values along the corresponding dimension in \code{probs}. -#'@param lat a numeric vector with the latitudes of the map grid, in the same order as the values along the corresponding dimension in \code{probs}. -#'@param cat_dim the name of the dimension along which the different categories are stored in \code{probs}. This only applies if \code{probs} is provided in the form of 3-dimensional array. The default expected name is 'bin'. -#'@param bar_titles vector of character strings with the names to be drawn on top of the color bar for each of the categories. As many titles as categories provided in \code{probs} must be provided. -#'@param col_unknown_cat character string with a colour representation of the colour to be used to paint the cells for which no category can be clearly assigned. Takes the value 'white' by default. +#'@param probs A list of bi-dimensional arrays with the named dimensions +#' 'latitude' (or 'lat') and 'longitude' (or 'lon'), with equal size and in the +#' same order, or a single tri-dimensional array with an additional dimension +#' (e.g. 'bin') for the different categories. The arrays must contain +#' probability values between 0 and 1, and the probabilities for all categories +#' of a grid cell should not exceed 1 when added. +#'@param lon A numeric vector with the longitudes of the map grid, in the same +#' order as the values along the corresponding dimension in \code{probs}. +#'@param lat A numeric vector with the latitudes of the map grid, in the same +#' order as the values along the corresponding dimension in \code{probs}. +#'@param cat_dim The name of the dimension along which the different categories +#' are stored in \code{probs}. This only applies if \code{probs} is provided in +#' the form of 3-dimensional array. The default expected name is 'bin'. +#'@param bar_titles Vector of character strings with the names to be drawn on +#' top of the color bar for each of the categories. As many titles as +#' categories provided in \code{probs} must be provided. +#'@param col_unknown_cat Character string with a colour representation of the +#' colour to be used to paint the cells for which no category can be clearly +#' assigned. Takes the value 'white' by default. #'@param drawleg Where to draw the common colour bar. Can take values TRUE, #' FALSE or:\cr #' 'up', 'u', 'U', 'top', 't', 'T', 'north', 'n', 'N'\cr #' 'down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S' (default)\cr #' 'right', 'r', 'R', 'east', 'e', 'E'\cr #' 'left', 'l', 'L', 'west', 'w', 'W' -#'@param ... additional parameters to be sent to \code{PlotCombinedMap} and \code{PlotEquiMap}. +#'@param ... Additional parameters to be sent to \code{PlotCombinedMap} and +#' \code{PlotEquiMap}. #'@seealso \code{PlotCombinedMap} and \code{PlotEquiMap} -#' -#'@importFrom maps map -#'@importFrom graphics box image layout mtext par plot.new -#'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off hcl jpeg pdf png postscript svg tiff #'@examples #'# Simple example #'x <- array(1:(20 * 10), dim = c(lat = 10, lon = 20)) / 200 @@ -29,10 +49,12 @@ #'c <- 1 - (a + b) #'lons <- seq(0, 359.5, length = 20) #'lats <- seq(-89.5, 89.5, length = 10) +#'\dontrun{ #'PlotMostLikelyQuantileMap(list(a, b, c), lons, lats, #' toptitle = 'Most likely tercile map', #' bar_titles = paste('% of belonging to', c('a', 'b', 'c')), #' brks = 20, width = 10, height = 8) +#'} #' #'# More complex example #'n_lons <- 40 @@ -99,12 +121,16 @@ #'bins <- multiApply::Apply(sample_data, 'time', binning, thresholds)$output1 #' #'# 3. Plotting most likely quantile/bin +#'\dontrun{ #'PlotMostLikelyQuantileMap(bins, lons, lats, #' toptitle = 'Most likely quantile map', #' bar_titles = paste('% of belonging to', letters[1:n_bins]), #' mask = 1 - (w1 + w2 / max(c(w1, w2))), #' brks = 20, width = 10, height = 8) -#' +#'} +#'@importFrom maps map +#'@importFrom graphics box image layout mtext par plot.new +#'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off hcl jpeg pdf png postscript svg tiff #'@export PlotMostLikelyQuantileMap <- function(probs, lon, lat, cat_dim = 'bin', bar_titles = NULL, @@ -194,3 +220,4 @@ PlotMostLikelyQuantileMap <- function(probs, lon, lat, cat_dim = 'bin', col_unknown_map = col_unknown_cat, drawleg = drawleg, ...) } + diff --git a/modules/Visualization/R/tmp/PlotRobinson.R b/modules/Visualization/R/tmp/PlotRobinson.R new file mode 100644 index 0000000000000000000000000000000000000000..bd427448fad9bdc9482c3e13b161f05e2fd6c1a7 --- /dev/null +++ b/modules/Visualization/R/tmp/PlotRobinson.R @@ -0,0 +1,567 @@ +#'Plot map in Robinson or other projections +#' +#'Transform a regular grid longitude-latitude data to a different projection and +#'plot the map. The target projection must be a valid CRS string, preferrably be +#'EPSG or ESRI code; check \link[sf]{st_crs} for more explanation. This function +#'is mainly tested for Robinson projection (ESRI:54030), but it can work with +#'other projection types in theory.\n +#'The map can be plotted by points or polygon. A legend can be plotted as either +#'a color bar or a discrete ggplot legend. Dots can be drawn on top of the data, +#'which can be used for significance test. A mask can be added to not plot the +#'data specified. A number of options is provided to adjust aesthetics, like +#'position, size, colors, etc. +#' +#'@param data A numeric array with longitude and latitude dimensions. The grid +#' should be regular grid. It can contain NA values. +#'@param lon A numeric vector of longitude locations of the cell centers of the +#' grid of 'data'. Expected to be regularly spaced, within the range of either +#' [-180, 180] or [0, 360]. +#'@param lat A numeric vector of latitude locations of the cell centers of the +#' grid of 'data'. Expected to be regularly spaced, within the range [-90, 90] +#' of ascending or descending order. +#'@param lon_dim A character string indicating the longitude dimension name in +#' 'data'. If it is NULL, the function tries to find the name in +#' \code{s2dv:::.KnownLonNames}. The default value is NULL. +#'@param lat_dim A character string indicating the latitude dimension name in +#' 'data'. If it is NULL, the function tries to find the name in +#' \code{s2dv:::.KnownLatNames}. The default value is NULL. +#'@param target_proj A character string indicating the target projection. It +#' should be a valid crs string. The default projection is Robinson +#' (ESRI:54030). Note that the character string may work differently depending +#' on PROJ and GDAL module version. +#'@param legend A character string indicating the legend style. It can be 's2dv' +#' (color bar by \code{ColorBar()}), 'ggplot2' (discrete legend by ggplot2), or +#' NULL (no legend), +#'@param style A character string indicating the plotting style. It can be +#' 'point' or 'polygon'. The default value is 'point'. Note that 'polygon' may +#' be time- and memory-consuming for global or high-resolution data. +#'@param dots An array with the same dimensions as 'data' of [0, 1] or logical +#' indicating the grids to plot dots. The value 0 or FALSE is the point to be +#' dotted. +#'@param mask An array with the same dimensions as 'data' of [0, 1] or logical +#' indicating the grids to not plot data. The value 0 or FALSE is the point not +#' to be plotted. +#'@param brks,cols,bar_limits,triangle_ends Usually only providing 'brks' is +#' enough to generate the desired color bar. These parameters allow to +#' define n breaks that define n - 1 intervals to classify each of the values +#' in 'data'. The corresponding grid cell of a given value in 'data' will be +#' colored in function of the interval it belongs to. These parameters are +#' sent to \code{ColorBar()} to generate the breaks and colours. Additional +#' colours for values beyond the limits of the colour bar are also generated +#' and applied to the plot if 'bar_limits' or 'brks' and 'triangle_ends' are +#' properly provided to do so. See ?ColorBar for a full explanation. +#'@param col_inf,col_sup,colNA Colour identifiers to color the values that +#' excess the extremes of the color bar and to color NAs, respectively. 'colNA' +#' takes attr(cols, 'na_color') if available by default, where cols is the +#' parameter 'cols' if provided or the vector of colors returned by +#' 'color_fun'. 'col_inf' and 'col_sup' will take the value of 'colNA' if not +#' specified. See ?ColorBar for a full explanation. +#'@param color_fun,bar_extra_margin Set of +#' parameters to control the visual aspect of the drawn colour bar +#' (1/3). See ?ColorBar for a full explanation. +#'@param vertical A logical value indicating the direction of colorbar if +#' parameter 'legend' is 's2dv'. The default value is TRUE. +#'@param toptitle A character string of the top title of the figure, scalable +#' with parameter 'title_size'. +#'@param caption A character string of the caption located at left-bottom of the +#' plot. +#'@param units A character string of the data units, which is the title of the +#' legend. +#'@param crop_coastlines A named numeric vector [lonmin, lonmax, latmin, latmax] +#' indicating the region to plot coastlines. Note that the longitude range +#' cannot exceed 180 degrees. +#'@param point_size A number of the size of the data points if "style = 'point'". +#' The default is 'auto' and the function tries to find the appropriate size. +#'@param title_size A number of the size of the top title. The default is 16. +#'@param dots_size A number of the size of the dots. The default is 0.5. +#'@param dots_shape A number indicating the dot shape recognized by parameter +#' 'shape' in \code{geom_point()}. +#'@param coastlines_width A number indicating the width of the coastlines. +#'@param fileout A character string of the path to save the plot. If not +#' specified (default), a graphic device will pop up. The extension should be +#' accepted by \code{ggsave()}. +#'@param width A number of the plot width, in the units specified in parameter +#' 'size_units'. The default is 8. +#'@param height A number of the plot height, in the units specified in parameter +#' 'size_units'. The default is 4. +#'@param size_units A character string of the units of the size of the device +#' (file or window) to plot in. The default is 'in' (inches). See ?ggsave and +#' ?Devices for details of the corresponding device. +#'@param res Resolution of the device (file or window) to plot in. The default +#' value is 300. See ?ggsave 'dpi' and ?Devices for details of the +#' corresponding device. +#' +#'@return A map plot with speficied projection, either in pop-up window or a +#' saved file. +#' +#'@examples +#'data <- array(rep(seq(-10, 10, length.out = 181), 360) + rnorm(360), +#' dim = c(lat = 181, lon = 360)) +#'dots <- data +#'dots[which(dots < 4 & dots > -4)] <- 0 +#'dots[which(dots != 0)] <- 1 +#'PlotRobinson(data, lon = 0:359, lat = -90:90, dots = dots, +#' brks = seq(-10, 10, length.out = 11), +#' toptitle = 'synthetic example', vertical = F, +#' caption = 'Robinson Global\ns2dv::PlotRobinson example', +#' bar_extra_margin = c(0, 1, 0, 1), width = 8, height = 6) +#'PlotRobinson(data, lon = 0:359, lat = -90:90, mask = dots, legend = 'ggplot2', +#' target_proj = "+proj=moll", brks = seq(-10, 10, length.out = 11), +#' color_fun = clim.palette("purpleorange"), colNA = 'green', +#' toptitle = 'synthetic example', +#' caption = 'Mollweide Global\ns2dv::PlotRobinson example', +#' width = 8, height = 6) +#' +#'@import sf ggplot2 rnaturalearth cowplot +#'@export +PlotRobinson <- function(data, lon, lat, lon_dim = NULL, lat_dim = NULL, + target_proj = 54030, legend = 's2dv', style = 'point', + dots = NULL, mask = NULL, brks = NULL, cols = NULL, bar_limits = NULL, + triangle_ends = NULL, col_inf = NULL, col_sup = NULL, colNA = NULL, + color_fun = clim.palette(), bar_extra_margin = rep(0, 4), vertical = TRUE, + toptitle = NULL, caption = NULL, units = NULL, crop_coastlines = NULL, + point_size = "auto", title_size = 16, dots_size = 0.5, + dots_shape = 47, coastlines_width = 0.3, + fileout = NULL, width = 8, height = 4, size_units = "in", + res = 300) { + + # Sanity check + # data + data <- drop(data) + if (length(dim(data)) != 2) { + stop("Parameter 'data' must have two dimensions.") + } + dims <- dim(data) + # lon, lon_dim + if (is.null(lon_dim)) { + lon_dim <- names(dims)[names(dims) %in% .KnownLonNames()] + if (identical(lon_dim, character(0))) { + stop("Cannot find known longitude name in data dimension. Please define parameter 'lon_dim'.") + } + } + if (is.unsorted(lon)) { + .warning("Parameter 'lon' should be sorted to guarantee the correct result.") + } + # lat, lat_dim + if (is.null(lat_dim)) { + lat_dim <- names(dims)[names(dims) %in% .KnownLatNames()] + if (identical(lat_dim, character(0))) { + stop("Cannot find known latitude name in data dimension. Please define parameter 'lat_dim'.") + } + } + if (!all(names(dims) %in% c(lat_dim, lon_dim))) { + stop("Dimensions names in paramter 'data' should match 'lat_dim' and 'lon_dim.") + } + if (length(lon) != dims[lon_dim]) { + stop("Length of parameter 'lon' should match longitude dimension in 'data'.") + } + if (length(lat) != dims[lat_dim]) { + stop("Length of parameter 'lat' should match latitude dimension in 'data'.") + } + data <- s2dv::Reorder(data, c(lon_dim, lat_dim)) + # Make lat always from 90 to -90 + sort_lat <- FALSE + if (!is.unsorted(lat)) { + lat <- rev(lat) + data <- ClimProjDiags::Subset(data, along = lat_dim, indices = seq(length(lat), 1, -1)) + sort_lat <- TRUE + } + + # original_proj: it can only be regular grid now + original_proj <- st_crs(4326) + # tartget_proj + if (is.null(target_proj)) { + stop("Parameter 'target_proj' cannot be NULL.") + } else { + target_proj_tmp <- st_crs(target_proj) + if (is.na(target_proj_tmp)) { + .warning(paste0("Try ESRI code: ESRI:", target_proj)) + target_proj <- st_crs(paste0("ESRI:", target_proj)) + } else { + target_proj <- target_proj_tmp + } + } + + # legend + if (!is.null(legend) && (!legend %in% c('s2dv', 'ggplot2'))) { + stop("Parameter 'legend' must be NULL, ggplot2 or s2dv.") + } + # style + if (!style %in% c('point', 'polygon') || length(style) != 1) { + stop("Parameter 'style' must be 'point' or 'polygon'.") + } + if (style == 'polygon') { + # polygon is slow for global map (and may be wrong) Confirm if users want to proceed + if ((abs(diff(range(lon))) > 350 & abs(diff(range(lat))) > 175) | + (prod(dim(data)) >= (180 * 360))) { + if (!isTRUE(utils::askYesNo("The region seems to be global and style 'polygon' is chosen. It may be time- and memory-consuming to plot the map. Are you sure that you want to continue?"))) { + return(invisible()) + } + } + } + # dots + if (!is.null(dots)) { + dots <- drop(dots) + if (any(!names(dim(dots)) %in% c(lon_dim, lat_dim))) { + stop("Parameter 'dots' must have two dimensions named as longitude and latitude dimensions in 'data'.") + } else { + dots <- Reorder(dots, c(lon_dim, lat_dim)) + } + if (!identical(dim(dots), dim(data))) { + stop("Parameter 'dots' must have the same dimensions as 'data'.") + } else if (is.numeric(dots)) { + if (all(dots %in% c(0, 1))) { + dots <- array(as.logical(dots), dim = dim(dots)) + } else { + stop("Parameter 'dots' must have only TRUE/FALSE or 0/1.") + } + } else if (is.logical(dots)) { + if (!all(dots %in% c(T, F))) { + stop("Parameter 'dots' must have only TRUE/FALSE or 0/1.") + } + } else { + stop("Parameter 'dots' must be a logical or numerical array.") + } + } + # mask + if (!is.null(mask)) { + mask <- drop(mask) + if (any(!names(dim(mask)) %in% c(lon_dim, lat_dim))) { + stop("Parameter 'mask' must have two dimensions named as longitude and latitude dimensions in 'data'.") + } else { + mask <- Reorder(mask, c(lon_dim, lat_dim)) + } + if (!identical(dim(mask), dim(data))) { + stop("Parameter 'mask' must have the same dimensions as 'data'.") + } else if (is.numeric(mask)) { + if (all(mask %in% c(0, 1))) { + mask <- array(as.logical(mask), dim = dim(mask)) + } else { + stop("Parameter 'mask' must have only TRUE/FALSE or 0/1.") + } + } else if (is.logical(mask)) { + if (!all(mask %in% c(T, F))) { + stop("Parameter 'mask' must have only TRUE/FALSE or 0/1.") + } + } else { + stop("Parameter 'mask' must be a logical or numerical array.") + } + } + + # Color bar + ## Check: brks, cols, bar_limits, color_fun, bar_extra_margin, units + ## Build: brks, cols, bar_limits, col_inf, col_sup + if (!all(is.na(data))) { + var_limits <- c(min(data[!is.infinite(data)], na.rm = TRUE), + max(data[!is.infinite(data)], na.rm = TRUE)) + } else { + warning("All the data are NAs. The map will be filled with colNA.") + if (!is.null(brks) && length(brks) > 1) { + #NOTE: var_limits be like this to avoid warnings from ColorBar + var_limits <- c(min(brks, na.rm = TRUE) + diff(brks)[1], + max(brks, na.rm = TRUE)) + } else if (!is.null(bar_limits)) { + var_limits <- c(bar_limits[1] + 0.01, bar_limits[2]) + } else { + var_limits <- c(-0.5, 0.5) # random range since colorbar is not going to be plotted + if (!is.null(legend)) { + legend <- NULL + warning("All data are NAs. Color bar won't be drawn. If you want to have ", + "color bar still, define parameter 'brks' or 'bar_limits'.") + } + } + } + colorbar <- ColorBar(brks = brks, cols = cols, vertical = vertical, subsampleg = NULL, + bar_limits = bar_limits, var_limits = var_limits, triangle_ends = triangle_ends, + col_inf = col_inf, col_sup = col_sup, color_fun = color_fun, + plot = FALSE, draw_ticks = TRUE, draw_separators = FALSE, + triangle_ends_scale = 1, extra_labels = NULL, + title = units, title_scale = 1, # units_scale + label_scale = 1, tick_scale = 1, #bar_tick_scale + extra_margin = bar_extra_margin, label_digits = 4) + brks <- colorbar$brks + cols <- colorbar$cols + col_inf <- colorbar$col_inf + col_sup <- colorbar$col_sup + bar_limits <- c(head(brks, 1), tail(brks, 1)) + # colNA + if (is.null(colNA)) { + if ('na_color' %in% names(attributes(cols))) { + colNA <- attr(cols, 'na_color') + if (!.IsColor(colNA)) { + stop("The 'na_color' provided as attribute of the colour vector must be a valid colour identifier.") + } + } else { + colNA <- 'pink' + } + } else if (!.IsColor(colNA)) { + stop("Parameter 'colNA' must be a valid colour identifier.") + } + # toptitle + if (!is.null(toptitle) && !is.character(toptitle)) { + stop("Parameter 'toptitle' must be a character string.") + } + # caption + if (!is.null(caption) && !is.character(caption)) { + stop("Parameter 'caption' must be a character string.") + } + # crop_coastlines + if (!is.null(crop_coastlines)) { + # if crop_coastlines doesn't have name, [lonmin, lonmax, latmin, latmax] + if (is.null(names(crop_coastlines))) { + names(crop_coastlines) <- c("lonmin", "lonmax", "latmin", "latmax") + } else if (!identical(sort(names(crop_coastlines)), sort(c("latmax", "latmin", "lonmax", "lonmin")))) { + stop("Parameter 'crop_coastlines' needs to have names 'latmax', 'latmin', 'lonmax', 'lonmin'.") + } + } + + # point_size + if (point_size == 'auto') { + # 360x181 with default setting, 0.05 + point_size <- round(0.05 * (360 * 181) / (length(lon) * length(lat)), 2) + } else if (!is.numeric(point_size)) { + stop("Parameter 'point_size' must be a number.") + } + # + +#================================================================= + + # Adapt s2dv ColorBar parameters to ggplot plot + # If legend is NULL, still tune with s2dv legend way + if (is.null(legend) || legend == 's2dv') { + # the colorbar triangle color. If it is NULL (no triangle plotted), use colNA + col_inf_image <- ifelse(is.null(col_inf), colNA, col_inf) + col_sup_image <- ifelse(is.null(col_sup), colNA, col_sup) + cols_ggplot <- c(col_inf_image, cols, col_sup_image) + + # Add triangles to brks + brks_ggplot <- brks + if (var_limits[2] > tail(brks, 1)) { + brks_ggplot <- c(brks_ggplot, max(data, na.rm = T)) + } else { + brks_ggplot <- c(brks_ggplot, tail(brks, 1) + diff(tail(brks, 2))) + } + if (var_limits[1] < brks[1]) { + brks_ggplot <- c(min(data, na.rm = T), brks_ggplot) + } else { + brks_ggplot <- c(brks[1] - diff(brks[1:2]), brks_ggplot) + } + + } else { # ggplot2 legend + brks_ggplot <- brks + cols_ggplot <- cols + } + + # Build data dataframe + lonlat_df <- data.frame(lon = rep(as.vector(lon), length(lat)), + lat = sort(rep(as.vector(lat), length(lon)), decreasing = TRUE)) + data_df <- lonlat_df %>% + dplyr::mutate(dat = as.vector(data)) + + lonlat_df_ori <- NULL + # Remove the points where mask = FALSE + if (!is.null(mask)) { + # Save original lonlat_df to plot with expected region + lonlat_df_ori <- st_as_sf(lonlat_df, coords = c("lon", "lat"), crs = original_proj) + lonlat_df_ori <- st_transform(lonlat_df_ori, crs = target_proj) + lonlat_df_ori <- as.data.frame(st_coordinates(lonlat_df_ori)) + names(lonlat_df_ori) <- c('long', 'lat') + + if (sort_lat) { + mask <- ClimProjDiags::Subset(mask, along = lat_dim, indices = seq(length(lat), 1, -1)) + } + mask_df <- data.frame(lon = rep(as.vector(lon), length(lat)), + lat = sort(rep(as.vector(lat), length(lon)), decreasing = TRUE), + mask = as.vector(mask)) + data_df <- data_df[mask_df$mask == TRUE, ] + lonlat_df <- data_df[, 1:2] + } + + #NOTE: if target_proj = "ESRI:54030", Nord3v2 has different behavior from hub and ws!! + data_df <- st_as_sf(data_df, coords = c("lon", "lat"), crs = original_proj) + data_df <- st_transform(data_df, crs = target_proj) + data_df <- data_df %>% + dplyr::mutate(long = st_coordinates(data_df)[, 1], + lat = st_coordinates(data_df)[, 2]) + + # Re-project dots + if (!is.null(dots)) { + if (sort_lat) { + dots <- ClimProjDiags::Subset(dots, along = lat_dim, indices = seq(length(lat), 1, -1)) + } + dots_df <- data.frame(lon = rep(as.vector(lon), length(lat)), + lat = sort(rep(as.vector(lat), length(lon)), decreasing = TRUE), + dot = as.vector(dots)) + + dots_df <- st_as_sf(dots_df, coords = c("lon", "lat"), crs = original_proj) + dots_df <- st_transform(dots_df, crs = target_proj) + dots_df <- dots_df %>% + dplyr::mutate(long = st_coordinates(dots_df)[, 1], + lat = st_coordinates(dots_df)[, 2]) + dots_df <- subset(dots_df, dot == FALSE) + } + + # coastlines + coastlines <- rnaturalearth::ne_coastline(scale = "medium", returnclass = "sf") + ## crop the coastlines to the desired range + if (!is.null(crop_coastlines)) { + suppressWarnings({ + coastlines <- st_crop(coastlines, + xmin = as.numeric(crop_coastlines['lonmin']), + xmax = as.numeric(crop_coastlines['lonmax']), + ymin = as.numeric(crop_coastlines['latmin']), + ymax = as.numeric(crop_coastlines['latmax'])) + }) + } + coastlines <- st_transform(coastlines, crs = target_proj) + + if (style == 'polygon') { + # Calculate polygon points from regular lat/lon + #NOTE: The original grid must be regular grid with same space + d_lon <- abs(lon[2] - lon[1]) / 2 + d_lat <- abs(lat[2] - lat[1]) / 2 + lon_poly <- lat_poly <- rep(NA, 4 * dim(lonlat_df)[1]) + for (ii in 1:dim(lonlat_df)[1]) { + lon_poly[(ii*4-3):(ii*4)] <- c(lonlat_df$lon[ii] - d_lon, lonlat_df$lon[ii] + d_lon, + lonlat_df$lon[ii] + d_lon, lonlat_df$lon[ii] - d_lon) + lat_poly[(ii*4-3):(ii*4)] <- c(lonlat_df$lat[ii] - d_lat, lonlat_df$lat[ii] - d_lat, + lonlat_df$lat[ii] + d_lat, lonlat_df$lat[ii] + d_lat) + } + # # To prevent out-of-global lon + # lon_poly[which(lon_poly >= 180)] <- 179.9 + # lon_poly[which(lon_poly < -180)] <- -180 + # To prevent out-of-global lat + lat_poly[which(lat_poly > 90)] <- 90 + lat_poly[which(lat_poly < -90)] <- -90 + + lonlat_df <- data.frame(lon = lon_poly, lat = lat_poly) + # Transfer lon/lat to projection + proj_lonlat <- st_as_sf(lonlat_df, coords = c("lon", "lat"), crs = original_proj) + #NOTE: if target_proj = "ESRI:54030", on Nord3v2, st_transform has lon and lat swapped! + proj_lonlat <- st_transform(proj_lonlat, crs = target_proj) + lonlat_df_proj <- st_coordinates(proj_lonlat) + + # Use id to create groups for each polygon + ids <- factor(paste0("id_", 1:dim(data_df)[1])) + values <- data.frame(id = ids, + value = data_df$dat) + positions <- data.frame(id = rep(ids, each = 4), + x = lonlat_df_proj[, 1], + y = lonlat_df_proj[, 2]) + datapoly <- merge(values, positions, by = "id") + datapoly <- st_as_sf(datapoly, coords = c("x", "y"), crs = target_proj) + datapoly <- datapoly %>% + dplyr::group_by(id) %>% + dplyr::summarise() %>% #NOTE: VERY SLOW if plot global + dplyr::mutate(value = values[order(values$id), ]$value) %>% + st_cast("POLYGON") %>% + st_convex_hull() # maintain outer polygen (no bowtie shape) + } + + # Plots + if (style == 'polygon') { + res_p <- ggplot(data = data_df) + #NOTE: must be data_df? + geom_sf(data = datapoly, + aes(col = cut(value, breaks = brks_ggplot, include.lowest = T), + fill = cut(value, breaks = brks_ggplot, include.lowest = T))) + } else if (style == 'point') { + res_p <- ggplot(data = data_df) + + geom_point(aes(x = long, y = lat, + col = cut(dat, breaks = brks_ggplot, include.lowest = T)), + #NOTE: These two lines make point size vary with lat + #size = point_size / (data_df$lat / min(data_df$lat))) + + #size = (sort(rep(as.vector(lat), length(lon))) / max(lat)) * point_size) + + size = point_size) + } + + if (is.null(lonlat_df_ori)) { + coord_sf_lim <- c(range(data_df$long), range(data_df$lat)) + } else { + coord_sf_lim <- c(range(lonlat_df_ori$long), range(lonlat_df_ori$lat)) + } + res_p <- res_p + + geom_sf(data = coastlines, colour ='black', size = coastlines_width) + + # Remove background grid and lat/lon label; add white background + theme_void() + theme(plot.background = element_rect(fill = 'white', colour = 'white')) + + # crop the projection + coord_sf(xlim = coord_sf_lim[1:2], ylim = coord_sf_lim[3:4], + expand = TRUE, datum = target_proj) + + if (!is.null(dots)) { + res_p <- res_p + geom_point(data = dots_df, aes(x = long, y = lat), + shape = dots_shape, size = dots_size) + #NOTE: This line makes point size vary with lat + #size = dots_size / (dots_df$lat / min(dots_df$lat))) + } + + if (identical(legend, 'ggplot2')) { + if (style == 'polygon') { + res_p <- res_p + scale_colour_manual(values = cols_ggplot, + aesthetics = c("colour", "fill"), + drop = FALSE, na.value = colNA) + + guides(fill = guide_legend(title = units, override.aes = list(size = 1)), + color = "none") + } else if (style == 'point') { + res_p <- res_p + scale_colour_manual(values = cols_ggplot, + drop = FALSE, na.value = colNA) + + guides(colour = guide_legend(title = units, override.aes = list(size = 1))) + } + + } else { # s2dv or NULL + if (style == 'polygon') { + res_p <- res_p + scale_colour_manual(values = cols_ggplot, + aesthetics = c("colour", "fill"), + drop = FALSE, na.value = colNA) + } else if (style == 'point') { + res_p <- res_p + scale_colour_manual(values = cols_ggplot, + drop = FALSE, na.value = colNA) + } + # Remove ggplot legend + res_p <- res_p + theme(legend.position = "none", plot.margin = margin(0.5, 0, 0, 0, 'cm')) + } + + if (!is.null(toptitle)) { + res_p <- res_p + ggtitle(toptitle) + + theme(plot.title = element_text(size = title_size, hjust = 0.5, vjust = 3)) + } + if (!is.null(caption)) { + res_p <- res_p + labs(caption = caption) + + theme(plot.caption = element_text(hjust = 0, vjust = 1, margin = margin(0, 0, 0, 0, 'cm'))) + } + + # s2dv legend fun to put in cowplot::plot_grid + if (identical(legend, 's2dv')) { + fun_legend <- function() { + if (vertical) { + par(mar = c(7.1, 2.2, 7.1, 3.1), mgp = c(3, 1, 0)) + } else { + par(mar = c(1.1, 1.2, 0.1, 1.1), mgp = c(3, 1, 0)) + } + ColorBar(brks = brks, cols = cols, vertical = vertical, subsampleg = NULL, + bar_limits = bar_limits, var_limits = var_limits, triangle_ends = triangle_ends, + col_inf = col_inf, col_sup = col_sup, color_fun = color_fun, + plot = TRUE, draw_ticks = TRUE, draw_separators = FALSE, + triangle_ends_scale = 1, extra_labels = NULL, + title = units, title_scale = 1, # units_scale + label_scale = 1, tick_scale = 1, #bar_tick_scale + extra_margin = bar_extra_margin, label_digits = 4) + } + if (vertical) { + res_p <- cowplot::plot_grid(res_p, fun_legend, rel_widths = c(6, 1)) + } else { + res_p <- cowplot::plot_grid(res_p, fun_legend, rel_heights = c(5, 1), ncol = 1) + } + res_p <- res_p + theme(plot.background = element_rect(fill = "white", colour = "white")) + } + + if (!is.null(fileout)) { + ext <- regmatches(fileout, regexpr("[a-zA-Z0-9]*$", fileout)) + ggsave(fileout, res_p, width = width, height = height, dpi = res, units = size_units, + device = ext) + } else { # pop-up window + dev.new(units = size_units, res = res, width = width, height = height) + res_p + } + +} + diff --git a/modules/Visualization/R/tmp/Utils.R b/modules/Visualization/R/tmp/Utils.R new file mode 100644 index 0000000000000000000000000000000000000000..058782595acdda6222173278100e53963a8cd5bd --- /dev/null +++ b/modules/Visualization/R/tmp/Utils.R @@ -0,0 +1,1779 @@ +#'@importFrom abind abind +#'@import plyr ncdf4 +#'@importFrom grDevices png jpeg pdf svg bmp tiff +#'@importFrom easyVerification convert2prob + +## Function to tell if a regexpr() match is a complete match to a specified name +.IsFullMatch <- function(x, name) { + ifelse(x > 0 && attributes(x)$match.length == nchar(name), TRUE, FALSE) +} + +.ConfigReplaceVariablesInString <- function(string, replace_values, allow_undefined_key_vars = FALSE) { + # This function replaces all the occurrences of a variable in a string by + # their corresponding string stored in the replace_values. + if (length(strsplit(string, "\\$")[[1]]) > 1) { + parts <- strsplit(string, "\\$")[[1]] + output <- "" + i <- 0 + for (part in parts) { + if (i %% 2 == 0) { + output <- paste(output, part, sep = "") + } else { + if (part %in% names(replace_values)) { + output <- paste(output, .ConfigReplaceVariablesInString(replace_values[[part]], replace_values, allow_undefined_key_vars), sep = "") + } else if (allow_undefined_key_vars) { + output <- paste0(output, "$", part, "$") + } else { + stop(paste('Error: The variable $', part, '$ was not defined in the configuration file.', sep = '')) + } + } + i <- i + 1 + } + output + } else { + string + } +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') +} + +.t2nlatlon <- function(t) { + ## As seen in cdo's griddes.c: ntr2nlat() + nlats <- (t * 3 + 1) / 2 + if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) { + nlats <- ceiling(nlats) + } else { + nlats <- round(nlats) + } + if (nlats %% 2 > 0) { + nlats <- nlats + 1 + } + ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF + nlons <- 2 * nlats + keep_going <- TRUE + while (keep_going) { + n <- nlons + if (n %% 8 == 0) n <- trunc(n / 8) + while (n %% 6 == 0) n <- trunc(n / 6) + while (n %% 5 == 0) n <- trunc(n / 5) + while (n %% 4 == 0) n <- trunc(n / 4) + while (n %% 3 == 0) n <- trunc(n / 3) + if (n %% 2 == 0) n <- trunc(n / 2) + if (n <= 8) { + keep_going <- FALSE + } else { + nlons <- nlons + 2 + if (nlons > 9999) { + stop("Error: pick another gaussian grid truncation. It doesn't fulfill the standards to apply FFT.") + } + } + } + c(nlats, nlons) +} + +.nlat2t <- function(nlats) { + trunc((nlats * 2 - 1) / 3) +} + +.LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) { + # The purpose, working modes, inputs and outputs of this function are + # explained in ?LoadDataFile + #suppressPackageStartupMessages({library(ncdf4)}) + #suppressPackageStartupMessages({library(bigmemory)}) + #suppressPackageStartupMessages({library(plyr)}) + # Auxiliar function to convert array indices to lineal indices + arrayIndex2VectorIndex <- function(indices, dims) { + if (length(indices) > length(dims)) { + stop("Error: indices do not match dimensions in arrayIndex2VectorIndex.") + } + position <- 1 + dims <- rev(dims) + indices <- rev(indices) + for (i in 1:length(indices)) { + position <- position + (indices[i] - 1) * prod(dims[-c(1:i)]) + } + position + } + + found_file <- NULL + dims <- NULL + grid_name <- units <- var_long_name <- NULL + is_2d_var <- array_across_gw <- NULL + data_across_gw <- NULL + + filename <- work_piece[['filename']] + namevar <- work_piece[['namevar']] + output <- work_piece[['output']] + # The names of all data files in the directory of the repository that match + # the pattern are obtained. + if (length(grep("^http", filename)) > 0) { + is_url <- TRUE + files <- filename + ## TODO: Check that the user is not using shell globbing exps. + } else { + is_url <- FALSE + files <- Sys.glob(filename) + } + + # If we don't find any, we leave the flag 'found_file' with a NULL value. + if (length(files) > 0) { + # The first file that matches the pattern is chosen and read. + filename <- head(files, 1) + filein <- filename + found_file <- filename + mask <- work_piece[['mask']] + + if (!silent) { + if (explore_dims) { + .message(paste("Exploring dimensions...", filename)) + } + ##} else { + ## cat(paste("* Reading & processing data...", filename, '\n')) + ##} + } + + # We will fill in 'expected_dims' with the names of the expected dimensions of + # the data array we'll retrieve from the file. + expected_dims <- NULL + remap_needed <- FALSE + # But first we open the file and work out whether the requested variable is 2d + fnc <- nc_open(filein) + if (!(namevar %in% names(fnc$var))) { + stop(paste("Error: The variable", namevar, "is not defined in the file", filename)) + } + var_long_name <- fnc$var[[namevar]]$longname + units <- fnc$var[[namevar]]$units + file_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + # The following two 'ifs' are to allow for 'lon'/'lat' by default, instead of + # 'longitude'/'latitude'. + if (!(work_piece[['dimnames']][['lon']] %in% file_dimnames) && + (work_piece[['dimnames']][['lon']] == 'longitude') && + ('lon' %in% file_dimnames)) { + work_piece[['dimnames']][['lon']] <- 'lon' + } + if (!(work_piece[['dimnames']][['lat']] %in% file_dimnames) && + (work_piece[['dimnames']][['lat']] == 'latitude') && + ('lat' %in% file_dimnames)) { + work_piece[['dimnames']][['lat']] <- 'lat' + } + if (is.null(work_piece[['is_2d_var']])) { + is_2d_var <- all(c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) %in% + unlist(lapply(fnc$var[[namevar]][['dim']], + '[[', 'name'))) + } else { + is_2d_var <- work_piece[['is_2d_var']] + } + if ((is_2d_var || work_piece[['is_file_per_dataset']])) { + if (Sys.which("cdo")[[1]] == "") { + stop("Error: CDO libraries not available") + } + + cdo_version <- strsplit(suppressWarnings(system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] + + cdo_version <- as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) + + } + # If the variable to load is 2-d, we need to determine whether: + # - interpolation is needed + # - subsetting is requested + if (is_2d_var) { + ## We read the longitudes and latitudes from the file. + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + first_lon_in_original_file <- lon[1] + # If a common grid is requested or we are exploring the file dimensions + # we need to read the grid type and size of the file to finally work out the + # CDO grid name. + if (!is.null(work_piece[['grid']]) || explore_dims) { + # Here we read the grid type and its number of longitudes and latitudes + file_info <- system(paste('cdo -s griddes', filein, '2> /dev/null'), intern = TRUE) + grids_positions <- grep('# gridID', file_info) + if (length(grids_positions) < 1) { + stop("The grid should be defined in the files.") + } + grids_first_lines <- grids_positions + 2 + grids_last_lines <- c((grids_positions - 2)[-1], length(file_info)) + grids_info <- as.list(1:length(grids_positions)) + grids_info <- lapply(grids_info, function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) + grids_info <- lapply(grids_info, function (x) gsub(" *", " ", x)) + grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x)) + grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = "))) + grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1])) + grids_matches <- unlist(lapply(grids_info, function (x) { + nlons <- if (length(grep('xsize', x)) > 0) { + as.numeric(x[grep('xsize', x) + 1]) + } else { + NA + } + nlats <- if (length(grep('ysize', x)) > 0) { + as.numeric(x[grep('ysize', x) + 1]) + } else { + NA + } + result <- FALSE + if (!anyNA(c(nlons, nlats))) { + if ((nlons == length(lon)) && + (nlats == length(lat))) { + result <- TRUE + } + } + result + })) + grids_matches <- grids_matches[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_info <- grids_info[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_types <- grids_types[which(grids_types %in% c('gaussian', 'lonlat'))] + if (length(grids_matches) == 0) { + stop("Error: Only 'gaussian' and 'lonlat' grids supported. See e.g: cdo sinfo ", filename) + } + if (sum(grids_matches) > 1) { + if ((all(grids_types[which(grids_matches)] == 'gaussian') || + all(grids_types[which(grids_matches)] == 'lonlat')) && + all(unlist(lapply(grids_info[which(grids_matches)], identical, + grids_info[which(grids_matches)][[1]])))) { + grid_type <- grids_types[which(grids_matches)][1] + } else { + stop("Error: Load() can't disambiguate: More than one lonlat/gaussian grids with the same size as the requested variable defined in ", filename) + } + } else if (sum(grids_matches) == 1) { + grid_type <- grids_types[which(grids_matches)] + } else { + stop("Unexpected error.") + } + grid_lons <- length(lon) + grid_lats <- length(lat) + # Convert to CDO grid name as seen in cdo's griddes.c: nlat2ntr() + if (grid_type == 'lonlat') { + grid_name <- paste0('r', grid_lons, 'x', grid_lats) + } else { + grid_name <- paste0('t', .nlat2t(grid_lats), 'grid') + } + if (is.null(work_piece[['grid']])) { + .warning(paste0("Detect the grid type to be '", grid_name, "'. ", + "If it is not expected, assign parameter 'grid' to avoid wrong result.")) + } + } + # If a common grid is requested, we will also calculate its size which we will use + # later on. + if (!is.null(work_piece[['grid']])) { + # Now we calculate the common grid type and its lons and lats + if (length(grep('^t\\d{1,+}grid$', work_piece[['grid']])) > 0) { + common_grid_type <- 'gaussian' + common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + nlonlat <- .t2nlatlon(common_grid_res) + common_grid_lats <- nlonlat[1] + common_grid_lons <- nlonlat[2] + } else if (length(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']])) > 0) { + common_grid_type <- 'lonlat' + common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3]) + } else { + stop("Error: Only supported grid types in parameter 'grid' are tgrid and rx") + } + } else { + ## If no 'grid' is specified, there is no common grid. + ## But these variables are filled in for consistency in the code. + common_grid_lons <- length(lon) + common_grid_lats <- length(lat) + } + first_common_grid_lon <- 0 + last_common_grid_lon <- 360 - 360/common_grid_lons + ## This is not true for gaussian grids or for some regular grids, but + ## is a safe estimation + first_common_grid_lat <- -90 + last_common_grid_lat <- 90 + # And finally determine whether interpolation is needed or not + remove_shift <- FALSE + if (!is.null(work_piece[['grid']])) { + if ((grid_lons != common_grid_lons) || + (grid_lats != common_grid_lats) || + (grid_type != common_grid_type) || + (lon[1] != first_common_grid_lon)) { + if (grid_lons == common_grid_lons && grid_lats == common_grid_lats && + grid_type == common_grid_type && lon[1] != first_common_grid_lon) { + remove_shift <- TRUE + } + remap_needed <- TRUE + common_grid_name <- work_piece[['grid']] + } + } else if ((lon[1] != first_common_grid_lon) && explore_dims && + !work_piece[['single_dataset']]) { + remap_needed <- TRUE + common_grid_name <- grid_name + remove_shift <- TRUE + } + if (remap_needed && (work_piece[['remap']] == 'con') && + (cdo_version >= as.numeric_version('1.7.0'))) { + work_piece[['remap']] <- 'ycon' + } + if (remove_shift && !explore_dims) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + .warning(paste0("The dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], "' doesn't start at longitude 0 and will be re-interpolated in order to align its longitudes with the standard CDO grids definable with the names 'tgrid' or 'rx', which are by definition starting at the longitude 0.\n")) + if (!is.null(mask)) { + .warning(paste0("A mask was provided for the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], "'. This dataset has been re-interpolated to align its longitudes to start at 0. You must re-interpolate the corresponding mask to align its longitudes to start at 0 as well, if you haven't done so yet. Running cdo remapcon,", common_grid_name, " original_mask_file.nc new_mask_file.nc will fix it.\n")) + } + } + if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + if (!explore_dims) { + .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is originally on ", + "a grid coarser than the common grid and it has been ", + "extrapolated. Check the results carefully. It is ", + "recommended to specify as common grid the coarsest grid ", + "among all requested datasets via the parameter 'grid'.\n")) + } + } + # Now calculate if the user requests for a lonlat subset or for the + # entire field + lonmin <- work_piece[['lon_limits']][1] + lonmax <- work_piece[['lon_limits']][2] + latmin <- work_piece[['lat_limits']][1] + latmax <- work_piece[['lat_limits']][2] + lon_subsetting_requested <- FALSE + lonlat_subsetting_requested <- FALSE + if (lonmin <= lonmax) { + if ((lonmin > first_common_grid_lon) || (lonmax < last_common_grid_lon)) { + lon_subsetting_requested <- TRUE + } + } else { + if ((lonmin - lonmax) > 360/common_grid_lons) { + lon_subsetting_requested <- TRUE + } else { + gap_width <- floor(lonmin / (360/common_grid_lons)) - + floor(lonmax / (360/common_grid_lons)) + if (gap_width > 0) { + if (!(gap_width == 1 && (lonmin %% (360/common_grid_lons) == 0) && + (lonmax %% (360/common_grid_lons) == 0))) { + lon_subsetting_requested <- TRUE + } + } + } + } + if ((latmin > first_common_grid_lat) || (latmax < last_common_grid_lat) + || (lon_subsetting_requested)) { + lonlat_subsetting_requested <- TRUE + } + # Now that we know if subsetting was requested, we can say if final data + # will go across greenwich + if (lonmax < lonmin) { + data_across_gw <- TRUE + } else { + data_across_gw <- !lon_subsetting_requested + } + + # When remap is needed but no subsetting, the file is copied locally + # so that cdo works faster, and then interpolated. + # Otherwise the file is kept as is and the subset will have to be + # interpolated still. + if (!lonlat_subsetting_requested && remap_needed) { + nc_close(fnc) + filecopy <- tempfile(pattern = "load", fileext = ".nc") + file.copy(filein, filecopy) + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the + # underlying hdf5 library. + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " -selname,", namevar, " ", filecopy, " ", filein, + " 2>/dev/null", sep = "")) + file.remove(filecopy) + work_piece[['dimnames']][['lon']] <- 'lon' + work_piece[['dimnames']][['lat']] <- 'lat' + fnc <- nc_open(filein) + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + } + + # Read and check also the mask + if (!is.null(mask)) { + ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + if (is.list(mask)) { + if (!file.exists(mask[['path']])) { + stop(paste("Error: Couldn't find the mask file", mask[['path']])) + } + mask_file <- mask[['path']] + ###file.copy(work_piece[['mask']][['path']], mask_file) + fnc_mask <- nc_open(mask_file) + vars_in_mask <- sapply(fnc_mask$var, '[[', 'name') + if ('nc_var_name' %in% names(mask)) { + if (!(mask[['nc_var_name']] %in% + vars_in_mask)) { + stop(paste("Error: couldn't find variable", mask[['nc_var_name']], + "in the mask file", mask[['path']])) + } + } else { + if (length(vars_in_mask) != 1) { + stop(paste("Error: one and only one non-coordinate variable should be defined in the mask file", + mask[['path']], "if the component 'nc_var_name' is not specified. Currently found: ", + paste(vars_in_mask, collapse = ', '), ".")) + } else { + mask[['nc_var_name']] <- vars_in_mask + } + } + if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) { + stop(paste0("Error: the variable '", + mask[['nc_var_name']], + "' must be defined only over the dimensions '", + work_piece[['dimnames']][['lon']], "' and '", + work_piece[['dimnames']][['lat']], + "' in the mask file ", + mask[['path']])) + } + mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE) + nc_close(fnc_mask) + ### mask_lon <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lon']]) + ### mask_lat <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lat']]) + ###} else { + ### dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + ### dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ### ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + ### fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ### ncvar_put(fnc_mask, ncdf_var, work_piece[['mask']]) + ### nc_close(fnc_mask) + ### fnc_mask <- nc_open(mask_file) + ### work_piece[['mask']] <- list(path = mask_file, nc_var_name = 'LSM') + ### mask_lon <- lon + ### mask_lat <- lat + ###} + ###} + ### Now ready to check that the mask is right + ##if (!(lonlat_subsetting_requested && remap_needed)) { + ### if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) { + ### stop(paste("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' is wrong. It must be on the common grid if the selected output type is 'lonlat', 'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on the grid of the corresponding dataset if the selected output type is 'areave' and no 'grid' has been specified. For more information check ?Load and see help on parameters 'grid', 'maskmod' and 'maskobs'.", sep = "")) + ### } + ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { + ### stop(paste0("Error: the longitudes and latitudes in the masks must be identical to the ones in the corresponding data files if output = 'areave' or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in the mask file must start by 0 and the latitudes must be ordered from highest to lowest. See\n ", + ### work_piece[['mask']][['path']], " and ", filein)) + ###} + } + } + + lon_indices <- 1:length(lon) + if (!(lonlat_subsetting_requested && remap_needed)) { + lon[which(lon < 0)] <- lon[which(lon < 0)] + 360 + } + if (lonmax >= lonmin) { + lon_indices <- lon_indices[which(((lon %% 360) >= lonmin) & ((lon %% 360) <= lonmax))] + } else if (!remap_needed) { + lon_indices <- lon_indices[which(((lon %% 360) <= lonmax) | ((lon %% 360) >= lonmin))] + } + lat_indices <- which(lat >= latmin & lat <= latmax) + ## In most of the cases the latitudes are ordered from -90 to 90. + ## We will reorder them to be in the order from 90 to -90, so mostly + ## always the latitudes are reordered. + ## TODO: This could be avoided in future. + if (lat[1] < lat[length(lat)]) { + lat_indices <- lat_indices[length(lat_indices):1] + } + if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) { + if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) { + stop(paste("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' is wrong. It must be on the common grid if the selected output type is 'lonlat', 'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on the grid of the corresponding dataset if the selected output type is 'areave' and no 'grid' has been specified. For more information check ?Load and see help on parameters 'grid', 'maskmod' and 'maskobs'.", sep = "")) + } + mask <- mask[lon_indices, lat_indices] + } + ## If the user requests subsetting, we must extend the lon and lat limits if possible + ## so that the interpolation after is done properly + maximum_extra_points <- work_piece[['remapcells']] + if (lonlat_subsetting_requested && remap_needed) { + if ((maximum_extra_points > (head(lon_indices, 1) - 1)) || + (maximum_extra_points > (length(lon) - tail(lon_indices, 1)))) { + ## if the requested number of points goes beyond the left or right + ## sides of the map, we need to take the entire map so that the + ## interpolation works properly + lon_indices <- 1:length(lon) + } else { + extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1) + if (extra_points > 0) { + lon_indices <- c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) + } + extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1)) + if (extra_points > 0) { + lon_indices <- c(lon_indices, (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) + } + } + min_lat_ind <- min(lat_indices) + max_lat_ind <- max(lat_indices) + extra_points <- min(maximum_extra_points, min_lat_ind - 1) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c(lat_indices, (min_lat_ind - 1):(min_lat_ind - extra_points)) + } else { + lat_indices <- c((min_lat_ind - extra_points):(min_lat_ind - 1), lat_indices) + } + } + extra_points <- min(maximum_extra_points, length(lat) - max_lat_ind) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c((max_lat_ind + extra_points):(max_lat_ind + 1), lat_indices) + } else { + lat_indices <- c(lat_indices, (max_lat_ind + 1):(max_lat_ind + extra_points)) + } + } + } + lon <- lon[lon_indices] + lat <- lat[lat_indices] + expected_dims <- c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) + } else { + lon <- 0 + lat <- 0 + } + # We keep on filling the expected dimensions + var_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + nmemb <- nltime <- NULL + ## Sometimes CDO renames 'members' dimension to 'lev' + old_members_dimname <- NULL + if (('lev' %in% var_dimnames) && !(work_piece[['dimnames']][['member']] %in% var_dimnames)) { + old_members_dimname <- work_piece[['dimnames']][['member']] + work_piece[['dimnames']][['member']] <- 'lev' + } + if (work_piece[['dimnames']][['member']] %in% var_dimnames) { + nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], var_dimnames)]]$len + expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']]) + } else { + nmemb <- 1 + } + if (length(expected_dims) > 0) { + dim_matches <- match(expected_dims, var_dimnames) + if (anyNA(dim_matches)) { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop(paste("Error: the expected dimension(s)", + paste(expected_dims[which(is.na(dim_matches))], collapse = ', '), + "were not found in", filename)) + } + time_dimname <- var_dimnames[-dim_matches] + } else { + time_dimname <- var_dimnames + } + if (length(time_dimname) > 0) { + if (length(time_dimname) == 1) { + nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len + expected_dims <- c(expected_dims, time_dimname) + dim_matches <- match(expected_dims, var_dimnames) + } else { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop(paste("Error: the variable", namevar, + "is defined over more dimensions than the expected (", + paste(c(expected_dims, 'time'), collapse = ', '), + "). It could also be that the members, longitude or latitude dimensions are named incorrectly. In that case, either rename the dimensions in the file or adjust Load() to recognize the actual name with the parameter 'dimnames'. See file", filename)) + } + } else { + nltime <- 1 + } + + # Now we must retrieve the data from the file, but only the asked indices. + # So we build up the indices to retrieve. + # Longitudes or latitudes have been retrieved already. + if (explore_dims) { + # If we're exploring the file we only want one time step from one member, + # to regrid it and work out the number of longitudes and latitudes. + # We don't need more. + members <- 1 + ltimes_list <- list(c(1)) + } else { + # The data is arranged in the array 'tmp' with the dimensions in a + # common order: + # 1) Longitudes + # 2) Latitudes + # 3) Members (even if is not a file per member experiment) + # 4) Lead-times + if (work_piece[['is_file_per_dataset']]) { + time_indices <- 1:nltime + mons <- strsplit(system(paste('cdo showmon ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + years <- strsplit(system(paste('cdo showyear ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + mons <- as.numeric(mons[[1]][which(mons[[1]] != "")]) + years <- as.numeric(years[[1]][which(years[[1]] != "")]) + time_indices <- ts(time_indices, start = c(years[1], mons[1]), + end = c(years[length(years)], mons[length(mons)]), + frequency = 12) + ltimes_list <- list() + for (sdate in work_piece[['startdates']]) { + selected_time_indices <- window(time_indices, start = c(as.numeric( + substr(sdate, 1, 4)), as.numeric(substr(sdate, 5, 6))), + end = c(3000, 12), frequency = 12, extend = TRUE) + selected_time_indices <- selected_time_indices[work_piece[['leadtimes']]] + ltimes_list <- c(ltimes_list, list(selected_time_indices)) + } + } else { + ltimes <- work_piece[['leadtimes']] + #if (work_piece[['dataset_type']] == 'exp') { + ltimes_list <- list(ltimes[which(ltimes <= nltime)]) + #} + } + ## TODO: Put, when reading matrices, this kind of warnings + # if (nmember < nmemb) { + # cat("Warning: + members <- 1:work_piece[['nmember']] + members <- members[which(members <= nmemb)] + } + + # Now, for each list of leadtimes to load (usually only one list with all leadtimes), + # we'll join the indices and retrieve data + found_disordered_dims <- FALSE + for (ltimes in ltimes_list) { + if (is_2d_var) { + start <- c(min(lon_indices), min(lat_indices)) + end <- c(max(lon_indices), max(lat_indices)) + if (lonlat_subsetting_requested && remap_needed) { + subset_indices <- list(min(lon_indices):max(lon_indices) - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ncdf_dims <- list(dim_longitudes, dim_latitudes) + } else { + subset_indices <- list(lon_indices - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + ncdf_dims <- list() + } + final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1) + } else { + start <- end <- c() + subset_indices <- list() + ncdf_dims <- list() + final_dims <- c(1, 1, 1, 1) + } + + if (work_piece[['dimnames']][['member']] %in% expected_dims) { + start <- c(start, head(members, 1)) + end <- c(end, tail(members, 1)) + subset_indices <- c(subset_indices, list(members - head(members, 1) + 1)) + dim_members <- ncdim_def(work_piece[['dimnames']][['member']], "", members) + ncdf_dims <- c(ncdf_dims, list(dim_members)) + final_dims[3] <- length(members) + } + if (time_dimname %in% expected_dims) { + if (any(!is.na(ltimes))) { + start <- c(start, head(ltimes[which(!is.na(ltimes))], 1)) + end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1)) + subset_indices <- c(subset_indices, list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) + } else { + start <- c(start, NA) + end <- c(end, NA) + subset_indices <- c(subset_indices, list(ltimes)) + } + dim_time <- ncdim_def(time_dimname, "", 1:length(ltimes), unlim = TRUE) + ncdf_dims <- c(ncdf_dims, list(dim_time)) + final_dims[4] <- length(ltimes) + } + count <- end - start + 1 + start <- start[dim_matches] + count <- count[dim_matches] + subset_indices <- subset_indices[dim_matches] + # Now that we have the indices to retrieve, we retrieve the data + if (prod(final_dims) > 0) { + tmp <- take(ncvar_get(fnc, namevar, start, count, + collapse_degen = FALSE), + 1:length(subset_indices), subset_indices) + # The data is regridded if it corresponds to an atmospheric variable. When + # the chosen output type is 'areave' the data is not regridded to not + # waste computing time unless the user specified a common grid. + if (is_2d_var) { + ###if (!is.null(work_piece[['mask']]) && !(lonlat_subsetting_requested && remap_needed)) { + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### start[dim_matches[1:2]], count[dim_matches[1:2]], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + if (lonlat_subsetting_requested && remap_needed) { + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + filein2 <- tempfile(pattern = "loadRegridded2", fileext = ".nc") + ncdf_var <- ncvar_def(namevar, "", ncdf_dims[dim_matches], + fnc$var[[namevar]]$missval, + prec = if (fnc$var[[namevar]]$prec == 'int') { + 'integer' + } else { + fnc$var[[namevar]]$prec + }) + scale_factor <- ifelse(fnc$var[[namevar]]$hasScaleFact, fnc$var[[namevar]]$scaleFact, 1) + add_offset <- ifelse(fnc$var[[namevar]]$hasAddOffset, fnc$var[[namevar]]$addOffset, 0) + if (fnc$var[[namevar]]$hasScaleFact || fnc$var[[namevar]]$hasAddOffset) { + tmp <- (tmp - add_offset) / scale_factor + } + #nc_close(fnc) + fnc2 <- nc_create(filein2, list(ncdf_var)) + ncvar_put(fnc2, ncdf_var, tmp) + if (add_offset != 0) { + ncatt_put(fnc2, ncdf_var, 'add_offset', add_offset) + } + if (scale_factor != 1) { + ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor) + } + nc_close(fnc2) + system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) { + "0,360," + } else { + paste0(lonmin, ",", lonmax, ",") + }, latmin, ",", latmax, + " -remap", work_piece[['remap']], ",", common_grid_name, + " ", filein2, " ", filein, " 2>/dev/null", sep = "")) + file.remove(filein2) + fnc2 <- nc_open(filein) + sub_lon <- ncvar_get(fnc2, 'lon') + sub_lat <- ncvar_get(fnc2, 'lat') + ## We read the longitudes and latitudes from the file. + ## In principle cdo should put in order the longitudes + ## and slice them properly unless data is across greenwich + sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360 + sub_lon_indices <- 1:length(sub_lon) + if (lonmax < lonmin) { + sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))] + } + sub_lat_indices <- 1:length(sub_lat) + ## In principle cdo should put in order the latitudes + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + sub_lat_indices <- length(sub_lat):1 + } + final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices)) + subset_indices[[dim_matches[1]]] <- sub_lon_indices + subset_indices[[dim_matches[2]]] <- sub_lat_indices + + tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), + 1:length(subset_indices), subset_indices) + + if (!is.null(mask)) { + ## We create a very simple 2d netcdf file that is then interpolated to the common + ## grid to know what are the lons and lats of our slice of data + mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc') + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", c(0, 360)) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", c(-90, 90)) + ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) + nc_close(fnc_mask) + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", common_grid_name, + " ", mask_file, " ", mask_file_remap, " 2>/dev/null", sep = "")) + fnc_mask <- nc_open(mask_file_remap) + mask_lons <- ncvar_get(fnc_mask, 'lon') + mask_lats <- ncvar_get(fnc_mask, 'lat') + nc_close(fnc_mask) + file.remove(mask_file, mask_file_remap) + if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) { + stop(paste("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' is wrong. It must be on the common grid if the selected output type is 'lonlat', 'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on the grid of the corresponding dataset if the selected output type is 'areave' and no 'grid' has been specified. For more information check ?Load and see help on parameters 'grid', 'maskmod' and 'maskobs'.", sep = "")) + } + mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360 + if (lonmax >= lonmin) { + mask_lon_indices <- which((mask_lons >= lonmin) & (mask_lons <= lonmax)) + } else { + mask_lon_indices <- which((mask_lons >= lonmin) | (mask_lons <= lonmax)) + } + mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax)) + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + mask_lat_indices <- mask_lat_indices[length(mask_lat_indices):1] + } + mask <- mask[mask_lon_indices, mask_lat_indices] + } + sub_lon <- sub_lon[sub_lon_indices] + sub_lat <- sub_lat[sub_lat_indices] + ### nc_close(fnc_mask) + ### system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) { + ### "0,360," + ### } else { + ### paste0(lonmin, ",", lonmax, ",") + ### }, latmin, ",", latmax, + ### " -remap", work_piece[['remap']], ",", common_grid_name, + ###This is wrong: same files + ### " ", mask_file, " ", mask_file, " 2>/dev/null", sep = "")) + ### fnc_mask <- nc_open(mask_file) + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + } + } + if (!all(dim_matches == sort(dim_matches))) { + if (!found_disordered_dims && rev(work_piece[['indices']])[2] == 1 && rev(work_piece[['indices']])[3] == 1) { + found_disordered_dims <- TRUE + .warning(paste0("The dimensions for the variable ", namevar, " in the files of the experiment with index ", tail(work_piece[['indices']], 1), " are not in the optimal order for loading with Load(). The optimal order would be '", paste(expected_dims, collapse = ', '), "'. One of the files of the dataset is stored in ", filename)) + } + tmp <- aperm(tmp, dim_matches) + } + dim(tmp) <- final_dims + # If we are exploring the file we don't need to process and arrange + # the retrieved data. We only need to keep the dimension sizes. + if (is_2d_var && lonlat_subsetting_requested && remap_needed) { + final_lons <- sub_lon + final_lats <- sub_lat + } else { + final_lons <- lon + final_lats <- lat + } + if (explore_dims) { + if (work_piece[['is_file_per_member']]) { + ## TODO: When the exp_full_path contains asterisks and is file_per_member + ## members from different datasets may be accounted. + ## Also if one file member is missing the accounting will be wrong. + ## Should parse the file name and extract number of members. + if (is_url) { + nmemb <- NULL + } else { + nmemb <- length(files) + } + } + dims <- list(member = nmemb, ftime = nltime, lon = final_lons, lat = final_lats) + } else { + # If we are not exploring, then we have to process the retrieved data + if (is_2d_var) { + tmp <- apply(tmp, c(3, 4), function(x) { + # Disable of large values. + if (!is.na(work_piece[['var_limits']][2])) { + x[which(x > work_piece[['var_limits']][2])] <- NA + } + if (!is.na(work_piece[['var_limits']][1])) { + x[which(x < work_piece[['var_limits']][1])] <- NA + } + if (!is.null(mask)) { + x[which(mask < 0.5)] <- NA + } + + if (output == 'areave' || output == 'lon') { + weights <- InsertDim(cos(final_lats * pi / 180), 1, length(final_lons), name = 'lon') + weights[which(is.na(x))] <- NA + if (output == 'areave') { + weights <- weights / mean(weights, na.rm = TRUE) + mean(x * weights, na.rm = TRUE) + } else { + weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, length(final_lats), name = 'lat') + MeanDims(x * weights, 2, na.rm = TRUE) + } + } else if (output == 'lat') { + MeanDims(x, 1, na.rm = TRUE) + } else if (output == 'lonlat') { + signif(x, 5) + } + }) + if (output == 'areave') { + dim(tmp) <- c(1, 1, final_dims[3:4]) + } else if (output == 'lon') { + dim(tmp) <- c(final_dims[1], 1, final_dims[3:4]) + } else if (output == 'lat') { + dim(tmp) <- c(1, final_dims[c(2, 3, 4)]) + } else if (output == 'lonlat') { + dim(tmp) <- final_dims + } + } + var_data <- attach.big.matrix(work_piece[['out_pointer']]) + if (work_piece[['dims']][['member']] > 1 && nmemb > 1 && + work_piece[['dims']][['ftime']] > 1 && + nltime < work_piece[['dims']][['ftime']]) { + work_piece[['indices']][2] <- work_piece[['indices']][2] - 1 + for (jmemb in members) { + work_piece[['indices']][2] <- work_piece[['indices']][2] + 1 + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp[, , jmemb, ]) - 1) + var_data[out_indices] <- as.vector(tmp[, , jmemb, ]) + } + work_piece[['indices']][2] <- work_piece[['indices']][2] - tail(members, 1) + 1 + } else { + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp) - 1) + a <- aperm(tmp, c(1, 2, 4, 3)) + as.vector(a) + var_data[out_indices] <- as.vector(aperm(tmp, c(1, 2, 4, 3))) + } + work_piece[['indices']][3] <- work_piece[['indices']][3] + 1 + } + } + } + nc_close(fnc) + if (is_2d_var) { + if (remap_needed) { + array_across_gw <- FALSE + file.remove(filein) + ###if (!is.null(mask) && lonlat_subsetting_requested) { + ### file.remove(mask_file) + ###} + } else { + if (first_lon_in_original_file < 0) { + array_across_gw <- data_across_gw + } else { + array_across_gw <- FALSE + } + } + } + } + if (explore_dims) { + list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, + units = units, var_long_name = var_long_name, + data_across_gw = data_across_gw, array_across_gw = array_across_gw) + } else { + ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) { + ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) + ###} + if (!silent && !is.null(work_piece[['progress_amount']])) { + message(paste0(work_piece[['progress_amount']]), appendLF = FALSE) + } + found_file + } +} + +.LoadSampleData <- function(var, exp = NULL, obs = NULL, sdates, + nmember = NULL, nmemberobs = NULL, + nleadtime = NULL, leadtimemin = 1, + leadtimemax = NULL, storefreq = 'monthly', + sampleperiod = 1, lonmin = 0, lonmax = 360, + latmin = -90, latmax = 90, output = 'areave', + method = 'conservative', grid = NULL, + maskmod = vector("list", 15), + maskobs = vector("list", 15), + configfile = NULL, suffixexp = NULL, + suffixobs = NULL, varmin = NULL, varmax = NULL, + silent = FALSE, nprocs = NULL) { + ## This function loads and selects sample data stored in sampleMap and + ## sampleTimeSeries and is used in the examples instead of Load() so as + ## to avoid nco and cdo system calls and computation time in the stage + ## of running examples in the CHECK process on CRAN. + selected_start_dates <- match(sdates, c('19851101', '19901101', '19951101', + '20001101', '20051101')) + start_dates_position <- 3 + lead_times_position <- 4 + + if (output == 'lonlat') { + sampleData <- s2dv::sampleMap + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ] + } + else if (output == 'areave') { + sampleData <- s2dv::sampleTimeSeries + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times] + } + + dims_out <- dim(sampleData$mod) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$mod) <- dims_out + + dims_out <- dim(sampleData$obs) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$obs) <- dims_out + + invisible(list(mod = dataOut$mod, obs = dataOut$obs, + lat = dataOut$lat, lon = dataOut$lon)) +} + +.ConfigGetDatasetInfo <- function(matching_entries, table_name) { + # This function obtains the information of a dataset and variable pair, + # applying all the entries that match in the configuration file. + if (table_name == 'experiments') { + id <- 'EXP' + } else { + id <- 'OBS' + } + defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), paste0('$DEFAULT_', id, '_FILE_PATH$'), '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') + info <- NULL + + for (entry in matching_entries) { + if (is.null(info)) { + info <- entry[-1:-2] + info[which(info == '*')] <- defaults[which(info == '*')] + } else { + info[which(entry[-1:-2] != '*')] <- entry[-1:-2][which(entry[-1:-2] != '*')] + } + } + + info <- as.list(info) + names(info) <- c('main_path', 'file_path', 'nc_var_name', 'suffix', 'var_min', 'var_max') + info +} + +.ReplaceGlobExpressions <- function(path_with_globs, actual_path, + replace_values, tags_to_keep, + dataset_name, permissive) { + # The goal of this function is to replace the shell globbing expressions in + # a path pattern (that may contain shell globbing expressions and Load() + # tags) by the corresponding part of the real existing path. + # What is done actually is to replace all the values of the tags in the + # actual path by the corresponding $TAG$ + # + # It takes mainly two inputs. The path with expressions and tags, e.g.: + # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc + # and a complete known path to one of the matching files, e.g.: + # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc + # and it returns the path pattern but without shell globbing expressions: + # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc + # + # To do that, it needs also as inputs the list of replace values (the + # association of each tag to their value). + # + # All the tags not present in the parameter tags_to_keep will be repalced. + # + # Not all cases can be resolved with the implemented algorithm. In an + # unsolvable case a warning is given and one possible guess is returned. + # + # In some cases it is interesting to replace only the expressions in the + # path to the file, but not the ones in the file name itself. To keep the + # expressions in the file name, the parameter permissive can be set to + # TRUE. To replace all the expressions it can be set to FALSE. + clean <- function(x) { + if (nchar(x) > 0) { + x <- gsub('\\\\', '', x) + x <- gsub('\\^', '', x) + x <- gsub('\\$', '', x) + x <- unname(sapply(strsplit(x, '[',fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) + do.call(paste0, as.list(x)) + } else { + x + } + } + + strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") + + if (permissive) { + actual_path_chunks <- strsplit(actual_path, '/')[[1]] + actual_path <- paste(actual_path_chunks[-length(actual_path_chunks)], collapse = '/') + file_name <- tail(actual_path_chunks, 1) + if (length(actual_path_chunks) > 1) { + file_name <- paste0('/', file_name) + } + path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] + path_with_globs <- paste(path_with_globs_chunks[-length(path_with_globs_chunks)], + collapse = '/') + path_with_globs <- .ConfigReplaceVariablesInString(path_with_globs, replace_values) + file_name_with_globs <- tail(path_with_globs_chunks, 1) + if (length(path_with_globs_chunks) > 1) { + file_name_with_globs <- paste0('/', file_name_with_globs) + } + right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) + right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values) + path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags)) + match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), paste0(actual_path, file_name)) + if (match != 1) { + stop("Incorrect parameters to replace glob expressions. The path with expressions does not match the actual path.") + } + if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { + path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*') + file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) + } + } + path_with_globs_rx <- utils::glob2rx(path_with_globs) + values_to_replace <- c() + tags_to_replace_starts <- c() + tags_to_replace_ends <- c() + give_warning <- FALSE + for (tag in tags_to_keep) { + matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] + lengths <- attr(matches, 'match.length') + if (!(length(matches) == 1 && matches[1] == -1)) { + for (i in 1:length(matches)) { + left <- NULL + if (matches[i] > 1) { + left <- .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, matches[i] - 1), replace_values) + left_known <- strReverse(head(strsplit(strReverse(left), strReverse('.*'), fixed = TRUE)[[1]], 1)) + } + right <- NULL + if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { + right <- .ConfigReplaceVariablesInString(substr(path_with_globs_rx, matches[i] + lengths[i], nchar(path_with_globs_rx)), replace_values) + right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) + } + final_match <- NULL + match_limits <- NULL + if (!is.null(left)) { + left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) + match_len <- attr(left_match, 'match.length') + left_match_limits <- c(left_match + match_len - 1 - nchar(clean(right_known)) - nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - nchar(clean(right_known))) + if (!(left_match < 1)) { + match_limits <- left_match_limits + } + } + right_match <- NULL + if (!is.null(right)) { + right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) + match_len <- attr(right_match, 'match.length') + right_match_limits <- c(right_match + nchar(clean(left_known)), + right_match + nchar(clean(left_known)) + nchar(replace_values[[tag]]) - 1) + if (is.null(match_limits) && !(right_match < 1)) { + match_limits <- right_match_limits + } + } + if (!is.null(right_match) && !is.null(left_match)) { + if (!identical(right_match_limits, left_match_limits)) { + give_warning <- TRUE + } + } + if (is.null(match_limits)) { + stop("Too complex path pattern specified for ", dataset_name, + ". Specify a simpler path pattern for this dataset.") + } + values_to_replace <- c(values_to_replace, tag) + tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) + tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) + } + } + } + + if (length(tags_to_replace_starts) > 0) { + reorder <- sort(tags_to_replace_starts, index.return = TRUE) + tags_to_replace_starts <- reorder$x + values_to_replace <- values_to_replace[reorder$ix] + tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] + while (length(values_to_replace) > 0) { + actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1), + '$', head(values_to_replace, 1), '$', + substr(actual_path, head(tags_to_replace_ends, 1) + 1, nchar(actual_path))) + extra_chars <- nchar(head(values_to_replace, 1)) + 2 - (head(tags_to_replace_ends, 1) - head(tags_to_replace_starts, 1) + 1) + values_to_replace <- values_to_replace[-1] + tags_to_replace_starts <- tags_to_replace_starts[-1] + tags_to_replace_ends <- tags_to_replace_ends[-1] + tags_to_replace_starts <- tags_to_replace_starts + extra_chars + tags_to_replace_ends <- tags_to_replace_ends + extra_chars + } + } + + if (give_warning) { + .warning(paste0("Too complex path pattern specified for ", dataset_name, + ". Double check carefully the '$Files' fetched for this dataset or specify a simpler path pattern.")) + } + + if (permissive) { + paste0(actual_path, file_name_with_globs) + } else { + actual_path + } +} + +.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { + tag <- paste0('\\$', tag, '\\$') + path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$') + parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]] + parts <- as.list(parts[grep(tag, parts)]) + longest_couples <- c() + pos_longest_couples <- c() + found_value <- NULL + for (i in 1:length(parts)) { + parts[[i]] <- strsplit(parts[[i]], tag)[[1]] + if (length(parts[[i]]) == 1) { + parts[[i]] <- c(parts[[i]], '') + } + len_parts <- sapply(parts[[i]], nchar) + len_couples <- len_parts[-length(len_parts)] + len_parts[2:length(len_parts)] + pos_longest_couples <- c(pos_longest_couples, which.max(len_couples)) + longest_couples <- c(longest_couples, max(len_couples)) + } + chosen_part <- which.max(longest_couples) + parts[[chosen_part]] <- parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] + if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) { + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + actual_path <- substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + } + } + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + found_value <- substr(actual_path, 0, match_right - 1) + } + } + } else { + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + actual_path <- substr(actual_path, 0, match_right - 1) + } + } + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + found_value <- substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + } + } + } + found_value +} + +.FilterUserGraphicArgs <- function(excludedArgs, ...) { + # This function filter the extra graphical parameters passed by the user in + # a plot function, excluding the ones that the plot function uses by default. + # Each plot function has a different set of arguments that are not allowed to + # be modified. + args <- list(...) + userArgs <- list() + for (name in names(args)) { + if ((name != "") & !is.element(name, excludedArgs)) { + # If the argument has a name and it is not in the list of excluded + # arguments, then it is added to the list that will be used + userArgs[[name]] <- args[[name]] + } else { + .warning(paste0("the argument '", name, "' can not be + modified and the new value will be ignored")) + } + } + userArgs +} + +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + .warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + .warning(paste0("some extensions of the filenames provided in 'fileout' are not ", ext[1],". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + .warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + +.message <- function(...) { + # Function to use the 'message' R function with our custom settings + # Default: new line at end of message, indent to 0, exdent to 3, + # collapse to \n* + args <- list(...) + + ## In case we need to specify message arguments + if (!is.null(args[["appendLF"]])) { + appendLF <- args[["appendLF"]] + } else { + ## Default value in message function + appendLF <- TRUE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value in message function + domain <- NULL + } + args[["appendLF"]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n*" + } + args[["collapse"]] <- NULL + + ## Message tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "* " + } + args[["tag"]] <- NULL + + message(paste0(tag, paste(strwrap( + args, indent = indent, exdent = exdent + ), collapse = collapse)), appendLF = appendLF, domain = domain) +} + +.warning <- function(...) { + # Function to use the 'warning' R function with our custom settings + # Default: no call information, indent to 0, exdent to 3, + # collapse to \n + args <- list(...) + + ## In case we need to specify warning arguments + if (!is.null(args[["call."]])) { + call <- args[["call."]] + } else { + ## Default: don't show info about the call where the warning came up + call <- FALSE + } + if (!is.null(args[["immediate."]])) { + immediate <- args[["immediate."]] + } else { + ## Default value in warning function + immediate <- FALSE + } + if (!is.null(args[["noBreaks."]])) { + noBreaks <- args[["noBreaks."]] + } else { + ## Default value warning function + noBreaks <- FALSE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value warning function + domain <- NULL + } + args[["call."]] <- NULL + args[["immediate."]] <- NULL + args[["noBreaks."]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n!" + } + args[["collapse"]] <- NULL + + ## Warning tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "! Warning: " + } + args[["tag"]] <- NULL + + warning(paste0(tag, paste(strwrap( + args, indent = indent, exdent = exdent + ), collapse = collapse)), call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) +} + +.IsColor <- function(x) { + res <- try(col2rgb(x), silent = TRUE) + return(!"try-error" %in% class(res)) +} + +# This function switches to a specified figure at position (row, col) in a layout. +# This overcomes the bug in par(mfg = ...). However the mode par(new = TRUE) is +# activated, i.e., all drawn elements will be superimposed. Additionally, after +# using this function, the automatical pointing to the next figure in the layout +# will be spoiled: once the last figure in the layout is drawn, the pointer won't +# move to the first figure in the layout. +# Only figures with numbers other than 0 (when creating the layout) will be +# accessible. +# Inputs: either row and col, or n and mat +.SwitchToFigure <- function(row = NULL, col = NULL, n = NULL, mat = NULL) { + if (!is.null(n) && !is.null(mat)) { + if (!is.numeric(n) || length(n) != 1) { + stop("Parameter 'n' must be a single numeric value.") + } + n <- round(n) + if (!is.array(mat)) { + stop("Parameter 'mat' must be an array.") + } + target <- which(mat == n, arr.ind = TRUE)[1, ] + row <- target[1] + col <- target[2] + } else if (!is.null(row) && !is.null(col)) { + if (!is.numeric(row) || length(row) != 1) { + stop("Parameter 'row' must be a single numeric value.") + } + row <- round(row) + if (!is.numeric(col) || length(col) != 1) { + stop("Parameter 'col' must be a single numeric value.") + } + col <- round(col) + } else { + stop("Either 'row' and 'col' or 'n' and 'mat' must be provided.") + } + next_attempt <- c(row, col) + par(mfg = next_attempt) + i <- 1 + layout_size <- par('mfrow') + layout_cells <- matrix(1:prod(layout_size), layout_size[1], layout_size[2], + byrow = TRUE) + while (any((par('mfg')[1:2] != c(row, col)))) { + next_attempt <- which(layout_cells == i, arr.ind = TRUE)[1, ] + par(mfg = next_attempt) + i <- i + 1 + if (i > prod(layout_size)) { + stop("Figure not accessible.") + } + } + plot(0, type = 'n', axes = FALSE, ann = FALSE) + par(mfg = next_attempt) +} + +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +.aperm2 <- function(x, new_order) { + old_dims <- dim(x) + attr_bk <- attributes(x) + if ('dim' %in% names(attr_bk)) { + attr_bk[['dim']] <- NULL + } + if (is.numeric(x)) { + x <- aperm(x, new_order) + } else { + y <- array(1:length(x), dim = dim(x)) + y <- aperm(y, new_order) + x <- x[as.vector(y)] + } + dim(x) <- old_dims[new_order] + attributes(x) <- c(attributes(x), attr_bk) + x +} + +# This function is a helper for the function .MergeArrays. +# It expects as inputs two named numeric vectors, and it extends them +# with dimensions of length 1 until an ordered common dimension +# format is reached. +# The first output is dims1 extended with 1s. +# The second output is dims2 extended with 1s. +# The third output is a merged dimension vector. If dimensions with +# the same name are found in the two inputs, and they have a different +# length, the maximum is taken. +.MergeArrayDims <- function(dims1, dims2) { + new_dims1 <- c() + new_dims2 <- c() + while (length(dims1) > 0) { + if (names(dims1)[1] %in% names(dims2)) { + pos <- which(names(dims2) == names(dims1)[1]) + dims_to_add <- rep(1, pos - 1) + if (length(dims_to_add) > 0) { + names(dims_to_add) <- names(dims2[1:(pos - 1)]) + } + new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) + new_dims2 <- c(new_dims2, dims2[1:pos]) + dims1 <- dims1[-1] + dims2 <- dims2[-c(1:pos)] + } else { + new_dims1 <- c(new_dims1, dims1[1]) + new_dims2 <- c(new_dims2, 1) + names(new_dims2)[length(new_dims2)] <- names(dims1)[1] + dims1 <- dims1[-1] + } + } + if (length(dims2) > 0) { + dims_to_add <- rep(1, length(dims2)) + names(dims_to_add) <- names(dims2) + new_dims1 <- c(new_dims1, dims_to_add) + new_dims2 <- c(new_dims2, dims2) + } + list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) +} + +# This function takes two named arrays and merges them, filling with +# NA where needed. +# dim(array1) +# 'b' 'c' 'e' 'f' +# 1 3 7 9 +# dim(array2) +# 'a' 'b' 'd' 'f' 'g' +# 2 3 5 9 11 +# dim(.MergeArrays(array1, array2, 'b')) +# 'a' 'b' 'c' 'e' 'd' 'f' 'g' +# 2 4 3 7 5 9 11 +.MergeArrays <- function(array1, array2, along) { + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in 1:length(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } + } + } + } + } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 + } + array1 +} + +# only can be used in Trend(). Needs generalization or be replaced by other function. +.reorder <- function(output, time_dim, dim_names) { + # Add dim name back + if (is.null(dim(output))) { + dim(output) <- c(stats = length(output)) + } else { #is an array + if (length(dim(output)) == 1) { + if (!is.null(names(dim(output)))) { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { + names(dim(output)) <- time_dim + } + } else { # more than one dim + if (names(dim(output))[1] != "") { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { #regular case + names(dim(output))[1] <- time_dim + } + } + } + # reorder + pos <- match(dim_names, names(dim(output))) + output <- aperm(output, pos) + names(dim(output)) <- dim_names + names(dim(output))[names(dim(output)) == time_dim] <- 'stats' + return(output) +} + +# to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R +.Indices <- function(data, type, monini, indices_for_clim, + fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) { + + if (type == 'dcpp') { + + fyear_dim <- 'fyear' + data <- Season(data = data, time_dim = fmonth_dim, + monini = monini, moninf = 1, monsup = 12, + method = mean, na.rm = na.rm) + names(dim(data))[which(names(dim(data))==fmonth_dim)] <- fyear_dim + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + + anom <- data + + } else { ## Different indices_for_clim for each forecast year (to use the same calendar years) + + n_fyears <- as.numeric(dim(data)[fyear_dim]) + n_sdates <- as.numeric(dim(data)[sdate_dim]) + + if (is.null(indices_for_clim)) { ## climatology over the whole (common) period + first_years_for_clim <- n_fyears : 1 + last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) + } else { ## indices_for_clim specified as a numeric vector + first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) + last_years_for_clim <- seq(from = indices_for_clim[length(indices_for_clim)], by = -1, length.out = n_fyears) + } + + data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) + anom <- array(data = NA, dim = dim(data)) + for (i in 1:n_fyears) { + clim <- mean(data[i,first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) + anom[i,] <- data[i,] - clim + } + } + + } else if (type %in% c('obs','hist')) { + + data <- multiApply::Apply(data = data, target_dims = month_dim, fun = mean, na.rm = na.rm)$output1 + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + clim <- 0 + } else if (is.null(indices_for_clim)) { ## climatology over the whole period + clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } else { ## indices_for_clim specified as a numeric vector + clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, indices = indices_for_clim), + target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } + + anom <- data - clim + + } else {stop('type must be dcpp, hist or obs')} + + return(anom) +} + +#TODO: Remove from s2dv when PlotLayout can get colorbar info from plotting function directly. +# The function is temporarily here because PlotLayout() needs to draw the colorbars of +# PlotMostLikelyQuantileMap(). +#Draws Color Bars for Categories +#A wrapper of s2dv::ColorBar to generate multiple color bars for different +#categories, and each category has different color set. +GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits, var_limits = NULL, + triangle_ends = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, label_scale = 1, extra_margin = rep(0, 4), + ...) { + # bar_limits + if (!is.numeric(bar_limits) || length(bar_limits) != 2) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2.") + } + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = bar_limits[1], to = bar_limits[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(1:length(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != nmap) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in 1:length(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { + s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, +# bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + #TODO: col_inf and col_sup + return(list(brks = brks, cols = cols)) + } + +} + + diff --git a/modules/Visualization/tmp/clim.palette.R b/modules/Visualization/R/tmp/clim.palette.R similarity index 99% rename from modules/Visualization/tmp/clim.palette.R rename to modules/Visualization/R/tmp/clim.palette.R index b23ff8428f201dbe4cb129e5f202ab2f1924532f..7f220d31bf2e5b4b6997da8f3f3b00a59ac4f732 100644 --- a/modules/Visualization/tmp/clim.palette.R +++ b/modules/Visualization/R/tmp/clim.palette.R @@ -67,3 +67,4 @@ clim.palette <- function(palette = "bluered") { clim.colors <- function(n, palette = "bluered") { clim.palette(palette)(n) } + diff --git a/modules/Visualization/R/tmp/zzz.R b/modules/Visualization/R/tmp/zzz.R new file mode 100644 index 0000000000000000000000000000000000000000..f2871dfd56a517e3ff5a4e8c93b46bdbaa9dd9f3 --- /dev/null +++ b/modules/Visualization/R/tmp/zzz.R @@ -0,0 +1,256 @@ +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +.aperm2 <- function(x, new_order) { + old_dims <- dim(x) + attr_bk <- attributes(x) + if ('dim' %in% names(attr_bk)) { + attr_bk[['dim']] <- NULL + } + if (is.numeric(x)) { + x <- aperm(x, new_order) + } else { + y <- array(1:length(x), dim = dim(x)) + y <- aperm(y, new_order) + x <- x[as.vector(y)] + } + dim(x) <- old_dims[new_order] + attributes(x) <- c(attributes(x), attr_bk) + x +} + +# verbose-only printing function +.printv <- function(value, verbosity = TRUE) { + if (verbosity) { + print(value) + } +} + +# normalize a time series +.standardize <- function(timeseries) { + out <- (timeseries - mean(timeseries, na.rm = T)) / sd(timeseries, na.rm = T) + return(out) +} + +.selbox <- function(lon, lat, xlim = NULL, ylim = NULL) { + if (!is.null(xlim)) { + # This transforms c(-20, -10) to c(340, 350) but c(-20, 10) is unchanged + # Bring them all to the same units in the 0:360 range + xlim1 <- xlim[1] %% 360 + xlim2 <- xlim[2] %% 360 + lonm <- lon %% 360 + if (lonm[1] > tail(lonm, 1)) { + lonm <- lon + } + if (xlim1 > xlim2) { + # If box crosses 0 + ilonsel <- (lonm >= xlim1) | (lonm <= xlim2) + } else { + ilonsel <- (lonm >= xlim1) & (lonm <= xlim2) + } + if (!any(ilonsel)) { + stop("No intersection between longitude bounds and data domain.") + } + } else { + ilonsel <- 1:length(lon) + } + if (!is.null(ylim)) { + ilatsel <- (lat >= ylim[1]) & (lat <= ylim[2]) + } else { + ilatsel <- 1:length(lat) + } + return(list(ilon = ilonsel, ilat = ilatsel)) +} + +# produce a 2d matrix of area weights +.area.weight <- function(ics, ipsilon, root = T) { + field <- array(NA, dim = c(length(ics), length(ipsilon))) + if (root == T) { + for (j in 1:length(ipsilon)) { + field[, j] <- sqrt(cos(pi / 180 * ipsilon[j])) + } + } + + if (root == F) { + for (j in 1:length(ipsilon)) { + field[, j] <- cos(pi / 180 * ipsilon[j]) + } + } + + return(field) +} + +#Draws Color Bars for Categories +#A wrapper of s2dv::ColorBar to generate multiple color bars for different +#categories, and each category has different color set. +GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits, var_limits = NULL, + triangle_ends = NULL, col_inf = NULL, col_sup = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, label_scale = 1, extra_margin = rep(0, 4), + ...) { + + # bar_limits: a vector of 2 or a list + if (!is.list(bar_limits)) { + if (!is.numeric(bar_limits) || length(bar_limits) != 2) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2 or a list containing that.") + } + # turn into list + bar_limits <- rep(list(bar_limits), nmap) + } else { + if (any(!sapply(bar_limits, is.numeric)) || any(sapply(bar_limits, length) != 2)) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2 or a list containing that.") + } + if (length(bar_limits) != nmap) { + stop("Parameter 'bar_limits' must have the length of 'nmap'.") + } + } + # Check brks + if (!is.list(brks)) { + if (is.null(brks)) { + brks <- 5 + } else if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Turn it into list + brks <- rep(list(brks), nmap) + } else { + if (length(brks) != nmap) { + stop("Parameter 'brks' must have the length of 'nmap'.") + } + } + for (i_map in 1:nmap) { + if (length(brks[[i_map]]) == 1) { + brks[[i_map]] <- seq(from = bar_limits[[i_map]][1], to = bar_limits[[i_map]][2], length.out = brks[[i_map]]) + } + } + + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(1:length(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + + # Set triangle_ends, col_sup, col_inf + #NOTE: The "col" input of ColorBar() later is not NULL (since we determine it here) + # so ColorBar() cannot decide these parameters for us. + #NOTE: Here, col_inf and col_sup are prior to triangle_ends, which is consistent with ColorBar(). + #TODO: Make triangle_ends a list + if (is.null(triangle_ends)) { + if (!is.null(var_limits)) { + triangle_ends <- c(FALSE, FALSE) + #TODO: bar_limits is a list + if (bar_limits[1] >= var_limits[1] | !is.null(col_inf)) { + triangle_ends[1] <- TRUE + if (is.null(col_inf)) { + col_inf <- lapply(cols, head, 1) + cols <- lapply(cols, '[', -1) + } + } + if (bar_limits[2] < var_limits[2] | !is.null(col_sup)) { + triangle_ends[2] <- TRUE + if (is.null(col_sup)) { + col_sup <- lapply(cols, tail, 1) + cols <- lapply(cols, '[', -length(cols[[1]])) + } + } + } else { + triangle_ends <- c(!is.null(col_inf), !is.null(col_sup)) + } + } else { # triangle_ends has values + if (triangle_ends[1] & is.null(col_inf)) { + col_inf <- lapply(cols, head, 1) + cols <- lapply(cols, '[', -1) + } + if (triangle_ends[2] & is.null(col_sup)) { + col_sup <- lapply(cols, tail, 1) + cols <- lapply(cols, '[', -length(cols[[1]])) + } + } + + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != nmap) { + stop("Parameter 'cols' must be a list of the same length as 'nmap'.") + } + } + for (i_map in 1:length(cols)) { + if (length(cols[[i_map]]) != (length(brks[[i_map]]) - 1)) { + cols[[i_map]] <- grDevices::colorRampPalette(cols[[i_map]])(length(brks[[i_map]]) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { +#TODO: Add s2dv:: + ColorBar(brks = brks[[k]], cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, + bar_limits = bar_limits[[k]], #var_limits = var_limits, + triangle_ends = triangle_ends, col_inf = col_inf[[k]], col_sup = col_sup[[k]], plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + return(list(brks = brks, cols = cols, col_inf = col_inf, col_sup = col_sup)) + } + +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'lons', 'longitude', 'x', 'i', 'nav_lon') +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'lats', 'latitude', 'y', 'j', 'nav_lat') +} + +.KnownTimeNames <- function() { + known_time_names <- c('time', 'ftime', 'sdate', 'sdates', 'syear', 'sweek', 'sday', 'leadtimes') +} + +.KnownForecastTimeNames <- function() { + known_time_names <- c('time', 'ftime', 'ltime', 'leadtimes') +} + +.KnownStartDateNames <- function() { + known_time_names <- c('sdate', 'sdates', 'syear', 'sweek', 'sday') +} + +.KnownMemberNames <- function() { + known_time_names <- c('memb', 'member', 'members', 'ensemble', 'ensembles') +} + +.isNullOb <- function(x) is.null(x) | all(sapply(x, is.null)) + +.rmNullObs <- function(x) { + x <- base::Filter(Negate(.isNullOb), x) + lapply(x, function(x) if (is.list(x)) .rmNullObs(x) else x) +} + +# Definition of a global variable to store the warning message used in Calibration +warning_shown <- FALSE + diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index a8664569047bb60185733e27a1250ec985cd481d..ae94ed3d01526f8cf98932a2adf6f6bce6d7359d 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,393 +1,213 @@ -#G# TODO: Remove once released in s2dv/CSTools -source("modules/Visualization/tmp/PlotMostLikelyQuantileMap.R") -source("modules/Visualization/tmp/PlotCombinedMap.R") -source("modules/Visualization/tmp/clim.palette.R") - ## TODO: Add the possibility to read the data directly from netCDF ## TODO: Adapt to multi-model case +## TODO: Adapt to multi-variable case ## TODO: Add param 'raw'? - -plot_data <- function(recipe, - data, - skill_metrics = NULL, - probabilities = NULL, - archive = NULL, - significance = F) { +## TODO: Decadal plot names + +source("modules/Visualization/R/plot_skill_metrics.R") +source("modules/Visualization/R/get_proj_code.R") +## TODO: Remove after the next s2dv release +source("modules/Visualization/R/tmp/PlotRobinson.R") +source("modules/Visualization/R/plot_most_likely_terciles_map.R") +source("modules/Visualization/R/plot_ensemble_mean.R") +## TODO: Remove in the next release +source("modules/Visualization/plot_data.R") + +Visualization <- function(recipe, + data, + skill_metrics = NULL, + probabilities = NULL, + significance = F, + output_conf = NULL) { # Try to produce and save several basic plots. # recipe: the auto-s2s recipe as read by read_yaml() - # archive: the auto-s2s archive as read by read_yaml() # data: list containing the hcst, obs and (optional) fcst s2dv_cube objects # calibrated_data: list containing the calibrated hcst and (optional) fcst # s2dv_cube objects # skill_metrics: list of arrays containing the computed skill metrics # significance: Bool. Whether to include significance dots where applicable - outdir <- paste0(get_dir(recipe), "/plots/") - dir.create(outdir, showWarnings = FALSE, recursive = TRUE) + # Try to set default configuration if not specified by user + if (is.null(output_conf) && !is.null(recipe$Analysis$Region$name)) { + output_conf <- read_yaml("modules/Visualization/output_size.yml", + eval.exp = TRUE)$region + if (recipe$Analysis$Region$name %in% names(output_conf)) { + output_conf <- output_conf[[recipe$Analysis$Region$name]] + } else { + warn(recipe$Run$logger, + paste0("The region name is not found in the 'output_conf' file. ", + "The default plot settings will be used.")) + output_conf <- NULL + } + # If the user chooses to specify the configuration, warn them. + } else { + if (is.list(output_conf)) { + warning(paste0("Parameter 'output_conf' should be a list matching the ", + "parameters of the requested plotting function, i.e. ", + "PlotEquiMap, PlotRobinson or PlotLayout. There could be ", + "plotting erros if the list is incomplete.")) + } else if (!is.null(output_conf)) { + warning(paste("Parameter 'output_conf' should be a list.", + "Using default configuration.")) + output_conf <- NULL + } + } + + # Get plot types and create output directories + plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, ", | |,")[[1]] + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/plots/") + outdir <- get_dir(recipe = recipe, + variable = data$hcst$attrs$Variable$varName) + for (directory in outdir) { + dir.create(directory, showWarnings = FALSE, recursive = TRUE) + } if ((is.null(skill_metrics)) && (is.null(data$fcst))) { error(recipe$Run$logger, "The Visualization module has been called, - but there is no fcst in 'data', and 'skill_metrics' is NULL - so there is no data that can be plotted.") + but there is no fcst in 'data', and 'skill_metrics' is NULL + so there is no data that can be plotted.") stop() } - - if (is.null(archive)) { - if (tolower(recipe$Analysis$Horizon) == "seasonal") { - archive <- read_yaml(paste0(recipe$Run$code_dir, - "conf/archive.yml"))$archive - } else if (tolower(recipe$Analysis$Horizon) == "decadal") { - archive <- read_yaml(paste0(recipe$Run$code_dir, - "conf/archive_decadal.yml"))$archive - } + # Set default single-panel plots if not specified + if (is.null(recipe$Analysis$Workflow$Visualization$multi_panel)) { + recipe$Analysis$Workflow$Visualization$multi_panel <- FALSE } - # Plot skill metrics - if (!is.null(skill_metrics)) { - plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, - significance) + if ("skill_metrics" %in% plots) { + if (!is.null(skill_metrics)) { + plot_skill_metrics(recipe, data$hcst, skill_metrics, outdir, + significance, output_conf = output_conf) + } else { + error(recipe$Run$logger, + paste0("The skill metric plots have been requested, but the ", + "parameter 'skill_metrics' is NULL")) + } } # Plot forecast ensemble mean - if (!is.null(data$fcst)) { - plot_ensemble_mean(recipe, archive, data$fcst, outdir) - } - - # Plot Most Likely Terciles - if ((!is.null(probabilities)) && (!is.null(data$fcst))) { - plot_most_likely_terciles(recipe, archive, data$fcst, - probabilities, outdir) - } -} - -plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, - outdir, significance = F) { - # recipe: Auto-S2S recipe - # archive: Auto-S2S archive - # data_cube: s2dv_cube object with the corresponding hindcast data - # skill_metrics: list of named skill metrics arrays - # outdir: output directory - # significance: T/F, whether to display the significance dots in the plots - - ## TODO: OPTION for CERISE: Using PuOr - # Abort if frequency is daily - if (recipe$Analysis$Variables$freq == "daily_mean") { - error(recipe$Run$logger, "Visualization functions not yet implemented - for daily data.") - stop() - } - # Abort if skill_metrics is not list - if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { - stop("The element 'skill_metrics' must be a list of named arrays.") - } - - latitude <- data_cube$lat - longitude <- data_cube$lon - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", - recipe$Analysis$Time$hcst_end) - init_month <- lubridate::month(as.numeric(substr(recipe$Analysis$Time$sdate, - start = 1, stop = 2)), - label = T, abb = T) - # Define color palette and number of breaks according to output format - ## TODO: Make separate function - if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { - diverging_palette <- "purpleorange" - sequential_palette <- "Oranges" - } else { - diverging_palette <- "bluered" - sequential_palette <- "Reds" - } - - # Group different metrics by type - skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", - "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", - "enscorr_specs", "rmsss") - scores <- c("rps", "frps", "crps", "frps_specs") - # Assign colorbar to each metric type - ## TODO: Triangle ends - for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { - if (name %in% names(skill_metrics)) { - # Define plot characteristics and metric name to display in plot - if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90_specs", "bss10_specs", - "rmsss")) { - display_name <- toupper(strsplit(name, "_")[[1]][1]) - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL - } else if (name == "mean_bias_ss") { - display_name <- "Mean Bias Skill Score" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL - } else if (name %in% c("enscorr", "enscorr_specs")) { - display_name <- "Ensemble Mean Correlation" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - cols <- clim.colors(length(brks) - 1, diverging_palette) - col_inf <- NULL - col_sup <- NULL - } else if (name %in% scores) { - skill <- skill_metrics[[name]] - display_name <- toupper(strsplit(name, "_")[[1]][1]) - brks <- seq(0, 1, by = 0.1) - colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) - cols <- colorbar[1:(length(colorbar) - 1)] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] - } else if (name == "enssprerr") { - skill <- skill_metrics[[name]] - display_name <- "Spread-to-Error Ratio" - brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) - colorbar <- clim.colors(length(brks), diverging_palette) - cols <- colorbar[1:length(colorbar) - 1] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] - } else if (name == "mean_bias") { - skill <- skill_metrics[[name]] - display_name <- "Mean Bias" - max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), - abs(quantile(skill, 0.98, na.rm = T))) - brks <- max_value * seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- colorbar[length(colorbar)] + if ("forecast_ensemble_mean" %in% plots) { + if (!is.null(data$fcst)) { + if (is.null(recipe$Analysis$Workflow$Visualization$mask_ens)) { + recipe$Analysis$Workflow$Visualization$mask_ens <- FALSE } - options(bitmapType = "cairo") - # Reorder dimensions - skill <- Reorder(skill, c("time", "longitude", "latitude")) - # If the significance has been requested and the variable has it, - # retrieve it and reorder its dimensions. - significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(skill_metrics))) { - significance_name <- paste0(name, "_significance") - skill_significance <- skill_metrics[[significance_name]] - skill_significance <- Reorder(skill_significance, c("time", - "longitude", - "latitude")) - # Split skill significance into list of lists, along the time dimension - # to avoid overlapping of significance dots. - skill_significance <- ClimProjDiags::ArrayToList(skill_significance, - dim = 'time', - level = "sublist", - names = "dots") - } else { - skill_significance <- NULL + if (is.null(recipe$Analysis$Workflow$Visualization$dots)) { + recipe$Analysis$Workflow$Visualization$dots <- FALSE + } + # Plot without mask or dots + if ((recipe$Analysis$Workflow$Visualization$mask_ens + %in% c('both', FALSE)) || + (recipe$Analysis$Workflow$Visualization$dots + %in% c('both', FALSE))) { + plot_ensemble_mean(recipe, data$fcst, outdir, + mask = NULL, dots = NULL, + output_conf = output_conf) + } + # Plots with masked + if (recipe$Analysis$Workflow$Visualization$mask_ens %in% + c('both', TRUE)) { + if (is.null(skill_metrics)) { + error(recipe$Run$logger, + paste0("For the forecast ensemble mean plot, skill_metrics ", + "need to be provided to be masked.")) + } else if (!('enscorr' %in% names(skill_metrics))) { + error(recipe$Run$logger, + paste0("For the forecast ensemble mean plot, enscor metric ", + "need to be provided to be masked")) + } else { + plot_ensemble_mean(recipe, data$fcst, + mask = skill_metrics$enscorr, + dots = NULL, + outdir, output_conf = output_conf) + } + } + # Plots with dotted negative correlated in ens-mean-fcst + if (recipe$Analysis$Workflow$Visualization$dots %in% c('both', TRUE)) { + if (is.null(skill_metrics)) { + error(recipe$Run$logger, + paste0("For the forecast ensemble mean plot, skill_metrics ", + "need to be provided for the dots")) + } else if (!('enscorr' %in% names(skill_metrics))) { + error(recipe$Run$logger, + paste0("For the forecast ensemble mean plot, enscor metric ", + "needs to be provided for the dots")) + } else { + plot_ensemble_mean(recipe, data$fcst, + mask = NULL, + dots = skill_metrics$enscorr, + outdir, output_conf = output_conf) + } } - # Define output file name and titles - outfile <- paste0(outdir, name, ".png") - toptitle <- paste(display_name, "-", data_cube$Variable$varName, - "-", system_name, "-", init_month, hcst_period) - months <- unique(lubridate::month(data_cube$Dates$start, - label = T, abb = F)) - titles <- as.vector(months) - # Plot - suppressWarnings( - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - asplit(skill, MARGIN=1), # Splitting array into a list - longitude, latitude, - special_args = skill_significance, - dot_symbol = 20, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - filled.continents=F, - brks = brks, - cols = cols, - col_inf = col_inf, - col_sup = col_sup, - fileout = outfile, - bar_label_digits = 3, - bar_extra_margin = rep(0.9, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) - ) - } - } - info(recipe$Run$logger, - "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") -} - -plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { - - ## TODO: Add 'anomaly' to plot title - # Abort if frequency is daily - if (recipe$Analysis$Variables$freq == "daily_mean") { - stop("Visualization functions not yet implemented for daily data.") - } - - latitude <- fcst$lat - longitude <- fcst$lon - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - variable <- recipe$Analysis$Variables$name - units <- attr(fcst$Variable, "variable")$units - start_date <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - # Compute ensemble mean - ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') - # Drop extra dims, add time dim if missing: - ensemble_mean <- drop(ensemble_mean) - - if (!("time" %in% names(dim(ensemble_mean)))) { - dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) - } - if (!'syear' %in% names(dim(ensemble_mean))) { - ensemble_mean <- Reorder(ensemble_mean, c("time", - "longitude", - "latitude")) - } else { - ensemble_mean <- Reorder(ensemble_mean, c("syear", - "time", - "longitude", - "latitude")) - } - ## TODO: Redefine column colors, possibly depending on variable - if (variable == 'prlr') { - palette = "BrBG" - rev = F - } else { - palette = "RdBu" - rev = T - } - # Define brks, centered on in the case of anomalies - ## - if (grepl("anomaly", - attr(fcst$Variable, "variable")$long_name)) { - variable <- paste(variable, "anomaly") - max_value <- max(abs(ensemble_mean)) - ugly_intervals <- seq(-max_value, max_value, max_value/20) - brks <- pretty(ugly_intervals, n = 12, min.n = 8) - } else { - brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) - } - cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) - options(bitmapType = "cairo") - for (i_syear in start_date) { - # Define name of output file and titles - if (length(start_date) == 1) { - i_ensemble_mean <- ensemble_mean - outfile <- paste0(outdir, "forecast_ensemble_mean.png") } else { - i_ensemble_mean <- ensemble_mean[which(start_date == i_syear), , , ] - outfile <- paste0(outdir, "forecast_ensemble_mean_", i_syear, ".png") + error(recipe$Run$logger, + paste0("The forecast ensemble mean plot has been requested, but ", + "there is no fcst element in 'data'")) } - toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, - "- Initialization:", i_syear) - months <- lubridate::month(fcst$Dates$start[1, 1, which(start_date == i_syear), ], - label = T, abb = F) - titles <- as.vector(months) - # Plots - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - i_ensemble_mean, longitude, latitude, - filled.continents = F, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - units = units, - cols = cols, - brks = brks, - fileout = outfile, - bar_label_digits = 4, - bar_extra_margin = rep(0.7, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) - } - info(recipe$Run$logger, - "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") -} - -plot_most_likely_terciles <- function(recipe, archive, - fcst, - probabilities, - outdir) { - - ## TODO: Add 'anomaly' to plot title - # Abort if frequency is daily - if (recipe$Analysis$Variables$freq == "daily_mean") { - stop("Visualization functions not yet implemented for daily data.") - } - - latitude <- fcst$lat - longitude <- fcst$lon - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - variable <- recipe$Analysis$Variables$name - start_date <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - - # Retrieve and rearrange probability bins for the forecast - if (is.null(probabilities$probs_fcst$prob_b33) || - is.null(probabilities$probs_fcst$prob_33_to_66) || - is.null(probabilities$probs_fcst$prob_a66)) { - stop("The forecast tercile probability bins are not present inside ", - "'probabilities', the most likely tercile map cannot be plotted.") - } - - probs_fcst <- abind(probabilities$probs_fcst$prob_b33, - probabilities$probs_fcst$prob_33_to_66, - probabilities$probs_fcst$prob_a66, - along = 0) - names(dim(probs_fcst)) <- c("bin", - names(dim(probabilities$probs_fcst$prob_b33))) - - ## TODO: Improve this section - # Drop extra dims, add time dim if missing: - probs_fcst <- drop(probs_fcst) - if (!("time" %in% names(dim(probs_fcst)))) { - dim(probs_fcst) <- c("time" = 1, dim(probs_fcst)) - } - if (!'syear' %in% names(dim(probs_fcst))) { - probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) - } else { - probs_fcst <- Reorder(probs_fcst, - c("syear", "time", "bin", "longitude", "latitude")) } - for (i_syear in start_date) { - # Define name of output file and titles - if (length(start_date) == 1) { - i_probs_fcst <- probs_fcst - outfile <- paste0(outdir, "forecast_most_likely_tercile.png") + # Plot Most Likely Terciles + if ("most_likely_terciles" %in% plots) { + if ((!is.null(probabilities)) && (!is.null(data$fcst))) { + # Assign default parameters + if (is.null(recipe$Analysis$Workflow$Visualization$mask_terciles)) { + recipe$Analysis$Workflow$Visualization$mask_terciles <- FALSE + } + if (is.null(recipe$Analysis$Workflow$Visualization$dots)) { + recipe$Analysis$Workflow$Visualization$dots <- FALSE + } + # Plots without masked terciles/dots + if ((recipe$Analysis$Workflow$Visualization$mask_terciles + %in% c('both', FALSE)) || + (recipe$Analysis$Workflow$Visualization$dots + %in% c('both', FALSE))) { + plot_most_likely_terciles(recipe, data$fcst, + probabilities, + mask = NULL, + dots = NULL, + outdir, output_conf = output_conf) + } + # Plots with masked terciles + if (recipe$Analysis$Workflow$Visualization$mask_terciles %in% + c('both', TRUE)) { + if (is.null(skill_metrics)) { + error(recipe$Run$logger, + paste0("For the most likely terciles plot, skill_metrics ", + "need to be provided to be masked.")) + } else if (!('rpss' %in% names(skill_metrics))) { + error(recipe$Run$logger, + paste0("For the most likely terciles plot, rpss metric ", + "need to be provided to be masked")) + } else { + plot_most_likely_terciles(recipe, data$fcst, + probabilities, + mask = skill_metrics$rpss, + dots = NULL, + outdir, output_conf = output_conf) + } + } + # Plots with dotted terciles + if (recipe$Analysis$Workflow$Visualization$dots %in% c('both', TRUE)) { + if (is.null(skill_metrics)) { + error(recipe$Run$logger, + paste0("For the most likely terciles plot, skill_metrics ", + "need to be provided for the dots")) + } else if (!('rpss' %in% names(skill_metrics))) { + error(recipe$Run$logger, + paste0("For the most likely terciles plot, rpss metric ", + "needs to be provided for the dots")) + } else { + plot_most_likely_terciles(recipe, data$fcst, + probabilities, + mask = NULL, + dots = skill_metrics$rpss, + outdir, output_conf = output_conf) + } + } } else { - i_probs_fcst <- probs_fcst[which(start_date == i_syear), , , , ] - outfile <- paste0(outdir, "forecast_most_likely_tercile_", i_syear, ".png") + error(recipe$Run$logger, + paste0("For the most likely terciles plot, both the fcst and the ", + "probabilities must be provided.")) } - toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", - "Initialization:", i_syear) - months <- lubridate::month(fcst$Dates$start[1, 1, which(start_date == i_syear), ], - label = T, abb = F) - ## TODO: Ensure this works for daily and sub-daily cases - titles <- as.vector(months) - - # Plots - ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked - ## on. - suppressWarnings( - PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), - cat_dim = 'bin', - i_probs_fcst, longitude, latitude, - coast_width = 1.5, - title_scale = 0.6, - legend_scale = 0.8, #cex_bar_titles = 0.6, - toptitle = toptitle, - titles = titles, - fileout = outfile, - bar_label_digits = 2, - bar_scale = rep(0.7, 4), - bar_label_scale = 1.2, - axes_label_scale = 1.3, - triangle_ends = c(F, F), width = 11, height = 8) - ) } - - info(recipe$Run$logger, - "##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") } + diff --git a/modules/Visualization/output_size.yml b/modules/Visualization/output_size.yml new file mode 100644 index 0000000000000000000000000000000000000000..0cd945be4b2d9cf578df057e2f2dc63c8ff241a2 --- /dev/null +++ b/modules/Visualization/output_size.yml @@ -0,0 +1,33 @@ +region: #units inches + EU: #latmin: 20, latmax: 80, lonmin: -20, lonmax: 40 + PlotEquiMap: + skill_metrics: + width: 8.5 + height: 8.5 + axes_label_scale: 0.8 + bar_label_scale: 1.2 + bar_extra_margin: !expr c(2,1,0.5,1) + dot_size: 1.7 + dot_symbol: 4 + font.main: 1 + forecast_ensemble_mean: + width: 8.5 + height: 8.5 + axes_label_scale: 0.8 + bar_label_scale: 1.2 + bar_extra_margin: !expr c(2,1,0.5,1) + dot_symbol: 4 + dot_size: 1.7 + font.main: 1 + most_likely_terciles: + width: 8.5 + height: 8.5 + dot_size: 2 + plot_margin: !expr c(0, 4.1, 4.1, 2.1) + Multipanel: + Robinson: + skill_metrics: {width: 8, height: 5} + NA-EU: #Norht Atlantic European region + Mediterranean: + Global: +# Add other regions diff --git a/modules/Visualization/plot_data.R b/modules/Visualization/plot_data.R new file mode 100644 index 0000000000000000000000000000000000000000..910b40fa16c6c912649101c9358fbe1a1973a932 --- /dev/null +++ b/modules/Visualization/plot_data.R @@ -0,0 +1,11 @@ +plot_data <- function(recipe, + data, + skill_metrics = NULL, + probabilities = NULL, + significance = F) { + warning(paste0("The function plot_data() has been renamed to: ", + "'Visualization()'. The name 'plot_data()' will be ", + "deprecated in the next release. Please change your scripts ", + "accordingly.")) + return(Visualization(recipe, data, skill_metrics, probabilities, significance)) +} diff --git a/modules/Visualization/tmp/PlotCombinedMap.R b/modules/Visualization/tmp/PlotCombinedMap.R deleted file mode 100644 index a7b5fc9701765a9969a29ff27373405a9a89198e..0000000000000000000000000000000000000000 --- a/modules/Visualization/tmp/PlotCombinedMap.R +++ /dev/null @@ -1,608 +0,0 @@ -#'Plot Multiple Lon-Lat Variables In a Single Map According to a Decision Function -#'@description Plot a number a two dimensional matrices with (longitude, latitude) dimensions on a single map with the cylindrical equidistant latitude and longitude projection. -#'@author Nicolau Manubens, \email{nicolau.manubens@bsc.es} -#'@author Veronica Torralba, \email{veronica.torralba@bsc.es} -#' -#'@param maps List of matrices to plot, each with (longitude, latitude) dimensions, or 3-dimensional array with the dimensions (longitude, latitude, map). Dimension names are required. -#'@param lon Vector of longitudes. Must match the length of the corresponding dimension in 'maps'. -#'@param lat Vector of latitudes. Must match the length of the corresponding dimension in 'maps'. -#'@param map_select_fun Function that selects, for each grid point, which value to take among all the provided maps. This function receives as input a vector of values for a same grid point for all the provided maps, and must return a single selected value (not its index!) or NA. For example, the \code{min} and \code{max} functions are accepted. -#'@param display_range Range of values to be displayed for all the maps. This must be a numeric vector c(range min, range max). The values in the parameter 'maps' can go beyond the limits specified in this range. If the selected value for a given grid point (according to 'map_select_fun') falls outside the range, it will be coloured with 'col_unknown_map'. -#'@param map_dim Optional name for the dimension of 'maps' along which the multiple maps are arranged. Only applies when 'maps' is provided as a 3-dimensional array. Takes the value 'map' by default. -#'@param brks Colour levels to be sent to PlotEquiMap. This parameter is optional and adjusted automatically by the function. -#'@param cols List of vectors of colours to be sent to PlotEquiMap for the colour bar of each map. This parameter is optional and adjusted automatically by the function (up to 5 maps). The colours provided for each colour bar will be automatically interpolated to match the number of breaks. Each item in this list can be named, and the name will be used as title for the corresponding colour bar (equivalent to the parameter 'bar_titles'). -#'@param col_unknown_map Colour to use to paint the grid cells for which a map is not possible to be chosen according to 'map_select_fun' or for those values that go beyond 'display_range'. Takes the value 'white' by default. -#'@param mask Optional numeric array with dimensions (latitude, longitude), with values in the range [0, 1], indicating the opacity of the mask over each grid point. Cells with a 0 will result in no mask, whereas cells with a 1 will result in a totally opaque superimposed pixel coloured in 'col_mask'. -#'@param col_mask Colour to be used for the superimposed mask (if specified in 'mask'). Takes the value 'grey' by default. -#'@param dots Array of same dimensions as 'var' or with dimensions -#' c(n, dim(var)), where n is the number of dot/symbol layers to add to the -#' plot. A value of TRUE at a grid cell will draw a dot/symbol on the -#' corresponding square of the plot. By default all layers provided in 'dots' -#' are plotted with dots, but a symbol can be specified for each of the -#' layers via the parameter 'dot_symbol'. -#'@param bar_titles Optional vector of character strings providing the titles to be shown on top of each of the colour bars. -#'@param legend_scale Scale factor for the size of the colour bar labels. Takes 1 by default. -#'@param cex_bar_titles Scale factor for the sizes of the bar titles. Takes 1.5 by default. -#'@param fileout File where to save the plot. If not specified (default) a graphics device will pop up. Extensions allowed: eps/ps, jpeg, png, pdf, bmp and tiff -#'@param width File width, in the units specified in the parameter size_units (inches by default). Takes 8 by default. -#'@param height File height, in the units specified in the parameter size_units (inches by default). Takes 5 by default. -#'@param size_units Units of the size of the device (file or window) to plot in. Inches ('in') by default. See ?Devices and the creator function of the corresponding device. -#'@param res Resolution of the device (file or window) to plot in. See ?Devices and the creator function of the corresponding device. -#'@param drawleg Where to draw the common colour bar. Can take values TRUE, -#' FALSE or:\cr -#' 'up', 'u', 'U', 'top', 't', 'T', 'north', 'n', 'N'\cr -#' 'down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S' (default)\cr -#' 'right', 'r', 'R', 'east', 'e', 'E'\cr -#' 'left', 'l', 'L', 'west', 'w', 'W' -#'@param ... Additional parameters to be passed on to \code{PlotEquiMap}. - -#'@seealso \code{PlotCombinedMap} and \code{PlotEquiMap} -#' -#'@importFrom s2dv PlotEquiMap ColorBar -#'@importFrom maps map -#'@importFrom graphics box image layout mtext par plot.new -#'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off hcl jpeg pdf png postscript svg tiff -#'@examples -#'# Simple example -#'x <- array(1:(20 * 10), dim = c(lat = 10, lon = 20)) / 200 -#'a <- x * 0.6 -#'b <- (1 - x) * 0.6 -#'c <- 1 - (a + b) -#'lons <- seq(0, 359.5, length = 20) -#'lats <- seq(-89.5, 89.5, length = 10) -#'PlotCombinedMap(list(a, b, c), lons, lats, -#' toptitle = 'Maximum map', -#' map_select_fun = max, -#' display_range = c(0, 1), -#' bar_titles = paste('% of belonging to', c('a', 'b', 'c')), -#' brks = 20, width = 10, height = 8) -#' -#'Lon <- c(0:40, 350:359) -#'Lat <- 51:26 -#'data <- rnorm(51 * 26 * 3) -#'dim(data) <- c(map = 3, lon = 51, lat = 26) -#'mask <- sample(c(0,1), replace = TRUE, size = 51 * 26) -#'dim(mask) <- c(lat = 26, lon = 51) -#'PlotCombinedMap(data, lon = Lon, lat = Lat, map_select_fun = max, -#' display_range = range(data), mask = mask, -#' width = 12, height = 8) -#' -#'@export -PlotCombinedMap <- function(maps, lon, lat, - map_select_fun, display_range, - map_dim = 'map', - brks = NULL, cols = NULL, - col_unknown_map = 'white', - mask = NULL, col_mask = 'grey', - dots = NULL, - bar_titles = NULL, legend_scale = 1, - cex_bar_titles = 1.5, - plot_margin = NULL, bar_margin = rep(0, 4), - fileout = NULL, width = 8, height = 5, - size_units = 'in', res = 100, drawleg = T, - ...) { - args <- list(...) - - # If there is any filenames to store the graphics, process them - # to select the right device - if (!is.null(fileout)) { - deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, - units = size_units, res = res) - saveToFile <- deviceInfo$fun - fileout <- deviceInfo$files - } - - # Check probs - error <- FALSE - if (is.list(maps)) { - if (length(maps) < 1) { - stop("Parameter 'maps' must be of length >= 1 if provided as a list.") - } - check_fun <- function(x) { - is.numeric(x) && (length(dim(x)) == 2) - } - if (!all(sapply(maps, check_fun))) { - error <- TRUE - } - ref_dims <- dim(maps[[1]]) - equal_dims <- all(sapply(maps, function(x) identical(dim(x), ref_dims))) - if (!equal_dims) { - stop("All arrays in parameter 'maps' must have the same dimension ", - "sizes and names when 'maps' is provided as a list of arrays.") - } - num_maps <- length(maps) - maps <- unlist(maps) - dim(maps) <- c(ref_dims, map = num_maps) - map_dim <- 'map' - } - if (!is.numeric(maps)) { - error <- TRUE - } - if (is.null(dim(maps))) { - error <- TRUE - } - if (length(dim(maps)) != 3) { - error <- TRUE - } - if (error) { - stop("Parameter 'maps' must be either a numeric array with 3 dimensions ", - " or a list of numeric arrays of the same size with the 'lon' and ", - "'lat' dimensions.") - } - dimnames <- names(dim(maps)) - - # Check map_dim - if (is.character(map_dim)) { - if (is.null(dimnames)) { - stop("Specified a dimension name in 'map_dim' but no dimension names provided ", - "in 'maps'.") - } - map_dim <- which(dimnames == map_dim) - if (length(map_dim) < 1) { - stop("Dimension 'map_dim' not found in 'maps'.") - } else { - map_dim <- map_dim[1] - } - } else if (!is.numeric(map_dim)) { - stop("Parameter 'map_dim' must be either a numeric value or a ", - "dimension name.") - } - if (length(map_dim) != 1) { - stop("Parameter 'map_dim' must be of length 1.") - } - map_dim <- round(map_dim) - - # Work out lon_dim and lat_dim - lon_dim <- NULL - if (!is.null(dimnames)) { - lon_dim <- which(dimnames %in% c('lon', 'longitude'))[1] - } - if (length(lon_dim) < 1) { - lon_dim <- (1:3)[-map_dim][1] - } - lon_dim <- round(lon_dim) - - lat_dim <- NULL - if (!is.null(dimnames)) { - lat_dim <- which(dimnames %in% c('lat', 'latitude'))[1] - } - if (length(lat_dim) < 1) { - lat_dim <- (1:3)[-map_dim][2] - } - lat_dim <- round(lat_dim) - - # Check lon - if (!is.numeric(lon)) { - stop("Parameter 'lon' must be a numeric vector.") - } - if (length(lon) != dim(maps)[lon_dim]) { - stop("Parameter 'lon' does not match the longitude dimension in 'maps'.") - } - - # Check lat - if (!is.numeric(lat)) { - stop("Parameter 'lat' must be a numeric vector.") - } - if (length(lat) != dim(maps)[lat_dim]) { - stop("Parameter 'lat' does not match the longitude dimension in 'maps'.") - } - - # Check map_select_fun - if (is.numeric(map_select_fun)) { - if (length(dim(map_select_fun)) != 2) { - stop("Parameter 'map_select_fun' must be an array with dimensions ", - "'lon' and 'lat' if provided as an array.") - } - if (!identical(dim(map_select_fun), dim(maps)[-map_dim])) { - stop("The dimensions 'lon' and 'lat' in the 'map_select_fun' array must ", - "have the same size, name and order as in the 'maps' parameter.") - } - } - if (!is.function(map_select_fun)) { - stop("The parameter 'map_select_fun' must be a function or a numeric array.") - } - - # Check display_range - if (!is.numeric(display_range) || length(display_range) != 2) { - stop("Parameter 'display_range' must be a numeric vector of length 2.") - } - - # Check brks - if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { - num_brks <- 5 - if (is.numeric(brks)) { - num_brks <- brks - } - brks <- seq(from = display_range[1], to = display_range[2], length.out = num_brks) - } - if (!is.numeric(brks)) { - stop("Parameter 'brks' must be a numeric vector.") - } - - # Check cols - col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), - c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), - c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), - c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), - c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) - if (is.null(cols)) { - if (length(col_sets) >= dim(maps)[map_dim]) { - chosen_sets <- 1:(dim(maps)[map_dim]) - chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) - } else { - chosen_sets <- array(1:length(col_sets), dim(maps)[map_dim]) - } - cols <- col_sets[chosen_sets] - } else { - if (!is.list(cols)) { - stop("Parameter 'cols' must be a list of character vectors.") - } - if (!all(sapply(cols, is.character))) { - stop("Parameter 'cols' must be a list of character vectors.") - } - if (length(cols) != dim(maps)[map_dim]) { - stop("Parameter 'cols' must be a list of the same length as the number of ", - "maps in 'maps'.") - } - } - for (i in 1:length(cols)) { - if (length(cols[[i]]) != (length(brks) - 1)) { - cols[[i]] <- colorRampPalette(cols[[i]])(length(brks) - 1) - } - } - - # Check bar_titles - if (is.null(bar_titles)) { - if (!is.null(names(cols))) { - bar_titles <- names(cols) - } else { - bar_titles <- paste0("Map ", 1:length(cols)) - } - } else { - if (!is.character(bar_titles)) { - stop("Parameter 'bar_titles' must be a character vector.") - } - if (length(bar_titles) != length(cols)) { - stop("Parameter 'bar_titles' must be of the same length as the number of ", - "maps in 'maps'.") - } - } - - # Check legend_scale - if (!is.numeric(legend_scale)) { - stop("Parameter 'legend_scale' must be numeric.") - } - - # Check col_unknown_map - if (!is.character(col_unknown_map)) { - stop("Parameter 'col_unknown_map' must be a character string.") - } - - # Check col_mask - if (!is.character(col_mask)) { - stop("Parameter 'col_mask' must be a character string.") - } - - # Check mask - if (!is.null(mask)) { - if (!is.numeric(mask)) { - stop("Parameter 'mask' must be numeric.") - } - if (length(dim(mask)) != 2) { - stop("Parameter 'mask' must have two dimensions.") - } - if ((dim(mask)[1] != dim(maps)[lat_dim]) || - (dim(mask)[2] != dim(maps)[lon_dim])) { - stop("Parameter 'mask' must have dimensions c(lat, lon).") - } - } - # Check dots - if (!is.null(dots)) { - if (length(dim(dots)) != 2) { - stop("Parameter 'mask' must have two dimensions.") - } - if ((dim(dots)[1] != dim(maps)[lat_dim]) || - (dim(dots)[2] != dim(maps)[lon_dim])) { - stop("Parameter 'mask' must have dimensions c(lat, lon).") - } - } - - #---------------------- - # Identify the most likely map - #---------------------- - brks_norm <- seq(0, 1, length.out = length(brks)) - if (is.function(map_select_fun)) { - range_width <- display_range[2] - display_range[1] - ml_map <- apply(maps, c(lat_dim, lon_dim), function(x) { - if (any(is.na(x))) { - res <- NA - } else { - res <- which(x == map_select_fun(x)) - if (length(res) > 0) { - res <- res[1] - if (map_select_fun(x) < display_range[1] || - map_select_fun(x) > display_range[2]) { - res <- -0.5 - } else { - res <- res + (map_select_fun(x) - display_range[1]) / range_width - if (map_select_fun(x) == display_range[1]) { - res <- res + brks_norm[2] / (num_brks * 2) - } - } - } else { - res <- -0.5 - } - } - res - }) - } else { - stop("Providing 'map_select_fun' as array not implemented yet.") - ml_map <- map_select_fun - } - nmap <- dim(maps)[map_dim] - nlat <- length(lat) - nlon <- length(lon) - - #---------------------- - # Set latitudes from minimum to maximum - #---------------------- - if (lat[1] > lat[nlat]){ - lat <- lat[nlat:1] - indices <- list(nlat:1, TRUE) - ml_map <- do.call("[", c(list(x = ml_map), indices)) - if (!is.null(mask)){ - mask <- mask[nlat:1, ] - } - if (!is.null(dots)){ - dots <- dots[nlat:1,] - } - } - - #---------------------- - # Set layout and parameters - #---------------------- - # Open connection to graphical device - if (!is.null(fileout)) { - saveToFile(fileout) - } else if (names(dev.cur()) == 'null device') { - dev.new(units = size_units, res = res, width = width, height = height) - } - #NOTE: I think plot.new() is not necessary in any case. -# plot.new() - par(font.main = 1) - # If colorbars need to be plotted, re-define layout. - if (drawleg) { - layout(matrix(c(rep(1, nmap),2:(nmap + 1)), 2, nmap, byrow = TRUE), heights = c(6, 1.5)) - } - - #---------------------- - # Set colors and breaks and then PlotEquiMap - #---------------------- - tcols <- c(col_unknown_map, cols[[1]]) - for (k in 2:nmap) { - tcols <- append(tcols, c(col_unknown_map, cols[[k]])) - } - - tbrks <- c(-1, brks_norm + rep(1:nmap, each = length(brks))) - - if (is.null(plot_margin)) { - plot_margin <- c(5, 4, 4, 2) + 0.1 # default of par()$mar - } - - PlotEquiMap(var = ml_map, lon = lon, lat = lat, - brks = tbrks, cols = tcols, drawleg = FALSE, - filled.continents = FALSE, dots = dots, mar = plot_margin, ...) - - #---------------------- - # Add overplot on top - #---------------------- - if (!is.null(mask)) { - dims_mask <- dim(mask) - latb <- sort(lat, index.return = TRUE) - dlon <- lon[2:dims_mask[2]] - lon[1:(dims_mask[2] - 1)] - wher <- which(dlon > (mean(dlon) + 1)) - if (length(wher) > 0) { - lon[(wher + 1):dims_mask[2]] <- lon[(wher + 1):dims_mask[2]] - 360 - } - lonb <- sort(lon, index.return = TRUE) - - cols_mask <- sapply(seq(from = 0, to = 1, length.out = 10), - function(x) adjustcolor(col_mask, alpha.f = x)) - image(lonb$x, latb$x, t(mask)[lonb$ix, latb$ix], - axes = FALSE, col = cols_mask, - breaks = seq(from = 0, to = 1, by = 0.1), - xlab='', ylab='', add = TRUE, xpd = TRUE) - if (!exists('coast_color')) { - coast_color <- 'black' - } - if (min(lon) < 0) { - map('world', interior = FALSE, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon -180 to 180). - } else { - map('world2', interior = FALSE, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon 0 to 360). - } - box() - } - - #---------------------- - # Add colorbars - #---------------------- - if ('toptitle' %in% names(args)) { - size_title <- 1 - if ('title_scale' %in% names(args)) { - size_title <- args[['title_scale']] - } - old_mar <- par('mar') - old_mar[3] <- old_mar[3] - (2 * size_title + 1) - par(mar = old_mar) - } - - if (drawleg) { - for (k in 1:nmap) { - ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, - draw_separators = TRUE, extra_margin = c(2, 0, 2, 0), - label_scale = legend_scale * 1.5) - if (!is.null(bar_titles)) { - mtext(bar_titles[[k]], 3, line = -3, cex = cex_bar_titles) - } - #TODO: Change to below code. Plot title together. extra_margin needs to be adjusted. -# ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, -# draw_separators = TRUE, extra_margin = c(1, 0, 1, 0), -# label_scale = legend_scale * 1.5, title = bar_titles[[k]], title_scale = cex_bar_titles) - } - } - - # If the graphic was saved to file, close the connection with the device - if (!is.null(fileout)) dev.off() -} - -# Color bar for PlotMostLikelyQuantileMap -multi_ColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, - bar_limits = NULL, var_limits = NULL, - triangle_ends = NULL, plot = TRUE, - draw_separators = FALSE, - bar_titles = NULL, title_scale = 1, label_scale = 1, extra_margin = rep(0, 4), - ...) { - - minimum_value <- ceiling(1 / nmap * 10 * 1.1) * 10 - display_range = c(minimum_value, 100) - - # Check brks - if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { - num_brks <- 5 - if (is.numeric(brks)) { - num_brks <- brks - } - brks <- seq(from = display_range[1], to = display_range[2], length.out = num_brks) - } - if (!is.numeric(brks)) { - stop("Parameter 'brks' must be a numeric vector.") - } - # Check cols - col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), - c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), - c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), - c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), - c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) - if (is.null(cols)) { - if (length(col_sets) >= nmap) { - chosen_sets <- 1:nmap - chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) - } else { - chosen_sets <- array(1:length(col_sets), nmap) - } - cols <- col_sets[chosen_sets] - } else { - if (!is.list(cols)) { - stop("Parameter 'cols' must be a list of character vectors.") - } - if (!all(sapply(cols, is.character))) { - stop("Parameter 'cols' must be a list of character vectors.") - } - if (length(cols) != dim(maps)[map_dim]) { - stop("Parameter 'cols' must be a list of the same length as the number of ", - "maps in 'maps'.") - } - } - for (i in 1:length(cols)) { - if (length(cols[[i]]) != (length(brks) - 1)) { - cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) - } - } - - # Check bar_titles - if (is.null(bar_titles)) { - if (nmap == 3) { - bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") - } else if (nmap == 5) { - bar_titles <- c("Low (%)", "Below normal (%)", - "Normal (%)", "Above normal (%)", "High (%)") - } else { - bar_titles <- paste0("Cat. ", 1:nmap, " (%)") - } - } - - if (plot) { - for (k in 1:nmap) { - s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, - bar_limits = bar_limits, var_limits = var_limits, - triangle_ends = triangle_ends, plot = TRUE, - draw_separators = draw_separators, - title = bar_titles[[k]], title_scale = title_scale, - label_scale = label_scale, extra_margin = extra_margin) - } - } else { - #TODO: col_inf and col_sup - return(list(brks = brks, cols = cols)) - } - -} - -#TODO: use s2dv:::.SelectDevice and remove this function here? -.SelectDevice <- function(fileout, width, height, units, res) { - # This function is used in the plot functions to check the extension of the - # files where the graphics will be stored and select the right R device to - # save them. - # If the vector of filenames ('fileout') has files with different - # extensions, then it will only accept the first one, changing all the rest - # of the filenames to use that extension. - - # We extract the extension of the filenames: '.png', '.pdf', ... - ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) - - if (length(ext) != 0) { - # If there is an extension specified, select the correct device - ## units of width and height set to accept inches - if (ext[1] == ".png") { - saveToFile <- function(fileout) { - png(filename = fileout, width = width, height = height, res = res, units = units) - } - } else if (ext[1] == ".jpeg") { - saveToFile <- function(fileout) { - jpeg(filename = fileout, width = width, height = height, res = res, units = units) - } - } else if (ext[1] %in% c(".eps", ".ps")) { - saveToFile <- function(fileout) { - postscript(file = fileout, width = width, height = height) - } - } else if (ext[1] == ".pdf") { - saveToFile <- function(fileout) { - pdf(file = fileout, width = width, height = height) - } - } else if (ext[1] == ".svg") { - saveToFile <- function(fileout) { - svg(filename = fileout, width = width, height = height) - } - } else if (ext[1] == ".bmp") { - saveToFile <- function(fileout) { - bmp(filename = fileout, width = width, height = height, res = res, units = units) - } - } else if (ext[1] == ".tiff") { - saveToFile <- function(fileout) { - tiff(filename = fileout, width = width, height = height, res = res, units = units) - } - } else { - warning("file extension not supported, it will be used '.eps' by default.") - ## In case there is only one filename - fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) - ext[1] <- ".eps" - saveToFile <- function(fileout) { - postscript(file = fileout, width = width, height = height) - } - } - # Change filenames when necessary - if (any(ext != ext[1])) { - warning(paste0("some extensions of the filenames provided in 'fileout' are not ", ext[1],". The extensions are being converted to ", ext[1], ".")) - fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) - } - } else { - # Default filenames when there is no specification - warning("there are no extensions specified in the filenames, default to '.eps'") - fileout <- paste0(fileout, ".eps") - saveToFile <- postscript - } - - # return the correct function with the graphical device, and the correct - # filenames - list(fun = saveToFile, files = fileout) -} - diff --git a/modules/test_decadal.R b/modules/test_decadal.R deleted file mode 100644 index 8998cfbe202e2d0a52cb69fe4e0ab59b9730d95f..0000000000000000000000000000000000000000 --- a/modules/test_decadal.R +++ /dev/null @@ -1,30 +0,0 @@ - -source("modules/Loading/Loading_decadal.R") -source("modules/Calibration/Calibration.R") -source("modules/Skill/Skill.R") -source("modules/Saving/Saving.R") -source("modules/Visualization/Visualization.R") - -recipe_file <- "modules/Loading/testing_recipes/recipe_decadal.yml" -recipe <- prepare_outputs(recipe_file) -# archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive - -# Load datasets -data <- load_datasets(recipe) - -# Calibrate datasets -calibrated_data <- calibrate_datasets(recipe, data) - -# Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, calibrated_data) - -# Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, calibrated_data) - -# Export all data to netCDF -save_data(recipe, calibrated_data, skill_metrics, probabilities) - -# Plot data -plot_data(recipe, calibrated_data, skill_metrics, probabilities, - significance = T) - diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R deleted file mode 100644 index b8541488c8540e61c694c42a0f36be60595699a9..0000000000000000000000000000000000000000 --- a/modules/test_seasonal.R +++ /dev/null @@ -1,25 +0,0 @@ -source("modules/Loading/Loading.R") -source("modules/Calibration/Calibration.R") -source("modules/Anomalies/Anomalies.R") -source("modules/Skill/Skill.R") -source("modules/Saving/Saving.R") -source("modules/Visualization/Visualization.R") - -recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml" -recipe <- prepare_outputs(recipe_file) - -# Load datasets -data <- load_datasets(recipe) -# Calibrate datasets -calibrated_data <- calibrate_datasets(recipe, data) -# Compute anomalies -calibrated_data <- compute_anomalies(recipe, calibrated_data) -# Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, calibrated_data) -# Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, calibrated_data) -# Export all data to netCDF -save_data(recipe, calibrated_data, skill_metrics, probabilities) -# Plot data -plot_data(recipe, calibrated_data, skill_metrics, probabilities, - significance = T) diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/recipes/atomic_recipes/recipe_decadal.yml similarity index 78% rename from modules/Loading/testing_recipes/recipe_decadal.yml rename to recipes/atomic_recipes/recipe_decadal.yml index 986578f7cc7a74e44604c83fb6080ada63637406..26312b34d1127af7585fb51c9348763e47643a79 100644 --- a/modules/Loading/testing_recipes/recipe_decadal.yml +++ b/recipes/atomic_recipes/recipe_decadal.yml @@ -8,8 +8,8 @@ Analysis: freq: monthly_mean Datasets: System: - name: EC-Earth3-i4 #CanESM5 - member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' + name: HadGEM3-GC31-MM #EC-Earth3-i4 #CanESM5 + member: r1i1p1f2,r2i1p1f2,r3i1p1f2 #'all' Multimodel: no Reference: name: ERA5 #JRA-55 @@ -19,7 +19,7 @@ Analysis: hcst_end: 1993 # season: 'Annual' ftime_min: 2 - ftime_max: 14 + ftime_max: 24 Region: latmin: 10 #-90 latmax: 20 #90 @@ -32,14 +32,20 @@ Analysis: Anomalies: compute: no cross_validation: + save: Calibration: method: bias + save: 'all' Skill: metric: RPSS Corr + save: 'all' Probabilities: percentiles: [[1/3, 2/3]] + save: 'all' Indicators: index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles ncores: # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E diff --git a/recipes/atomic_recipes/recipe_scorecards_atomic.yml b/recipes/atomic_recipes/recipe_scorecards_atomic.yml new file mode 100644 index 0000000000000000000000000000000000000000..a8adb6c12f6c388c05798734f53c023426995b02 --- /dev/null +++ b/recipes/atomic_recipes/recipe_scorecards_atomic.yml @@ -0,0 +1,70 @@ +Description: + Author: nmilders + Info: scorecards data + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas # Mandatory, str: tas prlr psl sfcWind + freq: monthly_mean # Mandatory, str: either monthly_mean or daily_mean + Datasets: + System: + name: ECMWF-SEAS5 # Mandatory ECMWF-SEAS5, CMCC-SPS3.5, DWD-GCFS2.1 + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0101' ## MMDD + fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -90 # Mandatory, int: minimum latitude + latmax: 90 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 359.9 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # conservative for prlr, bilinear for tas, psl, sfcWind + type: to_system + Workflow: + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + save: 'none' + Anomalies: + compute: yes + cross_validation: yes + save: 'none' + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # str: Skill metric or list of skill metrics. See docu. + cross_validation: yes + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: 'none' + Indicators: + index: no + Scorecards: + execute: yes # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: -90} + start_months: 'all' + metric: mean_bias enscorr rpss crpss enssprerr + metric_aggregation: 'score' + inf_to_na: TRUE + table_label: + fileout_label: + col1_width: + col2_width: + calculate_diff: FALSE + ncores: 7 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: Scorecards #S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nmilders/scorecards_data/new/to_system/cross_validation/all_cross_val/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/ diff --git a/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml b/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml new file mode 100644 index 0000000000000000000000000000000000000000..ae17f9cd1dbbcf2ada5b47a68219272d983aff6d --- /dev/null +++ b/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml @@ -0,0 +1,50 @@ +Description: + Author: nmilders + Info: scorecards data + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: prlr # Mandatory, str: tas prlr psl sfcWind + freq: monthly_mean # Mandatory, str: either monthly_mean or daily_mean + Datasets: + System: + name: ECMWF-SEAS5 # Mandatory, str: system5c3s system21_m1 system35c3s system3_m1-c3s system2_m1 system7c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0101' ## MMDD + fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -90 # Mandatory, int: minimum latitude + latmax: 90 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 359.9 # Mandatory, int: maximum longitude + Regrid: + method: conservative # conservative for prlr, bilinear for tas, psl, sfcWind + type: to_system + Workflow: + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + Anomalies: + compute: yes + cross_validation: no + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # str: Skill metric or list of skill metrics. See docu. + Probabilities: + percentiles: [[1/3, 2/3], [1/10], [9/10]] # frac: Quantile thresholds. + Indicators: + index: no + ncores: 15 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: Scorecards #S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nmilders/scorecards_data/to_system/cross_validation/tercile_cross_val/ECMWF-SEAS5/prlr/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/ diff --git a/recipes/atomic_recipes/recipe_seasonal_downscaling.yml b/recipes/atomic_recipes/recipe_seasonal_downscaling.yml new file mode 100644 index 0000000000000000000000000000000000000000..c526140d4cb91a3fd15a3e52a6d86b5178581fe6 --- /dev/null +++ b/recipes/atomic_recipes/recipe_seasonal_downscaling.yml @@ -0,0 +1,68 @@ +Description: + Author: V. Agudetse + Description: ECMWF-SEAS5 Downscaling +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '0501' + fcst_year: + hcst_start: '2000' + hcst_end: '2005' + ftime_min: 1 + ftime_max: 1 + Region: + latmin: 34.1 + latmax: 45.1 + lonmin: -12.5 + lonmax: 6.35 + Regrid: + method: + type: none + Workflow: + Anomalies: + compute: yes # yes/no, default yes + cross_validation: no # yes/no, default yes + save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' + Calibration: + method: mse_min + save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. + bc_method: bias # If type intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type intlr. Options: 'basic', 'large_scale', '4nn' + log_reg_method: # If type logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # 'all'/'none'/'exp_only' + Skill: + metric: BSS10 BSS90 + save: 'all' # 'all'/'none' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' + Visualization: + plots: skill_metrics + multi_panel: no + projection: cylindrical_equidistant + Indicators: + index: no + ncores: 10 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/recipes/atomic_recipes/recipe_seasonal_provenance.yml b/recipes/atomic_recipes/recipe_seasonal_provenance.yml new file mode 100644 index 0000000000000000000000000000000000000000..196b49c92bcbd52582bbf4573ae51591e3096ebc --- /dev/null +++ b/recipes/atomic_recipes/recipe_seasonal_provenance.yml @@ -0,0 +1,57 @@ +Description: + Author: V. Agudetse + '': split version +Analysis: + Horizon: seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5.1 + Multimodel: no + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: + hcst_start: '2000' + hcst_end: '2006' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 27 + latmax: 48 + lonmin: 0 + lonmax: 359 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + save: + Calibration: + method: bias + save: 'all' + Skill: + metric: RPSS + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'all' + Indicators: + index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /tmp/out-logs/ + code_dir: /home/kinow/Development/r/workspace/sunset/ + filesystem: sample + diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml b/recipes/atomic_recipes/recipe_system5c3s-tas-robinson.yml similarity index 66% rename from modules/Loading/testing_recipes/recipe_system5c3s-tas.yml rename to recipes/atomic_recipes/recipe_system5c3s-tas-robinson.yml index 31ae079d2dfe76c600ecef3083db5943e5f2ae20..f9f7d84e4809c084af1aee715c990006ae762990 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml +++ b/recipes/atomic_recipes/recipe_system5c3s-tas-robinson.yml @@ -1,6 +1,6 @@ Description: Author: V. Agudetse - + Description: Recipe to test forecast times + robinson projection with SEAS5 Analysis: Horizon: Seasonal Variables: @@ -13,11 +13,11 @@ Analysis: Reference: name: ERA5 Time: - sdate: '0601' + sdate: '0301' fcst_year: '2020' hcst_start: '1993' hcst_end: '2006' - ftime_min: 1 + ftime_min: 2 ftime_max: 3 Region: latmin: -10 @@ -31,17 +31,25 @@ Analysis: Anomalies: compute: no cross_validation: + save: Calibration: method: raw + save: fcst_only Skill: - metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + metric: FRPS + save: all Probabilities: percentiles: [[1/3, 2/3]] + save: all + Visualization: + plots: skill_metrics forecast_ensemble_mean + multi_panel: no + projection: robinson Indicators: index: no Output_format: S2S4E Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml b/recipes/atomic_recipes/recipe_system7c3s-prlr.yml similarity index 73% rename from modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml rename to recipes/atomic_recipes/recipe_system7c3s-prlr.yml index 58030bf3b0697a177d901bbb3b2cbbca0411c779..590d4499d07b1761737f4c0d2f89bec2bb5e30c7 100644 --- a/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml +++ b/recipes/atomic_recipes/recipe_system7c3s-prlr.yml @@ -1,11 +1,12 @@ Description: Author: V. Agudetse - + Description: Analysis of MF System 7 with precipitation Analysis: Horizon: Seasonal Variables: - name: prlr + name: prlr freq: monthly_mean + units: mm Datasets: System: name: Meteo-France-System7 @@ -31,15 +32,21 @@ Analysis: Anomalies: compute: no cross_validation: + save: Calibration: method: mse_min + save: 'all' Skill: - metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + save: 'all' + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles Indicators: index: no - ncores: 1 + ncores: 12 remove_NAs: no Output_format: S2S4E Run: diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml b/recipes/atomic_recipes/recipe_system7c3s-tas.yml similarity index 53% rename from modules/Loading/testing_recipes/recipe_system7c3s-tas.yml rename to recipes/atomic_recipes/recipe_system7c3s-tas.yml index c8d3b5e891de09b5cc1236e1b9c85297fc27f1e3..0b1fef13b55a4b6042ea027609ae491bbd57ad59 100644 --- a/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml +++ b/recipes/atomic_recipes/recipe_system7c3s-tas.yml @@ -1,6 +1,6 @@ Description: Author: V. Agudetse - + Description: Analysis of MF System 7 with temperature Analysis: Horizon: Seasonal Variables: @@ -15,15 +15,15 @@ Analysis: Time: sdate: '1101' fcst_year: '2020' - hcst_start: '1993' + hcst_start: '2000' hcst_end: '2010' ftime_min: 1 ftime_max: 2 Region: - latmin: -10 - latmax: 10 - lonmin: 0 - lonmax: 20 + latmin: 30 + latmax: 70 + lonmin: -20 + lonmax: 40 Regrid: method: bilinear type: to_system @@ -31,19 +31,27 @@ Analysis: Anomalies: compute: yes # yes/no, default yes cross_validation: yes # yes/no, default yes + save: 'all' # 'all'/'none'/'exp_only'/'fcst_only' Calibration: method: mse_min + save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' Skill: - metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + metric: BSS10 BSS90 + save: 'all' # 'all'/'none' Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + multi_panel: no + projection: lambert_europe Indicators: index: no - ncores: 1 + ncores: 10 remove_NAs: yes Output_format: S2S4E Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/recipes/atomic_recipes/recipe_tas-tos_nadia.yml b/recipes/atomic_recipes/recipe_tas-tos_nadia.yml new file mode 100644 index 0000000000000000000000000000000000000000..466659a228e2a881f8a7418db8bbae099fefb724 --- /dev/null +++ b/recipes/atomic_recipes/recipe_tas-tos_nadia.yml @@ -0,0 +1,55 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas-tos + sic_threshold: 0.15 ## sea ice threshold for tas-tos blending, default = 0.15 + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 + Multimodel: False + Reference: + name: BEST + Time: + sdate: '0101' + #fcst_year: + hcst_start: '2014' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 1 + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: raw + save: 'none' + Anomalies: + compute: yes + cross_validation: no + save: 'none' + Skill: + metric: mean_bias EnsCorr RPS RPSS CRPS CRPSS enssprerr + cross_validation: no + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'none' + Indicators: + index: no + ncores: 15 + remove_NAs: yes + Output_format: scorecards +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nmilders/scorecards_data/test/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite_tas-tos/ diff --git a/modules/Loading/testing_recipes/recipe_test-logging.yml b/recipes/atomic_recipes/recipe_test-logging.yml similarity index 100% rename from modules/Loading/testing_recipes/recipe_test-logging.yml rename to recipes/atomic_recipes/recipe_test-logging.yml diff --git a/modules/Loading/testing_recipes/recipe_test_anomalies.yml b/recipes/atomic_recipes/recipe_test_multivar.yml similarity index 67% rename from modules/Loading/testing_recipes/recipe_test_anomalies.yml rename to recipes/atomic_recipes/recipe_test_multivar.yml index 287f9a98f38786bcc4db37a21eff196d23f7837c..01625d40c13a0a3c9cdc6879ff7736c015c36408 100644 --- a/modules/Loading/testing_recipes/recipe_test_anomalies.yml +++ b/recipes/atomic_recipes/recipe_test_multivar.yml @@ -1,11 +1,12 @@ Description: Author: V. Agudetse - + Description: Multiple variables in the same atomic recipe Analysis: Horizon: Seasonal Variables: - name: tas + name: tas, prlr freq: monthly_mean + units: {tas: C, prlr: mm} Datasets: System: name: ECMWF-SEAS5 @@ -18,7 +19,7 @@ Analysis: hcst_start: '1999' hcst_end: '2010' ftime_min: 1 - ftime_max: 2 + ftime_max: 1 Region: latmin: -10 latmax: 10 @@ -30,13 +31,23 @@ Analysis: Workflow: Calibration: method: raw + save: 'exp_only' Anomalies: compute: yes cross_validation: yes + save: 'none' Skill: metric: RPS RPSS CRPS CRPSS BSS10 BSS90 EnsCorr mean_bias mean_bias_SS + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' + Visualization: + plots: most_likely_terciles + multi_panel: no + projection: cylindrical_equidistant + dots: both + mask_terciles: both Indicators: index: no ncores: 7 @@ -45,5 +56,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml b/recipes/atomic_recipes/recipe_test_multivar_decadal.yml similarity index 62% rename from modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml rename to recipes/atomic_recipes/recipe_test_multivar_decadal.yml index 38b25d42295e198d714956e11cdf8ffdf006ed12..00563fe2525844898975b308f89f6781ac0f730d 100644 --- a/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml +++ b/recipes/atomic_recipes/recipe_test_multivar_decadal.yml @@ -4,27 +4,27 @@ Description: Analysis: Horizon: Decadal Variables: - name: tas - freq: daily_mean + name: tas tos + freq: monthly_mean Datasets: System: name: CanESM5 - member: r1i1p2f1,r2i1p2f1 #'all' + member: r1i1p2f1,r2i1p2f1,r3i1p2f1 #'all' Multimodel: no Reference: name: ERA5 #JRA-55 Time: - fcst_year: - hcst_start: 2014 - hcst_end: 2016 + fcst_year: [2020,2021] + hcst_start: 1990 + hcst_end: 1993 # season: 'Annual' - ftime_min: 0 - ftime_max: 2 + ftime_min: 2 + ftime_max: 14 Region: - latmin: 10 #-90 - latmax: 20 #90 - lonmin: 0 - lonmax: 15 #359.9 + latmin: 10 + latmax: 20 + lonmin: 150 + lonmax: 170 Regrid: method: bilinear type: to_system #to_reference @@ -32,14 +32,20 @@ Analysis: Anomalies: compute: no cross_validation: + save: Calibration: method: bias + save: 'all' Skill: - metric: RPSS + metric: RPSS Corr + save: 'all' Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] + percentiles: [[1/3, 2/3]] + save: 'all' Indicators: index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles ncores: # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/recipes/atomic_recipes/recipe_test_multivar_decadal_multipath.yml similarity index 66% rename from modules/Loading/testing_recipes/recipe_decadal_daily.yml rename to recipes/atomic_recipes/recipe_test_multivar_decadal_multipath.yml index 9d404bfa45da3d70620b5f22920067d16f12afe5..a38f81b8dccb75fdee9d67de5f4b47b2c6a64596 100644 --- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml +++ b/recipes/atomic_recipes/recipe_test_multivar_decadal_multipath.yml @@ -4,22 +4,22 @@ Description: Analysis: Horizon: Decadal Variables: - name: tas - freq: daily_mean + name: tas pr + freq: monthly_mean Datasets: System: - name: EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 + name: EC-Earth3-i4 member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' Multimodel: no Reference: - name: ERA5 + name: ERA5 #JRA-55 Time: fcst_year: [2020,2021] - hcst_start: 2016 - hcst_end: 2019 - season: 'Annual' - ftime_min: 3 - ftime_max: 5 + hcst_start: 1990 + hcst_end: 1993 +# season: 'Annual' + ftime_min: 2 + ftime_max: 14 Region: latmin: 10 #-90 latmax: 20 #90 @@ -32,14 +32,20 @@ Analysis: Anomalies: compute: no cross_validation: + save: Calibration: - method: qmap + method: bias + save: 'all' Skill: - metric: RPSS FRPSS EnsCorr + metric: RPSS Corr + save: 'all' Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] + percentiles: [[1/3, 2/3]] + save: 'all' Indicators: index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles ncores: # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E diff --git a/modules/Loading/testing_recipes/recipe_testing_nadia.yml b/recipes/atomic_recipes/recipe_test_multivar_nadia.yml similarity index 57% rename from modules/Loading/testing_recipes/recipe_testing_nadia.yml rename to recipes/atomic_recipes/recipe_test_multivar_nadia.yml index e6b2bc02e6a511b114977c7238397d89a7ce5558..50cc62f7bae05e3af1e0af600186b13b183004d2 100644 --- a/modules/Loading/testing_recipes/recipe_testing_nadia.yml +++ b/recipes/atomic_recipes/recipe_test_multivar_nadia.yml @@ -4,26 +4,26 @@ Description: Analysis: Horizon: Seasonal Variables: - name: tas + name: tas tos freq: monthly_mean Datasets: System: name: ECMWF-SEAS5 Multimodel: False Reference: - name: ERA5 + name: BEST Time: - sdate: '1101' + sdate: '0101' fcst_year: - hcst_start: '2010' - hcst_end: '2015' + hcst_start: '1993' + hcst_end: '2016' ftime_min: 1 ftime_max: 6 Region: - latmin: 30 - latmax: 50 - lonmin: -10 - lonmax: 30 + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 Regrid: method: bilinear type: to_system @@ -34,9 +34,9 @@ Analysis: compute: yes cross_validation: yes Skill: - metric: mean_bias EnsCorr RPSS CRPSS EnsSprErr + metric: mean_bias EnsCorr RPS RPSS CRPS CRPSS enssprerr Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + percentiles: [[1/3, 2/3]] Indicators: index: no ncores: 7 @@ -45,5 +45,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + output_dir: /esarchive/scratch/nmilders/scorecards_data/to_system/tas-tos/ECMWF-SEAS5/tas-tos/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/ diff --git a/modules/Loading/testing_recipes/wrong_recipe_example.yml b/recipes/atomic_recipes/wrong_recipe_example.yml similarity index 100% rename from modules/Loading/testing_recipes/wrong_recipe_example.yml rename to recipes/atomic_recipes/wrong_recipe_example.yml diff --git a/recipes/examples/NAO_recipe.yml b/recipes/examples/NAO_recipe.yml new file mode 100644 index 0000000000000000000000000000000000000000..5d4a0cd9159a7d7a84a9c578f3a589a41004b7b5 --- /dev/null +++ b/recipes/examples/NAO_recipe.yml @@ -0,0 +1,60 @@ +Description: + Author: nperez + Info: ECMWF SEAS5 Seasonal Forecast Example recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: psl + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0301' ## MMDD + # fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 2 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 # Mandatory, int: minimum latitude + latmax: 80 # Mandatory, int: maximum latitude + lonmin: -80 # Mandatory, int: minimum longitude + lonmax: 40 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Indices: + NAO: {obsproj: TRUE, save: 'all', plot_ts: TRUE, plot_sp: yes} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + save: none + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ + + diff --git a/recipes/examples/Nino_recipe.yml b/recipes/examples/Nino_recipe.yml new file mode 100644 index 0000000000000000000000000000000000000000..779fc5f1a642169ae9bce0a6a2b79927da7a83d7 --- /dev/null +++ b/recipes/examples/Nino_recipe.yml @@ -0,0 +1,61 @@ +Description: + Author: nperez + Info: ECMWF SEAS5 Seasonal Forecast Example recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tos + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0101' ## MMDD + # fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 2 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -90 # Mandatory, int: minimum latitude + latmax: 90 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 359.9 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Indices: + Nino1+2: {save: 'all', plot_ts: TRUE, plot_sp: TRUE} + Nino3: {save: 'all', plot_ts: TRUE, plot_sp: TRUE} + Nino3.4: {save: 'all', plot_ts: TRUE, plot_sp: TRUE} + Nino4: {save: 'all', plot_ts: TRUE, plot_sp: TRUE} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + save: none + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: TRUE +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s/outputs + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ diff --git a/modules/Loading/testing_recipes/recipe_test-new-metrics.yml b/recipes/examples/recipe_GRIB_system5_era5.yml similarity index 53% rename from modules/Loading/testing_recipes/recipe_test-new-metrics.yml rename to recipes/examples/recipe_GRIB_system5_era5.yml index b5745292ace8c45258c4e975bc375385005f1fc1..71615afdd2364857fadab774acc6f8f08932cbfc 100644 --- a/modules/Loading/testing_recipes/recipe_test-new-metrics.yml +++ b/recipes/examples/recipe_GRIB_system5_era5.yml @@ -1,5 +1,5 @@ Description: - Author: V. Agudetse + Author: A. Ho Analysis: Horizon: Seasonal @@ -8,39 +8,44 @@ Analysis: freq: monthly_mean Datasets: System: - name: Meteo-France-System7 + name: ECMWF-SEAS5 Multimodel: False Reference: name: ERA5 Time: sdate: '1101' fcst_year: '2020' - hcst_start: '1998' - hcst_end: '2010' + hcst_start: '2000' + hcst_end: '2003' ftime_min: 1 - ftime_max: 2 - Region: - latmin: -10 - latmax: 10 + ftime_max: 3 + Region: #NOT USED + latmin: -90 + latmax: 90 lonmin: 0 - lonmax: 20 + lonmax: 360 Regrid: method: bilinear - type: to_system + type: 'r360x180' #to_system Workflow: Calibration: method: mse_min + Anomalies: + compute: yes + cross_validation: yes Skill: - metric: RMSSS + metric: mean_bias Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: no - ncores: 7 + ncores: 8 remove_NAs: yes - Output_format: S2S4E + Output_format: S2S4E #Scorecards Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + filesystem: mars + diff --git a/recipes/examples/recipe_Nino_decadal.yml b/recipes/examples/recipe_Nino_decadal.yml new file mode 100644 index 0000000000000000000000000000000000000000..6eb9619f86958361244241d1d6e0f7a77aef31f1 --- /dev/null +++ b/recipes/examples/recipe_Nino_decadal.yml @@ -0,0 +1,55 @@ +Description: + Author: Carlos Delgado + Info: Test for NAO indices in decadal prediction +Analysis: + Horizon: Decadal + Variables: + name: psl + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 + member: 'all' + Multimodel: no + Reference: + name: JRA-55 + Time: + fcst_year: + hcst_start: 1996 + hcst_end: 2016 + ftime_min: 3 + ftime_max: 4 + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 + Regrid: + method: bilinear + type: "r180x90"#to_system + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Indices: + Nino3.4: {save: 'all', plot_ts: TRUE, plot_sp: yes} + Calibration: + method: raw + save: none + Skill: + metric: EnsCorr RPSS + save: "all" + Probabilities: + percentiles: [[1/3, 2/3]] + save: none + Indicators: + index: FALSE + ncores: 8 # Optional, int: number of cores, defaults to 1 + remove_NAs: FALSE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nperez/scorecards_data/input_test/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ diff --git a/recipes/examples/recipe_decadal_split.yml b/recipes/examples/recipe_decadal_split.yml new file mode 100644 index 0000000000000000000000000000000000000000..70764027b20671618ac8f9df64eb5e90679adeeb --- /dev/null +++ b/recipes/examples/recipe_decadal_split.yml @@ -0,0 +1,68 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for spliting a decadal recipe (two variables) +Analysis: + Horizon: Decadal + Variables: + - {name: tas, freq: monthly_mean} + - {name: pr, freq: monthly_mean} + Datasets: + System: + name: EC-Earth3-i4, + member: r1i4p1f1,r2i4p1f1 + Multimodel: no + Reference: + name: JRA-55 + Time: + fcst_year: + hcst_start: 2015 # 2015-2016 in dcppA, 2017-2018 in dcppB + hcst_end: 2018 +# season: 'Annual' + ftime_min: 6 # Apr + ftime_max: 8 + Region: + - {latmin: -5, latmax: 5, lonmin: -5, lonmax: 5} + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + save: + Calibration: + method: 'bias' + save: 'all' + Skill: + metric: EnsCorr RPSS + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'all' + Visualization: + plots: skill_metrics + multi_panel: yes + Indicators: + index: FALSE + ncores: 8 # Optional, int: number of cores, defaults to 1 + remove_NAs: FALSE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/vagudets/auto-s2s_logs/ + code_dir: /esarchive/scratch/cdelgado/gitlab/auto-s2s/ + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/cdelgado/gitlab/cdelgado_copernicus/ESS_evaluation_tool/main_decadal.R + expid: a5tx ## if left empty, create new exp? + hpc_user: bsc32924 # your hpc username + wallclock: 01:00 # hh:mm + processors_per_job: 8 # use ncores parameter? + platform: nord3v2 # make this constant? + email_notifications: yes # enable/disable email notifications + email_address: carlos.delgado@bsc.es # email address for notifications + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/recipes/examples/recipe_ecvs_seasonal_oper.yml b/recipes/examples/recipe_ecvs_seasonal_oper.yml new file mode 100644 index 0000000000000000000000000000000000000000..832f36d54b04c688019b75b066dc41360b302288 --- /dev/null +++ b/recipes/examples/recipe_ecvs_seasonal_oper.yml @@ -0,0 +1,73 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: C} + - {name: prlr, freq: monthly_mean, units: mm, flux: no} + Datasets: + System: + - {name: ECMWF-SEAS5.1} # system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0801' ## MMDD + fcst_year: '2023' # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/nperez/cs_oper/seasonal/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/nperez/git/auto-s2s/ # replace with the directory where your code is + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git/auto-s2s/exec_ecvs_seasonal_oper.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/recipes/examples/recipe_model_decadal_NAO.yml b/recipes/examples/recipe_model_decadal_NAO.yml new file mode 100644 index 0000000000000000000000000000000000000000..cd1b2bdd1b7d797dbf62f5c9235362de4c2632b6 --- /dev/null +++ b/recipes/examples/recipe_model_decadal_NAO.yml @@ -0,0 +1,55 @@ +Description: + Author: Carlos Delgado + Info: Test for NAO indices in decadal prediction +Analysis: + Horizon: Decadal + Variables: + name: psl + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 + member: 'all' + Multimodel: no + Reference: + name: JRA-55 + Time: + fcst_year: + hcst_start: 1995 + hcst_end: 2016 + ftime_min: 3 + ftime_max: 5 + Region: + latmin: 20 + latmax: 80 + lonmin: -80 + lonmax: 40 + Regrid: + method: bilinear + type: "r180x90"#to_system + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Indices: + NAO: {obsproj: TRUE, save: 'all', plot_ts: TRUE, plot_sp: yes} + Calibration: + method: raw + save: none + Skill: + metric: EnsCorr RPSS + save: "all" + Probabilities: + percentiles: [[1/3, 2/3]] + save: none + Indicators: + index: FALSE + ncores: 8 # Optional, int: number of cores, defaults to 1 + remove_NAs: FALSE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nperez/scorecards_data/input_test/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ diff --git a/recipes/examples/recipe_prlr_seasonal_oper.yml b/recipes/examples/recipe_prlr_seasonal_oper.yml new file mode 100644 index 0000000000000000000000000000000000000000..fb6d1a1cd039df755abc45da9aae0d4486abdc5c --- /dev/null +++ b/recipes/examples/recipe_prlr_seasonal_oper.yml @@ -0,0 +1,62 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: prlr + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5.1 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0701' ## MMDD + fcst_year: '2023' # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 # Mandatory, int: minimum latitude + latmax: 80 # Mandatory, int: maximum latitude + lonmin: -20 # Mandatory, int: minimum longitude + lonmax: 40 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ diff --git a/recipes/examples/recipe_prlr_seasonal_units.yml b/recipes/examples/recipe_prlr_seasonal_units.yml new file mode 100644 index 0000000000000000000000000000000000000000..e03428ac44147c68af42d3bdfa0c120fe07d8c75 --- /dev/null +++ b/recipes/examples/recipe_prlr_seasonal_units.yml @@ -0,0 +1,63 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: prlr + freq: monthly_mean + units: mm + Datasets: + System: + name: ECMWF-SEAS5.1 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0601' ## MMDD + fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 30 # Mandatory, int: minimum latitude + latmax: 50 # Mandatory, int: maximum latitude + lonmin: -10 # Mandatory, int: minimum longitude + lonmax: 10 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ diff --git a/recipes/examples/recipe_scorecards.yml b/recipes/examples/recipe_scorecards.yml new file mode 100644 index 0000000000000000000000000000000000000000..434426d02d499db78c6884c6f6a9322390935526 --- /dev/null +++ b/recipes/examples/recipe_scorecards.yml @@ -0,0 +1,105 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test for recipe splitting + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: # ECVs and Indicators? + - {name: tas, freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: ECMWF-SEAS5} + Multimodel: False # single option + Reference: + - {name: ERA5} # multiple references for single model? + Time: + sdate: # list, split + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' + #fcst_year: '2020' # list, don't split, handled internally + hcst_start: '1993' # single option + hcst_end: '2016' # single option + ftime_min: 1 # single option + ftime_max: 6 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "global", latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + Regrid: + method: bilinear ## TODO: allow multiple methods? + type: to_system + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: 'none' + Calibration: + method: raw ## TODO: list, split? + save: 'none' + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # list, don't split + cross_validation: yes + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] # list, don't split + save: 'none' + Visualization: + plots: skill_metrics + Indicators: + index: no # ? + Scorecards: + execute: yes # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: -90} + start_months: NULL + metric: mean_bias enscorr rpss crpss enssprerr + metric_aggregation: 'score' + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 7 + remove_NAs: no # bool, don't split + Output_format: Scorecards # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nmilders/scorecards_data/test/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/ + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/execute_scorecards_data_loading.R # replace with the path to your script + expid: a6a3 # replace with your EXPID + hpc_user: bsc32878 # replace with your hpc username + wallclock: 03:00 # hh:mm + processors_per_job: 8 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nadia.milders@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipes/recipe_splitting_example.yml b/recipes/examples/recipe_splitting_example.yml similarity index 90% rename from recipes/recipe_splitting_example.yml rename to recipes/examples/recipe_splitting_example.yml index 78a4d18c566bd492ba66f483ae1e80f96b0db1ec..93e5994e9885598fc7c8d4af495b476e7b301565 100644 --- a/recipes/recipe_splitting_example.yml +++ b/recipes/examples/recipe_splitting_example.yml @@ -38,17 +38,24 @@ Analysis: method: bilinear ## TODO: allow multiple methods? type: to_system Workflow: + Anomalies: + compute: no Calibration: method: mse_min ## TODO: list, split? + save: 'none' Skill: metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + save: 'all' + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean Indicators: index: no # ? ncores: 7 remove_NAs: yes # bool, don't split - Output_format: S2S4E # string, don't split + Output_format: Scorecards # string, don't split ################################################################################ ## Run CONFIGURATION diff --git a/recipes/examples/recipe_tas_seasonal_oper.yml b/recipes/examples/recipe_tas_seasonal_oper.yml new file mode 100644 index 0000000000000000000000000000000000000000..75918265ba4d12a20226bd69511ef53eb1de1d4e --- /dev/null +++ b/recipes/examples/recipe_tas_seasonal_oper.yml @@ -0,0 +1,62 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5.1 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0601' ## MMDD + fcst_year: '2023' # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 # Mandatory, int: minimum latitude + latmax: 80 # Mandatory, int: maximum latitude + lonmin: -20 # Mandatory, int: minimum longitude + lonmax: 40 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ diff --git a/recipes/examples/recipe_tas_seasonal_units.yml b/recipes/examples/recipe_tas_seasonal_units.yml new file mode 100644 index 0000000000000000000000000000000000000000..d2b25321b8b6c43a2842c577d265469ba5131281 --- /dev/null +++ b/recipes/examples/recipe_tas_seasonal_units.yml @@ -0,0 +1,63 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: K + Datasets: + System: + name: ECMWF-SEAS5.1 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0601' ## MMDD + fcst_year: '2023' # Optional, int: Forecast year 'YYYY' + hcst_start: '2009' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 30 # Mandatory, int: minimum latitude + latmax: 50 # Mandatory, int: maximum latitude + lonmin: -10 # Mandatory, int: minimum longitude + lonmax: 10 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ diff --git a/recipes/seasonal_complex.yml-OUTDATED b/recipes/seasonal_complex.yml-OUTDATED deleted file mode 100644 index 2c27e0b7211a78b1b933336616f9d30197b72869..0000000000000000000000000000000000000000 --- a/recipes/seasonal_complex.yml-OUTDATED +++ /dev/null @@ -1,46 +0,0 @@ -Description: - Author: N.Pérez-Zanón - Info: This is a test to transform s2s4e data-analysis for SEAS5 - -Analysis: - Horizon: Seasonal - Variables: - ECVs: - - {name: tas, freq: monthly_mean} - - {name: tas, freq: daily_mean} - Indicators: - - {name: gdd} - Datasets: - System: - - name: system5c3s - - name: glosea5 - Multimodel: False - Reference: - - name: ERA5 - - name: ERAInterim - Time: - sdate: - fcst_year: 2021 - fcst_month: [07, 08] - fcst_day: 01 - hcst_start: 2000 - hcst_end: 2002 - leadtimemin: 2 - leadtimemax: 4 - Region: - - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} - Regrid: - method: bicubic - type: system - Workflow: - Calibration: - method: SBC - Skill: - metric: RPSS - Indicators: - index: False - Output_format: S2S4E - -Run: - Loglevel: INFO - Terminal: True diff --git a/recipes/seasonal_oper.yml b/recipes/seasonal_oper.yml deleted file mode 100644 index a1351e37306778ffaf9ae8867de84b9daf6ec38b..0000000000000000000000000000000000000000 --- a/recipes/seasonal_oper.yml +++ /dev/null @@ -1,68 +0,0 @@ -# -# ___ ___ _ _ _ -# / __| / __| ___ _ __ ___ _ _ __ _ | |_ (_) ___ _ _ __ _ | | -# | (__ \__ \ / _ \ | '_ \ / -_) | '_| / _` | | _| | | / _ \ | ' \ / _` | | | -# \___| |___/ \___/ | .__/ \___| |_| \__,_| \__| |_| \___/ |_||_| \__,_| |_| -# |_| - -################################################################################# -# RECIPE DESCRIPTION -################################################################################# - -Description: - Author: N.Pérez-Zanón # [Optional?/str] -Info: This is a test to transform s2s4e data-analysis for SEAS5 # [Optional?/str] - -################################################################################# -# ANALYSIS CONF -################################################################################# - -Analysis: - Horizon: Seasonal # [Mandatory/str (either seasonal, subseasonal, decadal)] - Variables: - ECVs: # [Mandatory/list of dicts {name: , freq: } or None] - - {name: tas, freq: monthly_mean} - Indicators: # list of strs? - - None - Datasets: - System: - - name: ECMWF-SEAS5 # list of strs - Multimodel: False # boolean, if true system above are aggregated into single multi-model - Reference: # single dict? in the future multiple ref can be an asset - - {name: ERA5} # str - Time: - sdate: - fcst_syear: ["2017"] # list of ints or None (case where only hcst is verfied) - fcst_sday: ["0701", "0601"] # int or list of ints with MMDD format - hcst_start: "1993" # int mandatory - hcst_end: "2016" # int mandatory - leadtimemin: 2 # int mandatory - leadtimemax: 4 # int mandatory [units????] - Region: - - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} - - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} - Regrid: - method: bilinear # str mandatory - type: to_system # str either to_system, to_reference or CDO-compatible grid mandatory - Data_load: - module: "modules/data_load/seas5.load.R" - Workflow: - Calibration: - method: SBC # str - Skill: - metric: RPSS # str - Indicators: - index: FALSE # bool - Output_format: S2S4E # str - -################################################################################# -# Run CONF -################################################################################# - -Run: - Loglevel: INFO # str - Terminal: TRUE # bool - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ - - diff --git a/recipes/seasonal_oper_atomic.yml-OUTDATED b/recipes/seasonal_oper_atomic.yml-OUTDATED deleted file mode 100644 index 31b2f5d76bd961b423c82ab631c619c9686f18cf..0000000000000000000000000000000000000000 --- a/recipes/seasonal_oper_atomic.yml-OUTDATED +++ /dev/null @@ -1,73 +0,0 @@ -# -# ___ ___ _ _ _ -# / __| / __| ___ _ __ ___ _ _ __ _ | |_ (_) ___ _ _ __ _ | | -# | (__ \__ \ / _ \ | '_ \ / -_) | '_| / _` | | _| | | / _ \ | ' \ / _` | | | -# \___| |___/ \___/ | .__/ \___| |_| \__,_| \__| |_| \___/ |_||_| \__,_| |_| -# |_| - -################################################################################# -# RECIPE DESCRIPTION -################################################################################# - -Description: - Author: Ll. Palma # [Optional?/str] - Info: This is a test of an atomic recipe to calibrate and verify SEAS5 # [Optional?/str] - -################################################################################# -# ANALYSIS CONF -################################################################################# - -Analysis: - Horizon: Seasonal # [Mandatory/str (either seasonal, subseasonal, decadal)] - Variables: - ECVs: - name: tas - freq: monthly_mean - Indicators: - no - Datasets: - System: - name: system5c3s # list of strs - Multimodel: False # boolean, if true system above are aggregated into single multi-model - Reference: # single dict? in the future multiple ref can be an asset - name: era5 # str - Time: - sdate: - fcst_year: no # list of ints or no (case where only hcst is verfied) - fcst_month: ["07"] # int or list of ints with MMDD format - fcst_day: ["01"] # int or list of ints with MMDD format - hcst_start: "1993" # int mandatory - hcst_end: "2016" # int mandatory - leadtimemin: 1 # int mandatory - leadtimemax: 6 # int mandatory [units????] - Region: - # FOCUS SADC - latmin: -40 - latmax: 10 - lonmin: 0 - lonmax: 60 - Regrid: - method: conservative # str mandatory - type: to_system # str either system or reference mandatory - Data_load: - module: "modules/data_load/seas5.load.R" - Workflow: - Calibration: - method: SBC # str - Skill: - metric: RPSS # str - Indicators: - index: FALSE # bool - Output_format: S2S4E # str - -################################################################################# -# Run CONF -################################################################################# - -Run: - Loglevel: INFO # str - Terminal: TRUE # bool - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ - - diff --git a/recipes/tests/old_tests/execute_tests.R b/recipes/tests/old_tests/execute_tests.R deleted file mode 100644 index 2fa6a1373a22b95c5572e6e5aaf9372fe83188ef..0000000000000000000000000000000000000000 --- a/recipes/tests/old_tests/execute_tests.R +++ /dev/null @@ -1,44 +0,0 @@ -library(yaml) - -args <- NULL; - -# Function to run tests: -# source_lines("/esarchive/scratch/nperez/git/startR/inst/doc/usecase/ex2_1_timedim.R", -# start = 4, end = 14) -source_lines <- function(file, start, end, ...) { - file.lines <- scan(file, what = character(), skip = start - 1, - nlines = end - start + 1, sep = '\n') - file.lines.collapsed <- paste(file.lines, collapse = '\n') - source(textConnection(file.lines.collapsed), ...) -} - -# ------------------------------------------ -# Section to check recipes that should work: -args[1] <- "recipes/tests/seasonal_testWorkflow1.yml" -source_lines("OperationalCS.R", start = 14, end = 50) -# Calibration method None --> raw data verification -args[1] <- "recipes/tests/seasonal_testWorkflow4.yml" -source_lines("OperationalCS.R", start = 14, end = 50) -# Calibration: None --> raw data verification -args[1] <- "recipes/tests/seasonal_testWorkflow5.yml" -source_lines("OperationalCS.R", start = 14, end = 50) -# Case Skill_1 and Skill_2 when multiple times needed -args[1] <- "recipes/tests/seasonal_testWorkflow7.yml" -source_lines("OperationalCS.R", start = 14, end = 50) -# Indicator -args[1] <- "recipes/tests/seasonal_testWorkflow8.yml" -source_lines("OperationalCS.R", start = 14, end = 50) - -# ------------------------------------------ -# Section to check recipes that should fail: -## This should fail because there is no Horizon: -args[1] <- "recipes/tests/seasonal_testWorkflow2.yml" -source_lines("OperationalCS.R", start = 14, end = 50) - -## This should fail because there are 2 Calibration options: -args[1] <- "recipes/tests/seasonal_testWorkflow3.yml" -source_lines("OperationalCS.R", start = 14, end = 50) - -## This fails because it is not allow repeating the name Skill -args[1] <- "recipes/tests/seasonal_testWorkflow6.yml" -source_lines("OperationalCS.R", start = 14, end = 50) diff --git a/recipes/tests/old_tests/seasonal_testWorkflow1.yml b/recipes/tests/old_tests/seasonal_testWorkflow1.yml deleted file mode 100644 index 3c9e55f67a6e05e7ba971cf34972f728a0187a7e..0000000000000000000000000000000000000000 --- a/recipes/tests/old_tests/seasonal_testWorkflow1.yml +++ /dev/null @@ -1,53 +0,0 @@ -Description: - Author: N.Pérez-Zanón - Info: This is a test to transform s2s4e data-analysis for SEAS5 - -Analysis: - Horizon: Seasonal - Variables: - ECVs: - - {name: tas, freq: monthly_mean} - Indicators: - - None - Datasets: - System: - - name: system5c3s - Multimodel: False - Reference: - - {name: era5} - Time: - sdate: - fcst_year: 2021 - fcst_month: 07 - fcst_day: 01 - hcst_start: 2000 - hcst_end: 2002 - leadtimemin: 2 - leadtimemax: 4 - Region: - Global: TRUE - Aggregation: False - Regional: - - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} - - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} - Regrid: - method: bilinear - type: system - Workflow: - Calibration: - method: SBC - Skill: - - {metric: fRPSS, probs: [1/3, 2/3]} - - {metric: BSS10} - - {metric: BSS90} - - {metric: EnsCorr} - - {metric: Bias} - Indicators: - index: FALSE - Output_format: S2S4E - -Run: - Loglevel: INFO - Terminal: TRUE - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/old_tests/seasonal_testWorkflow2.yml b/recipes/tests/old_tests/seasonal_testWorkflow2.yml deleted file mode 100644 index 4b05eb8cf52eb8aa65763a5101a1bcc44f030f98..0000000000000000000000000000000000000000 --- a/recipes/tests/old_tests/seasonal_testWorkflow2.yml +++ /dev/null @@ -1,54 +0,0 @@ -Description: - Author: N.Pérez-Zanón - Info: This is a test to transform s2s4e data-analysis for SEAS5 - -Analysis: - Variables: - ECVs: - - {name: tas, freq: monthly_mean} - Indicators: - - None - Datasets: - System: - - name: system5c3s - Multimodel: False - Reference: - - {name: era5} - Time: - sdate: - fcst_year: 2021 - fcst_month: 07 - fcst_day: 01 - hcst_start: 2000 - hcst_end: 2002 - leadtimemin: 2 - leadtimemax: 4 - Region: - Global: TRUE - Aggregation: False - Regional: - - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} - - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} - Regrid: - method: bilinear - type: system - Workflow: - Calibration: - method: SBC - Skill: - - {metric: fRPSS, probs: [1/3, 2/3]} - - {metric: BSS10} - - {metric: BSS90} - - {metric: EnsCorr} - - {metric: Bias} - Indicators: - index: FALSE - Output_format: S2S4E - -Run: - Loglevel: INFO - Terminal: TRUE - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ - - diff --git a/recipes/tests/old_tests/seasonal_testWorkflow3.yml b/recipes/tests/old_tests/seasonal_testWorkflow3.yml deleted file mode 100644 index 2b544fa6b5df9d27e1cb3ed00c5a9c6682418757..0000000000000000000000000000000000000000 --- a/recipes/tests/old_tests/seasonal_testWorkflow3.yml +++ /dev/null @@ -1,52 +0,0 @@ -Description: - Author: N.Pérez-Zanón - Info: This is a test to transform s2s4e data-analysis for SEAS5 - -Analysis: - Horizon: Seasonal - Variables: - ECVs: - - {name: tas, freq: monthly_mean} - Indicators: - - None - Datasets: - System: - - name: system5c3s - Multimodel: False - Reference: - - {name: era5} - Time: - sdate: - fcst_year: 2021 - fcst_month: 07 - fcst_day: 01 - hcst_start: 2000 - hcst_end: 2002 - leadtimemin: 2 - leadtimemax: 4 - Region: - Global: TRUE - Aggregation: False - Regional: - - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} - - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} - Regrid: - method: bilinear - type: system - Workflow: - Calibration: - - {method: SBC} - - {method: VarianceInflation} - Skill: - - {metric: fRPSS, probs: [1/3, 2/3]} - - {metric: BSS10} - - {metric: BSS90} - - {metric: EnsCorr} - - {metric: Bias} - Output_format: S2S4E - -Run: - Loglevel: INFO - Terminal: TRUE - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/old_tests/seasonal_testWorkflow4.yml b/recipes/tests/old_tests/seasonal_testWorkflow4.yml deleted file mode 100644 index e3f9499c498336c50f760ead2d56795bda2d7346..0000000000000000000000000000000000000000 --- a/recipes/tests/old_tests/seasonal_testWorkflow4.yml +++ /dev/null @@ -1,53 +0,0 @@ -Description: - Author: N.Pérez-Zanón - Info: This is a test to transform s2s4e data-analysis for SEAS5 - -Analysis: - Horizon: Seasonal - Variables: - ECVs: - - {name: tas, freq: monthly_mean} - Indicators: - - None - Datasets: - System: - - name: system5c3s - Multimodel: False - Reference: - - {name: era5} - Time: - sdate: - fcst_year: 2021 - fcst_month: 07 - fcst_day: 01 - hcst_start: 2000 - hcst_end: 2002 - leadtimemin: 2 - leadtimemax: 4 - Region: - Global: TRUE - Aggregation: False - Regional: - - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} - - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} - Regrid: - method: bilinear - type: system - Workflow: - Calibration: - method: None - Skill: - - {metric: fRPSS, probs: [1/3, 2/3]} - - {metric: BSS10} - - {metric: BSS90} - - {metric: EnsCorr} - - {metric: Bias} - Indicators: - index: FALSE - Output_format: S2S4E - -Run: - Loglevel: INFO - Terminal: TRUE - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/old_tests/seasonal_testWorkflow5.yml b/recipes/tests/old_tests/seasonal_testWorkflow5.yml deleted file mode 100644 index 7029db3c77d4e704a65d2827fb951a8b6f326e52..0000000000000000000000000000000000000000 --- a/recipes/tests/old_tests/seasonal_testWorkflow5.yml +++ /dev/null @@ -1,51 +0,0 @@ -Description: - Author: N.Pérez-Zanón - Info: This is a test to transform s2s4e data-analysis for SEAS5 - -Analysis: - Horizon: Seasonal - Variables: - ECVs: - - {name: tas, freq: monthly_mean} - Indicators: - - None - Datasets: - System: - - name: system5c3s - Multimodel: False - Reference: - - {name: era5} - Time: - sdate: - fcst_year: 2021 - fcst_month: 07 - fcst_day: 01 - hcst_start: 2000 - hcst_end: 2002 - leadtimemin: 2 - leadtimemax: 4 - Region: - Global: TRUE - Aggregation: False - Regional: - - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} - - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} - Regrid: - method: bilinear - type: system - Workflow: - Calibration: FALSE - Skill: - - {metric: fRPSS, probs: [1/3, 2/3]} - - {metric: BSS10} - - {metric: BSS90} - - {metric: EnsCorr} - - {metric: Bias} - Indicators: FALSE - Output_format: S2S4E - -Run: - Loglevel: INFO - Terminal: TRUE - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/old_tests/seasonal_testWorkflow6.yml b/recipes/tests/old_tests/seasonal_testWorkflow6.yml deleted file mode 100644 index 9441758207b5903d7b5079f125bbcef6b68badf0..0000000000000000000000000000000000000000 --- a/recipes/tests/old_tests/seasonal_testWorkflow6.yml +++ /dev/null @@ -1,53 +0,0 @@ -Description: - Author: N.Pérez-Zanón - Info: This is a test to transform s2s4e data-analysis for SEAS5 - -Analysis: - Horizon: Seasonal - Variables: - ECVs: - - {name: tas, freq: monthly_mean} - Indicators: - - None - Datasets: - System: - - name: system5c3s - Multimodel: False - Reference: - - {name: era5} - Time: - sdate: - fcst_year: 2021 - fcst_month: 07 - fcst_day: 01 - hcst_start: 2000 - hcst_end: 2002 - leadtimemin: 2 - leadtimemax: 4 - Region: - Global: TRUE - Aggregation: False - Regional: - - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} - - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} - Regrid: - method: bilinear - type: system - Workflow: - Skill: - - {metric: EnsCorr} - - {metric: Bias} - Calibration: - method: SBC - Skill: - - {metric: EnsCorr} - - {metric: Bias} - Indicators: - index: FALSE - Output_format: S2S4E - -Run: - Loglevel: INFO - Terminal: TRUE - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/old_tests/seasonal_testWorkflow7.yml b/recipes/tests/old_tests/seasonal_testWorkflow7.yml deleted file mode 100644 index 595b677bb250555170ca6ccb1d400ade906a5042..0000000000000000000000000000000000000000 --- a/recipes/tests/old_tests/seasonal_testWorkflow7.yml +++ /dev/null @@ -1,53 +0,0 @@ -Description: - Author: N.Pérez-Zanón - Info: This is a test to transform s2s4e data-analysis for SEAS5 - -Analysis: - Horizon: Seasonal - Variables: - ECVs: - - {name: tas, freq: monthly_mean} - Indicators: - - None - Datasets: - System: - - name: system5c3s - Multimodel: False - Reference: - - {name: era5} - Time: - sdate: - fcst_year: 2021 - fcst_month: 07 - fcst_day: 01 - hcst_start: 2000 - hcst_end: 2002 - leadtimemin: 2 - leadtimemax: 4 - Region: - Global: TRUE - Aggregation: False - Regional: - - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} - - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} - Regrid: - method: bilinear - type: system - Workflow: - Skill_1: - - {metric: EnsCorr} - - {metric: Bias} - Calibration: - method: SBC - Skill_2: - - {metric: EnsCorr} - - {metric: Bias} - Indicators: - index: FALSE - Output_format: S2S4E - -Run: - Loglevel: INFO - Terminal: TRUE - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/old_tests/seasonal_testWorkflow8.yml b/recipes/tests/old_tests/seasonal_testWorkflow8.yml deleted file mode 100644 index b6d0c66247a01c955aefdae1adebb5c0f7364307..0000000000000000000000000000000000000000 --- a/recipes/tests/old_tests/seasonal_testWorkflow8.yml +++ /dev/null @@ -1,51 +0,0 @@ -Description: - Author: N.Pérez-Zanón - Info: This is a test to transform s2s4e data-analysis for SEAS5 - -Analysis: - Horizon: Seasonal - Variables: - Indicators: - - {name: gdd} - Datasets: - System: - - name: system5c3s - Multimodel: False - Reference: - - {name: era5} - Time: - sdate: - fcst_year: 2021 - fcst_month: 07 - fcst_day: 01 - hcst_start: 2000 - hcst_end: 2002 - leadtimemin: 2 - leadtimemax: 4 - Region: - Global: TRUE - Aggregation: False - Regional: - - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} - - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} - Regrid: - method: bilinear - type: system - Workflow: - Calibration: - method: SBC - Skill: - - {metric: fRPSS, probs: [1/3, 2/3]} - - {metric: BSS10} - - {metric: BSS90} - - {metric: EnsCorr} - - {metric: Bias} - Indicators: - index: TRUE - Output_format: S2S4E - -Run: - Loglevel: INFO - Terminal: TRUE - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ \ No newline at end of file diff --git a/recipes/tests/recipe_autosubmit_marstest.yml b/recipes/tests/recipe_autosubmit_marstest.yml new file mode 100644 index 0000000000000000000000000000000000000000..24b907977f3ac2fde7f0cea04bae35f91fb0d990 --- /dev/null +++ b/recipes/tests/recipe_autosubmit_marstest.yml @@ -0,0 +1,81 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test Autosubmit WF for CERISE + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: # ECVs and Indicators? + - {name: tas, freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: ECMWF-SEAS5} + Multimodel: False # single option + Reference: + - {name: ERA5} # multiple references for single model? + Time: + sdate: # list, split + - '1101' + fcst_year: # list, don't split, handled internally + hcst_start: '2000' # single option + hcst_end: '2019' # single option + ftime_min: 1 # single option + ftime_max: 4 # single option + Region: # multiple lists, Add region name if there is more than 1 region + - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} + Regrid: + method: bilinear + type: r360x180 + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: 'all' + Calibration: + method: raw ## TODO: list, split? + save: 'none' + Skill: + metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + save: 'all' + Visualization: + plots: skill_metrics + multi_panel: no + projection: robinson + Indicators: + index: no # ? + ncores: 14 + remove_NAs: yes # bool, don't split + Output_format: S2S4E # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: DEBUG + Terminal: yes + filesystem: mars + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/vagudets/repos/auto-s2s/example_scripts/test_parallel_workflow.R # path to the script to run + expid: a5ta ## if left empty, create new exp? + hpc_user: bsc32762 # your hpc username + wallclock: 02:30 # hh:mm + processors_per_job: 14 # use ncores parameter? + platform: nord3v2 # make this constant? + email_notifications: yes # enable/disable email notifications + email_address: victoria.agudetse@bsc.es # email address for notifications + notify_completed: no # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/recipes/tests/recipe_multiregion.yml b/recipes/tests/recipe_multiregion.yml new file mode 100644 index 0000000000000000000000000000000000000000..91139523d261dbd9d0fe8363727c1b1385a8497f --- /dev/null +++ b/recipes/tests/recipe_multiregion.yml @@ -0,0 +1,82 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test Independent verification of two variables, two sdates, two systems + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: + - {name: tas, freq: monthly_mean} + Datasets: + System: + - {name: ECMWF-SEAS5} + - {name: Meteo-France-System7} + Multimodel: no + Reference: + - {name: ERA5} + Time: + sdate: + - '0101' + - '0601' + fcst_year: + hcst_start: '2000' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 6 + Region: + - {name: "tropics", latmin: -5, latmax: 5, lonmin: -10, lonmax: 10} + - {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: conservative + type: to_system + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: none + Calibration: + method: raw + save: none + Skill: + metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS + save: all + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + save: all + Visualization: + plots: skill_metrics + Indicators: + index: no + ncores: 8 + remove_NAs: yes + Output_format: S2S4E + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/vagudets/repos/auto-s2s/example_scripts/test_parallel_workflow.R + expid: a5no # autosubmit experiment ID + hpc_user: bsc32762 # your hpc username + wallclock: 04:00 # hh:mm + processors_per_job: 8 # use ncores parameter? + platform: nord3v2 # make this constant? + email_notifications: yes # enable/disable email notifications + email_address: victoria.agudetse@bsc.es # email address for notifications + notify_completed: no # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/recipes/tests/recipe_seasonal_example.yml b/recipes/tests/recipe_seasonal_example.yml new file mode 100644 index 0000000000000000000000000000000000000000..5d283fb901d6bf8843b3a8b90183b10c91477ec6 --- /dev/null +++ b/recipes/tests/recipe_seasonal_example.yml @@ -0,0 +1,85 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test Independent verification of two variables, two sdates, two systems + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: + - {name: tas prlr, freq: monthly_mean} + - {name: prlr, freq: monthly_mean} + Datasets: + System: + - {name: ECMWF-SEAS5} + - {name: Meteo-France-System7} + Multimodel: no + Reference: + - {name: ERA5} + Time: + sdate: + - '0101' + - '0601' + fcst_year: + hcst_start: '2000' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 3 + Region: + - {latmin: -10, latmax: 10, lonmin: -10, lonmax: 10} + Regrid: + method: conservative + type: to_system + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: 'all' + Calibration: + method: raw + save: 'none' + Skill: + metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + save: 'all' + Visualization: + plots: skill_metrics + multi_panel: yes + projection: cylindrical_equidistant + Indicators: + index: no + ncores: 10 + remove_NAs: yes + Output_format: S2S4E + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/vagudets/repos/auto-s2s/example_scripts/test_parallel_workflow.R + expid: a6jr # autosubmit experiment ID + hpc_user: bsc32762 # your hpc username + wallclock: 01:00 # hh:mm + processors_per_job: 8 # use ncores parameter? + custom_directives: ['#SBATCH --constraint=medmem', '#SBATCH --exclusive'] + platform: nord3v2 # make this constant? + email_notifications: yes # enable/disable email notifications + email_address: victoria.agudetse@bsc.es # email address for notifications + notify_completed: no # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/recipes/tests/recipe_seasonal_two-variables.yml b/recipes/tests/recipe_seasonal_two-variables.yml index 0cef06b312dde288b04512b00e93f4a09724a8c9..ac13ea2827419427da335dad8d16ca2439f9dbad 100644 --- a/recipes/tests/recipe_seasonal_two-variables.yml +++ b/recipes/tests/recipe_seasonal_two-variables.yml @@ -30,7 +30,7 @@ Analysis: ftime_min: 1 # single option ftime_max: 3 # single option Region: # multiple lists, split? Add region name if length(Region) > 1 - - {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + - {name: "tropics", latmin: -5, latmax: 5, lonmin: -10, lonmax: 10} Regrid: method: bilinear ## TODO: allow multiple methods? type: to_system @@ -38,12 +38,19 @@ Analysis: Anomalies: compute: yes cross_validation: yes + save: 'all' Calibration: - method: mse_min ## TODO: list, split? + method: raw ## TODO: list, split? + save: 'none' Skill: metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + save: 'all' + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + multi_panel: yes Indicators: index: no # ? ncores: 7 @@ -56,5 +63,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/recipes/tests/recipe_seasonal_vizNA.yml b/recipes/tests/recipe_seasonal_vizNA.yml new file mode 100644 index 0000000000000000000000000000000000000000..fac86933b4046c1f49fe79d45917cb250fd6e507 --- /dev/null +++ b/recipes/tests/recipe_seasonal_vizNA.yml @@ -0,0 +1,85 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: An-Chi Ho + Info: Test Visualization module to handle var is all NA + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: [ECMWF-SEAS5] + Multimodel: no + Reference: + name: ERA5 + Time: + sdate: + - '1101' + fcst_year: '2020' + hcst_start: '2014' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: 0 + latmax: 3 + lonmin: 0 + lonmax: 2 + Regrid: + method: bilinear + type: 'r180x90' + Workflow: + Anomalies: + compute: no + cross_validation: no + save: 'none' + Calibration: + method: raw + save: 'none' + Skill: + metric: RPSS, CRPSS + save: 'none' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + save: 'none' + Visualization: + plots: forecast_ensemble_mean + multi_panel: no + projection: cylindrical_equidistant + Indicators: + index: no + ncores: 8 + remove_NAs: yes + Output_format: S2S4E + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/aho/tmp/auto-s2s_outputs + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/vagudets/repos/auto-s2s/example_scripts/test_parallel_workflow.R + expid: # autosubmit experiment ID + hpc_user: bsc32762 # your hpc username + wallclock: 04:00 # hh:mm + processors_per_job: 8 # use ncores parameter? + platform: nord3v2 # make this constant? + email_notifications: yes # enable/disable email notifications + email_address: victoria.agudetse@bsc.es # email address for notifications + notify_completed: no # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/split.R b/split.R new file mode 100755 index 0000000000000000000000000000000000000000..0d443c7c84163934f1e01c33e6ad738d3fb782c6 --- /dev/null +++ b/split.R @@ -0,0 +1,61 @@ +#! /usr/bin/env Rscript + +doc <- "Read an Auto-S2S recipe and prepare the output directory, check the +recipe for potential errors, and split it into atomic recipes to be run +individually. + +If the usage of Autosubmit is requested in the recipe, the Autosubmit +configuration is also written in the folder of the specified experiment. +Instructions on how to run the experiment will appear in the terminal. + +Usage: + split.R [--disable_unique_ID] [--tmpfile=] + +Options: + -h --help Show usage. + --disable_unique_ID Do not add a unique ID to the output folder name. + --tmpfile= Temporary file to store output and code directories. Handled by launch_SUNSET.sh. + +Arguments: + recipe path to the recipe." + +library(docopt) +suppressMessages(source("tools/libs.R")) + +arguments <- docopt(doc = doc) +# Retrieve recipe file path + +# Check recipe and prepare output directories +recipe <- prepare_outputs(recipe_file = arguments$recipe, + uniqueID = !arguments$disable_unique_ID, + restructure = FALSE) + +# Split recipe into atomic recipes +run_parameters <- divide_recipe(recipe) + +if (!is.null(recipe$Run$autosubmit) && (recipe$Run$autosubmit)) { + write_autosubmit_conf(recipe, run_parameters$n_atomic_recipes) + sink(arguments$tmpfile, append = FALSE) + # Run with... + cat("autosubmit") + sink() +} else if (!is.null(arguments$tmpfile)) { + sink(arguments$tmpfile, append = FALSE) + # Run with... + cat(paste0("sbatch", "\n")) + # Code directory + cat(paste0(recipe$Run$code_dir, "\n")) + # Output directory + cat(paste0(run_parameters$outdir, "\n")) + # Scorecards + if (!("Scorecards" %in% names(recipe$Analysis$Workflow)) || + (!recipe$Analysis$Workflow$Scorecards$execute)) { + cat("FALSE") + } else { + cat("TRUE") + } + sink() +} else { + stop("Autosubmit is not selected but no temp file was provided by the + launcher. Please open an issue to report the problem.") +} diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml index 7a2a575baefd1881ecf2616a654b944e32f11b9c..88b87622cc066e665d69e5d42bc06185ed2fbf19 100644 --- a/tests/recipes/recipe-decadal_daily_1.yml +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -31,13 +31,17 @@ Analysis: Workflow: Anomalies: compute: no - cross_validation: + cross_validation: + save: 'none' Calibration: method: qmap + save: 'none' Skill: metric: RPSS + save: 'none' Probabilities: percentiles: [[1/10, 9/10]] + save: 'none' Indicators: index: FALSE ncores: # Optional, int: number of cores, defaults to 1 diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml index 35b55b1a1985142d0bb6833ccd925da5142cfd3e..ce44751d1589e5b8d016c532da20174b796adfd5 100644 --- a/tests/recipes/recipe-decadal_monthly_1.yml +++ b/tests/recipes/recipe-decadal_monthly_1.yml @@ -31,15 +31,23 @@ Analysis: Workflow: Anomalies: compute: no - cross-validation: + cross-validation: + save: Calibration: method: bias + save: 'all' Skill: metric: RPSS + save: 'all' Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' Indicators: index: FALSE + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean + multi_panel: yes + projection: cylindrical_equidistant ncores: # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-decadal_monthly_1b.yml b/tests/recipes/recipe-decadal_monthly_1b.yml index 5551d9c7c9611a79b73646bc846bbda629e9cbd8..5a1ce4fde98f0b79633ec5de810efeb66a0817ff 100644 --- a/tests/recipes/recipe-decadal_monthly_1b.yml +++ b/tests/recipes/recipe-decadal_monthly_1b.yml @@ -31,13 +31,17 @@ Analysis: Workflow: Anomalies: compute: no - cross_validation: + cross_validation: + save: 'none' Calibration: method: bias + save: 'none' Skill: metric: RPSS + save: 'none' Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'none' Indicators: index: FALSE ncores: # Optional, int: number of cores, defaults to 1 diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml index 45eb01dd44e20486eec242362f39df05f459603f..8f2e8111ff91588c0c38d75bfa26fa4ca87ec5f5 100644 --- a/tests/recipes/recipe-decadal_monthly_2.yml +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -32,14 +32,22 @@ Analysis: Anomalies: compute: no cross_validation: + save: 'all' Calibration: method: raw + save: 'all' Skill: metric: RPSS_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + save: 'all' Probabilities: percentiles: [[1/3, 2/3]] + save: 'all' Indicators: index: FALSE + Visualization: + plots: most_likely_terciles skill_metrics forecast_ensemble_mean + multi_panel: yes + projection: cylindrical_equidistant ncores: # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-decadal_monthly_3.yml b/tests/recipes/recipe-decadal_monthly_3.yml index 94bdfebc7fb5d1b56552f0ce983fcd091466f95f..f7ab5fc1a565092d042a9535ed6c72020072f026 100644 --- a/tests/recipes/recipe-decadal_monthly_3.yml +++ b/tests/recipes/recipe-decadal_monthly_3.yml @@ -31,13 +31,17 @@ Analysis: Workflow: Anomalies: compute: no - cross_validation: + cross_validation: + save: 'none' Calibration: method: 'evmos' + save: 'none' Skill: - metric: BSS10 Corr + metric: BSS10 Corr_Individual_Members + save: 'none' Probabilities: percentiles: [[1/3, 2/3]] + save: 'none' Indicators: index: FALSE ncores: # Optional, int: number of cores, defaults to 1 diff --git a/tests/recipes/recipe-seasonal_NAO.yml b/tests/recipes/recipe-seasonal_NAO.yml new file mode 100644 index 0000000000000000000000000000000000000000..1cccf1771a1f2e31c93b80f7ba446a90010ce49f --- /dev/null +++ b/tests/recipes/recipe-seasonal_NAO.yml @@ -0,0 +1,62 @@ +Description: + Author: nperez + Info: ECMWF SEAS5 Seasonal Forecast Example recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: psl + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0301' ## MMDD + # fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 2 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 # Mandatory, int: minimum latitude + latmax: 80 # Mandatory, int: maximum latitude + lonmin: -80 # Mandatory, int: minimum longitude + lonmax: 40 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "r180x90" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Indices: + NAO: {obsproj: TRUE, save: 'all', plot_ts: TRUE, plot_sp: yes} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + save: none + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + #output_dir: /esarchive/scratch/nperez/scorecards_data/input_test/ + #code_dir: /esarchive/scratch/nperez/git/s2s-suite/ + + diff --git a/tests/recipes/recipe-seasonal_daily_1.yml b/tests/recipes/recipe-seasonal_daily_1.yml index afa0f4966bf6b242e40f1179f79882fbc60c1022..42603c2cdb0bd7d421f9d1c48e6a7070e7ed4bf4 100644 --- a/tests/recipes/recipe-seasonal_daily_1.yml +++ b/tests/recipes/recipe-seasonal_daily_1.yml @@ -4,14 +4,13 @@ Description: Analysis: Horizon: Seasonal Variables: - name: tas - freq: daily_mean + - {name: tas, freq: daily_mean} Datasets: System: - name: ECMWF-SEAS5 + - name: ECMWF-SEAS5 Multimodel: False Reference: - name: ERA5 + - name: ERA5 Time: sdate: '1201' fcst_year: @@ -20,10 +19,7 @@ Analysis: ftime_min: 1 ftime_max: 1 Region: - latmin: 17 - latmax: 20 - lonmin: 12 - lonmax: 15 + - {latmin: 17, latmax: 20, lonmin: 12, lonmax: 15} Regrid: method: conservative type: to_system @@ -31,10 +27,13 @@ Analysis: Anomalies: compute: no cross_validation: + save: 'none' Calibration: method: qmap + save: 'none' Skill: metric: EnsCorr_specs + save: 'none' Indicators: index: no Output_format: S2S4E diff --git a/tests/recipes/recipe-seasonal_downscaling.yml b/tests/recipes/recipe-seasonal_downscaling.yml new file mode 100644 index 0000000000000000000000000000000000000000..d7a0466223f0e7416c2eb2602f80c49afac988a9 --- /dev/null +++ b/tests/recipes/recipe-seasonal_downscaling.yml @@ -0,0 +1,56 @@ +Description: + Author: E. Duzenli + +Analysis: + Horizon: Seasonal + Variables: + - {name: tas, freq: daily_mean} + Datasets: + System: + - name: ECMWF-SEAS5 + Multimodel: False + Reference: + - name: ERA5 + Time: + sdate: '1201' + fcst_year: + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 1 + Region: + - {latmin: 17, latmax: 20, lonmin: 12, lonmax: 15} + Regrid: + method: 'none' + type: 'none' + Workflow: + Anomalies: + compute: TRUE + cross_validation: FALSE + save: 'none' + Calibration: + method: 'raw' + save: 'none' + Skill: + metric: BSS10 CRPSS RPSS mean_bias + save: 'none' + Indicators: + index: no + Downscaling: + type: analogs + int_method: + bc_method: + lr_method: + log_reg_method: + nanalogs: 3 + target_grid: /esarchive/recon/ecmwf/era5/daily_mean/tas_f1h/tas_199301.nc + save: 'all' + Output_format: S2S4E + ncores: 1 + remove_NAs: FALSE +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml index 68c58f83603baf157662d5c44f885a814cb914ec..99acb16cb8dc6d61dc0f388678db297c936ee98d 100644 --- a/tests/recipes/recipe-seasonal_monthly_1.yml +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -28,17 +28,25 @@ Analysis: method: bilinear type: to_system Workflow: - Anomalies: - compute: no - cross_validation: + # Anomalies: + # compute: no + # cross_validation: + # save: 'none' Calibration: method: mse_min + save: 'all' Skill: - metric: RPSS CRPSS EnsCorr Corr Enscorr_specs + metric: RPSS CRPSS EnsCorr Corr_individual_members Enscorr_specs + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' Indicators: index: no + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean + multi_panel: yes + projection: cylindrical_equidistant Output_format: S2S4E Run: Loglevel: INFO diff --git a/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml b/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml new file mode 100644 index 0000000000000000000000000000000000000000..41048db127759ba614115fdc77eeb9ab398a95f6 --- /dev/null +++ b/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml @@ -0,0 +1,56 @@ +Description: + Author: N. Milders + +Analysis: + Horizon: Seasonal + Variables: + name: tas-tos + sic_threshold: 0.15 + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '0101' + fcst_year: '2018' + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + save: 'none' + Calibration: + method: mse_min + save: 'all' + Skill: + metric: RPSS CRPSS EnsCorr Corr_individual_members Enscorr_specs + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean + multi_panel: yes + projection: cylindrical_equidistant + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/ ##/esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R index c9833d2b34413422f49dcb13d24991ed624c0d80..c847fd10074ee8c457a32b83a2b96f5d44dc9749 100644 --- a/tests/testthat/test-decadal_daily_1.R +++ b/tests/testthat/test-decadal_daily_1.R @@ -2,7 +2,7 @@ context("Decadal daily data - 1") ########################################### -source("modules/Loading/Loading_decadal.R") +source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") @@ -13,18 +13,18 @@ archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$ar # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe) +data <- Loading(recipe) ))}) ## Calibrate datasets #suppressWarnings({invisible(capture.output( -# calibrated_data <- calibrate_datasets(data, recipe) +# calibrated_data <- Calibration(data, recipe) #))}) # ## Compute skill metrics #suppressWarnings({invisible(capture.output( -#skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, -# recipe, na.rm = T, ncores = 4) +#skill_metrics <- Skill(calibrated_data$hcst, data$obs, +# recipe, na.rm = T, ncores = 4) #))}) #====================================== @@ -53,7 +53,7 @@ class(data$obs), ) expect_equal( names(data$hcst), -c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +c("data", "dims", "coords", "attrs") ) expect_equal( names(data$hcst), @@ -72,7 +72,7 @@ dim(data$fcst$data), c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 2, time = 90, latitude = 7, longitude = 11, ensemble = 3) ) expect_equal( -dim(data$hcst$Dates$start), +dim(data$hcst$attrs$Dates), c(sday = 1, sweek = 1, syear = 3, time = 90) ) # hcst data @@ -111,23 +111,23 @@ tolerance = 0.0001 # time value expect_equal( -(data$hcst$Dates$start)[1], +(data$hcst$attrs$Dates)[1], as.POSIXct("1991-01-01 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[2], +(data$hcst$attrs$Dates)[2], as.POSIXct("1992-01-01 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[5], +(data$hcst$attrs$Dates)[5], as.POSIXct("1992-01-02 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[1, 1, 3, 90], +(data$hcst$attrs$Dates)[1, 1, 3, 90], as.POSIXct("1993-03-31 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[1, 1, 2, 90], +(data$hcst$attrs$Dates)[1, 1, 2, 90], as.POSIXct("1992-03-30 12:00:00", tz = 'UTC') ) @@ -219,4 +219,4 @@ as.POSIXct("1992-03-30 12:00:00", tz = 'UTC') # #}) - +unlink(recipe$Run$output_dir, recursive = TRUE) diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index b76a216c18d605436a730c92504be69379e991d8..3346e529ae76f9e58c08371f95cf0ded95fb6533 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -2,7 +2,7 @@ context("Decadal monthly data - 1") ########################################### -source("modules/Loading/Loading_decadal.R") +source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") @@ -14,37 +14,29 @@ archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$ar # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe) +data <- Loading(recipe) ))}) # Calibrate datasets suppressWarnings({invisible(capture.output( - calibrated_data <- calibrate_datasets(recipe, data) + calibrated_data <- Calibration(recipe, data) ))}) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +skill_metrics <- Skill(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(recipe, calibrated_data) -))}) - -# Saving -suppressWarnings({invisible(capture.output( -save_data(recipe = recipe, data = calibrated_data, - skill_metrics = skill_metrics, probabilities = probs, archive = archive) +probs <- Probabilities(recipe, calibrated_data) ))}) # Plotting suppressWarnings({invisible(capture.output( -plot_data(recipe = recipe, archive = archive, data = calibrated_data, - skill_metrics = skill_metrics, probabilities = probs, significance = T) +Visualization(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + significance = T) ))}) - -outdir <- get_dir(recipe) - #====================================== test_that("1. Loading", { @@ -71,7 +63,7 @@ class(data$obs), ) expect_equal( names(data$hcst), -c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +c("data", "dims", "coords", "attrs") ) expect_equal( names(data$hcst), @@ -90,7 +82,7 @@ dim(data$fcst$data), c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4, ensemble = 2) ) expect_equal( -dim(data$hcst$Dates$start), +dim(data$hcst$attr$Dates), c(sday = 1, sweek = 1, syear = 4, time = 3) ) expect_equal( @@ -109,19 +101,19 @@ c(281.7395, 294.2467), tolerance = 0.0001 ) expect_equal( -(data$hcst$Dates$start)[1], +(data$hcst$attrs$Dates)[1], as.POSIXct("1991-11-16", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[2], +(data$hcst$attrs$Dates)[2], as.POSIXct("1992-11-16", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[5], +(data$hcst$attrs$Dates)[5], as.POSIXct("1991-12-16 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[10], +(data$hcst$attrs$Dates)[10], as.POSIXct("1993-01-16 12:00:00", tz = 'UTC') ) @@ -195,7 +187,7 @@ class(skill_metrics$rpss), ) expect_equal( dim(skill_metrics$rpss), -c(time = 3, latitude = 5, longitude = 4) +c(var = 1, time = 3, latitude = 5, longitude = 4) ) expect_equal( dim(skill_metrics$rpss_significance), @@ -226,27 +218,27 @@ c('percentile_33', 'percentile_66', 'percentile_10', 'percentile_90') ) expect_equal( dim(probs$probs$prob_b33), -c(syear = 4, time = 3, latitude = 5, longitude = 4) +c(var = 1, syear = 4, time = 3, latitude = 5, longitude = 4) ) expect_equal( dim(probs$percentiles$percentile_33), -c(time = 3, latitude = 5, longitude = 4) +c(var = 1, time = 3, latitude = 5, longitude = 4) ) expect_equal( -as.vector(probs$probs$prob_b33[, 1, 2, 2]), +as.vector(probs$probs$prob_b33[, , 1, 2, 2]), c(0.0, 0.5, 0.0, 1.0) ) expect_equal( -as.vector(probs$probs$prob_10_to_90[, 1, 2, 2]), +as.vector(probs$probs$prob_10_to_90[, , 1, 2, 2]), c(1.0, 1.0, 0.5, 0.5) ) expect_equal( -as.vector(probs$percentiles$percentile_33[, 1, 2]), +as.vector(probs$percentiles$percentile_33[, , 1, 2]), c(293.7496, 287.4263, 285.8295), tolerance = 0.0001 ) expect_equal( -as.vector(probs$percentiles$percentile_10[, 1, 2]), +as.vector(probs$percentiles$percentile_10[, , 1, 2]), c(293.1772, 286.9533, 284.7887), tolerance = 0.0001 ) @@ -256,34 +248,44 @@ tolerance = 0.0001 #====================================== test_that("4. Saving", { - +outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( -list.files(outdir), -c("plots", "tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-percentiles_month11.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc", - "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc") + "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc")), +TRUE ) # open the files and check values/attributes? #expect_equal( #) +expect_equal( +length(list.files(outputs, recursive = T)), +16 +) }) test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") +expect_equal( +all(basename(list.files(plots, recursive = T)) %in% +c("forecast_ensemble_mean-2021.png", "forecast_most_likely_tercile-2021.png", + "rpss.png")), +TRUE +) expect_equal( -list.files(paste0(outdir, "/plots/")), -c("forecast_ensemble_mean.png", "forecast_most_likely_tercile.png", - "rpss.png") +length(list.files(plots, recursive = T)), +3 ) }) # Delete files -unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) - +unlink(recipe$Run$output_dir, recursive = TRUE) #============================================================== @@ -295,20 +297,20 @@ archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$ar # Load datasets suppressWarnings({invisible(capture.output( -data_b <- load_datasets(recipe) +data_b <- Loading(recipe) ))}) # Calibrate datasets suppressWarnings({invisible(capture.output( - calibrated_data_b <- calibrate_datasets(recipe, data_b) + calibrated_data_b <- Calibration(recipe, data_b) ))}) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics_b <- compute_skill_metrics(recipe, calibrated_data_b) +skill_metrics_b <- Skill(recipe, calibrated_data_b) ))}) suppressWarnings({invisible(capture.output( -probs_b <- compute_probabilities(recipe, calibrated_data_b) +probs_b <- Probabilities(recipe, calibrated_data_b) ))}) @@ -335,4 +337,5 @@ lapply(probs_b$probs_fcst, ClimProjDiags::Subset, 'syear', 2), probs$probs_fcst ) +unlink(recipe$Run$output_dir, recursive = TRUE) }) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index cdab57f3174d710e31ad68eb17eab14304b81d84..9adc16b65deb3b84f9a825d93c175504f6304f2b 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -2,7 +2,7 @@ context("Decadal monthly data - 2") ########################################### -source("modules/Loading/Loading_decadal.R") +source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") @@ -13,36 +13,31 @@ recipe <- prepare_outputs(recipe_file) # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe) +data <- Loading(recipe) ))}) # Calibrate datasets suppressWarnings({invisible(capture.output( - calibrated_data <- calibrate_datasets(recipe, data) + calibrated_data <- Calibration(recipe, data) ))}) # Compute skill metrics suppressMessages({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +skill_metrics <- Skill(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(recipe, calibrated_data) -))}) - -# Saving -suppressWarnings({invisible(capture.output( -save_data(recipe, calibrated_data, skill_metrics, probs) +probs <- Probabilities(recipe, calibrated_data) ))}) # Plotting suppressWarnings({invisible(capture.output( -plot_data(recipe = recipe, data = calibrated_data, - skill_metrics = skill_metrics, - probabilities = probs, significance = T) +Visualization(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, + probabilities = probs, significance = T) ))}) -outdir <- get_dir(recipe) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) #====================================== @@ -71,7 +66,7 @@ class(data$obs), ) expect_equal( names(data$hcst), -c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +c("data", "dims", "coords", "attrs") ) expect_equal( names(data$hcst), @@ -90,7 +85,7 @@ dim(data$fcst$data), c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 2, time = 14, latitude = 8, longitude = 5, ensemble = 3) ) expect_equal( -dim(data$hcst$Dates$start), +dim(data$hcst$attrs$Dates), c(sday = 1, sweek = 1, syear = 3, time = 14) ) #expect_equal( @@ -126,19 +121,19 @@ tolerance = 0.0001 ) expect_equal( -(data$hcst$Dates$start)[1], +(data$hcst$attrs$Dates)[1], as.POSIXct("1990-11-16", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[2], +(data$hcst$attrs$Dates)[2], as.POSIXct("1991-11-16", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[5], +(data$hcst$attrs$Dates)[5], as.POSIXct("1991-12-16 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[10], +(data$hcst$attrs$Dates)[10], as.POSIXct("1991-02-15", tz = 'UTC') ) @@ -169,18 +164,18 @@ TRUE ) expect_equal( names(skill_metrics), -c("rpss_specs", "enscorr_specs", "frps_specs", "frpss_specs", "bss10_specs", "frps") +c("rpss_specs", "enscorr_specs", "frps_specs", "frpss_specs", "bss10_specs", "frps", "frps_clim") ) expect_equal( class(skill_metrics$rpss_specs), "array" ) expect_equal( -all(unlist(lapply(lapply(skill_metrics, dim), all.equal, c(time = 14, latitude = 8, longitude = 5)))), +all(unlist(lapply(lapply(skill_metrics, dim), all.equal, c(var = 1, time = 14, latitude = 8, longitude = 5)))), TRUE ) expect_equal( -as.vector(skill_metrics$rpss_specs[6:8, 1, 2]), +as.vector(skill_metrics$rpss_specs[, 6:8, 1, 2]), c(-0.3333333, 0.1666667, -0.3333333), tolerance = 0.0001 ) @@ -189,26 +184,26 @@ tolerance = 0.0001 #TRUE #) expect_equal( -as.vector(skill_metrics$enscorr_specs[6:8, 1, 2]), +as.vector(skill_metrics$enscorr_specs[, 6:8, 1, 2]), c(0.4474382, 0.1026333, 0.4042823), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$frps_specs[6:8, 1, 2]), +as.vector(skill_metrics$frps_specs[, 6:8, 1, 2]), c(0.4444444, 0.2222222, 0.4444444), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$frpss_specs[4:7, 1, 5]), +as.vector(skill_metrics$frpss_specs[, 4:7, 1, 5]), c( 1.0, -0.5, -0.5, 0.5), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$bss10_specs[6:8, 1, 2]), +as.vector(skill_metrics$bss10_specs[, 6:8, 1, 2]), c(0.5, -0.5, -0.5), ) expect_equal( -as.vector(skill_metrics$frps[6:8, 1, 2]), +as.vector(skill_metrics$frps[, 6:8, 1, 2]), c(0.4444444, 0.2222222, 0.4444444), tolerance = 0.0001 ) @@ -228,19 +223,19 @@ c('percentile_33', 'percentile_66') ) expect_equal( dim(probs$probs$prob_b33), -c(syear = 3, time = 14, latitude = 8, longitude = 5) +c(var = 1, syear = 3, time = 14, latitude = 8, longitude = 5) ) expect_equal( dim(probs$percentiles$percentile_33), -c(time = 14, latitude = 8, longitude = 5) +c(var = 1,time = 14, latitude = 8, longitude = 5) ) expect_equal( -as.vector(probs$probs$prob_b33[, 1, 2, 2]), +as.vector(probs$probs$prob_b33[, , 1, 2, 2]), c(0.0, 0.3333333, 0.6666667), tolerance = 0.0001 ) expect_equal( -as.vector(probs$percentiles$percentile_33[1:3, 1, 2]), +as.vector(probs$percentiles$percentile_33[, 1:3, 1, 2]), c(271.7508, 273.1682, 274.1937), tolerance = 0.0001 ) @@ -250,26 +245,39 @@ tolerance = 0.0001 #====================================== test_that("4. Saving", { - +outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( -list.files(outdir), -c("plots", "tas_19901101.nc", "tas_19911101.nc", "tas_19921101.nc", "tas_20201101.nc", "tas_20211101.nc", +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19901101.nc", "tas_19911101.nc", "tas_19921101.nc", "tas_20201101.nc", "tas_20211101.nc", "tas-obs_19901101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-percentiles_month11.nc", "tas-probs_19901101.nc", "tas-probs_19911101.nc", - "tas-probs_19921101.nc", "tas-probs_20201101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc") + "tas-probs_19921101.nc", "tas-probs_20201101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc")), +TRUE ) +expect_equal( +length(list.files(outputs, recursive = T)), +15 +) + }) #====================================== test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") +expect_equal( +all(basename(list.files(plots, recursive = T)) %in% +c("bss10_specs.png", "enscorr_specs.png", "forecast_ensemble_mean-2020.png", "forecast_ensemble_mean-2021.png", "forecast_most_likely_tercile-2020.png", "forecast_most_likely_tercile-2021.png", "frps_specs.png", "frps.png", "rpss_specs.png") +), +TRUE +) expect_equal( -list.files(paste0(outdir, "/plots/")), -c("bss10_specs.png", "enscorr_specs.png", "forecast_ensemble_mean_2020.png", "forecast_ensemble_mean_2021.png", "forecast_most_likely_tercile_2020.png", "forecast_most_likely_tercile_2021.png", "frps_specs.png", "frps.png", "rpss_specs.png") +length(list.files(plots, recursive = T)), +9 ) }) # Delete files -unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) +unlink(recipe$Run$output_dir, recursive = TRUE) diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 22fd435391ab4ae1d5cc0f9e4a9ed0898d961e20..3cdc826aa4231a2a3dc51a68a1cc4068451b732a 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -2,7 +2,7 @@ context("Decadal monthly data - 3") ########################################### -source("modules/Loading/Loading_decadal.R") +source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") @@ -13,20 +13,20 @@ archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$ar # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe) +data <- Loading(recipe) ))}) # Calibrate datasets suppressWarnings({invisible(capture.output( - calibrated_data <- calibrate_datasets(recipe, data) + calibrated_data <- Calibration(recipe, data) ))}) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +skill_metrics <- Skill(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(recipe, calibrated_data) +probs <- Probabilities(recipe, calibrated_data) ))}) #====================================== @@ -55,7 +55,7 @@ class(data$obs), ) expect_equal( names(data$hcst), -c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +c("data", "dims", "coords", "attrs") ) expect_equal( names(data$hcst), @@ -66,7 +66,7 @@ dim(data$hcst$data), c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 25, longitude = 16, ensemble = 3) ) expect_equal( -dim(data$hcst$Dates$start), +dim(data$hcst$attrs$Dates), c(sday = 1, sweek = 1, syear = 4, time = 3) ) # hcst data @@ -87,19 +87,19 @@ tolerance = 0.0001 ) expect_equal( -(data$hcst$Dates$start)[1], +(data$hcst$attrs$Dates)[1], as.POSIXct("2016-04-16", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[2], +(data$hcst$attrs$Dates)[2], as.POSIXct("2017-04-16", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[5], +(data$hcst$attrs$Dates)[5], as.POSIXct("2016-05-16 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[12], +(data$hcst$attrs$Dates)[12], as.POSIXct("2019-06-16", tz = 'UTC') ) @@ -133,22 +133,23 @@ TRUE ) expect_equal( names(skill_metrics), -c("bss10", "bss10_significance", "corr", "corr_significance") +c("bss10", "bss10_significance", + "corr_individual_members", "corr_individual_members_significance") ) expect_equal( class(skill_metrics[[1]]), "array" ) expect_equal( -all(unlist(lapply(lapply(skill_metrics, dim)[1:2], all.equal, c(time = 3, latitude = 25, longitude = 16)))), +all(unlist(lapply(lapply(skill_metrics, dim)[1:2], all.equal, c(var = 1, time = 3, latitude = 25, longitude = 16)))), TRUE ) expect_equal( -all(unlist(lapply(lapply(skill_metrics, dim)[3:4], all.equal, c(ensemble = 3, time = 3, latitude = 25, longitude = 16)))), +all(unlist(lapply(lapply(skill_metrics, dim)[3:4], all.equal, c(ensemble = 3, var = 1, time = 3, latitude = 25, longitude = 16)))), TRUE ) expect_equal( -as.vector(skill_metrics$bss10[, 1, 2]), +as.vector(skill_metrics$bss10[, , 1, 2]), c(-0.1904762, -0.1904762, -0.1904762), tolerance = 0.0001 ) @@ -157,7 +158,7 @@ any(as.vector(skill_metrics$bss10_significance)), FALSE ) expect_equal( -as.vector(skill_metrics$corr[2, , 1, 2]), +as.vector(skill_metrics$corr_individual_members[2, , , 1, 2]), c(-0.2015265, 0.4635463, -0.1019575), tolerance = 0.0001 ) @@ -177,23 +178,23 @@ c('percentile_33', 'percentile_66') ) expect_equal( dim(probs$probs$prob_b33), -c(syear = 4, time = 3, latitude = 25, longitude = 16) +c(var = 1, syear = 4, time = 3, latitude = 25, longitude = 16) ) expect_equal( dim(probs$percentiles$percentile_33), -c(time = 3, latitude = 25, longitude = 16) +c(var = 1, time = 3, latitude = 25, longitude = 16) ) expect_equal( -as.vector(probs$probs$prob_b33[, 1, 2, 2]), +as.vector(probs$probs$prob_b33[, , 1, 2, 2]), c(0.0, 0.3333333, 0.3333333, 0.6666667), tolerance = 0.0001 ) expect_equal( -as.vector(probs$percentiles$percentile_33[1:3, 1, 2]), +as.vector(probs$percentiles$percentile_33[, 1:3, 1, 2]), c(278.1501, 279.5226, 282.0237), tolerance = 0.0001 ) }) - +unlink(recipe$Run$output_dir, recursive = TRUE) diff --git a/tests/testthat/test-seasonal_NAO.R b/tests/testthat/test-seasonal_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..9582bdfcfdb89ac11b4fa8f3c7dbb79f983fac04 --- /dev/null +++ b/tests/testthat/test-seasonal_NAO.R @@ -0,0 +1,261 @@ +context("Seasonal NAO monthly data") + +source("modules/Loading/Loading.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Indices/Indices.R") + +recipe_file <- "tests/recipes/recipe-seasonal_NAO.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +# Compute anomalies +suppressWarnings({invisible(capture.output( +ano_data <- Anomalies(recipe, data) +))}) + +# Compute index NAO +suppressWarnings({invisible(capture.output( +nao_data <- Indices(recipe = recipe, data = ano_data) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- Skill(recipe, nao_data, agg = 'region') +))}) + + +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) + +# ------- TESTS -------- + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"NULL" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 8, time = 1, latitude = 30, longitude = 61, ensemble = 25) +) +expect_equal( +dim(data$hcst$attrs$Dates), +c(sday = 1, sweek = 1, syear = 8, time = 1) +) +expect_equal( +as.vector(drop(data$hcst$data)[1:2,1,2,3]), +c(101607.2, 101523.7), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +101450, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(98909.3, 103299.8), +tolerance = 0.0001 +) +expect_equal( +month((data$hcst$attrs$Dates)[1]), +month(as.POSIXct("1993-04-30 18:00:00", tz = 'UTC')) +) +expect_equal( +month((data$hcst$attrs$Dates)[2]), +month(as.POSIXct("1994-04-30 18:00:00", tz = 'UTC')) +) +expect_equal( +month((data$hcst$attrs$Dates)[5]), +month(as.POSIXct("1997-04-30 18:00:00", tz = 'UTC')) +) +expect_equal( +month((data$obs$attrs$Dates)[8]), +month(as.POSIXct("2000-04-16", tz = 'UTC')) +) + +}) + +test_that("2. Anomalies", { + +expect_equal( +is.list(ano_data), +TRUE +) +expect_equal( +names(ano_data), +c("hcst", "obs", "fcst", "hcst.full_val", "obs.full_val") +) +expect_equal( +class(ano_data$hcst), +"s2dv_cube" +) +expect_equal( +class(ano_data$fcst), +"NULL" +) +expect_equal( +dim(ano_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 8, time = 1, latitude = 30, longitude = 61, ensemble = 25) +) +expect_equal( +mean(ano_data$hcst$data), +-1.044053e-13, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(ano_data$hcst$data)[1:3, 3, 4, 1]), +c(-165.688477, -233.336914, 3.358398), +tolerance = 0.0001 +) +expect_equal( +range(ano_data$hcst$data), +c(-2045.577, 2027.675), +tolerance = 0.0001 +) + +}) + +#===================================== +test_that("3. Indices", { + +expect_equal( +is.list(nao_data), +TRUE +) +expect_equal( +names(nao_data), +c("hcst", "obs") +) +expect_equal( +class(nao_data$hcst), +"s2dv_cube" +) +expect_equal( +class(nao_data$fcst), +"NULL" +) +expect_equal( +dim(nao_data$hcst$data), +c(region = 1, syear = 8, ensemble = 25, dat = 1, var = 1, sday = 1, sweek = 1, time = 1) +) +expect_equal( +mean(nao_data$hcst$data), +8.239937e-18, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(nao_data$hcst$data)[1:3, 1]), +c(0.6778843, -1.6014398, 0.3849058), +tolerance = 0.0001 +) +expect_equal( +range(nao_data$hcst$data), +c(-3.038780, 2.193189), +tolerance = 0.0001 +) + +outputs <- paste0(recipe$Run$output_dir, "/plots/") +expect_equal( +all(list.files(outputs, recursive = T) %in% +paste0("Indices/", + c("NAO_ECMWF-SEAS5_ERA5_s0301_ftime02.png", + "NAO_correlation_psl_ERA5_s0301_ftime02.png", + "NAO_correlation_psl_ensmean_ECMWF-SEAS5_s0301_ftime02.png", + "NAO_correlation_psl_member_ECMWF-SEAS5_s0301_ftime02.png"))), +TRUE) + +expect_equal( +length(list.files(outputs, recursive = T)), +4 +) + + +}) + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("mean_bias", "enscorr", + "enscorr_significance", "rps", "rps_clim", "rpss", "rpss_significance", + "crps", "crps_clim", "crpss", "crpss_significance", "enssprerr") +) +expect_equal( +class(skill_metrics$rpss), +"array" +) +expect_equal( +dim(skill_metrics$rpss), +c(region = 1, var = 1, time = 1) +) +expect_equal( +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) +) +expect_equal( +skill_metrics$rpss[1], +-0.06814118, +tolerance = 0.0001 +) +expect_equal( +skill_metrics$rpss_significance[1], +FALSE +) + +}) + +test_that("4. Saving", { +outputs <- paste0(recipe$Run$output_dir, "/outputs/") +expect_equal( +all(list.files(outputs, recursive = T) %in% +c(paste0("Indices/ECMWF-SEAS5/nao/", paste0("nao_", 1993:2000, "0301.nc")), + paste0("Indices/ERA5/nao/", paste0("nao_", 1993:2000, "0301.nc")), + "Skill/ECMWF-SEAS5/nao/scorecards_ECMWF-SEAS5_ERA5_nao-skill_1993-2000_s03.nc") +), TRUE) + +expect_equal( +length(list.files(outputs, recursive = T)), +17 +) + +}) + +# Delete files +unlink(recipe$Run$output_dir, recursive = T) diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index ddcca22fd93750647b02ecfc5290591edfc5167d..6cfa4384ee93c907789046c1de9955bd554e012d 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -6,20 +6,20 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-seasonal_daily_1.yml" -recipe <- prepare_outputs(recipe_file) +recipe <- prepare_outputs(recipe_file, disable_checks = F) # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe) +data <- Loading(recipe) ))}) # Calibrate data suppressWarnings({invisible(capture.output( -calibrated_data <- calibrate_datasets(recipe, data) +calibrated_data <- Calibration(recipe, data) ))}) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +skill_metrics <- Skill(recipe, calibrated_data) ))}) test_that("1. Loading", { @@ -46,7 +46,7 @@ class(data$obs), ) expect_equal( names(data$hcst), -c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +c("data", "dims", "coords", "attrs") ) expect_equal( names(data$hcst), @@ -61,7 +61,7 @@ dim(data$obs$data), c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 1) ) expect_equal( -dim(data$obs$Dates$start), +dim(data$obs$attrs$Dates), c(sday = 1, sweek = 1, syear = 4, time = 31) ) expect_equal( @@ -80,19 +80,19 @@ c(280.1490, 298.2324), tolerance = 0.0001 ) expect_equal( -(data$hcst$Dates$start)[1], +(data$hcst$attrs$Dates)[1], as.POSIXct("1993-12-01 18:00:00 UTC", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[2], +(data$hcst$attrs$Dates)[2], as.POSIXct("1994-12-01 18:00:00 UTC", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[5], +(data$hcst$attrs$Dates)[5], as.POSIXct("1993-12-02 18:00:00 UTC", tz = 'UTC') ) expect_equal( -(data$obs$Dates$start)[10], +(data$obs$attrs$Dates)[10], as.POSIXct("1994-12-03 11:30:00 UTC", tz = 'UTC') ) @@ -155,13 +155,13 @@ class(skill_metrics$enscorr_specs), ) expect_equal( dim(skill_metrics$enscorr_specs), -c(time = 31, latitude = 4, longitude = 4) +c(var = 1, time = 31, latitude = 4, longitude = 4) ) expect_equal( -skill_metrics$enscorr_specs[1:3, 1, 1], +skill_metrics$enscorr_specs[, 1:3, 1, 1], c(0.7509920, 0.6514916, 0.5118371), tolerance=0.0001 ) }) -unlink(recipe$Run$output_dir) +unlink(recipe$Run$output_dir, recursive = TRUE) diff --git a/tests/testthat/test-seasonal_downscaling.R b/tests/testthat/test-seasonal_downscaling.R new file mode 100644 index 0000000000000000000000000000000000000000..8a52657a8b4dfd3665d82b27af46af6a60375b63 --- /dev/null +++ b/tests/testthat/test-seasonal_downscaling.R @@ -0,0 +1,257 @@ +library(testthat) +context("Seasonal daily data") + +source("modules/Loading/Loading.R") +source("modules/Skill/Skill.R") +source("modules/Downscaling/Downscaling.R") +source("modules/Saving/Saving.R") +source("modules/Anomalies/Anomalies.R") + +recipe_file <- "tests/recipes/recipe-seasonal_downscaling.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +# Compute anomalies +suppressWarnings({invisible(capture.output( +ano_data <- Anomalies(recipe, data) +))}) + +# Downscale the data +suppressWarnings({invisible(capture.output( +downscaled_data <- Downscaling(recipe, ano_data) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- Skill(recipe, downscaled_data) +))}) + +#====================================== +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +data$fcst, +NULL +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 25) +) +expect_equal( +dim(data$obs$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 13, longitude = 13, ensemble = 1) +) +expect_equal( +dim(data$obs$attrs$Dates), +c(sday = 1, sweek = 1, syear = 4, time = 31) +) +expect_equal( +as.vector(drop(data$hcst$data)[1:2,1:2,1,2,3]), +c(295.5691, 291.7752, 294.0874, 290.1173), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +288.3723, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(280.1490, 298.2324), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$attrs$Dates)[1], +as.POSIXct("1993-12-01 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[2], +as.POSIXct("1994-12-01 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[5], +as.POSIXct("1993-12-02 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$obs$attrs$Dates)[10], +as.POSIXct("1994-12-03 11:30:00 UTC", tz = 'UTC') +) + +}) + +#====================================== +test_that("2. Anomalies", { + +expect_equal( +is.list(ano_data), +TRUE +) +expect_equal( +names(ano_data), +c("hcst", "obs", "fcst", "hcst.full_val", "obs.full_val") +) +expect_equal( +class(ano_data$hcst), +"s2dv_cube" +) +expect_equal( +class(ano_data$fcst), +"NULL" +) +expect_equal( +dim(ano_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 25) +) +expect_equal( +mean(ano_data$hcst$data), +-3.466088e-16, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(ano_data$hcst$data)[3, 26:28, 3, 2, 12]), +c(-1.5654303, -0.6506540, -0.4674155), +tolerance = 0.0001 +) +expect_equal( +range(ano_data$hcst$data), +c(-6.265505, 9.440247), +tolerance = 0.0001 +) + +}) + +#====================================== +test_that("3. Downscaling", { + +expect_equal( +is.list(downscaled_data), +TRUE +) +expect_equal( +names(downscaled_data), +c("hcst", "obs", "fcst") +) +expect_equal( +class(downscaled_data$hcst), +"s2dv_cube" +) +expect_equal( +class(downscaled_data$obs), +"s2dv_cube" +) +expect_equal( +downscaled_data$fcst, +NULL +) +expect_equal( +names(downscaled_data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(downscaled_data$hcst), +names(downscaled_data$obs) +) +expect_equal( +dim(downscaled_data$hcst$data), +c(latitude = 13, longitude = 13, syear = 4, dat = 1, var = 1, sday = 1, sweek = 1, time = 31, ensemble = 25) +) +expect_equal( +dim(downscaled_data$obs$data), +c(ensemble = 1, dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 13, longitude = 13) +) +expect_equal( +as.vector(drop(downscaled_data$hcst$data)[1:2,1:2,1,2,3]), +c(1.482848, 1.503154, 1.599051, 1.591909), +tolerance = 0.0001 +) +expect_equal( +mean(downscaled_data$hcst$data), +-0.04462092, +tolerance = 0.0001 +) +expect_equal( +range(downscaled_data$hcst$data), +c(-4.611440, 5.771648), +tolerance = 0.0001 +) + +}) + +#====================================== +test_that("4. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("bss10", "bss10_significance", "crpss", "crpss_significance","rpss", "rpss_significance", "mean_bias") +) +expect_equal( +class(skill_metrics$rpss), +"array" +) +expect_equal( +dim(skill_metrics$rpss), +c(latitude = 13, longitude = 13, var = 1, time = 31) +) +expect_equal( +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) +) +expect_equal( +skill_metrics$rpss[1], +-0.005942857, +tolerance = 0.0001 +) +expect_equal( +skill_metrics$rpss_significance[1], +FALSE +) + +}) + +test_that("5. Check saved data", { + +outputs <- paste0(recipe$Run$output_dir, "/outputs/Downscaling/") +expect_equal( +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19931201.nc", "tas_19941201.nc", "tas_19951201.nc", + "tas_19961201.nc", "tas-obs_19931201.nc", "tas-obs_19941201.nc", + "tas-obs_19951201.nc", "tas-obs_19961201.nc")), +TRUE +) + +}) + +unlink(recipe$Run$output_dir, recursive = TRUE) + diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index de03bf7340906650004fbdf5bcdf445992144c09..d9b7b3c3857f4d79df127f4be135c25c0b77818c 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -7,41 +7,41 @@ source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") recipe_file <- "tests/recipes/recipe-seasonal_monthly_1.yml" -recipe <- prepare_outputs(recipe_file) +recipe <- prepare_outputs(recipe_file, disable_checks = F) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe) +data <- Loading(recipe) ))}) # Calibrate data suppressWarnings({invisible(capture.output( -calibrated_data <- calibrate_datasets(recipe, data) +calibrated_data <- Calibration(recipe, data) ))}) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +skill_metrics <- Skill(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(recipe, calibrated_data) +probs <- Probabilities(recipe, calibrated_data) ))}) # Saving suppressWarnings({invisible(capture.output( -save_data(recipe = recipe, data = calibrated_data, - skill_metrics = skill_metrics, probabilities = probs) +Saving(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs) ))}) # Plotting suppressWarnings({invisible(capture.output( -plot_data(recipe = recipe, data = calibrated_data, - skill_metrics = skill_metrics, probabilities = probs, - significance = T) +Visualization(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + significance = T) ))}) -outdir <- get_dir(recipe) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) # ------- TESTS -------- @@ -69,7 +69,7 @@ class(data$obs), ) expect_equal( names(data$hcst), -c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +c("data", "dims", "coords", "attrs") ) expect_equal( names(data$hcst), @@ -88,7 +88,7 @@ dim(data$fcst$data), c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, longitude = 3, ensemble = 51) ) expect_equal( -dim(data$hcst$Dates$start), +dim(data$hcst$attrs$Dates), c(sday = 1, sweek = 1, syear = 4, time = 3) ) expect_equal( @@ -107,19 +107,19 @@ c(284.7413, 299.6219), tolerance = 0.0001 ) expect_equal( -(data$hcst$Dates$start)[1], +(data$hcst$attrs$Dates)[1], as.POSIXct("1993-11-30 23:59:59", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[2], +(data$hcst$attrs$Dates)[2], as.POSIXct("1994-11-30 23:59:59", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[5], +(data$hcst$attrs$Dates)[5], as.POSIXct("1993-12-31 23:59:59", tz = 'UTC') ) expect_equal( -(data$obs$Dates$start)[10], +(data$obs$attrs$Dates)[10], as.POSIXct("1995-01-15 12:00:00", tz = 'UTC') ) @@ -185,7 +185,8 @@ TRUE expect_equal( names(skill_metrics), c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", - "enscorr_significance", "corr", "corr_significance", "enscorr_specs") + "enscorr_significance", "corr_individual_members", "corr_individual_members_significance", + "enscorr_specs") ) expect_equal( class(skill_metrics$rpss), @@ -193,46 +194,58 @@ class(skill_metrics$rpss), ) expect_equal( dim(skill_metrics$rpss), -c(time = 3, latitude = 3, longitude = 3) +c(var = 1, time = 3, latitude = 3, longitude = 3) ) expect_equal( dim(skill_metrics$rpss_significance), dim(skill_metrics$rpss) ) expect_equal( -as.vector(skill_metrics$rpss[, 2, 3]), +as.vector(skill_metrics$rpss[, , 2, 3]), c(-0.2918857, -1.4809143, -1.3842286), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$rpss_significance[, 2, 3]), +as.vector(skill_metrics$rpss_significance[, , 2, 3]), rep(FALSE, 3) ) }) test_that("4. Saving", { - +outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( -list.files(outdir), -c("plots", "tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", - "tas-probs_20201101.nc", "tas-skill_month11.nc") + "tas-probs_20201101.nc", "tas-skill_month11.nc")), +TRUE +) +expect_equal( +length(list.files(outputs, recursive = T)), +17 ) }) test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") +expect_equal( +all(basename(list.files(plots, recursive = T)) %in% +c("crpss-november.png", "enscorr_specs-november.png", "enscorr-november.png", + "forecast_ensemble_mean-20201101.png", "forecast_most_likely_tercile-20201101.png", + "rpss-november.png")), +TRUE +) expect_equal( -list.files(paste0(outdir, "/plots/")), -c("crpss.png", "enscorr_specs.png", "enscorr.png", "forecast_ensemble_mean.png", - "forecast_most_likely_tercile.png", "rpss.png") +length(list.files(plots, recursive = T)), +6 ) }) # Delete files -unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) +unlink(recipe$Run$output_dir, recursive = T) diff --git a/tests/testthat/test-seasonal_monthly_tas-tos.R b/tests/testthat/test-seasonal_monthly_tas-tos.R new file mode 100644 index 0000000000000000000000000000000000000000..1345f63dfb1190988413905221cbda99c2f37c74 --- /dev/null +++ b/tests/testthat/test-seasonal_monthly_tas-tos.R @@ -0,0 +1,98 @@ +context("Seasonal monthly data") + +source("./modules/Loading/Loading.R") + +recipe_file <- "tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + + +# ------- TESTS -------- + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 4, longitude = 4, ensemble = 25) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 4, longitude = 4, ensemble = 51) +) +expect_equal( +dim(data$hcst$attrs$Dates), +c(sday = 1, sweek = 1, syear = 4, time = 3) +) +expect_equal( +as.vector(drop(data$hcst$data)[1:2,1:2,1,2,3]), +c(285.5869, 287.8836, 285.9362, 289.0483), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +290.1099, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(283.2845, 299.7845), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$attrs$Dates)[1], +as.POSIXct("1993-01-31 18:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[2], +as.POSIXct("1994-01-31 18:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[5], +as.POSIXct("1993-02-28 18:00:00", tz = 'UTC') +) +expect_equal( +(data$obs$attrs$Dates)[10], +as.POSIXct("1994-03-15 12:00:00", tz = 'UTC') +) + +}) + +# Delete files +unlink(recipe$Run$output_dir, recursive = T) diff --git a/tools/BSC_logo_95.jpg b/tools/BSC_logo_95.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f961c0409bdd5c973e8521570d96e4ff341e5fcc Binary files /dev/null and b/tools/BSC_logo_95.jpg differ diff --git a/tools/CST_ChangeDimName.R b/tools/CST_ChangeDimName.R new file mode 100644 index 0000000000000000000000000000000000000000..1ccbbb3d327df21f6582326e2e54fdf58ea68f09 --- /dev/null +++ b/tools/CST_ChangeDimName.R @@ -0,0 +1,39 @@ +## TODO: Documentation + +CST_ChangeDimName <- function(data, original_dimnames, final_dimnames) { + if (!inherits(data, "s2dv_cube")) { + stop("Parameter 'data' must be an object of class 's2dv_cube'") + } + if (!(length(original_dimnames) == length(final_dimnames))) { + stop("The number of dimension names in 'final_dimnames' must be the same + as in 'original_dimnames'") + } + ## TODO: Add check to verify that all original_dimnames are present in the array + for (index in 1:length(original_dimnames)) { + original_name <- original_dimnames[index] + final_name <- final_dimnames[index] + # Step 1: Change dims and data + names(data$dims)[which(names(data$dims) == original_name)] <- final_name + dim(data$data) <- data$dims + # Step 2: Change coords + names(data$coords)[which(names(data$coords) == original_name)] <- final_name + # Step 3: Change attrs + # 3.1 - Dates + if (original_name %in% names(dim(data$attrs$Dates))) { + names(dim(data$attrs$Dates))[which(names(dim(data$attrs$Dates)) + == original_name)] <- final_name + } + # 3.2 - Variable metadata + if (original_name %in% names(data$attrs$Variable$metadata)) { + names(data$attrs$Variable$metadata)[which(names(data$attrs$Variable$metadata) + == original_name)] <- final_name + } + # 3.3 - Source files + if (original_name %in% names(dim(data$attrs$source_files))) { + names(dim(data$attrs$source_files))[which(names(dim(data$attrs$source_files)) + == original_name)] <- final_name + } + } + return(data) +} + diff --git a/tools/Utils.R b/tools/Utils.R new file mode 100644 index 0000000000000000000000000000000000000000..c0acf3740a10d1d9da97d8363b52f5050260c97d --- /dev/null +++ b/tools/Utils.R @@ -0,0 +1,13 @@ +## TODO: Write header +## TODO: Add if 'DEBUG' +.log_memory_usage <- function(logger, when) { + debug(logger, paste0(when, ":")) + mem_info <- capture.output(memuse::Sys.meminfo()) + for (i in mem_info) { + debug(recipe$Run$logger, i) + } + proc_mem <- capture.output(memuse::Sys.procmem()) + for (i in proc_mem) { + debug(recipe$Run$logger, i) + } +} diff --git a/tools/add_logo.R b/tools/add_logo.R new file mode 100644 index 0000000000000000000000000000000000000000..42fb87c50b1bf1a0c20409cee43cc1703e8aeb3c --- /dev/null +++ b/tools/add_logo.R @@ -0,0 +1,15 @@ +add_logo <- function(recipe, logo) { + # recipe: SUNSET recipe + # logo: URL to the logo + system <- list.files(paste0(recipe$Run$output_dir, "/plots/")) + variable <- recipe$Analysis$Variable$name + files <- lapply(variable, function(x) { + f <- list.files(paste0(recipe$Run$output_dir, "/plots/", + system, "/", x)) + full_path <- paste0(recipe$Run$output_dir, "/plots/", + system, "/", x,"/", f)})[[1]] + dim(files) <- c(file = length(files)) + Apply(list(files), target_dims = NULL, function(x) { + system(paste("composite -gravity southeast -geometry +10+10", + logo, x, x))}, ncores = recipe$Analysis$ncores) +} diff --git a/tools/check_number_of_dependent_verifications.R b/tools/check_number_of_dependent_verifications.R new file mode 100644 index 0000000000000000000000000000000000000000..0c85d09f63c4d3c473c98f11a3e4ab26a6b82f56 --- /dev/null +++ b/tools/check_number_of_dependent_verifications.R @@ -0,0 +1,134 @@ +check_number_of_dependent_verifications <- function(recipe) { + # Number of verifications depends on the variables and indicators requested + # and the order of the workflow: + # workflow: correction + indicator --> only 1 variable is calibrated + # workflow: indicator + correction --> the indicator and the ecv are calibrated + independent_verifications <- NULL + dependent_verifications <- NULL + dep <- 1 + # check workflow order: + if (all(c('Calibration', 'Indicators') %in% names(recipe$Analysis$Workflow))) { + cal_pos <- which(names(recipe$Analysis$Workflow) == 'Calibration') + ind_pos <- which(names(recipe$Analysis$Workflow) == 'Indicators') + if (cal_pos < ind_pos) { + workflow_independent <- FALSE + } else { + workflow_independent <- TRUE + } + } + if (workflow_independent) { + independent_verifications <- append(recipe$Analysis$Variables$ECVs, + recipe$Analysis$Variables$Indicators) + } else { + if (is.null(recipe$Analysis$Variables$Indicators) || + (length(recipe$Analysis$Variables$Indicators) == 1 && + is.null(recipe$Analysis$Variables$ECVs))) { + independent_verifications <- append(recipe$Analysis$Variables$ECVs, + recipe$Analysis$Variables$Indicators) + } else { + ecvs <- recipe$Analysi$Variables$ECVs + inds <- recipe$Analysi$Variables$Indicators + ind_table <- read_yaml(paste0(recipe$Run$code_dir, + "conf/indicators_table.yml")) + # first, loop on ecvs if any and compare to indicators + done <- NULL # to gather the indicators reviewed + if (!is.null(ecvs)) { + for (i in 1:length(ecvs)) { + dependent <- list(ecvs[[i]]) + for (j in 1:length(inds)) { + if (ind_table[inds[[j]]$name][[1]]$ECVs == ecvs[[i]]$name) { + if (ind_table[inds[[j]]$name][[1]]$freq == ecvs[[i]]$freq) { + # they are dependent + dependent <- append(dependent, inds[[j]]) + done <- append(done, inds[[j]]) + } + } + } + if (length(dependent) == 1) { + dependent <- NULL + independent_verifications <- append(independent_verifications, + list(ecvs[[i]])) + } else { + dependent_verifications <- append(dependent_verifications, + list(dependent)) + } + } + # There are indicators not reviewed yet? + if (length(done) < length(inds)) { + if (length(inds) == 1) { + independent_verifications <- append(independent_verifications, + inds) + } else { + done <- NULL + for (i in 1:(length(inds) - 1)) { + dependent <- list(inds[[i]]$name) + if (is.na(match(unlist(dependent), unlist(done)))) { + for (j in (i+1):length(inds)) { + if (ind_table[inds[[i]]$name][[1]]$ECVs == + ind_table[inds[[j]]$name][[1]]$ECVs) { + if (ind_table[inds[[i]]$name][[1]]$freq == + ind_table[inds[[j]]$name][[1]]$freq) { + dependent <- append(dependent, inds[[j]]$name) + done <- dependent + } + } + } + } + if (length(dependent) == 1) { + independent_verifications <- dependent + dependent <- NULL + } else { + dependent_verifications <- dependent + } + } + } + } + } else { # there are only Indicators: + done <- NULL + for (i in 1:(length(inds) - 1)) { + dependent <- list(inds[[i]]$name) + if (is.na(match(unlist(dependent), unlist(done)))) { + for (j in (i+1):length(inds)) { + if (ind_table[inds[[i]]$name][[1]]$ECVs == + ind_table[inds[[j]]$name][[1]]$ECVs) { + if (ind_table[inds[[i]]$name][[1]]$freq == + ind_table[inds[[j]]$name][[1]]$freq) { + dependent <- append(dependent, inds[[j]]$name) + done <- dependent + } + } + } + } + if (length(dependent) == 1) { + independent_verifications <- dependent + dependent <- NULL + } else { + dependent_verifications <- dependent + } + } + } + } + } + if (!is.null(independent_verifications)) { + info(logger, paste("The variables for independent verification are ", + paste(independent_verifications, collapse = " "))) + } + if (!is.null(dependent_verifications)) { + info(logger, paste("The variables for dependent verification are: ", + paste(dependent_verifications, collapse = " "))) + } + # remove unnecessary names in objects to be removed + return(list(independent = independent_verifications, + dependent = dependent_verifications)) +} +#workflow <- list(Calibration = list(method = 'SBC'), +# Skill = list(metric = 'RPSS')) +#ApplyWorkflow <- function(workflow) { + +#res <- do.call('CST_BiasCorrection', +# args = list(exp = lonlat_data$exp, +# obs = lonlat_data$obs)) + + + + diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 559d049226fbd0902376fe85538fa032e01796e1..01b790b8255daacce1482357cd174ec26479c039 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -1,8 +1,6 @@ check_recipe <- function(recipe) { - # recipe: yaml recipe already read it - ## TODO: Adapt to decadal case - + ## TODO: set up logger-less case info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) # --------------------------------------------------------------------- @@ -10,10 +8,10 @@ check_recipe <- function(recipe) { # --------------------------------------------------------------------- TIME_SETTINGS_SEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", - "hcst_end") + "hcst_end") TIME_SETTINGS_DECADAL <- c("ftime_min", "ftime_max", "hcst_start", "hcst_end") PARAMS <- c("Horizon", "Time", "Variables", "Region", "Regrid", "Workflow", - "Datasets") + "Datasets") HORIZONS <- c("subseasonal", "seasonal", "decadal") ARCHIVE_SEASONAL <- "conf/archive.yml" ARCHIVE_DECADAL <- "conf/archive_decadal.yml" @@ -24,99 +22,117 @@ check_recipe <- function(recipe) { # Check basic elements in recipe:Analysis: if (!("Analysis" %in% names(recipe))) { error(recipe$Run$logger, - "The recipe must contain an element called 'Analysis'.") + "The recipe must contain an element called 'Analysis'.") error_status <- T } if (!all(PARAMS %in% names(recipe$Analysis))) { error(recipe$Run$logger, paste0("The element 'Analysis' in the recipe must contain all of ", - "the following: ", paste(PARAMS, collapse = ", "), ".")) + "the following: ", paste(PARAMS, collapse = ", "), ".")) error_status <- T } if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { error(recipe$Run$logger, paste0("The element 'Horizon' in the recipe must be one of the ", - "following: ", paste(HORIZONS, collapse = ", "), ".")) + "following: ", paste(HORIZONS, collapse = ", "), ".")) error_status <- T } # Check time settings if (tolower(recipe$Analysis$Horizon) == "seasonal") { ## TODO: Specify filesystem - archive <- read_yaml(ARCHIVE_SEASONAL)$archive + archive <- read_yaml(ARCHIVE_SEASONAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_SEASONAL %in% names(recipe$Analysis$Time))) { error(recipe$Run$logger, - paste0("The element 'Time' in the recipe must contain all of the ", - "following: ", paste(TIME_SETTINGS_SEASONAL, - collapse = ", "), ".")) + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_SEASONAL, + collapse = ", "), ".")) error_status <- T } } else if (tolower(recipe$Analysis$Horizon) == "decadal") { - archive <- read_yaml(ARCHIVE_DECADAL)$archive + archive <- read_yaml(ARCHIVE_DECADAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_DECADAL %in% names(recipe$Analysis$Time))) { error(recipe$Run$logger, - paste0("The element 'Time' in the recipe must contain all of the ", - "following: ", paste(TIME_SETTINGS_DECADAL, - collapse = ", "), ".")) + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_DECADAL, + collapse = ", "), ".")) error_status <- T } + } else { + archive <- NULL } - # Check system names - if (!all(recipe$Analysis$Datasets$System$name %in% names(archive$System))) { - error(recipe$Run$logger, - "The specified System name was not found in the archive.") - error_status <- T + # Check variable parameters + if ("name" %in% names(recipe$Analysis$Variables)) { + recipe_variables <- recipe$Analysis$Variables$name + } else { + recipe_variables <- sapply(recipe$Analysis$Variables, get, x = "name") + } + recipe_variables <- unlist(strsplit(recipe_variables, ", | |,")) + # Sea-ice threshold check + if (("tas-tos" %in% recipe_variables) && + (!is.null(recipe$Analysis$Variables$sic_threshold))) { + if (!is.numeric(recipe$Analysis$Variables$sic_threshold) || + dplyr::between(recipe$Analysis$Variables$sic_threshold, 0, 1)) { + error(recipe$Run$logger, + paste("The element Analysis:Variables:sic_threshold must be a", + "numeric value between 0 and 1.")) + } } - # Check reference names - if (!all(recipe$Analysis$Datasets$Reference$name %in% - names(archive$Reference))) { - error(recipe$Run$logger, - "The specified Reference name was not found in the archive.") - error_status <- T + # Check system names + if (!is.null(archive)) { + if (!all(recipe$Analysis$Datasets$System$name %in% names(archive$System))) { + error(recipe$Run$logger, + "The specified System name was not found in the archive.") + error_status <- T + } + # Check reference names + if (!all(recipe$Analysis$Datasets$Reference$name %in% + names(archive$Reference))) { + error(recipe$Run$logger, + "The specified Reference name was not found in the archive.") + error_status <- T + } } # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || (!is.integer(recipe$Analysis$Time$ftime_min))) { error(recipe$Run$logger, - "The element 'ftime_min' must be an integer larger than 0.") + "The element 'ftime_min' must be an integer larger than 0.") error_status <- T } if ((!(recipe$Analysis$Time$ftime_max > 0)) || (!is.integer(recipe$Analysis$Time$ftime_max))) { error(recipe$Run$logger, - "The element 'ftime_max' must be an integer larger than 0.") + "The element 'ftime_max' must be an integer larger than 0.") error_status <- T } - if ((is.numeric(recipe$Analysis$Time$ftime_max)) && - (is.numeric(recipe$Analysis$Time$ftime_min))) { - if (recipe$Analysis$Time$ftime_max < recipe$Analysis$Time$ftime_min) { - error(recipe$Run$logger, - "'ftime_max' cannot be smaller than 'ftime_min'.") - error_status <- T - } + if (recipe$Analysis$Time$ftime_max < recipe$Analysis$Time$ftime_min) { + error(recipe$Run$logger, + "'ftime_max' cannot be smaller than 'ftime_min'.") + error_status <- T } # Check consistency of hindcast years if (!(as.numeric(recipe$Analysis$Time$hcst_start) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_start > 0))) { error(recipe$Run$logger, - "The element 'hcst_start' must be a valid year.") + "The element 'hcst_start' must be a valid year.") error_status <- T } if (!(as.numeric(recipe$Analysis$Time$hcst_end) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_end > 0))) { error(recipe$Run$logger, - "The element 'hcst_end' must be a valid year.") + "The element 'hcst_end' must be a valid year.") error_status <- T } if (recipe$Analysis$Time$hcst_end < recipe$Analysis$Time$hcst_start) { error(recipe$Run$logger, - "'hcst_end' cannot be smaller than 'hcst_start'.") + "'hcst_end' cannot be smaller than 'hcst_start'.") error_status <- T } ## TODO: Is this needed? if (is.null(recipe$Analysis$Time$fcst_year) || - tolower(recipe$Analysis$Time$fcst_year) == 'none') { + identical(tolower(recipe$Analysis$Time$fcst_year), 'none')) { stream <- "hindcast" # recipe$Analysis$Time$fcst_year <- 'YYYY' } else { @@ -140,7 +156,7 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Time$fcst_year)) { warn(recipe$Run$logger, paste("The element 'fcst_year' is not defined in the recipe.", - "No forecast year will be used.")) + "No forecast year will be used.")) } ## TODO: Adapt and move this inside 'if'? # fcst.sdate <- NULL @@ -164,7 +180,7 @@ check_recipe <- function(recipe) { # calculate number of workflows to create for each variable and if (length(recipe$Analysis$Horizon) > 1) { error(recipe$Run$logger, - "Only one single Horizon can be specified in the recipe") + "Only one single Horizon can be specified in the recipe") error_status <- T } @@ -190,10 +206,30 @@ check_recipe <- function(recipe) { # Region checks: LIMITS <- c('latmin', 'latmax', 'lonmin', 'lonmax') - if (!all(LIMITS %in% names(recipe$Analysis$Region))) { + # Ordinary recipe + if (is.null(names(recipe$Analysis$Region))) { + for (region in recipe$Analysis$Region) { + if (!all(LIMITS %in% names(region))) { + error(recipe$Run$logger, + paste0("There must be 4 elements in 'Region': ", + paste(LIMITS, collapse = ", "), ".")) + error_status <- T + } + } + if (length(recipe$Analysis$Region) > 1) { + for (region in recipe$Analysis$Region) { + if (!("name" %in% names(region)) || (is.null(region$name))) { + error(recipe$Run$logger, + paste("If more than one region has been defined, every region", + "must have a unique name.")) + } + } + } + # Atomic recipe + } else if (!all(LIMITS %in% names(recipe$Analysis$Region))) { error(recipe$Run$logger, paste0("There must be 4 elements in 'Region': ", - paste(LIMITS, collapse = ", "), ".")) + paste(LIMITS, collapse = ", "), ".")) error_status <- T } ## TODO: Implement multiple regions @@ -218,7 +254,9 @@ check_recipe <- function(recipe) { # WORKFLOW CHECKS # --------------------------------------------------------------------- - # Only one Calibration method allowed: + # Calibration + # If 'method' is FALSE/no/'none' or NULL, set to 'raw' + ## TODO: Review this check if ((is.logical(recipe$Analysis$Workflow$Calibration$method) && recipe$Analysis$Workflow$Calibration$method == FALSE) || tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || @@ -232,52 +270,336 @@ check_recipe <- function(recipe) { "The 'Calibration' element 'method' must be specified.") error_status <- T } + SAVING_OPTIONS_CALIB <- c("all", "none", "exp_only", "fcst_only") + if ((is.null(recipe$Analysis$Workflow$Calibration$save)) || + (!(recipe$Analysis$Workflow$Calibration$save %in% SAVING_OPTIONS_CALIB))) { + error(recipe$Run$logger, + paste0("Please specify which Calibration module outputs you want ", + "to save with the 'save' parameter. The options are: ", + paste(SAVING_OPTIONS_CALIB, collapse = ", "), ".")) + error_status <- T + } } # Anomalies if ("Anomalies" %in% names(recipe$Analysis$Workflow)) { + # Computation and cross-validation checks if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { error(recipe$Run$logger, - "Parameter 'compute' must be defined under 'Anomalies'.") + "Parameter 'compute' must be defined under 'Anomalies'.") error_status <- T } else if (!(is.logical(recipe$Analysis$Workflow$Anomalies$compute))) { error(recipe$Run$logger, - paste("Parameter 'Anomalies:compute' must be a logical value", - "(True/False or yes/no).")) + paste("Parameter 'Anomalies:compute' must be a logical value", + "(True/False or yes/no).")) + error_status <- T + } else if ((recipe$Analysis$Workflow$Anomalies$compute)) { + # Cross-validation check + if (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation)) { + error(recipe$Run$logger, + paste("If anomaly computation is requested, parameter", + "'cross_validation' must be defined under 'Anomalies', + and it must be a logical value (True/False or yes/no).")) + error_status <- T + } + # Saving checks + SAVING_OPTIONS_ANOM <- c("all", "none", "exp_only", "fcst_only") + if ((is.null(recipe$Analysis$Workflow$Anomalies$save)) || + (!(recipe$Analysis$Workflow$Anomalies$save %in% SAVING_OPTIONS_ANOM))) { + error(recipe$Run$logger, + paste0("Please specify which Anomalies module outputs you want ", + "to save with the 'save' parameter. The options are: ", + paste(SAVING_OPTIONS_ANOM, collapse = ", "), ".")) + error_status <- T + } + } + } + + # Downscaling + if ("Downscaling" %in% names(recipe$Analysis$Workflow)) { + downscal_params <- lapply(recipe$Analysis$Workflow$Downscaling, tolower) + # Define accepted entries + DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") + BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", + "rpc-based", "qm") + LR_METHODS <- c("basic", "large-scale", "4nn") + LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + # Check downscaling type + if ("type" %in% names(downscal_params)) { + if (length(downscal_params$type) == 0) { + downscal_params$type <- "none" + warn(recipe$Run$logger, + paste("Downscaling 'type' is empty in the recipe, setting it to", + "'none'.")) + } + if (!(downscal_params$type %in% DOWNSCAL_TYPES)) { + error(recipe$Run$logger, + paste0("The type of Downscaling request in the recipe is not ", + "available. It must be one of the following: ", + paste(DOWNSCAL_TYPES, collapse = ", "), ".")) + error_status <- T + } + if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && + (is.null(downscal_params$target_grid))) { + error(recipe$Run$logger, + paste("A target grid is required for the downscaling method", + "requested in the recipe.")) + error_status <- T + } + if (downscal_params$type == "int") { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested, but no", + "interpolation method is provided in the recipe.")) + error_status <- T + } + } else if (downscal_params$type %in% + c("int", "intbc", "intlr", "logreg")) { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type", downscal_params$type, + "was requested in the recipe, but no", + "interpolation method is provided.")) + error_status <- T + } + } else if (downscal_params$type == "intbc") { + if (is.null(downscal_params$bc_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'intbc' was requested in the recipe, but", + "no bias correction method is provided.")) + error_status <- T + } else if (!(downscal_params$bc_method %in% BC_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted Bias Correction methods for the downscaling", + " module are: ", paste(BC_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "intlr") { + if (length(downscal_params$lr_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no linear regression method was provided.")) + error_status <- T + } else if (!(downscal_params$lr_method %in% LR_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted linear regression methods for the", + " downscaling module are: ", + paste(LR_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "analogs") { + if (is.null(downscal_params$nanalogs)) { + warn(recipe$Run$logger, + paste("Downscaling type is 'analogs, but the number of analogs", + "has not been provided in the recipe. The default is 3.")) + } + } else if (downscal_params$type == "logreg") { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe, but", + "no interpolation method was provided.")) + error_status <- T + } + if (is.null(downscal_params$log_reg_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe,", + "but no logistic regression method is provided.")) + error_status <- T + } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted logistic regression methods for the ", + "downscaling module are: ", + paste(LOGREG_METHODS, collapse = ", "), ".")) + error_status <- T + } + } + } + } + + # Indices + if ("Indices" %in% names(recipe$Analysis$Workflow)) { + nino_indices <- paste0("nino", c("1+2", "3", "3.4", "4")) + indices <- c("nao", nino_indices) + if (!("anomalies" %in% tolower(names(recipe$Analysis$Workflow)))) { + error(recipe$Run$logger, + paste0("Indices uses Anomalies as input, but Anomalies are missing", + "in the recipe.")) + error_status <- T + } else if (!(recipe$Analysis$Workflow$Anomalies$compute)) { + error(recipe$Run$logger, + paste0("Indices uses Anomalies as input, but the parameter", + "'Anomalies:compute' is set as no/False.")) error_status <- T - } else if ((recipe$Analysis$Workflow$Anomalies$compute) && - (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation))) { + } + recipe_indices <- tolower(names(recipe$Analysis$Workflow$Indices)) + if (!all(recipe_indices %in% indices)) { error(recipe$Run$logger, - paste("If anomaly computation is requested, parameter", - "'cross_validation' must be defined under 'Anomalies', - and it must be a logical value (True/False or yes/no).")) + paste0("Some of the indices under 'Indices' are not available.", + "The available Indices are: 'NAO', 'Nino1+2', 'Nino3', ", + "'Nino3.4' and 'Nino4'.")) + error_status <- T + } + # Check that variables correspond with indices requested + if (("nao" %in% recipe_indices) && + (!all(recipe_variables %in% c("psl", "z500")))) { + error(recipe$Run$logger, + paste0("It is not possible to compute the NAO with some of the ", + "variables requested. To compute the NAO, please make sure", + "your recipe requests only psl and/or z500.")) + error_status <- T + } + if ((any(nino_indices %in% recipe_indices)) && + (!all(recipe_variables %in% c("tos", "sst")))) { + error(recipe$Run$logger, + paste0("It is not possible to compute El Nino indices with some ", + "of the variables requested. To compute El Nino, please ", + "make sure your recipe requests only tos.")) error_status <- T } } + # Skill - if (("Skill" %in% names(recipe$Analysis$Workflow)) && - (is.null(recipe$Analysis$Workflow$Skill$metric))) { + AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", "rpss", + "frps", "frpss", "crps", "crpss", "bss10", "bss90", + "mean_bias", "mean_bias_ss", "enssprerr", "rps_clim", + "rpss_clim", "enscorr_specs", "frps_specs", "rpss_specs", + "frpss_specs", "bss10_specs", "bss90_specs") + if ("Skill" %in% names(recipe$Analysis$Workflow)) { + if (is.null(recipe$Analysis$Workflow$Skill$metric)) { error(recipe$Run$logger, - "Parameter 'metric' must be defined under 'Skill'.") + "Parameter 'metric' must be defined under 'Skill'.") error_status <- T + } else { + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + if (!all(tolower(requested_metrics) %in% AVAILABLE_METRICS)) { + error(recipe$Run$logger, + paste0("Some of the metrics requested under 'Skill' are not ", + "available in SUNSET. Check the documentation to see the ", + "full list of accepted skill metrics.")) + error_status <- T + } + } + # Saving checks + SAVING_OPTIONS_SKILL <- c("all", "none") + if ((is.null(recipe$Analysis$Workflow$Skill$save)) || + (!(recipe$Analysis$Workflow$Skill$save %in% SAVING_OPTIONS_SKILL))) { + error(recipe$Run$logger, + paste0("Please specify whether you want to save the Skill metrics ", + "with the 'save' parameter. The options are: ", + paste(SAVING_OPTIONS_SKILL, collapse = ", "), ".")) + error_status <- T + } } + # Probabilities if ("Probabilities" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, - "Parameter 'percentiles' must be defined under 'Probabilities'.") + "Parameter 'percentiles' must be defined under 'Probabilities'.") error_status <- T } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, - paste("Parameter 'Probabilities:percentiles' expects a list.", - "See documentation in the wiki for examples.")) + paste("Parameter 'Probabilities:percentiles' expects a list.", + "See documentation in the wiki for examples.")) + error_status <- T + } + # Saving checks + SAVING_OPTIONS_PROBS <- c("all", "none", "bins_only", "percentiles_only") + if ((is.null(recipe$Analysis$Workflow$Probabilities$save)) || + (!(recipe$Analysis$Workflow$Probabilities$save %in% SAVING_OPTIONS_PROBS))) { + error(recipe$Run$logger, + paste0("Please specify whether you want to save the percentiles ", + "and probability bins with the 'save' parameter. The ", + "options are: ", + paste(SAVING_OPTIONS_PROBS, collapse = ", "), ".")) error_status <- T } } + # Visualization + if ("Visualization" %in% names(recipe$Analysis$Workflow)) { + PLOT_OPTIONS <- c("skill_metrics", "forecast_ensemble_mean", + "most_likely_terciles") + # Separate plots parameter and check if all elements are in PLOT_OPTIONS + if (is.null(recipe$Analysis$Workflow$Visualization$plots)) { + error(recipe$Run$logger, + "The 'plots' element must be defined under 'Visualization'.") + error_status <- T + } else { + plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, + ", | |,")[[1]] + if (!all(plots %in% PLOT_OPTIONS)) { + error(recipe$Run$logger, + paste0("The options available for the plots are: ", + paste(PLOT_OPTIONS, collapse = ", "), ".")) + error_status <- T + } + } + # Check multi_panel option + ## TODO: Multi-panel + if (is.null(recipe$Analysis$Workflow$Visualization$multi_panel)) { + warn(recipe$Run$logger, + paste0("Visualization:multi_panel not specified for the plots, the", + " default is 'no/False'.")) + } else if (!is.logical(recipe$Analysis$Workflow$Visualization$multi_panel)) { + error(recipe$Run$logger, + paste0("Parameter 'Visualization:multi_panel' must be a logical ", + "value: either 'yes/True' or 'no/False'")) + error_status <- T + } + # Check projection + if (is.null(recipe$Analysis$Workflow$Visualization$projection)) { + warn(recipe$Run$logger, + paste0("Visualization:projection not specified for the plots, the ", + "default projection is cylindrical equidistant.")) + } + ## TODO: Add significance? + if ("most_likely_terciles" %in% plots) { + if (is.null(recipe$Analysis$Workflow$Visualization$mask_terciles)) { + warn(recipe$Run$logger, + paste0("Visualization:mask_terciles not set for tercile plots,", + " the default setting is: 'no/False'.")) + } else if (!(recipe$Analysis$Workflow$Visualization$mask_terciles %in% + c(TRUE, FALSE, "both"))) { + error(recipe$Run$logger, + paste0("Parameter Visualization:mask_terciles must be one of: ", + "yes/True, no/False, 'both'")) + error_status <- T + } + if (is.null(recipe$Analysis$Workflow$Visualization$dots)) { + warn(recipe$Run$logger, + paste0("Visualization:dots not set for tercile plots, the default", + " setting is: 'no/False'.")) + } else if (!(recipe$Analysis$Workflow$Visualization$dots %in% + c(TRUE, FALSE, "both"))) { + error(recipe$Run$logger, + paste0("Parameter Visualization:plots must be one of: ", + "yes/True, no/False, 'both'")) + } + } + } + # Scorecards + if ("Scorecards" %in% names(recipe$Analysis$Workflow)) { + if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) { + error(recipe$Run$logger, + "Parameter 'metric' must be defined under 'Scorecards'.") + error_status <- T + } else { + sc_metrics <- strsplit(recipe$Analysis$Workflow$Scorecards$metric, + ", | |,")[[1]] + if (!all(tolower(sc_metrics) %in% tolower(requested_metrics))) { + error(recipe$Run$logger, + paste0("All of the metrics requested under 'Scorecards' must ", + "be requested in the 'Skill' section.")) + error_status <- T + } + } + } # --------------------------------------------------------------------- # RUN CHECKS # --------------------------------------------------------------------- + ## TODO: These checks should probably go first RUN_FIELDS = c("Loglevel", "Terminal", "output_dir", "code_dir") LOG_LEVELS = c("INFO", "DEBUG", "WARN", "ERROR", "FATAL") @@ -286,7 +608,7 @@ check_recipe <- function(recipe) { } if (!all(RUN_FIELDS %in% names(recipe$Run))) { error(recipe$Run$logger, paste("Recipe element 'Run' must contain", - "all of the following fields:", + "all of the following fields:", paste(RUN_FIELDS, collapse=", "), ".")) error_status <- T } @@ -321,6 +643,79 @@ check_recipe <- function(recipe) { error_status <- T } + # --------------------------------------------------------------------- + # AUTOSUBMIT CHECKS + # --------------------------------------------------------------------- + + AUTO_PARAMS <- c("script", "expid", "hpc_user", "wallclock", + "processors_per_job", "platform", "email_notifications", + "email_address", "notify_completed", "notify_failed") + # Autosubmit false by default + if (is.null(recipe$Run$autosubmit)) { + recipe$Run$autosubmit <- F + } + # Autosubmit configuration checks + if (recipe$Run$autosubmit) { + # Read autosubmit info for the specific filesystem (e.g. esarchive) + auto_specs <- read_yaml("conf/autosubmit.yml")[[recipe$Run$filesystem]] + # Check that the autosubmit configuration parameters are present + if (!("auto_conf" %in% names(recipe$Run))) { + error(recipe$Run$logger, + "The 'auto_conf' is missing from the 'Run' section of the recipe.") + error_status <- T + } else if (!all(AUTO_PARAMS %in% names(recipe$Run$auto_conf))) { + error(recipe$Run$logger, + paste0("The element 'Run:auto_conf' must contain all of the ", + "following: ", paste(AUTO_PARAMS, collapse = ", "), ".")) + error_status <- T + } + # Check that the script is not NULL and exists + if (is.null(recipe$Run$auto_conf$script)) { + error(recipe$Run$logger, + "A script must be provided to run the recipe with autosubmit.") + error_status <- T + } else if (!file.exists(recipe$Run$auto_conf$script)) { + error(recipe$Run$logger, + "Could not find the file for the script in 'auto_conf'.") + error_status <- T + } + # Check that the experiment ID exists + if (is.null(recipe$Run$auto_conf$expid)) { + error(recipe$Run$logger, + paste("The Autosubmit EXPID is missing. You can create one by", + "running the following commands on the autosubmit machine:")) + error(recipe$Run$logger, + paste("module load", auto_specs$module_version)) + error(recipe$Run$logger, + paste("autosubmit expid -H", auto_specs$platform, + "-d ")) + error_status <- T + } else if (!dir.exists(paste0(auto_specs$experiment_dir, + recipe$Run$auto_conf$expid))) { + error(recipe$Run$logger, + paste0("No folder in ", auto_specs$experiment_dir, + " for the EXPID", recipe$Run$auto_conf$expid, + ". Please make sure it is correct.")) + error_status <- T + } + if ((recipe$Run$auto_conf$email_notifications) && + (is.null(recipe$Run$auto_conf$email_address))) { + error(recipe$Run$logger, + "Autosubmit notifications are enabled but email address is empty!") + error_status <- T + } + if (is.null(recipe$Run$auto_conf$hpc_user)) { + error(recipe$Run$logger, + "The 'Run:auto_conf:hpc_user' field can not be empty.") + error_status <- T + } else if ((recipe$Run$filesystem == "esarchive") && + (!substr(recipe$Run$auto_conf$hpc_user, 1, 5) == "bsc32")) { + error(recipe$Run$logger, + "Please check your hpc_user ID. It should look like: 'bsc32xxx'") + error_status <- T + } + } + # --------------------------------------------------------------------- # WORKFLOW CHECKS # --------------------------------------------------------------------- @@ -329,149 +724,14 @@ check_recipe <- function(recipe) { ## TODO: Implement number of dependent verifications #nverifications <- check_number_of_dependent_verifications(recipe) # info(recipe$Run$logger, paste("Start Dates:", - # paste(fcst.sdate, collapse = " "))) + # paste(fcst.sdate, collapse = " "))) # Return error if any check has failed if (error_status) { error(recipe$Run$logger, "RECIPE CHECK FAILED.") - stop("The recipe contains some errors. The full list is in the logs.") + stop("The recipe contains some errors. Find the full list in the", + " main.log file.") } else { info(recipe$Run$logger, "##### RECIPE CHECK SUCCESSFULL #####") - # return(append(nverifications, fcst.sdate)) - } -} - -check_number_of_dependent_verifications <- function(recipe) { - # Number of verifications depends on the variables and indicators requested - # and the order of the workflow: - # workflow: correction + indicator --> only 1 variable is calibrated - # workflow: indicator + correction --> the indicator and the ecv are calibrated - independent_verifications <- NULL - dependent_verifications <- NULL - dep <- 1 - # check workflow order: - if (all(c('Calibration', 'Indicators') %in% names(recipe$Analysis$Workflow))) { - cal_pos <- which(names(recipe$Analysis$Workflow) == 'Calibration') - ind_pos <- which(names(recipe$Analysis$Workflow) == 'Indicators') - if (cal_pos < ind_pos) { - workflow_independent <- FALSE - } else { - workflow_independent <- TRUE - } } - if (workflow_independent) { - independent_verifications <- append(recipe$Analysis$Variables$ECVs, - recipe$Analysis$Variables$Indicators) - } else { - if (is.null(recipe$Analysis$Variables$Indicators) || - (length(recipe$Analysis$Variables$Indicators) == 1 && - is.null(recipe$Analysis$Variables$ECVs))) { - independent_verifications <- append(recipe$Analysis$Variables$ECVs, - recipe$Analysis$Variables$Indicators) - } else { - ecvs <- recipe$Analysi$Variables$ECVs - inds <- recipe$Analysi$Variables$Indicators - ind_table <- read_yaml(paste0(recipe$Run$code_dir, - "conf/indicators_table.yml")) - # first, loop on ecvs if any and compare to indicators - done <- NULL # to gather the indicators reviewed - if (!is.null(ecvs)) { - for (i in 1:length(ecvs)) { - dependent <- list(ecvs[[i]]) - for (j in 1:length(inds)) { - if (ind_table[inds[[j]]$name][[1]]$ECVs == ecvs[[i]]$name) { - if (ind_table[inds[[j]]$name][[1]]$freq == ecvs[[i]]$freq) { - # they are dependent - dependent <- append(dependent, inds[[j]]) - done <- append(done, inds[[j]]) - } - } - } - if (length(dependent) == 1) { - dependent <- NULL - independent_verifications <- append(independent_verifications, - list(ecvs[[i]])) - } else { - dependent_verifications <- append(dependent_verifications, - list(dependent)) - } - } - # There are indicators not reviewed yet? - if (length(done) < length(inds)) { - if (length(inds) == 1) { - independent_verifications <- append(independent_verifications, - inds) - } else { - done <- NULL - for (i in 1:(length(inds) - 1)) { - dependent <- list(inds[[i]]$name) - if (is.na(match(unlist(dependent), unlist(done)))) { - for (j in (i+1):length(inds)) { - if (ind_table[inds[[i]]$name][[1]]$ECVs == - ind_table[inds[[j]]$name][[1]]$ECVs) { - if (ind_table[inds[[i]]$name][[1]]$freq == - ind_table[inds[[j]]$name][[1]]$freq) { - dependent <- append(dependent, inds[[j]]$name) - done <- dependent - } - } - } - } - if (length(dependent) == 1) { - independent_verifications <- dependent - dependent <- NULL - } else { - dependent_verifications <- dependent - } - } - } - } - } else { # there are only Indicators: - done <- NULL - for (i in 1:(length(inds) - 1)) { - dependent <- list(inds[[i]]$name) - if (is.na(match(unlist(dependent), unlist(done)))) { - for (j in (i+1):length(inds)) { - if (ind_table[inds[[i]]$name][[1]]$ECVs == - ind_table[inds[[j]]$name][[1]]$ECVs) { - if (ind_table[inds[[i]]$name][[1]]$freq == - ind_table[inds[[j]]$name][[1]]$freq) { - dependent <- append(dependent, inds[[j]]$name) - done <- dependent - } - } - } - } - if (length(dependent) == 1) { - independent_verifications <- dependent - dependent <- NULL - } else { - dependent_verifications <- dependent - } - } - } - } - } - if (!is.null(independent_verifications)) { - info(logger, paste("The variables for independent verification are ", - paste(independent_verifications, collapse = " "))) - } - if (!is.null(dependent_verifications)) { - info(logger, paste("The variables for dependent verification are: ", - paste(dependent_verifications, collapse = " "))) - } - # remove unnecessary names in objects to be removed - return(list(independent = independent_verifications, - dependent = dependent_verifications)) } -#workflow <- list(Calibration = list(method = 'SBC'), -# Skill = list(metric = 'RPSS')) -#ApplyWorkflow <- function(workflow) { - -#res <- do.call('CST_BiasCorrection', -# args = list(exp = lonlat_data$exp, -# obs = lonlat_data$obs)) - - - - diff --git a/tools/data_summary.R b/tools/data_summary.R index d8f2b1b63f1a732ef08e72f712a1d24bbd49fa73..b76101bac4bba40b1a26ff2cdea1c1c16bae9580 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -1,5 +1,7 @@ # Print a summary of the loaded data for the user, for each object. -# object: hindcast, forecast or reference data in s2dv_cube format. +# data_cube: An s2dv_cube returned by one of the functions in Auto-S2S +# recipe: The Auto-S2S recipe as returned by read_yaml() + ## TODO: Adapt to daily/subseasonal cases ## TODO: Add check for missing files/NAs by dimension @@ -8,30 +10,41 @@ data_summary <- function(data_cube, recipe) { object_name <- deparse(substitute(data_cube)) if (recipe$Analysis$Variables$freq == "monthly_mean") { date_format <- "%b %Y" - } else if (recipe$Analysis$Variables$freq == "daily_mean") { + } else if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { date_format <- "%b %d %Y" } - months <- unique(format(as.Date(data_cube$Dates[[1]]), format = "%B")) - months <- paste(as.character(months), collapse = ", ") - sdate_min <- format(min(as.Date(data_cube$Dates[[1]])), format = date_format) - sdate_max <- format(max(as.Date(data_cube$Dates[[1]])), format = date_format) - - # Create log instance and sink output to logfile and terminal + months <- unique(format(as.Date(data_cube$attrs$Dates), format = '%B')) + months <- paste(as.character(months), collapse=", ") + sdate_min <- format(min(as.Date(data_cube$attrs$Dates)), + format = date_format) + sdate_max <- format(max(as.Date(data_cube$attrs$Dates)), + format = date_format) + # Log the summary info(recipe$Run$logger, "DATA SUMMARY:") info(recipe$Run$logger, paste(object_name, "months:", months)) info(recipe$Run$logger, paste(object_name, "range:", sdate_min, "to", - sdate_max)) + sdate_max)) info(recipe$Run$logger, paste(object_name, "dimensions:")) # Use capture.output() and for loop to display results neatly output_string <- capture.output(dim(data_cube$data)) for (i in output_string) { info(recipe$Run$logger, i) } + # Print statistical summary of the data for every variable info(recipe$Run$logger, paste0("Statistical summary of the data in ", - object_name, ":")) - output_string <- capture.output(summary(data_cube$data)) - for (i in output_string) { - info(recipe$Run$logger, i) + object_name, ":")) + for (var_index in 1:data_cube$dims[['var']]) { + variable_name <- data_cube$attrs$Variable$varName[var_index] + variable_units <- data_cube$attrs$Variable$metadata[[variable_name]]$units + info(recipe$Run$logger, + paste0("Variable: ", variable_name, + " (units: ", variable_units, ")")) + output_string <- capture.output(summary(Subset(data_cube$data, + along = "var", + indices = var_index))) + for (i in output_string) { + info(recipe$Run$logger, i) + } } info(recipe$Run$logger, "---------------------------------------------") invisible(gc()) diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 18962f93010bae7fe6693beb48a5f485075954f5..b22274cc5c0e7e5081a3f48f492cdcbfe8fba026 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -2,11 +2,11 @@ divide_recipe <- function(recipe) { ## TODO: Implement dependent vs independent verifications? - info(recipe$Run$logger, "Spliting recipe in single verifications.") + info(recipe$Run$logger, "Splitting recipe in single verifications.") beta_recipe <- list(Description = append(recipe$Description, list(Origin = paste("Atomic recipe,", - "split from:", - recipe$name))), + "split from:", + recipe$name))), Analysis = list(Horizon = recipe$Analysis$Horizon, Variables = NULL, Datasets = NULL, @@ -14,16 +14,25 @@ divide_recipe <- function(recipe) { Region = NULL, Regrid = recipe$Analysis$Regrid, Workflow = recipe$Analysis$Workflow, + ncores = recipe$Analysis$ncores, + remove_NAs = recipe$Analysis$remove_NAs, Output_format = - recipe$Analysis$Output_format), - Run = recipe$Run[c("Loglevel", "output_dir", "Terminal", - "code_dir", "logfile")]) - - # duplicate recipe by independent variables: + recipe$Analysis$Output_format), + Run = recipe$Run[c("Loglevel", "output_dir", "Terminal", + "code_dir", "logfile", "filesystem")]) + + # duplicate recipe by independent variables:ç + # If a single variable is not given inside a list, rebuild structure + if (any(c("name", "freq", "units") %in% names(recipe$Analysis$Variables))) { + variables <- recipe$Analysis$Variables + recipe$Analysis$Variables <- NULL + recipe$Analysis$Variables[[1]] <- variables + } all_recipes <- rep(list(beta_recipe), length(recipe$Analysis$Variables)) for (var in 1:length(recipe$Analysis$Variables)) { all_recipes[[var]]$Analysis$Variables <- recipe$Analysis$Variables[[var]] } + # for (dep in verifications$dependent) { # all_recipes[[i]]$Analysis$Variables <- dep # i = i + 1 @@ -33,35 +42,49 @@ divide_recipe <- function(recipe) { # duplicate recipe by Datasets: # check Systems - if (recipe$Analysis$Datasets$Multimodel) { + # If a single system is not given inside a list, rebuild structure + if (any(c("name", "member") %in% names(recipe$Analysis$Datasets$System))) { + system <- recipe$Analysis$Datasets$System + recipe$Analysis$Datasets$System <- NULL + recipe$Analysis$Datasets$System[[1]] <- system + } + + if (recipe$Analysis$Datasets$Multimodel %in% c(TRUE, 'both')) { for (reci in 1:length(all_recipes)) { all_recipes[[reci]]$Analysis$Datasets <- - list(System = recipe$Analysis$Datasets$System, - Multimodel = recipe$Analysis$Datasets$Multimodel, - Reference = NULL) + list(System = recipe$Analysis$Datasets$System, + Multimodel = recipe$Analysis$Datasets$Multimodel, + Reference = NULL) } } else { for (sys in 1:length(recipe$Analysis$Datasets$System)) { for (reci in 1:length(all_recipes)) { all_recipes[[reci]]$Analysis$Datasets <- - list(System = recipe$Analysis$Datasets$System[[sys]], - Multimodel = recipe$Analysis$Datasets$Multimodel, - Reference = NULL) + list(System = recipe$Analysis$Datasets$System[[sys]], + Multimodel = recipe$Analysis$Datasets$Multimodel, + Reference = NULL) } if (sys == 1) { recipes <- all_recipes } else { recipes <- append(recipes, all_recipes) } - } + } # Rest of horizons all_recipes <- recipes rm(list = 'recipes') } - # check References + # Check references + # If a single reference is not given inside a list, rebuild structure + if (c("name") %in% names(recipe$Analysis$Datasets$Reference)) { + reference <- recipe$Analysis$Datasets$Reference + recipe$Analysis$Datasets$Reference <- NULL + recipe$Analysis$Datasets$Reference[[1]] <- reference + } + for (ref in 1:length(recipe$Analysis$Datasets$Reference)) { for (reci in 1:length(all_recipes)) { all_recipes[[reci]]$Analysis$Datasets$Reference <- - recipe$Analysis$Datasets$Reference[[ref]] + recipe$Analysis$Datasets$Reference[[ref]] } if (ref == 1) { recipes <- all_recipes @@ -70,16 +93,27 @@ divide_recipe <- function(recipe) { } } all_recipes <- recipes + rm(list = 'recipes') + # Duplicate recipe by Region recipes <- list() + if (any(c("latmin", "latmax", "lonmin", "lonmax") %in% + names(recipe$Analysis$Region))) { + region <- recipe$Analysis$Region + recipe$Analysis$Region <- NULL + recipe$Analysis$Region[[1]] <- region + } for (reg in 1:length(recipe$Analysis$Region)) { - # if (length(recipe$Analysis$Region[[reg]]) == 4) { ##TODO: THIS SHOULD BE ONLY CHECK IN THE RECIPE CHECKER? - for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Region <- recipe$Analysis$Region[[reg]] - } + for (reci in 1:length(all_recipes)) { + all_recipes[[reci]]$Analysis$Region <- recipe$Analysis$Region[[reg]] + } + if (reg == 1) { + recipes <- all_recipes + } else { recipes <- append(recipes, all_recipes) - # } + } } + all_recipes <- recipes rm(list = 'recipes') # Duplicate recipe by start date @@ -87,12 +121,12 @@ divide_recipe <- function(recipe) { for (sdate in 1:length(recipe$Analysis$Time$sdate)) { for (reci in 1:length(all_recipes)) { all_recipes[[reci]]$Analysis$Time <- - list(sdate = recipe$Analysis$Time$sdate[[sdate]], - fcst_year = recipe$Analysis$Time$fcst_year, - hcst_start = recipe$Analysis$Time$hcst_start, - hcst_end = recipe$Analysis$Time$hcst_end, - ftime_min = recipe$Analysis$Time$ftime_min, - ftime_max = recipe$Analysis$Time$ftime_max) + list(sdate = recipe$Analysis$Time$sdate[[sdate]], + fcst_year = recipe$Analysis$Time$fcst_year, + hcst_start = recipe$Analysis$Time$hcst_start, + hcst_end = recipe$Analysis$Time$hcst_end, + ftime_min = recipe$Analysis$Time$ftime_min, + ftime_max = recipe$Analysis$Time$ftime_max) } if (sdate == 1) { recipes <- all_recipes @@ -102,9 +136,17 @@ divide_recipe <- function(recipe) { } all_recipes <- recipes rm(list = 'recipes') + } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { + for (reci in 1:length(all_recipes)) { + all_recipes[[reci]]$Analysis$Time <- + list(fcst_year = recipe$Analysis$Time$fcst_year, + hcst_start = recipe$Analysis$Time$hcst_start, + hcst_end = recipe$Analysis$Time$hcst_end, + ftime_min = recipe$Analysis$Time$ftime_min, + ftime_max = recipe$Analysis$Time$ftime_max) + } } # Rest of horizons # Save all recipes in separate YAML files - ## TODO: Re-add recipe$Run$logger for (reci in 1:length(all_recipes)) { if (reci < 10) { recipe_number <- paste0("0", reci) @@ -112,15 +154,16 @@ divide_recipe <- function(recipe) { recipe_number <- reci } write_yaml(all_recipes[[reci]], - paste0(recipe$Run$output_dir, "/logs/recipes/recipe_", - recipe_number, ".yml")) - all_recipes[[reci]]$Run$logger <- recipe$Run$logger + paste0(recipe$Run$output_dir, "/logs/recipes/atomic_recipe_", + recipe_number, ".yml")) } info(recipe$Run$logger, paste("The main recipe has been divided into", length(all_recipes), - "atomic recipes.")) - text <- paste0("See output directory ", recipe$Run$output_dir, - "/logs/recipes/ to see all the individual atomic recipes.") + "atomic recipes.")) + text <- paste0("Check output directory ", recipe$Run$output_dir, + "/logs/recipes/ to see all the individual atomic recipes.") info(recipe$Run$logger, text) - return(all_recipes) + ## TODO: Change returns? + return(list(n_atomic_recipes = length(all_recipes), + outdir = recipe$Run$output_dir)) } diff --git a/tools/get_archive.R b/tools/get_archive.R new file mode 100644 index 0000000000000000000000000000000000000000..11602698b6a07ad4d82acde880291906263fc201 --- /dev/null +++ b/tools/get_archive.R @@ -0,0 +1,12 @@ +get_archive <- function(recipe) { + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + archive <- + read_yaml(paste0("conf/archive.yml"))[[recipe$Run$filesystem]] + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + archive <- + read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]] + } + ## TODO: Add dictionary filesystem dependency? + # dict <- read_yaml("conf/variable-dictionary.yml") + return(archive) +} diff --git a/tools/libs.R b/tools/libs.R index a0767f76c79105d9e60cc88cc76bde875136abc4..401467860ba602c0bd459439e973e3637adb7a4f 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -1,35 +1,45 @@ +# Libraries library(log4r) +library(docopt) library(startR) library(ClimProjDiags) library(multiApply) library(yaml) library(s2dv) library(abind) -# library(s2dverification) -# library(ncdf4) -# library(easyVerification) library(easyNCDF) library(CSTools) library(lubridate) library(PCICt) +library(clock) library(RColorBrewer) -library(grDevices) -# -# library(parallel) -# library(pryr) # To check mem usage. -#setwd("/esarchive/scratch/nperez/git/S2S4E-backend-BSC/data-analysis/") -# source('export_2_nc.R') -# source('S2S/s2s.filefmt.R') -# source('s2s.calib.R') -# -# source("s2s_tools.R") -# source("Calibration_fcst4.R") -# source("R_Reorder.R") -# source("R_CST_MergeDims.R") -#setwd(recipe$Run$code_dir) -# # To be removed when new package is done by library(CSOperational) +library(configr) +library(sf) +library(ggplot2) +library(rnaturalearth) +library(cowplot) +library(stringr) +library(pryr) +library(ncdf4) +library(formattable) ## to plot horizontal color bars - used ?? +library(kableExtra) +library(memuse) # To check mem usage. + +# Functions +## To be removed when new package is done by library(CSOperational) source("tools/check_recipe.R") source("tools/prepare_outputs.R") source("tools/divide_recipe.R") source("tools/data_summary.R") +source("tools/read_atomic_recipe.R") +source("tools/write_autosubmit_conf.R") +source("tools/get_archive.R") +source("tools/Utils.R") +source("tools/restructure_recipe.R") # source("tools/add_dims.R") # Not sure if necessary yet + +# Settings +options(bitmapType = 'cairo') +pdf(NULL) +Sys.setlocale("LC_ALL", 'en_GB.UTF-8') + diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 1972aef0cb38f7e845ebd0fdfc1ca19a28950939..f994252506a6b3edfe644100deb94b35c7a8a4b7 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -7,6 +7,7 @@ #' #'@param recipe_file path to a YAML file with Auto-S2S configuration recipe #'@param disable_checks whether to disable the recipe checks +#'@param unique_ID whether to add a unique ID to the output directory #'@return list contaning recipe with logger, log file name and log dir name #' #'@import log4r @@ -21,30 +22,41 @@ #'@export prepare_outputs <- function(recipe_file, - disable_checks = FALSE) { - -# recipe_file: path to recipe YAML file -# disable_checks: If TRUE, does not perform checks on recipe - + disable_checks = FALSE, + uniqueID = TRUE, + restructure = TRUE) { + # recipe_file: path to recipe YAML file + # disable_checks: If TRUE, does not perform checks on recipe + # disable_uniqueID: If TRUE, does not add a unique ID to output dir recipe <- read_yaml(recipe_file) recipe$recipe_path <- recipe_file recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) - + output_dir = recipe$Run$output_dir - # Create output folders: - folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$name)), "_", - gsub(" ", "", gsub(":", "", gsub("-", "", Sys.time())))) + # Create output folders + if (!uniqueID) { + folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$name))) + } else { + folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$name)), "_", + gsub(" ", "", gsub(":", "", gsub("-", "", + Sys.time())))) + } + recipe$Run$output_dir <- file.path(output_dir, folder_name) print("Saving all outputs to:") - print(output_dir) - print(folder_name) - dir.create(file.path(output_dir, folder_name, 'outputs'), recursive = TRUE) - dir.create(file.path(output_dir, folder_name, 'logs')) - dir.create(file.path(output_dir, folder_name, 'logs', 'recipes')) + print(paste0(output_dir, folder_name)) + if (dir.exists(file.path(output_dir, folder_name))) { + warning("The output directory already exists and files may be overwritten.") + } else { + dir.create(file.path(output_dir, folder_name, 'outputs'), + recursive = T, showWarnings = F) + } + dir.create(file.path(output_dir, folder_name, 'logs', 'recipes'), + recursive = T, showWarnings = F) # Copy recipe to output folder file.copy(recipe$recipe_path, file.path(output_dir, folder_name, 'logs', - 'recipes')) + 'recipes')) # Create log output file - logfile <- file.path(output_dir, folder_name, 'logs', 'log.txt') + logfile <- file.path(output_dir, folder_name, 'logs', 'main.log') file.create(logfile) # Set default behaviour of logger @@ -68,13 +80,23 @@ prepare_outputs <- function(recipe_file, appenders = list(file_appender(logfile, append = TRUE, layout = default_log_layout()))) } - recipe$Run$output_dir <- file.path(output_dir, folder_name) recipe$Run$logger <- logger recipe$Run$logfile <- logfile + + # Set up default filesystem + if (is.null(recipe$Run$filesystem)) { + recipe$Run$filesystem <- "esarchive" + warn(recipe$Run$logger, + "Filesystem not specified in the recipe. Setting it to 'esarchive'.") + } + # Restructure the recipe to make the atomic recipe more readable + if (restructure) { + recipe <- restructure_recipe(recipe) + } # Run recipe checker if (disable_checks) { warn(recipe$Run$logger, - "Recipe checks disabled. The recipe will not be checked for errors.") + "Recipe checks disabled. The recipe will not be checked for errors.") } else { check_recipe(recipe) } diff --git a/tools/read_atomic_recipe.R b/tools/read_atomic_recipe.R new file mode 100644 index 0000000000000000000000000000000000000000..fb26cb11a4e71b4b7059a48acccd0ec4c85d235d --- /dev/null +++ b/tools/read_atomic_recipe.R @@ -0,0 +1,59 @@ +#'Read recipe YAML file and create and store logfile info +#' +#'The purpose of this function is to read the atomic recipe generated by +#'divide_recipe() and create its individual logfile in the output directory +#'specified in the recipe. It returns the recipe as a list with an object of +#'class logger added to it, that stores information on the workflow execution +#'and errors. +#' +#'@param recipe_file path to a YAML file with Auto-S2S configuration recipe +#' +#'@return list contaning recipe with logger, log file name and log dir name +#' +#'@import log4r +#'@import yaml +#' +#'@examples +#'setwd("/esarchive/scratch/vagudets/repos/auto-s2s/") +#'library(yaml) +#'recipe <- prepare_outputs("modules/data_load/recipe_1.yml") +#'info(recipe$Run$logger, "This is an info message") +#' +#'@export + +read_atomic_recipe <- function(recipe_file) { + # recipe_file: path to recipe YAML file + recipe <- read_yaml(recipe_file) + recipe$recipe_path <- recipe_file + recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) + # Create log file for atomic recipe + logfile <- file.path(recipe$Run$output_dir, 'logs', + paste0(recipe$name, '.log')) + file.create(logfile) + # Set default behaviour of logger + if (is.null(recipe$Run)) { + recipe$Run <- list(Loglevel = 'INFO', Terminal = TRUE) + } + if (is.null(recipe$Run$Loglevel)) { + recipe$Run$Loglevel <- 'INFO' + } + if (!is.logical(recipe$Run$Terminal)) { + recipe$Run$Terminal <- TRUE + } + # logger set-up + if (recipe$Run$Terminal) { + logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(console_appender(layout = default_log_layout()), + file_appender(logfile, append = TRUE, + layout = default_log_layout()))) + } else { + logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(file_appender(logfile, append = TRUE, + layout = default_log_layout()))) + } + recipe$Run$logger <- logger + recipe$Run$logfile <- logfile + # Restructure recipe to flatten redundant lists + recipe <- restructure_recipe(recipe) + return(recipe) +} diff --git a/tools/restructure_recipe.R b/tools/restructure_recipe.R new file mode 100644 index 0000000000000000000000000000000000000000..0a9f28f42f1e8b27cb2957242320b920ba925b61 --- /dev/null +++ b/tools/restructure_recipe.R @@ -0,0 +1,24 @@ +restructure_recipe <- function(recipe) { + # Flattens the structure of the recipe to improve the readability of the code + # System + if ((length(recipe$Analysis$Datasets$System) == 1) && + (is.list(recipe$Analysis$Datasets$System[[1]]))) { + recipe$Analysis$Datasets$System <- recipe$Analysis$Datasets$System[[1]] + } + # Reference + if ((length(recipe$Analysis$Datasets$Reference) == 1) && + (is.list(recipe$Analysis$Datasets$Reference[[1]]))) { + recipe$Analysis$Datasets$Reference <- recipe$Analysis$Datasets$Reference[[1]] + } + # Variable + if ((length(recipe$Analysis$Variables) == 1) && + (is.list(recipe$Analysis$Variables[[1]]))) { + recipe$Analysis$Variables <- recipe$Analysis$Variables[[1]] + } + # Region + if ((length(recipe$Analysis$Region) == 1) && + (is.list(recipe$Analysis$Region[[1]]))) { + recipe$Analysis$Region <- recipe$Analysis$Region[[1]] + } + return(recipe) +} diff --git a/tools/tmp/as.s2dv_cube.R b/tools/tmp/as.s2dv_cube.R deleted file mode 100644 index 019f69a223e8d62006bc70dad02b75f6ef2e2a53..0000000000000000000000000000000000000000 --- a/tools/tmp/as.s2dv_cube.R +++ /dev/null @@ -1,184 +0,0 @@ -#'Conversion of 'startR_array' or 'list' objects to 's2dv_cube' -#' -#'This function converts data loaded using startR package or s2dverification Load function into a 's2dv_cube' object. -#' -#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -#'@author Nicolau Manubens, \email{nicolau.manubens@bsc.es} -#' -#'@param object an object of class 'startR_array' generated from function \code{Start} from startR package (version 0.1.3 from earth.bsc.es/gitlab/es/startR) or a list output from function \code{Load} from s2dverification package. -#' -#'@return The function returns a 's2dv_cube' object to be easily used with functions \code{CST} from CSTools package. -#' -#'@seealso \code{\link{s2dv_cube}}, \code{\link[s2dverification]{Load}}, \code{\link[startR]{Start}} and \code{\link{CST_Load}} -#'@examples -#'\dontrun{ -#'library(startR) -#'repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' -#'data <- Start(dat = repos, -#' var = 'tas', -#' sdate = c('20170101', '20180101'), -#' ensemble = indices(1:20), -#' time = 'all', -#' latitude = 'all', -#' longitude = indices(1:40), -#' return_vars = list(latitude = 'dat', longitude = 'dat', time = 'sdate'), -#' retrieve = TRUE) -#'data <- as.s2dv_cube(data) -#'class(data) -#'startDates <- c('20001101', '20011101', '20021101', -#' '20031101', '20041101', '20051101') -#'data <- Load(var = 'tas', exp = 'system5c3s', -#' nmember = 15, sdates = startDates, -#' leadtimemax = 3, latmin = 27, latmax = 48, -#' lonmin = -12, lonmax = 40, output = 'lonlat') -#'data <- as.s2dv_cube(data) -#'class(data) -#'} -#'@export -as.s2dv_cube <- function(object) { - if (is.list(object)) { - if (is.null(object) || (is.null(object$mod) && is.null(object$obs))) { - stop("The s2dverification::Load call did not return any data.") - } - obs <- object - obs$mod <- NULL - object$obs <- NULL - names(object)[[1]] <- 'data' - names(obs)[[1]] <- 'data' - remove_matches <- function(v, patterns) { - if (length(v) > 0) { - matches <- c() - for (pattern in patterns) { - matches <- c(matches, which(grepl(pattern, v))) - } - if (length(matches) > 0) { - v <- v[-matches] - } - } - v - } - - harmonize_patterns <- function(v) { - matches <- grepl('.*\\.nc$', v) - if (sum(!matches) > 0) { - match_indices <- which(!matches) - v[match_indices] <- sapply(v[match_indices], function(x) paste0(x, '*')) - } - v <- glob2rx(v) - v <- gsub('\\$.*\\$', '*', v) - v - } - - if (!is.null(obs$data)) { - obs$Datasets$exp <- NULL - obs$Datasets <- obs$Datasets$obs - obs_path_patterns <- sapply(obs$Datasets, function(x) attr(x, 'source')) - obs_path_patterns <- harmonize_patterns(obs_path_patterns) - } - - if (!is.null(object$data)) { - object$Datasets$obs <- NULL - object$Datasets <- object$Datasets$exp - exp_path_patterns <- sapply(object$Datasets, function(x) attr(x, 'source')) - exp_path_patterns <- harmonize_patterns(exp_path_patterns) - } - - if (!is.null(obs$data) && !is.null(object$data)) { - obs$source_files <- remove_matches(obs$source_files, - exp_path_patterns) - obs$not_found_files <- remove_matches(obs$not_found_files, - exp_path_patterns) - - object$source_files <- remove_matches(object$source_files, - obs_path_patterns) - object$not_found_files <- remove_matches(object$not_found_files, - obs_path_patterns) - } - - result <- list() - if (!is.null(object$data)) { - class(object) <- 's2dv_cube' - result$exp <- object - } - if (!is.null(obs$data)) { - class(obs) <- 's2dv_cube' - result$obs <- obs - } - if (is.list(result)) { - if (is.null(result$exp)) { - result <- result$obs - } else if (is.null(result$obs)) { - result <- result$exp - } else { - warning("The output is a list of two 's2dv_cube' objects", - " corresponding to 'exp' and 'obs'.") - } - } - - } else if (class(object) == 'startR_array') { - result <- list() - result$data <- as.vector(object) - dim(result$data) <- dim(object) - - dat_attr_names <- names(attributes(object)$Variables$dat1) - common_attr_names <- names(attributes(object)$Variables$common) - # $lon - known_lon_names <- s2dv:::.KnownLonNames() - if (!is.null(dat_attr_names[which(dat_attr_names %in% known_lon_names)]) & - !identical(dat_attr_names[which(dat_attr_names %in% known_lon_names)], character(0))) { - result$lon <- attributes(object)$Variables$dat1[[dat_attr_names[which(dat_attr_names %in% known_lon_names)]]] - } else if (!is.null(common_attr_names[which(common_attr_names %in% known_lon_names)]) & - !identical(common_attr_names[which(common_attr_names %in% known_lon_names)], character(0))) { - result$lon <- attributes(object)$Variables$common[[common_attr_names[which(common_attr_names %in% known_lon_names)]]] - } else { - warning("'lon' is not found in this object.") - result$lon <- NULL - } - # $lat - known_lat_names <- s2dv:::.KnownLatNames() - if (!is.null(dat_attr_names[which(dat_attr_names %in% known_lat_names)]) & - !identical(dat_attr_names[which(dat_attr_names %in% known_lat_names)], character(0))) { - result$lat <- attributes(object)$Variables$dat1[[dat_attr_names[which(dat_attr_names %in% known_lat_names)]]] - } else if (!is.null(common_attr_names[which(common_attr_names %in% known_lat_names)]) & - !identical(common_attr_names[which(common_attr_names %in% known_lat_names)], character(0))) { - result$lat <- attributes(object)$Variables$common[[common_attr_names[which(common_attr_names %in% known_lat_names)]]] - } else { - warning("'lat' is not found in this object.") - result$lat <- NULL - } - - vars <- which(!common_attr_names %in% c("time", known_lon_names, known_lat_names)) - - if (length(vars) > 1) { - warning("More than one variable has been provided and ", - "only the first one '", common_attr_names[vars[1]],"' will be used.") - vars <- vars[1] - } - - Variable <- list() - Variable$varName <- names(attributes(object)$Variables$common)[vars] - attr(Variable, 'variable') <- attributes(object)$Variables$common[[vars]] - result$Variable <- Variable - dims <- dim(object) - if (any(c('sdate', 'sdates') %in% names(dims))) { - n_sdates <- dims[which(names(dims) == 'sdate' | names(dims) == 'sdates')] - sdates <- attributes(object)$Variables$common$time[1 : n_sdates] - } else { - sdates <- attributes(object)$Variables$common$time[1] - } - Dataset <- list(list(InitializationDates = list(Member_1 = sdates))) - names(Dataset) <- list(deparse(substitute(object))) - result$Datasets <- Dataset - result$Dates$start <- attributes(object)$Variables$common$time - result$when <- Sys.time() - result$source_files <- as.vector(attributes(object)$Files) - result$load_parameters <- attributes(object)$FileSelectors - class(result) <- 's2dv_cube' - } else { - stop("The class of parameter 'object' is not implemented", - " to be converted into 's2dv_cube' class yet.") - } - result - -} - diff --git a/modules/verifications.R b/tools/verifications.R similarity index 100% rename from modules/verifications.R rename to tools/verifications.R diff --git a/tools/write_autosubmit_conf.R b/tools/write_autosubmit_conf.R new file mode 100644 index 0000000000000000000000000000000000000000..95ca93f0f09f10f8b8b67a5076f24918c3d6db7e --- /dev/null +++ b/tools/write_autosubmit_conf.R @@ -0,0 +1,109 @@ +# Function to write autosubmit configuration from an Auto-S2S recipe +write_autosubmit_conf <- function(recipe, nchunks) { + # Experiment ID + expid <- recipe$Run$auto_conf$expid + # Directory with the experiment templates + template_dir <- paste0("autosubmit/conf_", recipe$Run$filesystem, "/") + # Read autosubmit info for the specific filesystem (e.g. esarchive) + auto_specs <- read_yaml("conf/autosubmit.yml")[[recipe$Run$filesystem]] + # Output directory + dest_dir <- paste0(auto_specs$experiment_dir, expid, "/conf/") + # Modify the configuration files according to the info in the recipe + for (file in list.files(template_dir)) { + conf_type <- strsplit(file, split = "[.]")[[1]][1] + extension <- strsplit(file, split = "[.]")[[1]][2] + dest_file <- paste0(conf_type, "_", expid, ".", extension) + # Read configuration file + conf <- read.config(file = paste0(template_dir, file)) + if (conf_type == "autosubmit") { + # Section 1: autosubmit.conf + ## expid, email notifications and address + conf$config$EXPID <- expid + if (recipe$Run$auto_conf$email_notifications) { + conf$mail$NOTIFICATIONS <- "True" + } else { + conf$mail$NOTIFICATIONS <- "False" + } + conf$mail$TO <- recipe$Run$auto_conf$email_address + } else if (conf_type == "expdef") { + # Section 2: expdef + ## expid, numchunks, project_type?, project_destination = auto-s2s + conf$DEFAULT$EXPID <- expid + conf$experiment$DATELIST <- format(Sys.Date(), "%Y%m%d") + conf$experiment$NUMCHUNKS <- nchunks + conf$local$PROJECT_PATH <- recipe$Run$code_dir + } else if (conf_type == "jobs") { + # Section 3: jobs + ## wallclock, notify_on, platform?, processors, + conf$JOBS$verification$WALLCLOCK <- recipe$Run$auto_conf$wallclock + if (recipe$Run$auto_conf$notify_completed) { + conf$JOBS$verification$NOTIFY_ON <- paste(conf$JOBS$verification$NOTIFY_ON, + "COMPLETED") + } + if (recipe$Run$auto_conf$notify_failed) { + conf$JOBS$verification$NOTIFY_ON <- paste(conf$JOBS$verification$NOTIFY_ON, + "FAILED") + } + conf$JOBS$verification$PROCESSORS <- recipe$Run$auto_conf$processors_per_job # ncores? + conf$JOBS$verification$CUSTOM_DIRECTIVES <- recipe$Run$auto_conf$custom_directives + # Only include Scorecards job if section exists in the recipe and + # is set to 'execute: True' + if (!("Scorecards" %in% names(recipe$Analysis$Workflow)) || + (!recipe$Analysis$Workflow$Scorecards$execute)) { + conf$JOBS$scorecards <- NULL + } else { + if (recipe$Run$auto_conf$notify_completed) { + conf$JOBS$scorecards$NOTIFY_ON <- paste(conf$JOBS$scorecards$NOTIFY_ON, + "COMPLETED") + } + if (recipe$Run$auto_conf$notify_failed) { + conf$JOBS$scorecards$NOTIFY_ON <- paste(conf$JOBS$scorecards$NOTIFY_ON, + "FAILED") + } + } + } else if (conf_type == "platforms") { + # Section 4: platform configuration + ## nord3v2 configuration... platform name? user, processors_per_node + conf$Platforms[[auto_specs$platform]]$USER <- + recipe$Run$auto_conf$hpc_user + } else if (conf_type == "proj") { + # Section 5: proj + ## modules? Info that goes on script, e.g. output directory + conf$common$OUTDIR <- recipe$Run$output_dir + conf$common$SCRIPT <- recipe$Run$auto_conf$script + conf$common$RECIPE <- paste0(recipe$name, ".yml") + } + # Write config file inside autosubmit dir + ## TODO: Change write.type depending on autosubmit version + write.config(conf, paste0(dest_dir, dest_file), + write.type = auto_specs$conf_format) + Sys.chmod(paste0(dest_dir, dest_file), mode = "755", use_umask = F) + } + info(recipe$Run$logger, + paste("##### AUTOSUBMIT CONFIGURATION WRITTEN FOR", expid, "#####")) + info(recipe$Run$logger, + paste0("You can check your experiment configuration at: ", + "/esarchive/autosubmit/", expid, "/conf/")) + # Print instructions/commands for user + if (recipe$Run$Terminal) { + ## TODO: Change SSH message for other environments (outside BSC) + info(recipe$Run$logger, + paste("Please SSH into bscesautosubmit01 or bscesautosubmit02 and run", + "the following commands:")) + info(recipe$Run$logger, + paste("module load", auto_specs$module_version)) + info(recipe$Run$logger, + paste("autosubmit create", expid)) + info(recipe$Run$logger, + paste("autosubmit refresh", expid)) + info(recipe$Run$logger, + paste("nohup autosubmit run", expid, "& disown")) + } else { + print(paste("Please SSH into bscesautosubmit01 or bscesautosubmit02 and run", + "the following commands:")) + print(paste("module load", auto_specs$module_version)) + print(paste("autosubmit create", expid)) + print(paste("autosubmit refresh", expid)) + print(paste("nohup autosubmit run", expid, "& disown")) + } +} diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-handson.md b/use_cases/ex0_1_sample_dataset/ex0_1-handson.md new file mode 100644 index 0000000000000000000000000000000000000000..c6d824ef3fa61603c7100ecb95915843d7c8ac28 --- /dev/null +++ b/use_cases/ex0_1_sample_dataset/ex0_1-handson.md @@ -0,0 +1,240 @@ +# Use case 0.1: Loading a sample dataset + +Author: Victòria Agudetse Roures + +## Goal +Load a sample dataset into SUNSET without the need configure an archive of netCDF or GRIB files. + +## 0. SUNSET software stack + +The first step to use SUNSET is to create a copy of the code in your local environment. Open a terminal and `cd` to the directory where you would like to store your local copy of SUNSET. For example: `/esarchive/scratch//git/`. If a directory does not exist yet, you can create it with the `mkdir` shell command. + +```shell +# Clone the GitLab repository to create a local copy of the code +git clone https://earth.bsc.es/gitlab/es/sunset.git +``` + +If you are using BSC infrastructure, all of the software dependencies are already installed within our common environment. However, if you are running SUNSET outside of the BSC facilities, you can install the dependencies by creating a conda environment. A script is provided in the SUNSET repository, and you can install the environment by running the following line from the main folder of the SUNSET repository: + +```shell +bash conda_installation/load_sunset.bash +``` + +To run the line above, you should replace `` with the path where you want the conda environment to be installed. For example, `/home//conda-sunset`. + +## 1. The recipe + +SUNSET uses YAML configuration files called 'recipes' to specify which data you want to load and the details of the different steps of the workflow. In this example, we are using a sample dataset which contains data of temperature-at-surface (tas) monthly means, from ECMWF SEAS5 as the experiment and ERA5 as the reference dataset, for the initialization month of November. + +There is a template file for this hands-on tutorial, which you can open with a text editor: + +```shell +# cd to the main SUNSET directory +# Open the recipe with a text editor such as vim or emacs +vim use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +``` + +Once you have opened your recipe, it is time to edit the contents. Fill in the blank slots according to your preference, based on the options given in the description. + +NOTE: In this particular example, the sample data is limited, so the system and reference names, variable, forecast times and region cannot be changed. + +```yaml +Description: + Author: <___> + Description: Exercise 0.1: Skill assessment of a sample dataset +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + units: <___> # Choose your units: C or K + Datasets: + System: + name: ECMWF-SEAS5.1 + Multimodel: no + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: + hcst_start: '2000' + hcst_end: '2006' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 27 + latmax: 48 + lonmin: 0 + lonmax: 359 + Regrid: + method: bilinear + type: 'to_system' + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: 'none' + Calibration: + method: <___> + save: 'none' + Skill: + metric: RPSS BSS10 BSS90 + cross_validation: yes + save: 'none' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'none' + Visualization: + plots: skill_metrics + multi_panel: no + projection: cylindrical_equidistant + ncores: 10 + remove_NAs: yes + Output_format: S2S4E +Run: + filesystem: sample # This parameter specifies that we want to load the sample data, rather than data from a filesystem + Loglevel: INFO + Terminal: yes + output_dir: <______> # Path to the directory where you want your outputs to be saved + code_dir: <______> # Path to the directory where your code is +``` + +## 2. Load the required SUNSET modules and read the recipe + +If you are running SUNSET within BSC infrastructure, source the MODULES file to load the environment modules needed to run SUNSET. + +```shell +source MODULES +``` + +Otherwise, activate the conda environment: + +```shell +conda activate +``` + +Open an R session, by simply typing `R` on the terminal. + +To run SUNSET, we must run the R session from the directory where the code is. To check your working directory, you can run the shell command `pwd`. From the R session, you can use the commands `getwd()` and `setwd()` to see and change your working directory. + +```r +# Load required modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +# Read recipe +recipe_file <- "use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml" +recipe <- prepare_outputs(recipe_file) +``` + +The function `prepare_outputs()` creates a unique folder for the logs, data files and plots that result from the execution of your recipe, inside the directory you specified. It also runs a check over the recipe to detect any potential errors, misspellings or missing arguments. At the end of the check, a message is displayed indicating whether or not the recipe passed the check, along with the list of errors and warnings. + +**Questions** + +Read the logs! + +(1) Did your recipe pass the check? Did you get any warnings? + +(2) Where will your outputs be saved? Copy and paste this directory somewhere, so that you can check it later! + +*Tip*: The recipe is now stored as a `list` containing all the information of the original YAML file, plus some extra things! If you want to see any particular element of the recipe from the R session, you can simply access that element in the list. For example: + +```r +# Checking the variable name +recipe$Analysis$Variables$name +# Checking the output directory +recipe$Run$output_dir +``` + +## 3. Load the data and change the units + +The **Loading** module retrieves the information from the recipe to load the data that has been requested it in. It loads the experiment data for the hindcast period, the reference data for the corresponding period, and the experiment forecast if a forecast year has been requested. In the case of the sample dataset, we have a hindcast and the corresponding observations, but no forecast. + +For certain variables like temperature, precipitation or sea level pressure, the user can request for specific units to load the data in. The **Units** module will read the original units as stored in the netCDF files and perform any necessary unit converstions to match the request in the recipe. It also verifies that all of the loaded datasets share the same units, even if no specific unit has been requested. For this reason, users are strongly encouraged to run it even if they did not request any unit conversion. + +```r +# Load datasets +data <- Loading(recipe) +# Change units +data <- Units(recipe, data) +``` + +**Questions** + +(1) What is the structure of `data`? What is the class of the objects in `data`? *Tip*: you can use functions like `class()`, `names()` or `str()` to gain information about the structure of the object and its contents. + +```r +class(data) +names(data) +str(data, max.level = 2) +# You can access any of the three objects with the `$` operator: +class(data$hcst) +``` + +(2) Pay attention to the log messages: Did your units get converted? Are the new units what you expect? You can check the metadata of any of the objects in data. SUNSET also provides the `data_summary()` function, which lets you have a quick look at your objects: + +```r +# Check the new units and data of the hindcast (hcst) and/or observations (obs). Are they the same? +data$hcst$attrs$Variable$metadata$tas$units +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) +``` +(3) What are the dimensions of the datasets? Are they consistent with what is requested in the recipe? *Tip*: Check the data summary! + +## 4. Calibrate the data and compute the anomalies + +SUNSET has a few modules to perform post-processing on the experimental and the reference datasets. The **Calibration** module performs the bias correction method indicated in the recipe, using the `CSTools::CST_Calibration()` function. + +The **Anomalies** module removes the climatologies using functions like `CSTools::CST_Anomaly()` and `s2dv::Clim()`, and also returns the full fields in case they are needed for any future computations. + +```r +# Calibrate the data +data <- Calibration(recipe, data) +# Compute anomalies +data <- Anomalies(recipe, data) +``` +**Questions** + +(1) Verify that you now have anomaly values instead of the original full field. *Tip*: Use `data_summary()` like in the previous example and pay attention to the new values. + +## 5. Evaluate the model skill and compute the probability thresholds + +The **Skill** module returns a list of all the evaluation metrics requested in the recipe, in the shape of multi-dimensional arrays. In this case, we will compute three metrics: + +- **RPSS (Ranked Probability Skill Score)**: This skill score measures how well a forecast predicts the probability of the tercile categories (below normal, normal and above-normal), compared to the climatology. +- **BSS10 and BSS90 (Brier Skill Score):** This skill score measures how well a forecast predicts the probability of the 10th percentile and 90th percentile extreme events, compared to the climatology. + +The `Probabilities()` function returns the probability values for each requested category for the hindcast (and forecast) data, as well as the hindcast percentile values corresponding to each threshold. +``` +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Compute percentiles and probability bins +probabilities <- Probabilities(recipe, data) +``` +**Questions** + +(1) What is the structure of `skill_metrics`? Which metrics were computed? What dimensions do they have? *Tip*: use `str()` and `names()`. + +(2) What is the structure of `probabilities`? Can you identify the probability categories and the percentiles? *Tip*: use `str()`. + +## 6. Plotting the results + +Now, let's visualize the information that was computed! + +The **Visualization** module will generate the maps we requested in the recipe: Skill maps to visualize the skill distribution of the model, for each metric. + +With the significance option in the `Visualization()` function, you can choose whether or not to shade the grid points that are statistically significant in each skill metric plot. + +```r +# Plot data +Visualization(recipe, data, + skill_metrics = skill_metrics, + significance = TRUE) +``` + +Now, you can `cd` to the the output directory and inspect the contents of the `plots/` subdirectory. The plots are PNG files that can be visualized with the `display` command. They have a descriptive name including the content of the plot, the date and the forecast time. diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml new file mode 100644 index 0000000000000000000000000000000000000000..662e75cc77691b7e6670cf68f31af885c48225c5 --- /dev/null +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -0,0 +1,57 @@ +Description: + Author: V. Agudetse + '': split version +Analysis: + Horizon: seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5.1 + Multimodel: no + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: + hcst_start: '2000' + hcst_end: '2006' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 27 + latmax: 48 + lonmin: 0 + lonmax: 359 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: 'all' + Calibration: + method: bias + save: 'none' + Skill: + metric: RPSS BSS10 BSS90 + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'all' + Indicators: + index: FALSE + Visualization: + plots: skill_metrics + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /tmp/out-logs/ + code_dir: ./ + filesystem: sample + diff --git a/use_cases/ex1_1_single_analysis_terminal/ex1_1-handson.md b/use_cases/ex1_1_single_analysis_terminal/ex1_1-handson.md new file mode 100644 index 0000000000000000000000000000000000000000..ee86459a6f556b17c6776b86ef0021691a25478a --- /dev/null +++ b/use_cases/ex1_1_single_analysis_terminal/ex1_1-handson.md @@ -0,0 +1,237 @@ +# Hands-on 1.1: Single Verification Workflow on the Terminal + +Author: Victòria Agudetse Roures + +## Goal +Create a SUNSET recipe and use the functions in the suite to reproduce the verification workflow from the previous hands-on exercises. + +## 0. Cloning the SUNSET repository + +The first step to use SUNSET is to create a copy of the code in your local environment. Open a terminal and `cd` to the directory where you would like to store your local copy of SUNSET. For example: `/esarchive/scratch//git/`. If a directory does not exist yet, you can create it with the `mkdir` shell command. + +```shell +# Clone the GitLab repository to create a local copy of the code +git clone https://earth.bsc.es/gitlab/es/sunset.git +``` + +## 1. Modifying the recipe + +SUNSET uses YAML configuration files called 'recipes' to specify which data you want to load and the details of the different steps of the workflow. In this example, we want to evaluate the temperature-at-surface (tas) monthly means, using MeteoFrance System 7 data as our experiment and ERA5 as our reference dataset, for the initialization month of November. + +There is a template file for this hands-on tutorial, which you can open with a text editor: + +```shell +# cd to the main SUNSET directory +# Open the recipe with a text editor such as vim or emacs +vim use_cases/ex1_1_single_analysis_terminal/ex1_1-recipe.yml +``` + +Once you have opened your recipe, it is time to edit the contents. Fill in the blank slots according to your preference, based on the options given in the description. + +```yaml +Description: + Author: <___> + Description: Exercise 1.1: Calibration and skill assessment of MeteoFrance System 7 surface temperature +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + units: <___> # Choose your units: C or K + Datasets: + System: + name: Meteo-France-System7 + Multimodel: no + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2016' + ftime_min: <___> # Choose the first time step! A number from 1 to 6 + ftime_max: <___> # Choose the last time step! A number from 1 to 6 + Region: + name: "EU" + latmin: 20 + latmax: 80 + lonmin: -20 + lonmax: 40 + Regrid: + method: bilinear + type: 'r360x181' # options: to_system, to_reference, self-defined grid + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: 'none' + Calibration: + method: <___> + save: 'none' + Skill: + metric: RPSS, BSS10, BSS90 + cross_validation: yes + save: 'none' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'none' + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + multi_panel: no + projection: cylindrical_equidistant + ncores: 10 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: <______> # Path to the directory where you want your outputs to be saved + code_dir: <______> # Path to the directory where your code is +``` + +## 2. Load the required SUNSET modules and read the recipe + +First of all, source the MODULES file to load the environment modules needed to run SUNSET. + +```shell +source MODULES +``` + +Open an R session, by simply typing `R` on the terminal. + +To run SUNSET, we must run the R session from the directory where the code is. To check your working directory, you can run the shell command `pwd`. From the R session, you can use the commands `getwd()` and `setwd()` to see and change your working directory. + +```r +# Load required modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +# Read recipe +recipe_file <- "use_cases/ex1_1_single_analysis_terminal/ex1_1-recipe.yml" +recipe <- prepare_outputs(recipe_file) +``` + +The function `prepare_outputs()` creates a unique folder for the logs, data files and plots that result from the execution of your recipe, inside the directory you specified. It also runs a check over the recipe to detect any potential errors, misspellings or missing arguments. At the end of the check, a message is displayed indicating whether or not the recipe passed the check, along with the list of errors and warnings. + +**Questions** + +Read the logs! + +(1) Did your recipe pass the check? Did you get any warnings? + +(2) Where will your outputs be saved? Copy and paste this directory somewhere, so that you can check it later! + +*Tip*: The recipe is now stored as a `list` containing all the information of the original YAML file, plus some extra things! If you want to see any particular element of the recipe from the R session, you can simply access that element in the list. For example: + +```r +# Checking the variable name +recipe$Analysis$Variables$name +# Checking the output directory +recipe$Run$output_dir +``` + +## 3. Load the data and change the units + +The **Loading** module retrieves the information from the recipe to load the data that has been requested it in. It loads the experiment data for the hindcast period, the reference data for the corresponding period, and the experiment forecast if a forecast year has been requested. + +For certain variables like temperature, precipitation or sea level pressure, the user can request for specific units to load the data in. The **Units** module will read the original units as stored in the netCDF files and perform any necessary unit converstions to match the request in the recipe. It also verifies that all of the loaded datasets share the same units, even if no specific unit has been requested. For this reason, users are strongly encouraged to run it even if they did not request any unit conversion. + +```r +# Load datasets +data <- Loading(recipe) +# Change units +data <- Units(recipe, data) +``` + +**Questions** + +(1) What is the structure of `data`? What is the class of the objects in `data`? *Tip*: you can use functions like `class()`, `names()` or `str()` to gain information about the structure of the object and its contents. + +```r +class(data) +names(data) +str(data, max.level = 2) +# You can access any of the three objects with the `$` operator: +class(data$hcst) +``` + +(2) Pay attention to the log messages: Did your units get converted? Are the new units what you expect? You can check the metadata of any of the objects in data. SUNSET also provides the `data_summary()` function, which lets you have a quick look at your objects: + +```r +# Check the new units and data of hcst, fcst and/or obs. Are they all the same? +data$hcst$attrs$Variable$metadata$tas$units +data_summary(data$hcst, recipe) +data_summary(data$fcst, recipe) +data_summary(data$obs, recipe) +``` +(3) What are the dimensions of the datasets? Are they consistent with what is requested in the recipe? *Tip*: Check the data summary! + +## 4. Calibrate the data and compute the anomalies + +SUNSET has a few modules to perform post-processing on the experimental and the reference datasets. The **Calibration** module performs the bias correction method indicated in the recipe, using the `CSTools::CST_Calibration()` function. + +The **Anomalies** module removes the climatologies using functions like `CSTools::CST_Anomaly()` and `s2dv::Clim()`, and also returns the full fields in case they are needed for any future computations. + +```r +# Calibrate the data +data <- Calibration(recipe, data) +# Compute anomalies +data <- Anomalies(recipe, data) +``` +**Questions** + +(1) Verify that you now have anomaly values instead of the original full field. *Tip*: Use `data_summary()` like in the previous example and pay attention to the new values. + +## 5. Evaluate the model skill and compute the probability thresholds + +The **Skill** module returns a list of all the evaluation metrics requested in the recipe, in the shape of multi-dimensional arrays. In this case, we will compute three metrics: + +- **RPSS (Ranked Probability Skill Score)**: This skill score measures how well a forecast predicts the probability of the tercile categories (below normal, normal and above-normal), compared to the climatology. +- **BSS10 and BSS90 (Brier Skill Score):** This skill score measures how well a forecast predicts the probability of the 10th percentile and 90th percentile extreme events, compared to the climatology. + +The `Probabilities()` function returns the probability values for each requested category for the hindcast and forecast data, as well as the hindcast percentile values corresponding to each threshold. +``` +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Compute percentiles and probability bins +probabilities <- Probabilities(recipe, data) +``` +**Questions** + +(1) What is the structure of `skill_metrics`? Which metrics were computed? What dimensions do they have? *Tip*: use `str()` and `names()`. + +(2) What is the structure of `probabilities`? Can you identify the probability categories and the percentiles? *Tip*: use `str()`. + +## 6. Plotting the results + +Now, let's visualize the information that was computed! + +The **Visualization** module will generate the three types of maps we requested in the recipe: +- Skill maps to visualize the skill distribution of the model, for each metric. +- The ensemble mean of the calibrated forecast anomalies. +- A map showing the most likely tercile category for each point in the grid. + +With the significance option in the `Visualization()` function, you can choose whether or not to shade the grid points that are statistically significant in each skill metric plot. + +```r +# Plot data +Visualization(recipe, data, + skill_metrics = skill_metrics, + probabilities = probabilities, + significance = TRUE) +``` + +Now, you can `cd` to the the output directory and inspect the contents of the `plots/` subdirectory. The plots are png files that can be visualized with the `display` command. They have a descriptive name including the content of the plot, the date and the forecast time. + +**Questions** + +(1) Let's take a look at the forecast ensemble mean. What is the sign of the anomalies over Spain? In what regions are the anomalous temperatures strongest? + +(2) Let's take a look at the skill metrics RPSS, BSS10 and BSS90. In what regions and for which metrics is the forecast most skillful? *Tip*: Positive values indicate that the model is a better predictor than the climatology, with 1 being the perfect score. + +(3) Let's take a look at the Most Likely Terciles plots. This plot indicates the probability of the temperature being below normal, near-normal or above normal. What is the most likely category for Spain? diff --git a/use_cases/ex1_1_single_analysis_terminal/ex1_1-recipe.yml b/use_cases/ex1_1_single_analysis_terminal/ex1_1-recipe.yml new file mode 100644 index 0000000000000000000000000000000000000000..7b35e9666b599ea7a41113acf1b14ef5a0415c63 --- /dev/null +++ b/use_cases/ex1_1_single_analysis_terminal/ex1_1-recipe.yml @@ -0,0 +1,61 @@ +Description: + Author: V. Agudetse + Description: Analysis of MF System 7 with temperature +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + units: K + Datasets: + System: + name: Meteo-France-System7 + Multimodel: no + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 2 + Region: + name: "EU" + latmin: 20 + latmax: 80 + lonmin: -20 + lonmax: 40 + Regrid: + method: bilinear + type: 'r360x181' + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: 'none' + Calibration: + method: mse_min + save: 'none' + Skill: + metric: RPSS, BSS10, BSS90 + cross_validation: yes + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'none' + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + multi_panel: no + projection: cylindrical_equidistant + mask_terciles: no # CHECK + dots: no # CHECK + ncores: 10 + remove_NAs: yes + Output_format: S2S4E +Run: + filesystem: esarchive + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/use_cases/ex1_2_autosubmit_scorecards/Figures/as_change_status.PNG b/use_cases/ex1_2_autosubmit_scorecards/Figures/as_change_status.PNG new file mode 100644 index 0000000000000000000000000000000000000000..5fc5cd8a3397a626824cbaa8fb9d3fecb090ce13 Binary files /dev/null and b/use_cases/ex1_2_autosubmit_scorecards/Figures/as_change_status.PNG differ diff --git a/use_cases/ex1_2_autosubmit_scorecards/Figures/as_tree.PNG b/use_cases/ex1_2_autosubmit_scorecards/Figures/as_tree.PNG new file mode 100644 index 0000000000000000000000000000000000000000..5194568e387d7251c478eed0cf071f7d7c7a4a60 Binary files /dev/null and b/use_cases/ex1_2_autosubmit_scorecards/Figures/as_tree.PNG differ diff --git a/use_cases/ex1_2_autosubmit_scorecards/Figures/scorecard-1_ECMWF-SEAS5_ERA5_tas_1993-2003.png b/use_cases/ex1_2_autosubmit_scorecards/Figures/scorecard-1_ECMWF-SEAS5_ERA5_tas_1993-2003.png new file mode 100644 index 0000000000000000000000000000000000000000..631e3e2f80955aaf2b5dbd8abb2e577a73746373 Binary files /dev/null and b/use_cases/ex1_2_autosubmit_scorecards/Figures/scorecard-1_ECMWF-SEAS5_ERA5_tas_1993-2003.png differ diff --git a/use_cases/ex1_2_autosubmit_scorecards/Figures/scorecard-2_ECMWF-SEAS5_ERA5_tas_1993-2003.png b/use_cases/ex1_2_autosubmit_scorecards/Figures/scorecard-2_ECMWF-SEAS5_ERA5_tas_1993-2003.png new file mode 100644 index 0000000000000000000000000000000000000000..e311079536dc83ccb94c2cd786cebc44b46694ed Binary files /dev/null and b/use_cases/ex1_2_autosubmit_scorecards/Figures/scorecard-2_ECMWF-SEAS5_ERA5_tas_1993-2003.png differ diff --git a/use_cases/ex1_2_autosubmit_scorecards/Figures/scorecard-3_ECMWF-SEAS5_ERA5_tas_1993-2003.png b/use_cases/ex1_2_autosubmit_scorecards/Figures/scorecard-3_ECMWF-SEAS5_ERA5_tas_1993-2003.png new file mode 100644 index 0000000000000000000000000000000000000000..356243497a01f392ce21575d9df17c1a1d2b1796 Binary files /dev/null and b/use_cases/ex1_2_autosubmit_scorecards/Figures/scorecard-3_ECMWF-SEAS5_ERA5_tas_1993-2003.png differ diff --git a/use_cases/ex1_2_autosubmit_scorecards/Figures/scorecard-4_ECMWF-SEAS5_ERA5_tas_1993-2003.png b/use_cases/ex1_2_autosubmit_scorecards/Figures/scorecard-4_ECMWF-SEAS5_ERA5_tas_1993-2003.png new file mode 100644 index 0000000000000000000000000000000000000000..54e1a5b1825b1a00672360dd86f3c8c15a86380c Binary files /dev/null and b/use_cases/ex1_2_autosubmit_scorecards/Figures/scorecard-4_ECMWF-SEAS5_ERA5_tas_1993-2003.png differ diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md new file mode 100644 index 0000000000000000000000000000000000000000..c77fb2b164a631c51dac51dd2a0f2ad90479d6bc --- /dev/null +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md @@ -0,0 +1,174 @@ +# Hands-on 1.2: Computation of Scorecards with Autosubmit + +Author: An-Chi Ho + +## Goal +Compute some skill metrics and plots scorecards with SUNSET, using Autosubmit to dispatch jobs in parallel. +In the recipe, we request 12 start dates (0101, 0201, ..., 1201). SUNSET will split the recipe into 12 atomic recipes, and Autosubmit will run 12 jobs, which process the verification, for each recipe in parallel. +When all the verification jobs are finished, the scorecard job will be triggered and produces the scorecards. + +We only use one variable, one model and one reference dataset in this example, but you can add more datasets and variables if needed, and SUNSET will split the recipes accordingly. + +Check GitLab Wiki: +- Autosubmit page for full explanation of using SUNSET with Autosubmit https://earth.bsc.es/gitlab/es/sunset/-/wikis/Autosubmit + +- Home page Scorecards module section to know more about scorecards https://earth.bsc.es/gitlab/es/sunset/-/wikis/home#scorecards-module + + +## 0. Cloning the SUNSET repository + +If you're completely new to SUNSET, the first step is to create a copy of the tool in your local environment. +Open a terminal and `cd` to the directory where you would like to store your local copy of SUNSET. For example: `/esarchive/scratch//git/`. If a directory does not exist yet, you can create it with the `mkdir` shell command. + +```shell +# Clone the GitLab repository to create a local copy of the code +git clone https://earth.bsc.es/gitlab/es/sunset.git +``` +You should see a git folder "sunset" under the current directory. Now you have all the code, recipes, and scripts for running SUNSET. + + +## 1. Create Autosubmit experiment + +Since we're going to use Autosubmit to dispatch jobs, we need to have an Autosubmit experiment. Note that SUNSET uses Autosubmit >= 4.0.0. + +On the workstation or the Autosubmit machine, you can create an experiment by the following commands. + +```shell +module load autosubmit/4.0.0b-foss-2015a-Python-3.7.3 +autosubmit expid -H nord3v2 -d "SUNSET use case 1_2" +``` +You will see the messages like below: + +```shell +Autosubmit is running with 4.0.0b +The new experiment "a6pc" has been registered. +Generating folder structure... +Experiment folder: /esarchive/autosubmit/a6pc +Generating config files... +Experiment a6pc created +``` +Note the experiment ID (in this snippet above, a6pc) down. We need it for the recipe later. + + +## 2. Modifying the recipe + +The template recipe for this use case can be found in [ex1_2-recipe.yml](use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml). +You should at least edit some items in the "Run" section: +- `output_dir`: The directory you want to save the outputs and logs +- `code_dir`: The directory where your SUNSET code is stored (i.e., the git folder) +- `auto_conf$script`: The path to the script ex1_2-recipe.yml +- `auto_conf$expid`: The experiment "xxxx" you just created +- `auto_conf$hpc_user`: You user ID on Nord3, which should be bsc32xxx +- `auto_conf$email_address`: Your email. You can also adjust other email notification parts up to your preference. + +In the recipe, we ask for anomaly calculation after loading the data, calculate the skill scores and save the result for scorecards. In the Scorecard section, three regions are requested. + +Feel free to also modify other aspects according to your particular needs. You can read more about the parameters and the available modules in the SUNSET GitLab wiki. + +## 3. The user-defined script + +We need to have a script to define the modules to use and the steps of the workflow. Note that the script is for data loading and verification parts. The Scorecards module doesn't need to be included in this script. + +The prepare_outputs() function is already incorporated into the launcher script (see the next section for details about launcher), so it does not need to be included in the user-defined script in this case. +In its place, we will use the function read_atomic_recipe(). The recipe path is passed as an argument onto the R script. The beginning of our script should look like this: + +```R +# Load modules +source("modules/Loading/Loading.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") + +# Read recipe +## (leave this part as-is! Autosubmit will automatically pass the atomic recipe as a parameter) +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) +``` + +The rest of the user-defined script can be written in the same way as any other SUNSET script. We load the data, calculate the anomalies, then compute the skill scores and save the result as netCDF files for Scorecards. + +```R +# Load data +data <- Loading(recipe) +# Compute tos anomalies +data <- Anomalies(recipe, data) +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +``` +Check the example script at [ex1_2-script.yml](use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R). +You can execute it as-is or copy it and modify it according to your needs. + + +## 4. Launch jobs and Use Autosubmit + +We will start the jobs with the launcher. The SUNSET Launcher is a bash script named launch_SUNSET.sh that can be found in the main directory of the SUNSET repository. It runs in two steps: + +1. Run the recipe checks, split the recipe into atomic recipes and create the directory for the outputs. +2. Modify the Autosubmit configuration of your experiment according to the parameters in the recipe. + +The bash script needs two inputs: (1) [recipe](#2-modifying-the-recipe) (2) [R script](#3-the-user-defined-script). + + On your workstation or Nord3 under the SUNSET code directory, run: + +```shell +bash launch_SUNSET.sh use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R +``` +You will see the messages similar to below: +```shell +[1] "Saving all outputs to:" +[1] "/esarchive/scratch/aho/auto-s2s-outputs/ex1_2-recipe_20231129003740" +INFO [2023-11-29 00:37:41] Checking recipe: use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml +WARN [2023-11-29 00:37:41] The element 'fcst_year' is not defined in the recipe. No forecast year will be used. +INFO [2023-11-29 00:37:41] ##### RECIPE CHECK SUCCESSFULL ##### +INFO [2023-11-29 00:37:41] Splitting recipe in single verifications. +INFO [2023-11-29 00:37:41] The main recipe has been divided into 12 atomic recipes. +INFO [2023-11-29 00:37:41] Check output directory /esarchive/scratch/aho/auto-s2s-outputs//ex1_2-recipe_20231129003740/logs/recipes/ to see all the individual atomic recipes. +INFO [2023-11-29 00:37:41] ##### AUTOSUBMIT CONFIGURATION WRITTEN FOR a6pc ##### +INFO [2023-11-29 00:37:41] You can check your experiment configuration at: /esarchive/autosubmit/a6pc/conf/ +INFO [2023-11-29 00:37:41] Please SSH into bscesautosubmit01 or bscesautosubmit02 and run the following commands: +INFO [2023-11-29 00:37:41] module load autosubmit/4.0.0b-foss-2015a-Python-3.7.3 +INFO [2023-11-29 00:37:41] autosubmit create a6pc +INFO [2023-11-29 00:37:41] autosubmit refresh a6pc +INFO [2023-11-29 00:37:41] nohup autosubmit run a6pc & disown +``` +You can see some useful information, like the the path to atomic recipes, the Autosubmit configuration files, and most importantly, follow the last lines to launch your experiment. + +```shell +ssh bscesautosubmit01.bsc.es +(enter Autosubmit machine) +module load autosubmit/4.0.0b-foss-2015a-Python-3.7.3 +autosubmit create a6pc +autosubmit refresh a6pc +nohup autosubmit run a6pc & disown +``` + +Then, you can go to [Autosubmit GUI](https://earth.bsc.es/autosubmitapp/) to check the experiment status. + + + +As you can see, the Scorecards job is dependent on the Verification jobs. Once the 12 verification jobs are finished, the Scorecards job will start. + +## 5. Results and plots + +The scorecards are saved under `plots/Scorecards` under the output directory. There will be 4 files (_more explanation here_) + + + + + + + +## 6. Rerun Autosubmit + +If something goes wrong and makes the jobs fail, you can rerun the failed jobs only. + +1. Go to Autosubmit GUI, select the failed job(s), click "CHANGE STATUS" +2. Select "Set status to:" as "WAITING". Copy the lines and run them on Autosubmit machine or workstation. +3. Fix the problem under your local SUNSET git directory. +4. Run `autosubmit refresh xxxx` and `nohup autosubmit run xxxx & disown`. + + + +If everything fails, you can also simply recreate the experiment by `autosubmit create xxxx` --> `autosubmit refresh xxxx` --> `nohup autosubmit run xxxx & disown`. + diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml new file mode 100644 index 0000000000000000000000000000000000000000..73f16311f93cf69aa439945b00715b7d29f80afa --- /dev/null +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml @@ -0,0 +1,96 @@ +Description: + Author: An-Chi Ho + Info: Compute Skills and Plot Scorecards with Autosubmit + +Analysis: + Horizon: seasonal + Variables: + - {name: tas, freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: ECMWF-SEAS5} + Multimodel: False # single option + Reference: + - {name: ERA5} + Time: + sdate: # list, split + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' + fcst_year: + hcst_start: '1993' # single option + hcst_end: '2003' # single option + ftime_min: 1 # single option + ftime_max: 6 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "global", latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: 'none' + Calibration: + method: raw + save: 'none' + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # list, don't split + cross_validation: yes + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'none' + Scorecards: + execute: yes # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: -90} + start_months: 'all' + metric: mean_bias enscorr rpss crpss enssprerr + metric_aggregation: 'score' + inf_to_na: TRUE # Optional, bool: set inf values in data to NA, default is FALSE table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 8 + remove_NAs: no # bool, don't split + Output_format: Scorecards # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/aho/auto-s2s-outputs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/aho/git/auto-s2s/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R # replace with the path to your script + expid: a6pc # replace with your EXPID + hpc_user: bsc32734 # replace with your hpc username + wallclock: 03:00 # hh:mm + processors_per_job: 8 + platform: nord3v2 + custom_directives: ['#SBATCH --exclusive'] + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: an.ho@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + + diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R new file mode 100644 index 0000000000000000000000000000000000000000..1f60798736ae021e55face318444f62149c2aec2 --- /dev/null +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R @@ -0,0 +1,25 @@ +############################################################################### +## Author: An-Chi Ho +## Description: Computes some skill metrics and plots scorecards with Autosubmit +## Instructions: Follow the steps described in: +## use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md +## This script should be called by bash script launch_SUNSET.sh. +############################################################################### + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") + +# Read recipe +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) + +# Load data +data <- Loading(recipe) +# Compute tos anomalies +data <- Anomalies(recipe, data) +# Compute skill metrics +skill_metrics <- Skill(recipe, data) + diff --git a/use_cases/ex1_3_nino_indices_comparison/ex1_3-handson.md b/use_cases/ex1_3_nino_indices_comparison/ex1_3-handson.md new file mode 100644 index 0000000000000000000000000000000000000000..8417a4f08f367ab282a66932227a5f6cf1c3c90a --- /dev/null +++ b/use_cases/ex1_3_nino_indices_comparison/ex1_3-handson.md @@ -0,0 +1,108 @@ +# Hands-on 1.3: Computation of El Niño indices for two seasonal models + +Author: Victòria Agudetse Roures and Núria Pérez Zanón + +## Goal +Create a SUNSET recipe to compute and evaluate the skill of several El Niño indices (Niño1+2, Niño3, Niño3.4 and Niño4) for two models: ECMWF-SEAS5 and MeteoFrance System 7. We include the information for both of the models in a single recipe, and the SUNSET Launcher will split the recipe into two 'atomic recipes': one for each model. The computation for atomic recipe will be run in the cluster as two separate jobs. + +It is also possible to split a recipe along different Reference datasets, Variables and Start Dates. + +## 0. Cloning the SUNSET repository + +The first step to use SUNSET is to create a copy of the code in your local environment. Open a terminal and `cd` to the directory where you would like to store your local copy of SUNSET. For example: `/esarchive/scratch//git/`. If a directory does not exist yet, you can create it with the `mkdir` shell command. + +```shell +# Clone the GitLab repository to create a local copy of the code +git clone https://earth.bsc.es/gitlab/es/sunset.git +``` + +## 1. Modifying the recipe + +The template recipe for this use case can be found in `use_cases/ex1_3_nino_indices_comparison/ex1_3-recipe.yml`. You should open it with an editor such as emacs or vim: + +```shell +# cd to the main SUNSET directory +# Open the recipe with a text editor such as vim or emacs +vim use_cases/ex1_3_nino_indices_comparison/ex1_3-recipe.yml +``` + +Then, under the 'Run' section of the recipe, you should edit the parameters `output_dir` and `code_dir` to point to your desire output directory and to the directory where your SUNSET code is stored, respectively. + +Feel free to also modify other aspects of the reicpe according to your particular needs. You can read more about the parameters and the available modules in the SUNSET wiki. + +## 2. The user-defined script + +The SUNSET Launcher is a bash script named launch_SUNSET.sh that can be found in the main directory of the SUNSET repository. When working without Autosubmit, it runs in two steps: + +1. Running the recipe checks, splitting the recipe into atomic recipes and creating the directory for the outputs. +2. Sending jobs to the cluster to run the user-defined script for each atomic recipe, using SLURM. + +The `prepare_outputs()` function is already incorporated into the first step. For that reason, it does not need to be included in the user-defined script in this particular case. In its place, we will use the function `read_atomic_recipe()`. The recipe path is passed as an argument onto the R script. The beginning of our script should look like this: + +```R +# Load the modules to be used +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Indices/Indices.R") +source("modules/Skill/Skill.R") + +# Define the recipe path as the first argument from the command line +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +# Read the atomic recipe +recipe <- read_atomic_recipe(recipe_file) +``` + +The rest of the user-defined script can be written in the same way as any other SUNSET script: + +```R +# Load data +data <- Loading(recipe) +# Check units and transform if needed +data <- Units(recipe, data) +# Compute tos anomalies +data <- Anomalies(recipe, data) +# Compute Niño Indices +nino_indices <- Indices(data = data, recipe = recipe) + +# We can compute the Skill metrics for each of the El Niño indices, +# specifying that the data is spatially aggregated, with the parameter +# agg = "region". +for (index in nino_indices) { + nino_skill_metrics <- Skill(recipe = recipe, data = index, agg = "region") +} +``` + +A complete, ready-to-use sample of this example script can be found in `use_cases/ex1_3_nino_indices_comparison/ex1_3-script.R`. You can execute it as-is or copy it and modify it according to your specific needs. + +## 3. Launching the jobs with the SUNSET Launcher + +The first step is to connect to the HPC machine through `ssh`. When working without Autosubmit, the SUNSET Launcher should be run directly from the HPC machine where the jobs will run (for example, Nord3v2). There is no need to request an interactive session; the launcher script can be called directly from the login node. You can obtain detailed usage information by running: + +```shell +bash launch_SUNSET.sh --help +``` + +The mandatory arguments are the paths to the recipe and the script. We can also include other optional arguments to be used by SLURM, such as the number of CPUs to request (--cpus), the wallclock time for each job (--wallclock) and other extra directives (--custom_directives). You can refer to the [Nord3v2 user guide](https://www.bsc.es/user-support/nord3v2.php#jobdirectives) and the [SLURM sbatch documentation](https://slurm.schedmd.com/sbatch.html) for more information on the available options for the parameters. + +In this case, we are giving each job a wallclock time of 1 hour and requesting exclusive usage of all the cores in one node. The shell command to run SUNSET will look like this: + +```shell +bash launch_SUNSET.sh use_cases/ex1_3_nino_indices_comparison/ex1_3-recipe.yml use_cases/ex1_3_nino_indices_comparison/ex1_3-script.R --wallclock=01:00:00 --custom_directives="--exclusive" +``` + +You can check the status of your running jobs with the `squeue` command. The SLURM logs will be inside of your code directory, in a subfolder named 'out-logs'. It can be useful to check them in case of errors. + +## 4. Results and plots + +The spatial pattern and time series plots that were requested are saved inside `plots/Indices/` in the output directory. There will be one set of plots for each El Niño index, with a descriptive file name providing information about the content of the plot, the system/reference datasets, the start date and the forecast time. Here are some examples of the results: + +Spatial correlation for the ensemble mean: + +![](./figures/nino34_correlation_tos_ensmean_ECMWF-SEAS5_s1101_ftime02.png) +![](./figures/nino34_correlation_tos_ensmean_Meteo-France-System7_s1101_ftime02.png) + +Time series comparison between the model and the reference dataset (ERA5): +![](./figures/nino34_ECMWF-SEAS5_ERA5_s1101_ftime02.png) +![](./figures/nino34_Meteo-France-System7_ERA5_s1101_ftime02.png) diff --git a/use_cases/ex1_3_nino_indices_comparison/ex1_3-recipe.yml b/use_cases/ex1_3_nino_indices_comparison/ex1_3-recipe.yml new file mode 100644 index 0000000000000000000000000000000000000000..a4231e6a2403201b4e9b9d9963bd8ba99d6bd52c --- /dev/null +++ b/use_cases/ex1_3_nino_indices_comparison/ex1_3-recipe.yml @@ -0,0 +1,56 @@ +Description: + Author: V. Agudetse + Info: Computing El Nino indices for ECMWF SEAS5 and MeteoFrance System7 + +Analysis: + Horizon: seasonal + Variables: + - {name: tos, freq: monthly_mean, units: K} + Datasets: + System: + - {name: ECMWF-SEAS5} + - {name: Meteo-France-System7} + Multimodel: no + Reference: + - {name: ERA5} + Time: + sdate: '1101' + fcst_year: + hcst_start: '1993' + hcst_end: '2016' + ftime_min: 2 + ftime_max: 4 + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + save: none + Anomalies: + compute: yes + cross_validation: no + save: none + Indices: + Nino1+2: {save: all, plot_ts: yes, plot_sp: yes} + Nino3: {save: all, plot_ts: yes, plot_sp: yes} + Nino3.4: {save: all, plot_ts: yes, plot_sp: yes} + Nino4: {save: all, plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr + save: 'all' + ncores: 8 + remove_NAs: yes + Output_format: S2S4E + logo: TRUE +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ # ______ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ # _____ + autosubmit: no diff --git a/use_cases/ex1_3_nino_indices_comparison/ex1_3-script.R b/use_cases/ex1_3_nino_indices_comparison/ex1_3-script.R new file mode 100644 index 0000000000000000000000000000000000000000..c2b0ba341a015ef4440f653a8c2638cc0fe6619f --- /dev/null +++ b/use_cases/ex1_3_nino_indices_comparison/ex1_3-script.R @@ -0,0 +1,37 @@ +############################################################################### +## Author: Núria Pérez-Zanón and Victòria Agudetse Roures +## Description: Computes the Niño1+2, Niño3, Niño3.4 and Niño4 indices and some +## skill metrics for each index. +## Instructions: To run it, follow the steps described in: +## use_cases/ex1_3_nino_indices_comparison/ex1_3-handson.md +############################################################################### + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Indices/Indices.R") +source("modules/Skill/Skill.R") + +# Read recipe +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) + +# Load data +data <- Loading(recipe) +# Check units and transform if needed +data <- Units(recipe, data) +# Calibrate data +# data <- Calibration(recipe, data) +# Compute tos anomalies +data <- Anomalies(recipe, data) +# Compute Niño Indices +nino_indices <- Indices(data = data, recipe = recipe) + +# We can compute the Skill metrics for each of the El Niño indices, +# specifying that the data is spatially aggregated, with the parameter +# agg = "region". +for (index in nino_indices) { + skill_metrics <- Skill(recipe = recipe, data = index, agg = "region") +} diff --git a/use_cases/ex1_3_nino_indices_comparison/figures/nino34_ECMWF-SEAS5_ERA5_s1101_ftime02.png b/use_cases/ex1_3_nino_indices_comparison/figures/nino34_ECMWF-SEAS5_ERA5_s1101_ftime02.png new file mode 100644 index 0000000000000000000000000000000000000000..b14e72311586239292d447c238442a9b9f2957f5 Binary files /dev/null and b/use_cases/ex1_3_nino_indices_comparison/figures/nino34_ECMWF-SEAS5_ERA5_s1101_ftime02.png differ diff --git a/use_cases/ex1_3_nino_indices_comparison/figures/nino34_Meteo-France-System7_ERA5_s1101_ftime02.png b/use_cases/ex1_3_nino_indices_comparison/figures/nino34_Meteo-France-System7_ERA5_s1101_ftime02.png new file mode 100644 index 0000000000000000000000000000000000000000..696de30e0a7a883bce952897b47c82223cb0d186 Binary files /dev/null and b/use_cases/ex1_3_nino_indices_comparison/figures/nino34_Meteo-France-System7_ERA5_s1101_ftime02.png differ diff --git a/use_cases/ex1_3_nino_indices_comparison/figures/nino34_correlation_tos_ensmean_ECMWF-SEAS5_s1101_ftime02.png b/use_cases/ex1_3_nino_indices_comparison/figures/nino34_correlation_tos_ensmean_ECMWF-SEAS5_s1101_ftime02.png new file mode 100644 index 0000000000000000000000000000000000000000..5b8af9efa6878dc51e4fea01bab2a240bf1ec2b4 Binary files /dev/null and b/use_cases/ex1_3_nino_indices_comparison/figures/nino34_correlation_tos_ensmean_ECMWF-SEAS5_s1101_ftime02.png differ diff --git a/use_cases/ex1_3_nino_indices_comparison/figures/nino34_correlation_tos_ensmean_Meteo-France-System7_s1101_ftime02.png b/use_cases/ex1_3_nino_indices_comparison/figures/nino34_correlation_tos_ensmean_Meteo-France-System7_s1101_ftime02.png new file mode 100644 index 0000000000000000000000000000000000000000..39cb7dcde131f31f3169d5e25ed0f5bd972c2abf Binary files /dev/null and b/use_cases/ex1_3_nino_indices_comparison/figures/nino34_correlation_tos_ensmean_Meteo-France-System7_s1101_ftime02.png differ