diff --git a/.gitignore b/.gitignore index e11ba7d322dd439b07d98ef244a871c11ae75d9e..263c4e640a4ffe3bd13bf6a14f80c37553954d4d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ out-logs/ *.swp *.swo +ecsbatch.log* modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml diff --git a/autosubmit/auto-multimodel.sh b/autosubmit/auto-multimodel.sh new file mode 100644 index 0000000000000000000000000000000000000000..a9912666046bf3d8fc33643f764d864fa390963d --- /dev/null +++ b/autosubmit/auto-multimodel.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +############ AUTOSUBMIT INPUTS ############ +proj_dir=%PROJDIR% +outdir=%common.OUTDIR% +script=%common.SCRIPT% +SPLIT=%SPLIT% +############################### + +cd $proj_dir + +source split_to_recipe +# atomic_recipe_number=$(printf "%02d" $CHUNK) +atomic_recipe=${outdir}/logs/recipes/multimodel/atomic_recipe_sys-Multimodel${recipe}.yml + +source MODULES + +Rscript ${script} ${atomic_recipe} diff --git a/autosubmit/auto-verification-CERISE.sh b/autosubmit/auto-verification-CERISE.sh index caf2dd0ec8194b2868187eb26697a197003b5d1a..675b41d474064d53ca435d5681105a85f16d880a 100644 --- a/autosubmit/auto-verification-CERISE.sh +++ b/autosubmit/auto-verification-CERISE.sh @@ -9,8 +9,10 @@ CHUNK=%CHUNK% cd $proj_dir -atomic_recipe_number=$(printf "%02d" $CHUNK) -atomic_recipe=${outdir}/logs/recipes/atomic_recipe_${atomic_recipe_number}.yml +source chunk_to_recipe + +# atomic_recipe_number=$(printf "%02d" $CHUNK) +atomic_recipe=${outdir}/logs/recipes/atomic_recipe_${recipe}.yml ## Workaround to avoid bug in conda activate/source activate when running ## inside bash script diff --git a/autosubmit/auto-verification.sh b/autosubmit/auto-verification.sh index 0089e322d63e89a5578c98a6a64a1369b5b9b108..e909dbfb433ff6d59816734973547e918513bf76 100644 --- a/autosubmit/auto-verification.sh +++ b/autosubmit/auto-verification.sh @@ -9,8 +9,10 @@ CHUNK=%CHUNK% cd $proj_dir -atomic_recipe_number=$(printf "%02d" $CHUNK) -atomic_recipe=${outdir}/logs/recipes/atomic_recipe_${atomic_recipe_number}.yml +source chunk_to_recipe + +# atomic_recipe_number=$(printf "%02d" $CHUNK) +atomic_recipe=${outdir}/logs/recipes/atomic_recipe_${recipe}.yml source MODULES diff --git a/autosubmit/conf_esarchive/jobs.yml b/autosubmit/conf_esarchive/jobs.yml index a3c8934bf70f92d15a8e644a6f34947afcc28847..04d23ba0acedf56ab1edb813db4f2e1300168f59 100644 --- a/autosubmit/conf_esarchive/jobs.yml +++ b/autosubmit/conf_esarchive/jobs.yml @@ -5,12 +5,24 @@ JOBS: WALLCLOCK: NOTIFY_ON: PLATFORM: nord3v2 - PROCESSORS: + PROCESSORS: + # SPLITS: # n_atomic_recipes, number of atomic recipes + multimodel: + FILE: autosubmit/auto-multimodel.sh + RUNNING: once + WALLCLOCK: + NOTIFY_ON: + PLATFORM: nord3v2 + PROCESSORS: + DEPENDENCIES: + verification: + SPLITS_FROM: + SPLITS: # n_atomic_recipes/n_models = n_multimodels scorecards: FILE: autosubmit/auto-scorecards.sh WALLCLOCK: 00:10 PLATFORM: nord3v2 NOTIFY_ON: PROCESSORS: 1 - DEPENDENCIES: verification + DEPENDENCIES: diff --git a/conda_installation/environment-cerise-localgribR-ecmwf.yml b/conda_installation/environment-cerise-localgribR-ecmwf.yml index 983347f0d2d91190e5165e86c66c3694cf28fcfb..c70811e81e06dd823fe0400339f58fb9182f33a5 100644 --- a/conda_installation/environment-cerise-localgribR-ecmwf.yml +++ b/conda_installation/environment-cerise-localgribR-ecmwf.yml @@ -208,6 +208,7 @@ dependencies: - r-lattice=0.21_8=r42h57805ef_1 - r-lifecycle=1.0.3=r42hc72bb7e_2 - r-listenv=0.9.0=r42hc72bb7e_1 + - r-lobstr=1.1.2=r42ha503ecb_3 - r-log4r=0.4.3=r42h57805ef_1 - r-lubridate=1.9.2=r42h57805ef_2 - r-magick=2.7.3=r42h7525677_1 @@ -242,6 +243,7 @@ dependencies: - r-proj4=1.0_12=r42h4db2be8_0 - r-promises=1.2.1=r42ha503ecb_0 - r-proxy=0.4_27=r42h57805ef_2 + - r-pryr=0.1.6=r42ha503ecb_1 - r-ps=1.7.5=r42h57805ef_1 - r-r6=2.5.1=r42hc72bb7e_2 - r-rappdirs=0.3.3=r42h57805ef_2 @@ -328,4 +330,46 @@ dependencies: - xz=5.2.6=h166bdaf_0 - zlib=1.2.13=hd590300_5 - zstd=1.5.5=hfc55251_0 + - pip: + - argparse==1.4.0 + - autosubmit==4.0.98 + - autosubmitconfigparser==1.0.49 + - bcrypt==4.0.1 + - bscearth-utils==0.5.2 + - cdo==1.6.0 + - certifi==2023.7.22 + - cffi==1.16.0 + - charset-normalizer==3.3.1 + - configobj==5.0.8 + - coverage==7.3.2 + - cryptography==41.0.5 + - cycler==0.12.1 + - cython==3.0.4 + - fonttools==4.43.1 + - idna==3.4 + - iniconfig==2.0.0 + - kiwisolver==1.4.5 + - matplotlib==3.5.3 + - mock==5.1.0 + - networkx==2.6.3 + - nose==1.3.7 + - packaging==23.2 + - paramiko==3.3.1 + - pillow==10.1.0 + - pluggy==1.3.0 + - portalocker==2.7.0 + - psutil==5.9.6 + - py3dotplus==1.1.0 + - pycparser==2.21 + - pygments==2.16.1 + - pynacl==1.5.0 + - pyparsing==3.1.1 + - pytest==7.4.3 + - python-dateutil==2.8.2 + - pythondialog==3.5.3 + - requests==2.31.0 + - ruamel-yaml==0.17.21 + - six==1.16.0 + - urllib3==2.0.7 + - xlib==0.21 prefix: /perm/cyce/conda/envs/condaCerise diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index c25d8d3a86fb08670e1556db0b9281adf5e43d4c..c7c6f3a43716a745be0b77d0f44bc39ca26f3e09 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -19,6 +19,7 @@ esarchive: calendar: "proleptic_gregorian" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i1p1f1/Amon/tas/gr/v20190713/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i1p1f1_gr_196011-196110.nc" #'r512x256' # ---- @@ -41,6 +42,7 @@ esarchive: #NOTE:There are many members but not all of them are available on ESGF (only r6-10 available). Then, we might have some variables for the rest of the members (r1-5 and r11-15), but not for all the variables. That's why i'm only using r6-10 member: r6i2p1f1,r7i2p1f1,r8i2p1f1,r9i2p1f1,r10i2p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r6i2p1f1/Amon/tas/gr/v20200730/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r6i2p1f1_gr_196011-196012.nc" #'r512x256' # ---- @@ -79,27 +81,29 @@ esarchive: calendar: "proleptic_gregorian" member: r1i4p1f1,r2i4p1f1,r3i4p1f1,r4i4p1f1,r5i4p1f1,r6i4p1f1,r7i4p1f1,r8i4p1f1,r9i4p1f1,r10i4p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i4p1f1/Amon/tas/gr/v20210910/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i4p1f1_gr_196011-196110.nc" # ---- HadGEM3-GC31-MM: name: "HadGEM3-GC31-MM" - institution: + institution: "Met Office Hadley Centre" src: hcst: "exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppB-forecast/" first_dcppB_syear: 2019 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon", "tos":"Omon"} - grid: {"tas":"gn", "psl":"gr", "pr":"gr", "ts":"gr", "tos":"gr"} + grid: {"tas":"gn", "psl":"gr", "pr":"gn", "ts":"gr", "tos":"gr"} #version depends on member and variable - version: {"tas":"v20200417", "psl":"v20200316", "pr":"v20200316", "ts":"v20200316", "tos":"v20200417"} + version: {"tas":"v20200417", "psl":"v20200316", "pr":"v20200417", "ts":"v20200316", "tos":"v20200417"} daily_mean: grid: {"tasmin":"gn", "tasmax":"gn", "pr":"gn"} version: {"tasmin":"v20200417", "tasmax":"v20200417", "pr":"v20200417"} calendar: "360-day" member: r1i1p1f2,r2i1p1f2,r3i1p1f2,r4i1p1f2,r5i1p1f2,r6i1p1f2,r7i1p1f2,r8i1p1f2,r9i1p1f2,r10i1p1f2 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/tas/gn/v20200417/tas_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s1960-r1i1p1f2_gn_196011-196012.nc" #'r432x324' # ---- @@ -121,6 +125,7 @@ esarchive: calendar: "365_day" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1 initial_month: 1 + sdate_add: 1 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r8i1p1f1/Amon/tas/gn/v20200101/tas_Amon_BCC-CSM2-MR_dcppA-hindcast_s2008-r8i1p1f1_gn_200801-201712.nc" # ---- @@ -142,13 +147,14 @@ esarchive: calendar: "365_day" member: r1i1p2f1,r2i1p2f1,r3i1p2f1,r4i1p2f1,r5i1p2f1,r6i1p2f1,r7i1p2f1,r8i1p2f1, r9i1p2f1, r10i1p2f1, r11i1p2f1,r12i1p2f1,r13i1p2f1,r14i1p2f1,r15i1p2f1,r16i1p2f1,r17i1p2f1,r18i1p2f1, r19i1p2f1, r20i1p2f1,r21i1p2f1,r22i1p2f1,r23i1p2f1,r24i1p2f1,r25i1p2f1,r26i1p2f1,r27i1p2f1,r28i1p2f1, r29i1p2f1, r30i1p2f1, r31i1p2f1,r32i1p2f1,r33i1p2f1,r34i1p2f1,r35i1p2f1,r36i1p2f1,r37i1p2f1,r38i1p2f1, r39i1p2f1, r40i1p2f1 initial_month: 1 #next year Jan + sdate_add: 0 reference_grid: "/esarchive/exp/canesm5/cmip6-dcppA-hindcast/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_dcppA-hindcast_s2008-r1i1p2f1_gn_200901-201812.nc" # ---- #NOTE: no data there CESM1-1-CAM5-CMIP5: name: "CESM1-1-CAM5-CMIP5" - institution: + institution: "National Center for Atmospheric Research" src: hcst: "exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast" fcst: @@ -163,13 +169,14 @@ esarchive: calendar: "365_day" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1, r9i1p1f1, r10i1p1f1, r11i1p1f1,r12i1p1f1,r13i1p1f1,r14i1p1f1,r15i1p1f1,r16i1p1f1,r17i1p1f1,r18i1p1f1, r19i1p1f1, r20i1p1f1,r21i1p1f1,r22i1p1f1,r23i1p1f1,r24i1p1f1,r25i1p1f1,r26i1p1f1,r27i1p1f1,r28i1p1f1, r29i1p1f1, r30i1p1f1, r31i1p1f1,r32i1p1f1,r33i1p1f1,r34i1p1f1,r35i1p1f1,r36i1p1f1,r37i1p1f1,r38i1p1f1, r39i1p1f1, r40i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- #NOTE: in tapes CMCC-CM2-SR5: name: "CMCC-CM2-SR5" - institution: + institution: "Euro-Mediterranean Center on Climate Change" src: hcst: "exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppB-forecast/" @@ -184,6 +191,7 @@ esarchive: calendar: "365_day" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20210312/tas_Amon_CMCC-CM2-SR5_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- @@ -205,12 +213,13 @@ esarchive: calendar: "365_day" member: r1i1p1f1,r2i1p1f1,r3i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppA-hindcast/r1i1p1f1/Amon/tas/gr/v20220212/tas_Amon_FGOALS-f3-L_dcppA-hindcast_s1960-r1i1p1f1_gr_196011-197012.nc" # ---- IPSL-CM6A-LR: name: "IPSL-CM6A-LR" - institution: "IPSL" + institution: "Institut Pierre-Simon Laplace" src: hcst: "exp/CMIP6/dcppA-hindcast/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/dcppA-hindcast/" fcst: @@ -224,31 +233,33 @@ esarchive: calendar: "gregorian" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 1 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/dcppA-hindcast/r1i1p1f1/Amon/tas/gr/v20200504/tas_Amon_IPSL-CM6A-LR_dcppA-hindcast_s2008-r1i1p1f1_gr_200901-201812.nc" # ---- MIROC6: - name: - institution: + name: "MIROC6" + institution: "Model for Interdisciplinary Research on Climate" src: hcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" - fcst: + fcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn"} - version: {"tas":"v20200417", "pr":["v20200416","v20200504"], "psl":"v20200504", "tasmin":"v20200417", "tasmax":"v20200504"} + version: {"tas":"v20200417", "pr":"v20200504", "psl":"v20200504", "tasmin":"v20200417", "tasmax":"v20200504"} daily_mean: grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} version: {"pr":"v20191217", "tas":"v20200416", "tasmax":"v20200416", "tasmin":"v20200416"} calendar: "standard" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200417/tas_Amon_MIROC6_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- MPI-ESM1.2-HR: name: "MPI-ESM1.2-HR" - institution: "MIROC" + institution: "Max-Planck-Institute for Meteorology" src: hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/" fcst: @@ -262,6 +273,7 @@ esarchive: calendar: "standard" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200320/tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- @@ -281,6 +293,7 @@ esarchive: calendar: "proleptic_gregorian" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1,r11i1p1f1,r12i1p1f1,r13i1p1f1,r14i1p1f1,r15i1p1f1,r16i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_MPI-ESM1-2-LR_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- @@ -300,13 +313,14 @@ esarchive: calendar: "proleptic_gregorian" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_MRI-ESM2-0_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201312.nc" # ---- #NOTE: NorCPM1-i1 and i2 are under the same directory NorCPM1-i1: name: "NorCPM1-i1" - institution: "NCC" + institution: "NorESM Climate modeling Consortium" src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: @@ -320,12 +334,13 @@ esarchive: calendar: "noleap" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 10 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20191029/tas_Amon_NorCPM1_dcppA-hindcast_s2008-r1i1p1f1_gn_200810-201812.nc" # ---- NorCPM1-i2: name: "NorCPM1-i2" - institution: "NCC" + institution: "NorESM Climate modeling Consortium" src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: @@ -339,6 +354,7 @@ esarchive: calendar: "noleap" member: r1i2p1f1,r2i2p1f1,r3i2p1f1,r4i2p1f1,r5i2p1f1,r6i2p1f1,r7i2p1f1,r8i2p1f1,r9i2p1f1,r10i2p1f1 initial_month: 10 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/r1i2p1f1/Amon/pr/gn/v20200101/pr_Amon_NorCPM1_dcppA-hindcast_s2008-r1i2p1f1_gn_200810-201812.nc" diff --git a/conf/autosubmit.yml b/conf/autosubmit.yml index 4ff15ffd24e2bac63543cc792e7004f22953a6ab..3e3f1220624a8e89767ed00caf5556ed5ba39003 100644 --- a/conf/autosubmit.yml +++ b/conf/autosubmit.yml @@ -1,7 +1,7 @@ esarchive: platform: nord3v2 - module_version: autosubmit/4.0.0b-foss-2015a-Python-3.7.3 - auto_version: 4.0.0 + module_version: autosubmit/4.0.98-foss-2015a-Python-3.7.3 + auto_version: 4.0.98 conf_format: yaml experiment_dir: /esarchive/autosubmit/ userID: bsc32 diff --git a/conf/slurm_templates/run_parallel_workflow.sh b/conf/slurm_templates/run_parallel_workflow.sh index 461ee7e2335f4e830e4e72d95319d88415d3d98c..e9ef6964e74919c809931a206d25bc731384a86b 100644 --- a/conf/slurm_templates/run_parallel_workflow.sh +++ b/conf/slurm_templates/run_parallel_workflow.sh @@ -1,7 +1,5 @@ #!/bin/bash -#SBATCH -J SUNSET_verification - # Slurm directive description: # -J: job name @@ -11,5 +9,8 @@ script=$1 atomic_recipe=$2 source MODULES +# module load conda/22.11.1-2 +# conda activate condaCerise +# export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib Rscript ${script} ${atomic_recipe} diff --git a/conf/slurm_templates/run_scorecards.sh b/conf/slurm_templates/run_scorecards.sh index 5ebf65281985d4ddac408609497eb76408f2eb32..9abcac172bb0a25c078d62ea9d7ddd136811b201 100644 --- a/conf/slurm_templates/run_scorecards.sh +++ b/conf/slurm_templates/run_scorecards.sh @@ -15,6 +15,7 @@ recipe=$1 outdir=$2 source MODULES + # Execute scorecards Rscript modules/Scorecards/execute_scorecards.R ${recipe} ${outdir} diff --git a/example_scripts/example_multimodel.R b/example_scripts/example_multimodel.R new file mode 100644 index 0000000000000000000000000000000000000000..c07ee5512bcb487b40bf8645a0b678475ce43438 --- /dev/null +++ b/example_scripts/example_multimodel.R @@ -0,0 +1,56 @@ + +################################ +### SEASONAL MULTIMODEL TEST ### +################################ + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("modules/Multimodel/Multimodel.R") + +horizon <- 'decadal' # seasonal + +## Cleaning output directory and splitting recipe +sunset_outputs_folder <- '/esarchive/scratch/cdelgado/sunset_outputs/' +recipe_file <- paste0("recipes/recipe_multimodel_",horizon,".yml") +# system(paste0('rm -r ',sunset_outputs_folder,'recipe_multimodel_',horizon,'_*')) +system(paste0('Rscript split.R ',recipe_file)) + +## Finding atomic recipes +atomic_recipe_folder <- paste0(sunset_outputs_folder,list.files(sunset_outputs_folder, pattern = horizon),'/logs/recipes/') +atomic_recipe_folder <- atomic_recipe_folder[length(atomic_recipe_folder)] +atomic_recipes <- list.files(paste0(atomic_recipe_folder), pattern = '.yml') +atomic_recipes <- atomic_recipes[-length(atomic_recipes)] +atomic_recipes_multimodel <- list.files(paste0(atomic_recipe_folder,'multimodel')) +atomic_recipes <- paste0(atomic_recipe_folder,atomic_recipes) +atomic_recipes_multimodel <- paste0(atomic_recipe_folder,'multimodel/',atomic_recipes_multimodel) + +## Running atomic recipes +for (recipe_file in c(atomic_recipes,atomic_recipes_multimodel)){ + # Read recipe + recipe <- read_atomic_recipe(recipe_file) + if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ + # Load datasets and create multimodel + mm <- Multimodel(recipe) + data <- mm$data + probabilities <- mm$prob + } else { + # Load datasets + data <- Loading(recipe) + # Change units + data <- Units(recipe, data) + # Compute anomalies + data <- Anomalies(recipe, data) + # Compute percentiles and probability bins + probabilities <- Probabilities(recipe, data) + } + # Compute skill metrics + skill_metrics <- Skill(recipe, data) + # Plot data + Visualization(recipe, data, skill_metrics, probabilities, significance = T) +} diff --git a/example_scripts/multimodel_seasonal.R b/example_scripts/multimodel_seasonal.R new file mode 100644 index 0000000000000000000000000000000000000000..d4e20e44982e77d262c06ad27fff15edcf63bf17 --- /dev/null +++ b/example_scripts/multimodel_seasonal.R @@ -0,0 +1,39 @@ + +########################################### +### SEASONAL MULTIMODEL TEST - LAUNCHER ### +########################################### + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("modules/Multimodel/Multimodel.R") + +# Read recipe +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) + +if (recipe$Analysis$Datasets$System$name == 'Multimodel') { + # Load datasets and create multimodel + mm <- Multimodel(recipe) + data <- mm$data + probabilities <- mm$prob +} else { + # Load datasets + data <- Loading(recipe) + # Change units + data <- Units(recipe, data) + # Compute anomalies + data <- Anomalies(recipe, data) + # Compute percentiles and probability bins + probabilities <- Probabilities(recipe, data) +} +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Plot data +Visualization(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index 153d64b3cee49c24066ad298464615f984a2ce35..6149a9639604942f58897363dd0c854f010b79a2 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -104,18 +104,24 @@ tmpfile=$(mktemp ${TMPDIR-/tmp}/SUNSET.XXXXXX) # Create outdir and split recipes source MODULES +# module load conda/22.11.1-2 +# conda activate condaCerise +# export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + Rscript split.R ${recipe} $disable_unique_ID --tmpfile $tmpfile # Run with Autosubmit or directly with Slurm's sbatch? run_method=$( head -1 $tmpfile | tail -1 ) # If run method is 'sbatch', launch jobs with dependencies -if [ $run_method == "sbatch" ]; then +if [[ $run_method == "sbatch" ]]; then # Retrieve working directory codedir=$( head -2 $tmpfile | tail -1 ) # Retrieve output directory outdir=$( head -3 $tmpfile | tail -1 ) + # Multimodel TRUE/FALSE + multimodel=$( head -4 $tmpfile | tail -1) # Scorecards TRUE/FALSE - scorecards=$( head -4 $tmpfile | tail -1) + scorecards=$( head -5 $tmpfile | tail -1) # Create directory for slurm output logdir=${outdir}/logs/slurm/ @@ -129,17 +135,33 @@ if [ $run_method == "sbatch" ]; then verification_job_list=() echo "Submitting verification jobs..." # Loop over atomic recipes - for atomic_recipe in ${outdir}/logs/recipes/atomic_recipe_??.yml; do + for atomic_recipe in ${outdir}/logs/recipes/atomic_recipe_*.yml; do job_number=$(($job_number + 1)) job_name=$(basename $outdir)_$(printf %02d $job_number) outfile=${logdir}/run-${job_name}.out errfile=${logdir}/run-${job_name}.err # Send batch job and capture job ID - job_ID=$(sbatch --parsable --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) + job_ID=$(sbatch --parsable --job-name="SUNSET_verification" --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) # Add job ID to array verification_job_list+=($job_ID) echo "Submitted batch job $job_ID" done + + multimodel_job_list=() + job_number=0 + if [[ $multimodel == "TRUE" ]]; then + for atomic_recipe in ${outdir}/logs/recipes/multimodel/atomic_recipe_*.yml; do + job_number=$(($job_number + 1)) + job_name=$(basename $outdir)_$(printf %02d $job_number) + outfile=${logdir}/run-multimodel-${job_name}.out + errfile=${logdir}/run-multimodel-${job_name}.err + # Send batch job and capture job ID + job_ID=$(sbatch --parsable --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]}") --kill-on-invalid-dep=yes --job-name="SUNSET_multimodel" --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) + # Add job ID to array + multimodel_job_list+=($job_ID) + echo "Submitted batch job $job_ID" + done + fi # Submit scorecards job with dependency on verification jobs, passed as a # comma-separated string. The scorecards job will not run until all the @@ -149,7 +171,7 @@ if [ $run_method == "sbatch" ]; then echo "Submitting scorecards jobs..." outfile=${logdir}/run-scorecards.out errfile=${logdir}/run-scorecards.err - sbatch --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]}") --output=$outfile --error=$errfile --time=01:00:00 conf/slurm_templates/run_scorecards.sh ${recipe} ${outdir} + sbatch --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]} ${multimodel_job_list[*]}") --output=$outfile --error=$errfile --time=01:00:00 conf/slurm_templates/run_scorecards.sh ${recipe} ${outdir} fi fi diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 989d9b94519ee298dc6d76a3ecaf254f71927aaa..16d7b96b190fad0f2e80f48c6195ec61416447b3 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -28,7 +28,8 @@ Calibration <- function(recipe, data) { } else { ## TODO: Calibrate full fields when present # Calibration function params - mm <- recipe$Analysis$Datasets$Multimodel + mm <- !is.null(recipe$Analysis$Datasets$Multimodel) && + !tolower(recipe$Analysis$Datasets$Multimodel) %in% c('no','false') if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 63fee97bede51b72128b917af9b5171d83061d4f..6c4002ee9f10d291acdd36f9dfe6996eea15be26 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -18,10 +18,10 @@ Loading <- function(recipe) { if(recipe$Analysis$Variables$name == 'tas-tos') { source("modules/Loading/R/load_tas_tos.R") data <- load_tas_tos(recipe) - } else { + } else { source("modules/Loading/R/load_seasonal.R") data <- load_seasonal(recipe) - } + } } else if (time_horizon == "decadal") { source("modules/Loading/R/load_decadal.R") data <- load_decadal(recipe) diff --git a/modules/Loading/R/GRIB/GrbLoad.R b/modules/Loading/R/GRIB/GrbLoad.R index ef1df0cb13b6eb46ac4b55de3fabee4dd204213a..7a3f441410c34d0b667ef0785d33d8c727670ceb 100644 --- a/modules/Loading/R/GRIB/GrbLoad.R +++ b/modules/Loading/R/GRIB/GrbLoad.R @@ -7,21 +7,21 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, regrid = NULL) { library(gribr) - + result <- vector('list', length = length(dat)) times <- vector('list', length = length(dat)) times <- lapply(times, '[<-', rep(NA, length(time_step))) #NOTE: length is 0 (slower in loop?) -# times <- lapply(times, '[<-', .POSIXct(rep(NA, length(time_step)), tz = 'UTC')) - + # times <- lapply(times, '[<-', .POSIXct(rep(NA, length(time_step)), tz = 'UTC')) + for (dat_i in 1:length(dat)) { - + file_to_load <- grib_open(dat[[dat_i]]) - + #---------------------------------------- # HOW TO FIND THE VALUE OF EACH FTIME STEP? #---------------------------------------- #NOTE: ValidityTime is not considered now. So if the time frequency is less than daily, it has problem. - + # METHOD 1: Get first message to figure out the validityDate/Time of each message #NOTE: gm1$validityDate should be "s", "m", "h", etc. according to document. But our files have "1". gm1 <- grib_get_message(file_to_load, 1) @@ -30,168 +30,168 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, # For monthly data #NOTE: may not be correct because it is calculated by the first message cdo_time_attr <- clock::add_months(as.POSIXct(paste0(first_ftime, ' ', first_ftime_hour), - format = "%Y%m%d %H", tz = 'UTC'), time_step - 1) + format = "%Y%m%d %H", tz = 'UTC'), time_step - 1) cdo_time <- format(cdo_time_attr, "%Y%m%d") - -# # METHOD 2: Use cdo showtimestamp (DEPENDENCY!) -# #TODO: Change to method 1 because can't predict what cdo will produce -# cdo_time <- system(paste0("cdo showtimestamp ", dat[[dat_i]]), intern = T) -# cdo_time <- strsplit(cdo_time, " ")[[length(cdo_time)]] -# cdo_time <- cdo_time[which(cdo_time != "")] -## # Check if there is member dim or not -## has_memb <- ifelse((length(unique(cdo_time)) == length(cdo_time)), FALSE, TRUE) -# if (has.memb) memb_dim_length <- length(cdo_time)/length(unique(cdo_time)) -# cdo_time <- unique(cdo_time)[time_step] #"2000-12-01T00:00:00" -# cdo_time_attr <- as.POSIXct(gsub('T', ' ', cdo_time), tz = 'UTC') -# cdo_time <- sapply(sapply(cdo_time, strsplit, "T"), '[[', 1) -# cdo_time <- gsub('-', '', cdo_time) - + + # # METHOD 2: Use cdo showtimestamp (DEPENDENCY!) + # #TODO: Change to method 1 because can't predict what cdo will produce + # cdo_time <- system(paste0("cdo showtimestamp ", dat[[dat_i]]), intern = T) + # cdo_time <- strsplit(cdo_time, " ")[[length(cdo_time)]] + # cdo_time <- cdo_time[which(cdo_time != "")] + ## # Check if there is member dim or not + ## has_memb <- ifelse((length(unique(cdo_time)) == length(cdo_time)), FALSE, TRUE) + # if (has.memb) memb_dim_length <- length(cdo_time)/length(unique(cdo_time)) + # cdo_time <- unique(cdo_time)[time_step] #"2000-12-01T00:00:00" + # cdo_time_attr <- as.POSIXct(gsub('T', ' ', cdo_time), tz = 'UTC') + # cdo_time <- sapply(sapply(cdo_time, strsplit, "T"), '[[', 1) + # cdo_time <- gsub('-', '', cdo_time) + #---------------------------------------- - + # all members + ftimes: length should be memb*ftime (e.g., 51*7) ## Method 1: use grib_select and real values to filter memb_ftime <- grib_select(file_to_load, list(validityDate = cdo_time)) if (inherits(memb_ftime, 'gribMessage')) memb_ftime <- list(memb_ftime) - -# ## Method 2: Calculate which messages are the desired ones -# gm <- grib_get_message(file_to_load, time_step) -# if (length(time_step) == 1) { -# gm <- list(gm) -# } - - ################################################################## - # Get data as an array [longitude, latitude, (memb*)time] - ################################################################## - if (grepl("reduced", gm1$gridType)) { - #NOTE: Need to call gribr::grib_expand_grids because I don't know how to make .Call("gribr_redtoreg") work outside that function - # https://github.com/nawendt/gribr/blob/main/src/redtoreg.c - values_l <- vector('list', length = length(memb_ftime)) - for (gm_i in 1:length(memb_ftime)) { - values_l[[gm_i]] <- grib_expand_grids(memb_ftime[[gm_i]]) + + # ## Method 2: Calculate which messages are the desired ones + # gm <- grib_get_message(file_to_load, time_step) + # if (length(time_step) == 1) { + # gm <- list(gm) + # } + + ################################################################## + # Get data as an array [longitude, latitude, (memb*)time] + ################################################################## + if (grepl("reduced", gm1$gridType)) { + #NOTE: Need to call gribr::grib_expand_grids because I don't know how to make .Call("gribr_redtoreg") work outside that function + # https://github.com/nawendt/gribr/blob/main/src/redtoreg.c + values_l <- vector('list', length = length(memb_ftime)) + for (gm_i in 1:length(memb_ftime)) { + values_l[[gm_i]] <- grib_expand_grids(memb_ftime[[gm_i]]) + } + result[[dat_i]] <- array(unlist(values_l), dim = c(longitude = gm1$Nj * 2, latitude = gm1$Nj, time = length(values_l))) + # Save memory + rm(values_l); gc() + + } else { + result[[dat_i]] <- .grib_expand_grids(memb_ftime) } - result[[dat_i]] <- array(unlist(values_l), dim = c(longitude = gm1$Nj * 2, latitude = gm1$Nj, time = length(values_l))) - # Save memory - rm(values_l); gc() - - } else { - result[[dat_i]] <- .grib_expand_grids(memb_ftime) - } - - ################################################################## - # Get metadata - ################################################################## - ## (1-1) Everything from the first message of first file - if (dat_i == 1) { + + ################################################################## + # Get metadata + ################################################################## ## (1-1) Everything from the first message of first file -# dims <- dim(result[[dat_i]]) -# attributes(result) <- gm1 -# # turn result into array again -# dim(result[[dat_i]]) <- dims - - ## (1-2) Only save the necessary attributes - attr(result, 'edition') <- gm1$edition - attr(result, 'shortName') <- gm1$shortName - #NOTE: Tune varaible name!! - if (gm1$shortName == '2t') attr(result, 'shortName') <- 'tas' - attr(result, 'name') <- gm1$name - attr(result, 'units') <- gm1$units -# attr(result, 'validityDate') <- gm1$validityDate -# attr(result, 'validityTime') <- gm1$validityTime - - ## (2) Lat and lon - latlon <- grib_latlons(gm1, expand = TRUE) - attr(result, 'latitude') <- unique(as.vector(c(latlon$lats))) - attr(result, 'longitude') <- unique(as.vector(c(latlon$lons))) - # Save memory (though it's small) - rm(latlon); gc() - - #NOTE: Find another way to check regular grid; Ni/Nj not always exist -# if (has.key(gm1, "Nx") && has.key(gm1, "Ny")) { -# nx <- gm1$Nx -# ny <- gm1$Ny -# } else { -# nx <- gm1$Ni -# ny <- gm1$Nj -# } -# if (length(lats) != ny | length(lons) != nx) { -# stop("Latitude and Longitude seem to be non-regular grid.") -# } - - } - -#-------------------------------- -#NOTE: Just use cdo_time -# ## (3) Date and time: Need to get from each massage -# for (time_i in 1:length(time_step)) { -# gm1 <- gm[[time_i]] -# #NOTE: What's the correct time? -## dates <- gm1$validityDate #Date of validity of the forecast -## times <- gm1$validityTime -## dates <- gm1$dataDate # Reference date -# times[[dat_i]][time_i] <- as.POSIXct( -# lubridate::ymd_hms(paste0(paste(gm1$year,gm1$month,gm1$day, '-'), ' ', -# paste(gm1$hour, gm1$minute, gm1$second, ':'))) -# ) -# } - times[[dat_i]] <- cdo_time_attr -#-------------------------------- - - ################################################################## - # regrid - ################################################################## - if (!is.null(regrid)) { - # result[[dat_i]]: [longitude, latitude, time] - res_data <- s2dv::CDORemap(result[[dat_i]], lons = attr(result, 'longitude'), lats = attr(result, 'latitude'), - grid = regrid$type, method = regrid$method, force_remap = TRUE) - if (dat_i == length(dat)) { - attr(result, 'longitude') <- res_data$lons - attr(result, 'latitude') <- res_data$lats + if (dat_i == 1) { + ## (1-1) Everything from the first message of first file + # dims <- dim(result[[dat_i]]) + # attributes(result) <- gm1 + # # turn result into array again + # dim(result[[dat_i]]) <- dims + + ## (1-2) Only save the necessary attributes + attr(result, 'edition') <- gm1$edition + attr(result, 'shortName') <- gm1$shortName + #NOTE: Tune varaible name!! + if (gm1$shortName == '2t') attr(result, 'shortName') <- 'tas' + attr(result, 'name') <- gm1$name + attr(result, 'units') <- gm1$units + # attr(result, 'validityDate') <- gm1$validityDate + # attr(result, 'validityTime') <- gm1$validityTime + + ## (2) Lat and lon + latlon <- grib_latlons(gm1, expand = TRUE) + attr(result, 'latitude') <- unique(as.vector(c(latlon$lats))) + attr(result, 'longitude') <- unique(as.vector(c(latlon$lons))) + # Save memory (though it's small) + rm(latlon); gc() + + #NOTE: Find another way to check regular grid; Ni/Nj not always exist + # if (has.key(gm1, "Nx") && has.key(gm1, "Ny")) { + # nx <- gm1$Nx + # ny <- gm1$Ny + # } else { + # nx <- gm1$Ni + # ny <- gm1$Nj + # } + # if (length(lats) != ny | length(lons) != nx) { + # stop("Latitude and Longitude seem to be non-regular grid.") + # } + } - result[[dat_i]] <- res_data$data_array - } - - - ################################################################## - # Save memory - rm(memb_ftime); rm(gm1); gc() - grib_close(file_to_load) # Doesn't impact memory - ################################################################## -} #for loop dat - + + #-------------------------------- + #NOTE: Just use cdo_time + # ## (3) Date and time: Need to get from each massage + # for (time_i in 1:length(time_step)) { + # gm1 <- gm[[time_i]] + # #NOTE: What's the correct time? + ## dates <- gm1$validityDate #Date of validity of the forecast + ## times <- gm1$validityTime + ## dates <- gm1$dataDate # Reference date + # times[[dat_i]][time_i] <- as.POSIXct( + # lubridate::ymd_hms(paste0(paste(gm1$year,gm1$month,gm1$day, '-'), ' ', + # paste(gm1$hour, gm1$minute, gm1$second, ':'))) + # ) + # } + times[[dat_i]] <- cdo_time_attr + #-------------------------------- + + ################################################################## + # regrid + ################################################################## + if (!is.null(regrid)) { + # result[[dat_i]]: [longitude, latitude, time] + res_data <- s2dv::CDORemap(result[[dat_i]], lons = attr(result, 'longitude'), lats = attr(result, 'latitude'), + grid = regrid$type, method = regrid$method, force_remap = TRUE) + if (dat_i == length(dat)) { + attr(result, 'longitude') <- res_data$lons + attr(result, 'latitude') <- res_data$lats + } + result[[dat_i]] <- res_data$data_array + } + + + ################################################################## + # Save memory + rm(memb_ftime); rm(gm1); gc() + grib_close(file_to_load) # Doesn't impact memory + ################################################################## + } #for loop dat + # Turn result list into array attr <- attributes(result) res_dim <- c(dim(result[[1]]), syear = length(result)) #[longitude, latitude, (memb*)time, syear] result <- unlist(result) dim(result) <- res_dim - + # Generate date/time attributes times <- array(unlist(times), dim = c(time = length(time_step), syear = length(dat), sday = 1, sweek = 1)) times <- s2dv::Reorder(times, c('sday', 'sweek', 'syear', 'time')) if (!is.null(syear_time_dim)) dim(times) <- syear_time_dim times <- as.POSIXct(times, origin = '1970-01-01', tz = 'UTC') - + # Reshape and reorder array if (is.null(has.memb)) { # obs doesn't have memb; reshape syear/time dim result <- s2dv::Reorder(result, c("syear", "time", "latitude", "longitude")) result <- array(result, dim = c(dat = 1, var = 1, syear_time_dim, dim(result)[3:4], - ensemble = 1)) + ensemble = 1)) } else { result <- array(result, dim = c(dim(result)[1:2], ensemble = has.memb, time = length(time_step), dim(result)[4])) result <- s2dv::Reorder(result, c("syear", "time", "latitude", "longitude", "ensemble")) dim(result) <- c(dat = 1, var = 1, sday = 1, sweek = 1, dim(result)) } - + # Add attributes back attr$dim <- dim(result) attributes(result) <- attr attr(result, 'time') <- times - + # Save memory rm(times); rm(attr); gc() - + return(result) } @@ -201,7 +201,7 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, .grib_expand_grids <- function(gribMessages, vector = FALSE) { # gribMessages is a list of multiple messages gribMessage <- gribMessages[[1]] - + if (gribr::has.key(gribMessage, "Nx") && gribr::has.key(gribMessage, "Ny")) { nx <- gribMessage$Nx ny <- gribMessage$Ny @@ -209,11 +209,11 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, nx <- gribMessage$Ni ny <- gribMessage$Nj } - + if (is.null(nx) || is.null(ny)) { stop("Unsupported grid type: ", gribMessage$gridType) } - + if (grepl("reduced", gribMessage$gridType)) { #TODO: This part is not used now. nx <- ny * 2 @@ -221,28 +221,28 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, gribMessage$values) values <- matrix(values, nx, ny, byrow = gribMessage$jPointsAreConsecutive) - -# values_l <- vector('list', length = length(gribMessages)) -# for (gm_i in 1:length(gribMessages)) { -# values <- .Call("gribr_redtoreg", nx, gribMessages[[gm_i]]$pl, -# gribMessages[[gm_i]]$values) -# values <- matrix(values, nx, ny, -# byrow = gribMessage$jPointsAreConsecutive) -# values_l[[gm_i]] <- values -# } - + + # values_l <- vector('list', length = length(gribMessages)) + # for (gm_i in 1:length(gribMessages)) { + # values <- .Call("gribr_redtoreg", nx, gribMessages[[gm_i]]$pl, + # gribMessages[[gm_i]]$values) + # values <- matrix(values, nx, ny, + # byrow = gribMessage$jPointsAreConsecutive) + # values_l[[gm_i]] <- values + # } + } else { -# values <- matrix(gribMessage$values, nx, ny, -# byrow = gribMessage$jPointsAreConsecutive) + # values <- matrix(gribMessage$values, nx, ny, + # byrow = gribMessage$jPointsAreConsecutive) values_l <- lapply(gribMessages, '[[', 'values') values_l <- lapply(values_l, matrix, nx, ny, byrow = gribMessage$jPointsAreConsecutive) values <- array(unlist(values_l), dim = c(longitude = nx, latitude = ny, time = length(values_l))) } - + if (vector) { values <- as.numeric(values) } - + values } diff --git a/modules/Loading/R/load_GRIB.R b/modules/Loading/R/load_GRIB.R index 8ae2a74d2efb2c3e5f82034ee1f647b6548620d7..0dd5f9191c4275b66c5525c248e03b9c6e0a0e8b 100644 --- a/modules/Loading/R/load_GRIB.R +++ b/modules/Loading/R/load_GRIB.R @@ -6,10 +6,10 @@ source('modules/Loading/R/GRIB/GrbLoad.R') source('tools/libs.R') load_GRIB <- function(recipe) { - + # Set params #------------------------------------------------------------------- - + # get recipe info hcst.inityear <- recipe$Analysis$Time$hcst_start hcst.endyear <- recipe$Analysis$Time$hcst_end @@ -24,10 +24,10 @@ load_GRIB <- function(recipe) { exp.name <- recipe$Analysis$Datasets$System$name variable <- recipe$Analysis$Variables$name #'tas' store.freq <- recipe$Analysis$Variables$freq - + regrid.method <- recipe$Analysis$Regrid$method regrid.type <- recipe$Analysis$Regrid$type - + # get MARS datasets dict: archive <- read_yaml("conf/archive.yml")[[recipe$Run$filesystem]] exp_descrip <- archive$System[[exp.name]] @@ -40,24 +40,24 @@ load_GRIB <- function(recipe) { #NOTE: We can use this info in GrbLoad() to substitute param 'has.memb' fcst.nmember <- exp_descrip$nmember$fcst hcst.nmember <- exp_descrip$nmember$hcst - + info(recipe$Run$logger, "========== PARAMETERS RETRIEVED. ==========") - + # Load hindcast #------------------------------------------------------------------- - -## original file dir -#exp_path <- "/esarchive/exp/ecmwf/system5_m1/original_files/fcmean_od_sfc_msmm_ecmf/" -## soft link to original file dir -#exp_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas/" #files are not correct -# The correct files -#exp_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/" - + + ## original file dir + #exp_path <- "/esarchive/exp/ecmwf/system5_m1/original_files/fcmean_od_sfc_msmm_ecmf/" + ## soft link to original file dir + #exp_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas/" #files are not correct + # The correct files + #exp_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/" + hcst.path <- paste0(archive$src, hcst.dir) hcst.year <- paste0(as.numeric(hcst.inityear):as.numeric(hcst.endyear)) hcst.files <- paste0(hcst.path, variable, '_', hcst.year, hcst.sdate, '.grb') - + if (!regrid.type %in% c('none', 'to_system')) { if (regrid.type == 'to_reference') { regrid_list <- c(method = regrid.method, type = reference_descrip$reference_grid) @@ -67,15 +67,15 @@ load_GRIB <- function(recipe) { } else { regrid_list <- NULL } - + .log_memory_usage(recipe$Run$logger, when = "Before loading the data") hcst <- GrbLoad(dat = as.list(hcst.files), time_step = hcst.ftime, has.memb = hcst.nmember, - syear_time_dim = NULL, regrid = regrid_list) + syear_time_dim = NULL, regrid = regrid_list) gc() - + info(recipe$Run$logger, "========== HCST LOADED. ==========") - + # Load forecast #------------------------------------------------------------------- if (!is.null(fcst.year)) { @@ -87,20 +87,20 @@ load_GRIB <- function(recipe) { } else { fcst <- NULL } - + info(recipe$Run$logger, "========== FCST LOADED. ==========") - + # Load reference #------------------------------------------------------------------- -#obs_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_era5_tas/" + #obs_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_era5_tas/" obs.path <- paste0(archive$src, obs.dir) # Use hcst time attr to load obs hcst_times <- attr(hcst, 'time') hcst_times_strings <- format(hcst_times, '%Y%m') - + obs.files <- paste0(obs.path, variable, '_', hcst_times_strings, '.grb') - + if (!regrid.type %in% c('none', 'to_reference')) { if (regrid.type == 'to_system') { regrid_list <- c(method = regrid.method, type = exp_descrip$reference_grid) @@ -110,51 +110,51 @@ load_GRIB <- function(recipe) { } else { regrid_list <- NULL } - + #NOTE: only 1 time step in each obs file obs <- GrbLoad(dat = as.list(obs.files), time_step = 1, has.memb = NULL, syear_time_dim = dim(hcst_times), regrid = regrid_list) gc() - + .log_memory_usage(recipe$Run$logger, when = "After loading the data") info(recipe$Run$logger, "========== OBS LOADED. ==========") - - -################################################################################# - -#dim(hcst) -# syear time latitude longitude ensemble -# 4 3 640 1280 51 - -##BEFORE TRANSFER TO S2DV_CUBE -#str(hcst) -# num [1:4, 1:3, 1:640, 1:1280, 1:51] 252 252 252 252 251 ... -# - attr(*, "edition")= num 1 -# - attr(*, "shortName")= chr "2t" -# - attr(*, "longitude")= num [1:1280] 0 0.281 0.563 0.844 1.125 ... -# - attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... -# - attr(*, "time")= POSIXct[1:12], format: "2000-12-01" "2001-12-01" ... - -#dim(attr(hcst, 'time')) -#syear time -# 4 3 - -##BEFORE TRANSFER TO S2DV_CUBE -#str(obs) -# num [1:4, 1:3, 1:640, 1:1280] 251 251 251 251 251 ... -# - attr(*, "edition")= num 1 -# - attr(*, "shortName")= chr "2t" -# - attr(*, "longitude")= num [1:1280] 0 0.281 0.562 0.844 1.125 ... -# - attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... -# - attr(*, "time")= POSIXct[1:12], format: "2000-12-01" "2001-12-01" ... - -################################################################################# - + + + ################################################################################# + + #dim(hcst) + # syear time latitude longitude ensemble + # 4 3 640 1280 51 + + ##BEFORE TRANSFER TO S2DV_CUBE + #str(hcst) + # num [1:4, 1:3, 1:640, 1:1280, 1:51] 252 252 252 252 251 ... + # - attr(*, "edition")= num 1 + # - attr(*, "shortName")= chr "2t" + # - attr(*, "longitude")= num [1:1280] 0 0.281 0.563 0.844 1.125 ... + # - attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... + # - attr(*, "time")= POSIXct[1:12], format: "2000-12-01" "2001-12-01" ... + + #dim(attr(hcst, 'time')) + #syear time + # 4 3 + + ##BEFORE TRANSFER TO S2DV_CUBE + #str(obs) + # num [1:4, 1:3, 1:640, 1:1280] 251 251 251 251 251 ... + # - attr(*, "edition")= num 1 + # - attr(*, "shortName")= chr "2t" + # - attr(*, "longitude")= num [1:1280] 0 0.281 0.562 0.844 1.125 ... + # - attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... + # - attr(*, "time")= POSIXct[1:12], format: "2000-12-01" "2001-12-01" ... + + ################################################################################# + info(recipe$Run$logger, "========== REGRID DONE. ==========") - - + + # Turn into s2dv_cube #------------------------------------------------------------------- # hcst @@ -163,7 +163,7 @@ load_GRIB <- function(recipe) { metadata_list[[variable]] <- list(long_name = attr(hcst, 'name'), units = attr(hcst, 'units')) load_parameters_list <- list(dat1 = list(file_date = list(paste0(hcst.year, hcst.sdate)))) - + hcst <- s2dv_cube(data = array(hcst, dim = dim(hcst)), coords = list(dat = 'dat1', var = variable, @@ -181,15 +181,15 @@ load_GRIB <- function(recipe) { load_parameters = load_parameters_list, # extra attrs gribEdition = attr(hcst, 'edition')) - + # fcst if (!is.null(fcst)) { metadata_list <- vector("list", length = 1) names(metadata_list) <- variable metadata_list[[variable]] <- list(long_name = attr(fcst, 'name'), - units = attr(fcst, 'units')) + units = attr(fcst, 'units')) load_parameters_list <- list(dat1 = list(file_date = list(paste0(fcst.year, hcst.sdate)))) - + fcst <- s2dv_cube(data = array(fcst, dim = dim(fcst)), coords = list(dat = 'dat1', var = variable, @@ -207,14 +207,14 @@ load_GRIB <- function(recipe) { load_parameters = load_parameters_list, gribEdition = attr(fcst, 'edition')) } - + # obs metadata_list <- vector("list", length = 1) names(metadata_list) <- variable metadata_list[[variable]] <- list(long_name = attr(obs, 'name'), units = attr(obs, 'units')) load_parameters_list <- list(dat1 = list(file_date = list(hcst_times_strings))) - + obs <- s2dv_cube(data = array(obs, dim = dim(obs)), coords = list(dat = 'dat1', var = variable, @@ -225,55 +225,55 @@ load_GRIB <- function(recipe) { time = hcst.ftime, latitude = attr(obs, 'latitude'), longitude = attr(obs, 'longitude'), - ensemble = 1), + ensemble = 1), varName = attr(obs, 'shortName'), metadata = metadata_list, Dates = attributes(obs)$time, source_files = obs.files, load_parameters = load_parameters_list, gribEdition = attr(obs, 'edition')) - - -#str(hcst) -#List of 4 -# $ data : num [1, 1, 1, 1, 1:2, 1:2, 1:640, 1:1280, 1:51] 252 253 248 251 251 ... -# ..- attr(*, "edition")= num 1 -# ..- attr(*, "shortName")= chr "2t" -# ..- attr(*, "longitude")= num [1:1280] 0 0.281 0.563 0.844 1.125 ... -# ..- attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... -# ..- attr(*, "time")= POSIXct[1:4], format: "2000-12-01" "2001-12-01" ... -# $ dims : Named int [1:9] 1 1 1 1 2 2 640 1280 51 -# ..- attr(*, "names")= chr [1:9] "dat" "var" "sday" "sweek" ... -# $ coords:List of 9 -# ..$ dat : chr "dat1" -# .. ..- attr(*, "indices")= logi FALSE -# ..$ var : chr "tas" -# .. ..- attr(*, "indices")= logi FALSE -# ..$ sday : num 1 -# .. ..- attr(*, "indices")= logi FALSE -# ..$ sweek : num 1 -# .. ..- attr(*, "indices")= logi FALSE -# ..$ syear : chr [1:2] "2000" "2001" -# .. ..- attr(*, "indices")= logi FALSE -# ..$ time : int [1:2] 1 2 -# .. ..- attr(*, "indices")= logi FALSE -# ..$ latitude : num [1:640] 89.8 89.5 89.2 88.9 88.7 ... -# .. ..- attr(*, "indices")= logi FALSE -# ..$ longitude: num [1:1280] 0 0.281 0.563 0.844 1.125 ... -# .. ..- attr(*, "indices")= logi FALSE -# ..$ ensemble : int [1:51] 1 2 3 4 5 6 7 8 9 10 ... -# .. ..- attr(*, "indices")= logi FALSE -# $ attrs :List of 4 -# ..$ Dates : POSIXct[1:4], format: "2000-12-01" "2001-12-01" ... -# ..$ Variable :List of 1 -# .. ..$ varName: chr "2t" -# ..$ source_files: chr [1:2] "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/tas_20001101.grb" "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/tas_20011101.grb" -# ..$ gribEdition : num 1 -# - attr(*, "class")= chr "s2dv_cube" + + + #str(hcst) + #List of 4 + # $ data : num [1, 1, 1, 1, 1:2, 1:2, 1:640, 1:1280, 1:51] 252 253 248 251 251 ... + # ..- attr(*, "edition")= num 1 + # ..- attr(*, "shortName")= chr "2t" + # ..- attr(*, "longitude")= num [1:1280] 0 0.281 0.563 0.844 1.125 ... + # ..- attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... + # ..- attr(*, "time")= POSIXct[1:4], format: "2000-12-01" "2001-12-01" ... + # $ dims : Named int [1:9] 1 1 1 1 2 2 640 1280 51 + # ..- attr(*, "names")= chr [1:9] "dat" "var" "sday" "sweek" ... + # $ coords:List of 9 + # ..$ dat : chr "dat1" + # .. ..- attr(*, "indices")= logi FALSE + # ..$ var : chr "tas" + # .. ..- attr(*, "indices")= logi FALSE + # ..$ sday : num 1 + # .. ..- attr(*, "indices")= logi FALSE + # ..$ sweek : num 1 + # .. ..- attr(*, "indices")= logi FALSE + # ..$ syear : chr [1:2] "2000" "2001" + # .. ..- attr(*, "indices")= logi FALSE + # ..$ time : int [1:2] 1 2 + # .. ..- attr(*, "indices")= logi FALSE + # ..$ latitude : num [1:640] 89.8 89.5 89.2 88.9 88.7 ... + # .. ..- attr(*, "indices")= logi FALSE + # ..$ longitude: num [1:1280] 0 0.281 0.563 0.844 1.125 ... + # .. ..- attr(*, "indices")= logi FALSE + # ..$ ensemble : int [1:51] 1 2 3 4 5 6 7 8 9 10 ... + # .. ..- attr(*, "indices")= logi FALSE + # $ attrs :List of 4 + # ..$ Dates : POSIXct[1:4], format: "2000-12-01" "2001-12-01" ... + # ..$ Variable :List of 1 + # .. ..$ varName: chr "2t" + # ..$ source_files: chr [1:2] "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/tas_20001101.grb" "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/tas_20011101.grb" + # ..$ gribEdition : num 1 + # - attr(*, "class")= chr "s2dv_cube" .log_memory_usage(recipe$Run$logger, when = "After regridding") info(recipe$Run$logger, "##### GRIB DATA LOADED SUCCESSFULLY #####") - + return(list(hcst = hcst, fcst = fcst, obs = obs)) - + } diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index 9268b090fc70db506279c87cbc3a697dd763fe67..0170e9cd67b343b1ae5dc325e20a199fa643911b 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -14,10 +14,10 @@ source("modules/Loading/R/compare_exp_obs_grids.R") load_decadal <- function(recipe) { ## archive <- read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]] - + # Print Start() info or not DEBUG <- FALSE - + ## TODO: this should come from the main script # Create output folder and log: @@ -27,21 +27,21 @@ load_decadal <- function(recipe) { exp.name <- recipe$Analysis$Datasets$System$name #'HadGEM3' ref.name <- recipe$Analysis$Datasets$Reference$name #'era5' member <- strsplit(recipe$Analysis$Datasets$System$member, ', | |,')[[1]] #c("r1i1p1f2", "r2i1p1f2") -# variable <- recipe$Analysis$Variables$name #'tas' + # variable <- recipe$Analysis$Variables$name #'tas' variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq #monthly_mean lats.min <- as.numeric(recipe$Analysis$Region$latmin) #0 lats.max <- as.numeric(recipe$Analysis$Region$latmax) #10 lons.min <- as.numeric(recipe$Analysis$Region$lonmin) #0 lons.max <- as.numeric(recipe$Analysis$Region$lonmax) #10 - + # change to: sdates <- dates2load(recipe, logger) sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) #1960:2015 sdates_fcst <- recipe$Analysis$Time$fcst - + if (store.freq == "monthly_mean") { time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) - + } else if (store.freq == "daily_mean") { time_ind <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min), ftimemax = as.numeric(recipe$Analysis$Time$ftime_max), @@ -49,10 +49,10 @@ load_decadal <- function(recipe) { sdates = sdates_hcst, calendar = archive$System[[exp.name]]$calendar) } - -#NOTE: May be used in the future -# season <- recipe$Analysis$Time$season - + + #NOTE: May be used in the future + # season <- recipe$Analysis$Time$season + #------------------------- # Read from archive: #------------------------- @@ -66,20 +66,20 @@ load_decadal <- function(recipe) { if (identical(member, 'all')) { member <- strsplit(archive$System[[exp.name]]$member, ',')[[1]] } - + #------------------------- # derived from above: #------------------------- # Check lat and lon and decide CircularSort - circularsort <- check_latlon(latmin = lats.min, latmax = lats.max, lonmin = lons.min, lonmax = lons.max) - + circularsort <- check_latlon(latmin = lats.min, latmax = lats.max, lonmin = lons.min, lonmax = lons.max) + # generate transform params for system and ref regrid_params <- get_regrid_params(recipe, archive) - + # Only if the time length in each chunk may differ that we need largest_dims_length to be TRUE. Otherwise, set FALSE to increase efficiency. need_largest_dims_length <- ifelse(exp.name %in% c('HadGEM3-GC31-MM', 'EC-Earth3-i2'), TRUE, FALSE) - - + + #------------------------------------------- # Step 1: Load the hcst #------------------------------------------- @@ -88,12 +88,12 @@ load_decadal <- function(recipe) { version = version, sdates = sdates_hcst) path_list <- tmp$path_list multi_path <- tmp$multi_path - + #TODO: to make this case work; enhance Start() if it's possible if (multi_path & length(variable) > 1) { stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") } - + Start_default_arg_list <- list( dat = path_list, var = variable, @@ -114,39 +114,39 @@ load_decadal <- function(recipe) { transform_params = list(grid = regrid_params$fcst.gridtype, method = regrid_params$fcst.gridmethod), transform_vars = c('latitude', 'longitude'), -# path_glob_permissive = 2, # for version + # path_glob_permissive = 2, # for version synonims = list(longitude = c('lon', 'longitude'), latitude = c('lat', 'latitude')), return_vars = list(latitude = NULL, longitude = NULL, time = c('syear', 'chunk')), silent = !DEBUG, retrieve = T) - + if (length(variable) > 1) { Start_default_arg_list <- c(Start_default_arg_list, - list(table = table, grid = grid, version = version, - table_depends = 'var', grid_depends = 'var', version_depends = 'var', - metadata_dims = 'var')) + list(table = table, grid = grid, version = version, + table_depends = 'var', grid_depends = 'var', version_depends = 'var', + metadata_dims = 'var')) } - + if (!multi_path) { Start_hcst_arg_list <- Start_default_arg_list hcst <- do.call(Start, Start_hcst_arg_list) - + } else { Start_hcst_arg_list <- Start_default_arg_list Start_hcst_arg_list[['syear']] <- NULL Start_hcst_arg_list[['chunk_depends']] <- NULL remove_ind <- which(Start_hcst_arg_list[['return_vars']][['time']] == 'syear') Start_hcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind] - + hcst <- do.call(Start, Start_hcst_arg_list) - + # Reshape and reorder dimensions ## dat should be 1, syear should be length of dat; reorder dimensions dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6]) hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) - + # Manipulate time attr because Start() cannot read it correctly wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only tmp <- array(dim = c(dim(hcst)[c('syear', 'time')])) @@ -156,302 +156,302 @@ load_decadal <- function(recipe) { tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) } attr(hcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') - + } - + tmp_time_attr <- attr(hcst, 'Variables')$common$time - + # change syear to c(sday, sweek, syear) # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) { error(recipe$Run$logger, - "hcst has problem in matching data and time attr dimension.") + "hcst has problem in matching data and time attr dimension.") stop() } dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) - + #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 if (multi_path) { attributes(hcst)$Variables$common[[variable]] <- attributes(hcst)$Variables$dat1[[variable]] } - + # Change class from startR_array to s2dv_cube suppressWarnings( - hcst <- as.s2dv_cube(hcst) + hcst <- as.s2dv_cube(hcst) ) - -#------------------------------------------- -# Step 2: Load the fcst -#------------------------------------------- + + #------------------------------------------- + # Step 2: Load the fcst + #------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst)) { - - tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, - version = version, sdates = sdates_fcst) - path_list <- tmp$path_list - multi_path <- tmp$multi_path - - #TODO: to make this case work; enhance Start() if it's possible - if (multi_path & length(variable) > 1) { - stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") - } - - # monthly & daily - if (!multi_path) { - #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst - Start_fcst_arg_list <- Start_default_arg_list - Start_fcst_arg_list[['dat']] <- path_list - Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) - fcst <- do.call(Start, Start_fcst_arg_list) - - - } else { # multi_path - - #TODO: time attribute is not correct. Improve Start(). - Start_fcst_arg_list <- Start_default_arg_list - Start_fcst_arg_list[['dat']] <- path_list - Start_fcst_arg_list[['syear']] <- NULL - Start_fcst_arg_list[['chunk_depends']] <- NULL - remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear') - Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] - fcst <- do.call(Start, Start_fcst_arg_list) - - # Reshape and reorder dimensions - ## dat should be 1, syear should be length of dat; reorder dimensions - ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble] - dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) - fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) - - # Manipulate time attr because Start() cannot read it correctly - wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only - tmp <- array(dim = c(dim(fcst)[c('syear', 'time')])) - tmp[1, ] <- wrong_time_attr - yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst) - for (i_syear in 1:length(yr_diff)) { - tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) + + tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, + version = version, sdates = sdates_fcst) + path_list <- tmp$path_list + multi_path <- tmp$multi_path + + #TODO: to make this case work; enhance Start() if it's possible + if (multi_path & length(variable) > 1) { + stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") } - attr(fcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') - - } - - tmp_time_attr <- attr(fcst, 'Variables')$common$time - - # change syear to c(sday, sweek, syear) - # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] - dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) - if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { - error(recipe$Run$logger, - "fcst has problem in matching data and time attr dimension.") - stop() - } - dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) - - #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 - if (multi_path) { - attributes(fcst)$Variables$common[[variable]] <- attributes(fcst)$Variables$dat1[[variable]] - } - - # Change class from startR_array to s2dv_cube - suppressWarnings( - fcst <- as.s2dv_cube(fcst) - ) - - # Only syear could be different - if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { - error(recipe$Run$logger, - "hcst and fcst do not share the same dimension structure.") - stop() - } - + + # monthly & daily + if (!multi_path) { + #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst + Start_fcst_arg_list <- Start_default_arg_list + Start_fcst_arg_list[['dat']] <- path_list + Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) + fcst <- do.call(Start, Start_fcst_arg_list) + + + } else { # multi_path + + #TODO: time attribute is not correct. Improve Start(). + Start_fcst_arg_list <- Start_default_arg_list + Start_fcst_arg_list[['dat']] <- path_list + Start_fcst_arg_list[['syear']] <- NULL + Start_fcst_arg_list[['chunk_depends']] <- NULL + remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear') + Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] + fcst <- do.call(Start, Start_fcst_arg_list) + + # Reshape and reorder dimensions + ## dat should be 1, syear should be length of dat; reorder dimensions + ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble] + dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) + fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + + # Manipulate time attr because Start() cannot read it correctly + wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only + tmp <- array(dim = c(dim(fcst)[c('syear', 'time')])) + tmp[1, ] <- wrong_time_attr + yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst) + for (i_syear in 1:length(yr_diff)) { + tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) + } + attr(fcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') + + } + + tmp_time_attr <- attr(fcst, 'Variables')$common$time + + # change syear to c(sday, sweek, syear) + # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] + dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) + if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { + error(recipe$Run$logger, + "fcst has problem in matching data and time attr dimension.") + stop() + } + dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) + + #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 + if (multi_path) { + attributes(fcst)$Variables$common[[variable]] <- attributes(fcst)$Variables$dat1[[variable]] + } + + # Change class from startR_array to s2dv_cube + suppressWarnings( + fcst <- as.s2dv_cube(fcst) + ) + + # Only syear could be different + if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { + error(recipe$Run$logger, + "hcst and fcst do not share the same dimension structure.") + stop() + } + } else { fcst <- NULL } - -#------------------------------------------- -# Step 3. Load the reference -#------------------------------------------- + + #------------------------------------------- + # Step 3. Load the reference + #------------------------------------------- obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, store.freq, "$var$$var_dir$", "$var$_$file_date$.nc") var_dir_obs <- archive$Reference[[ref.name]][[store.freq]][variable] # list(tas = "_f1h-r1440x721cds", tos = "_f1h-r1440x721cds") - -# obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, store.freq, -# paste0(variable, archive$Reference[[ref.name]][[store.freq]][[variable]])) -# obs.files <- paste0('$var$_$file_date$.nc') - + + # obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, store.freq, + # paste0(variable, archive$Reference[[ref.name]][[store.freq]][[variable]])) + # obs.files <- paste0('$var$_$file_date$.nc') + # Get from startR_cube -# dates <- attr(hcst, 'Variables')$common$time + # dates <- attr(hcst, 'Variables')$common$time # Get from s2dv_cube dates <- hcst$attrs$Dates dates_file <- sapply(dates, format, '%Y%m') dim(dates_file) <- dim(dates) - + if (store.freq == "daily_mean") { -#//////////////// -# Method 1: use hcst time attr as obs time selector -#//////////////// - - # Set hour to 12:00 to ensure correct date retrieval for daily data - lubridate::hour(dates) <- 12 - lubridate::minute(dates) <- 00 - # Restore correct dimensions - dim(dates) <- dim(dates_file) - - obs <- Start(dat = obs.path, - var = variable, - var_dir = var_dir_obs, - var_dir_depends = 'var', - file_date = unique(format(dates, '%Y%m')), - time = dates, # [sday, sweek, syear, time] - time_across = 'file_date', - merge_across_dims = TRUE, - split_multiselected_dims = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = TRUE), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_extra_cells = 2, - transform_params = list(grid = regrid_params$obs.gridtype, #nc file - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = NULL, longitude = NULL, - time = 'file_date'), - silent = !DEBUG, - retrieve = TRUE) - + #//////////////// + # Method 1: use hcst time attr as obs time selector + #//////////////// + + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = unique(format(dates, '%Y%m')), + time = dates, # [sday, sweek, syear, time] + time_across = 'file_date', + merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_extra_cells = 2, + transform_params = list(grid = regrid_params$obs.gridtype, #nc file + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = 'file_date'), + silent = !DEBUG, + retrieve = TRUE) + } else if (store.freq == "monthly_mean") { -#//////////////// -# Method 2: reshape hcst time attr's date into an array with time dim then as obs date selector -#//////////////// - - obs <- Start(dat = obs.path, - var = variable, - var_dir = var_dir_obs, - var_dir_depends = 'var', - file_date = dates_file, #dates_arr, # [sday, sweek, syear, time] - split_multiselected_dims = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = TRUE), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_extra_cells = 2, - transform_params = list(grid = regrid_params$obs.gridtype, #nc file - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = NULL, longitude = NULL, - time = 'file_date'), - metadata_dims = 'var', - silent = !DEBUG, - retrieve = TRUE) + #//////////////// + # Method 2: reshape hcst time attr's date into an array with time dim then as obs date selector + #//////////////// + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = dates_file, #dates_arr, # [sday, sweek, syear, time] + split_multiselected_dims = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_extra_cells = 2, + transform_params = list(grid = regrid_params$obs.gridtype, #nc file + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = 'file_date'), + metadata_dims = 'var', + silent = !DEBUG, + retrieve = TRUE) } - - -#dim(attr(obs, 'Variables')$common$time) -# sday sweek syear time -# 1 1 2 14 - + + + #dim(attr(obs, 'Variables')$common$time) + # sday sweek syear time + # 1 1 2 14 + # Remove var_dir dimension obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") - + # Only ensemble dim could be different if (!identical(dim(obs), dim(hcst$data)[-9])) { error(recipe$Run$logger, - "obs and hcst dimensions do not match.") + "obs and hcst dimensions do not match.") stop() } # Add ensemble dim to obs dim(obs) <- c(dim(obs), ensemble = 1) - + # Change class from startR_array to s2dv_cube suppressWarnings( - obs <- as.s2dv_cube(obs) + obs <- as.s2dv_cube(obs) ) - -#------------------------------------------- -# Step 4. Verify the consistance between data -#------------------------------------------- + + #------------------------------------------- + # Step 4. Verify the consistance between data + #------------------------------------------- # dimension if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) { error(recipe$Run$logger, - "hcst and obs don't share the same dimension names.") + "hcst and obs don't share the same dimension names.") stop() } else { ens_ind <- which(names(dim(obs$data)) == 'ensemble') match_ind <- match(names(dim(obs$data))[-ens_ind], names(dim(hcst$data))) if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) { error(recipe$Run$logger, - "hcst and obs don't share the same dimension length.") + "hcst and obs don't share the same dimension length.") stop() } } - + # time attribute if (!identical(format(hcst$attrs$Dates, '%Y%m'), format(obs$attrs$Dates, '%Y%m'))) { error(recipe$Run$logger, - "hcst and obs don't share the same time.") + "hcst and obs don't share the same time.") stop() } - + # lat and lon attributes if (!(recipe$Analysis$Regrid$type == 'none')) { if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lat_error_msg) hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) info(recipe$Run$logger, hcst_lat_msg) obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) + "; Last obs lat: ", obs$lat[length(obs$lat)]) info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } - + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lon_error_msg) hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) info(recipe$Run$logger, hcst_lon_msg) obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) + "; Last obs lon: ", obs$lon[length(obs$lon)]) info(recipe$Run$logger, obs_lon_msg) stop("hcst and obs don't share the same longitudes.") } - } - + } + # Check fcst if (!is.null(fcst)) { # dimension if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) { error(recipe$Run$logger, - "hcst and fcst don't share the same dimension names.") + "hcst and fcst don't share the same dimension names.") stop() } else { ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear')) match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data))) if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) { error(recipe$Run$logger, - "hcst and fcst don't share the same dimension length.") + "hcst and fcst don't share the same dimension length.") stop() } } - + # lat and lon attributes if (!(recipe$Analysis$Regrid$type == 'none')) { compare_exp_obs_grids(hcst, obs) } } - + #------------------------------------------- # Step 5. Tune data #------------------------------------------- @@ -472,14 +472,14 @@ load_decadal <- function(recipe) { # fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 # } # } - + #------------------------------------------- # Step 6. Print summary #------------------------------------------- - + info(recipe$Run$logger, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") - + .log_memory_usage(recipe$Run$logger, when = "After loading") return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R index 2caa34a9ebe10315f94280cfbae44901045cd306..7eb357574c58d32b98f4a605af8508f27cc00ab7 100644 --- a/modules/Loading/R/load_seasonal.R +++ b/modules/Loading/R/load_seasonal.R @@ -6,7 +6,7 @@ source("modules/Loading/R/check_latlon.R") source("modules/Loading/R/compare_exp_obs_grids.R") load_seasonal <- function(recipe) { - + # ------------------------------------------- # Set params ----------------------------------------- @@ -66,7 +66,7 @@ load_seasonal <- function(recipe) { ##} else { ## accum <- FALSE ##} - + if (store.freq %in% c("daily", "daily_mean")) { frequency <- "daily_mean" } else if (store.freq == "monthly_mean") { @@ -313,7 +313,7 @@ load_seasonal <- function(recipe) { # Convert obs to s2dv_cube obs <- as.s2dv_cube(obs) - + # Check for consistency between hcst and obs grid if (!(recipe$Analysis$Regrid$type == 'none')) { compare_exp_obs_grids(hcst, obs) diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R new file mode 100644 index 0000000000000000000000000000000000000000..192e92928974906e6698d2e708cd4c0193559c66 --- /dev/null +++ b/modules/Multimodel/Multimodel.R @@ -0,0 +1,74 @@ +# This module load the outputs saved for each individual forecast system +# and creates the multimodel ensemble + +## TO DO: +## Remove empty folders after cleaning files +## Return probabilities + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source('modules/Multimodel/load_multimodel.R') +source('modules/Multimodel/load_multimodel_splitted.R') +source('modules/Multimodel/dims_multimodel.R') +source('modules/Multimodel/build_multimodel.R') +source('modules/Multimodel/clean_multimodel.R') + +Multimodel <- function(recipe) { + + # recipe: auto-s2s recipe as provided by read_yaml + + # Loading data saved in the jobs for individual models + if (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')){ + # Load data splitting by system + data_aux <- load_multimodel_splitted(recipe) + + data <- data_aux$data + + files_hcst <- data_aux$files_hcst + files_fcst <- data_aux$files_fcst + files_obs <- data_aux$files_obs + + rm(data_aux) + + } else { + # Load data without splitting + data <- load_multimodel(recipe) + files_hcst <- data$hcst$attrs$source_files + files_fcst <- data$fcst$attrs$source_files + files_obs <- data$obs$attrs$source_files + } + + # Building the multi-model + multimodel_aux <- build_multimodel(data, recipe) + data <- multimodel_aux$data + prob <- multimodel_aux$prob + rm(multimodel_aux) + + # Saving multimodel + if (recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save != 'none') { + save_forecast(recipe = recipe, + data_cube = data$hcst, + outdir = NULL, + type = 'hcst') + if (!is.null(data$fcst)) { + save_forecast(recipe = recipe, + data_cube = data$fcst, + outdir = NULL, + type = 'fcst') + } + save_observations(recipe = recipe, + data_cube = data$obs, + outdir = NULL) + } + + # Removing the temporary data of models in case user only requests multimodel + clean_multimodel(recipe, + files_hcst, + files_fcst, + files_obs) + + return(list(data = data, + prob = prob)) + +} diff --git a/modules/Multimodel/build_multimodel.R b/modules/Multimodel/build_multimodel.R new file mode 100644 index 0000000000000000000000000000000000000000..ae0eff56040cff3c0333fed8191cd3ade266e9c2 --- /dev/null +++ b/modules/Multimodel/build_multimodel.R @@ -0,0 +1,69 @@ + +#TODO: return also the probabilities + +build_multimodel <- function(data, recipe) { + + if (tolower(recipe$Analysis$Datasets$Multimodel$approach) == 'pooled') { + + # Deterministic hindcast + data$hcst$data <- CSTools::MergeDims(data = data$hcst$data, + merge_dims = c('model','ensemble'), + rename_dim = 'ensemble', na.rm = TRUE) + data$hcst$dims <- dim(data$hcst$data) + + # Deterministic forecast + if (!is.null(recipe$Analysis$Time$fcst_year)) { + data$fcst$data <- CSTools::MergeDims(data = data$fcst$data, + merge_dims = c('model','ensemble'), + rename_dim = 'ensemble', na.rm = TRUE) + data$fcst$dims <- dim(data$fcst$data) + } + + # Probabilistic hindcast and forecast + prob <- Probabilities(recipe, data) + + } else if (tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% c('mean','median')) { + + # Probabilistic hindcast and forecast + stop('Probabilities for multi-model mean is still under development. + If Skill is used, the results are not correct for the probabilitic metrics. + Probabilities cannot be computed because data has an extra dimension (model). + Also, the function should return the observed probabilities. + Maybe it is better to use GetProbs.') + prob <- NULL #Probabilities(recipe, data) + + # Deterministic hindcast + data$hcst$data <- multiApply::Apply(data = data$hcst$data, + target_dims = 'ensemble', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = TRUE, + ncores = recipe$Analysis$ncores)$output1 + data$hcst$data <- multiApply::Apply(data = data$hcst$data, + target_dims = 'model', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = FALSE, + ncores = recipe$Analysis$ncores)$output1 + data$hcst$dims <- s2dv::InsertDim(data = data$hcst$dims, posdim = 6, lendim = 1, name = 'ensemble') + data$hcst$dims <- dim(data$hcst$data) + + # Deterministic forecast + if (!is.null(recipe$Analysis$Time$fcst_year)) { + data$fcst$data <- multiApply::Apply(data = data$fcst$data, + target_dims = 'ensemble', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = TRUE, + ncores = recipe$Analysis$ncores)$output1 + data$fcst$data <- multiApply::Apply(data = data$fcst$data, + target_dims = 'model', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = FALSE, + ncores = recipe$Analysis$ncores)$output1 + data$fcst$dims <- s2dv::InsertDim(data = data$fcst$dims, posdim = 6, lendim = 1, name = 'ensemble') + data$fcst$dims <- dim(data$fcst$data) + } + + } else {stop('Incorrect multi-model approach')} + + return(list(data = data, + prob = prob)) +} diff --git a/modules/Multimodel/clean_multimodel.R b/modules/Multimodel/clean_multimodel.R new file mode 100644 index 0000000000000000000000000000000000000000..816ef0c23b85e63a9f67bb3f8ed8de377345c57b --- /dev/null +++ b/modules/Multimodel/clean_multimodel.R @@ -0,0 +1,42 @@ + +clean_multimodel <- function(recipe, files_hcst, files_fcst, files_obs) { + + # Names of the models + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + exp.name <- recipe$Analysis$Datasets$System$models + } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { + exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') + } else {stop('Multimodel not implemented for this horizon.')} + exp.name <- gsub('\\.','',exp.name) + + # To remove the observations from all models' folders + for (m in exp.name[-1]){ + files_obs <- c(files_obs, gsub(exp.name[1],m,files_obs)) + } + + if (tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('true','yes')) { + + # Keep only multimodel data + + # Remove createFrom files + unlink(files_hcst) + unlink(files_fcst) + unlink(files_obs) + + # Remove Skill and plots file of systems + unlink(file.path(recipe$Run$output_dir,"outputs/*",exp.name), + recursive = T) + unlink(file.path(recipe$Run$output_dir,"plots/*",exp.name), + recursive = T) + + } else if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'both' && + recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save == 'none') { + + # Remove createFrom files only + unlink(files_hcst) + unlink(files_fcst) + unlink(files_obs) + + } + +} diff --git a/modules/Multimodel/dims_multimodel.R b/modules/Multimodel/dims_multimodel.R new file mode 100644 index 0000000000000000000000000000000000000000..f9076153081bf34d421161e2178e0c93480accd7 --- /dev/null +++ b/modules/Multimodel/dims_multimodel.R @@ -0,0 +1,107 @@ + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") + +dims_multimodel <- function(recipe) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + exp.name <- recipe$Analysis$Datasets$System$models + } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { + exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') + } else {stop('Multimodel not implemented for this horizon.')} + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + if (tolower(recipe$Analysis$Output_format) == "scorecards") { + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_$var$_$file_date$_", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + } else { + filename <- "$var$_$file_date$.nc" + } + + hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), filename) + hcst.path <- gsub(variable[1], "$var$", hcst.path) + hcst.path <- gsub('Multimodel', "$model$", hcst.path) + fcst.path <- obs.path <- hcst.path + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = FALSE) + + dim.hcst <- attr(hcst,'Dimensions') + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- Start(dat = fcst.path, + var = variable, + file_date = sdates$fcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = FALSE) + + dim.fcst <- attr(fcst,'Dimensions') + + } else { + dim.fcst <- NULL + } + + return(list(dim.hcst = dim.hcst, dim.fcst = dim.fcst)) +} diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R new file mode 100644 index 0000000000000000000000000000000000000000..c6bd4585a57367b97eee66fd4ff5d2a46237380b --- /dev/null +++ b/modules/Multimodel/load_multimodel.R @@ -0,0 +1,277 @@ + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") + +load_multimodel <- function(recipe) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + exp.name <- recipe$Analysis$Datasets$System$models + } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { + exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') + } else {stop('Multimodel not implemented for this horizon.')} + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + if (tolower(recipe$Analysis$Output_format) == "scorecards") { + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_$var$_$file_date$_", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + } else { + filename <- "$var$_$file_date$.nc" + } + + hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), filename) + hcst.path <- gsub(variable[1], "$var$", hcst.path) + hcst.path <- gsub('Multimodel', "$model$", hcst.path) + fcst.path <- obs.path <- hcst.path + obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) + obs.path <- gsub("$model$", gsub('\\.','',exp.name[1]), obs.path, fixed = T) + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + ############################### + + # Convert hcst to s2dv_cube object + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- Start(dat = fcst.path, + var = variable, + file_date = sdates$fcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + ############################# + + # Convert fcst to s2dv_cube + fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$attrs$Dates[] <- + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + if (store.freq == "monthly_mean") { + + obs <- Start(dat = obs.path, + var = variable, + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq %in% c("daily_mean", "daily")) { + + ############################# + #NOTE: NOT TESTED YET + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + ############################# + + } + + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!isTRUE(all.equal(as.vector(hcst$coords$latitude), as.vector(obs$coords$latitude)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst$coords$longitude), as.vector(obs$coords$longitude)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + + # Print a summary of the loaded data for the user, for each object + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } + } + + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/modules/Multimodel/load_multimodel_splitted.R b/modules/Multimodel/load_multimodel_splitted.R new file mode 100644 index 0000000000000000000000000000000000000000..5b9a642f5599602a2318f71b4766dffcd13ef930 --- /dev/null +++ b/modules/Multimodel/load_multimodel_splitted.R @@ -0,0 +1,58 @@ + +load_multimodel_splitted <- function(recipe){ + + # Retrieve data dimension only without loading data + dims <- dims_multimodel(recipe) + data_order <- names(dims$dim.hcst) + + # Create empty array with desired dimensions + data <- list(hcst = NULL, fcst = NULL, obs = NULL) + data$hcst$data <- array(data = NA, dim = dims$dim.hcst) + if (!is.null(recipe$Analysis$Time$fcst_year)) { + data$fcst$data <- array(data = NA, dim = dims$dim.fcst) + } + files_hcst <- list() + files_fcst <- list() + files_obs <- list() + + + # Loop over system to load hindcast and forecast data + for (sys in 1:length(recipe$Analysis$Datasets$System$models)){ + + recipe_aux <- recipe + system_load <- recipe$Analysis$Datasets$System$models[sys] + recipe_aux$Analysis$Datasets$System$models <- system_load + + data_aux <- load_multimodel(recipe_aux) + + data$hcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$hcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$hcst$data, order = data_order) + + if(!is.null(recipe$Analysis$Time$fcst_year)){ + data$fcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$fcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$fcst$data, order = data_order) + } + + files_hcst <- append(files_hcst, data_aux$hcst$attrs$source_files, after = length(files_hcst)) + files_fcst <- append(files_fcst, data_aux$fcst$attrs$source_files, after = length(files_fcst)) + files_obs <- append(files_obs, data_aux$obs$attrs$source_files, after = length(files_obs)) + + } # close loop on sys + + # Define obs data + data$obs$data <- data_aux$obs$data + + # Include data attributes + data$hcst$attrs <- data_aux$hcst$attrs + data$fcst$attrs <- data_aux$fcst$attrs + data$obs$attrs <- data_aux$obs$attrs + data$hcst$coords <- data_aux$hcst$coords + data$fcst$coords <- data_aux$fcst$coords + data$obs$coords <- data_aux$obs$coords + + # Remove temporary data_aux + rm(data_aux) + + return(list(data = data, + files_hcst = files_hcst, + files_fcst = files_fcst, + files_obs = files_obs)) +} diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index 54bea81cf6d3af9797daae015fd6432df6ca0fb2..a179379975080ea566dfd6f3237b42d4152d62a1 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -7,7 +7,7 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { # variable, forecast date, startdate, aggregation, forecast horizon and # type of metric/forecast/probability. - if (recipe$Analysis$Horizon == "subseasonal") { + if (tolower(recipe$Analysis$Horizon) == "subseasonal") { shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%V") dd <- "week" } else { @@ -15,6 +15,17 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { dd <- "month" } + if (tolower(recipe$Analysis$Horizon) == "decadal") { + # to not save the month and day in the filename (needed for the multimodel) + date <- substr(date,1,4) + # for the models initialised in January - it may be better to do this in save_* functions + archive <- read_yaml(paste0("conf/archive_decadal.yml"))$esarchive + exp.name <- recipe$Analysis$Datasets$System$name + if (exp.name != 'Multimodel' && archive$System[[exp.name]]$initial_month == 1){ + date <- as.character(as.numeric(date)-1) + } + } + switch(tolower(agg), "region" = {gg <- "-region"}, "global" = {gg <- ""}) @@ -42,13 +53,19 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { file <- paste0("scorecards_", system, "_", reference, "_", var, type_info, hcst_start, "-", hcst_end, "_s", shortdate) } else { + + if (tolower(recipe$Analysis$Horizon) == "decadal") { + shortdate_aux <- '' + } else { + shortdate_aux <- paste0("_", dd, shortdate) + } + switch(file.type, - "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, - "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, + "skill" = {file <- paste0(var, gg, "-skill", shortdate_aux)}, + "corr" = {file <- paste0(var, gg, "-corr", shortdate_aux)}, "exp" = {file <- paste0(var, gg, "_", date)}, "obs" = {file <- paste0(var, gg, "-obs_", date)}, - "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, - shortdate)}, + "percentiles" = {file <- paste0(var, gg, "-percentiles",shortdate_aux)}, "probs" = {file <- paste0(var, gg, "-probs_", date)}, "bias" = {file <- paste0(var, gg, "-bias_", date)}) } diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R index 050fe68db9e2974d9568a611f2e69300ec76cb48..1eb58b937b966909957f365c60f8666feaa57cc3 100644 --- a/modules/Saving/R/save_corr.R +++ b/modules/Saving/R/save_corr.R @@ -25,7 +25,11 @@ save_corr <- function(recipe, # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar + if (global_attributes$system == 'Multimodel'){ + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } else { + calendar <- archive$System[[global_attributes$system]]$calendar + } # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R index 00a22850dfa13e5a3eadd73e55d24be5c10b5585..f7afa73cbbb4a8b1b47c086a8ba7c38e3c2a32fb 100644 --- a/modules/Saving/R/save_forecast.R +++ b/modules/Saving/R/save_forecast.R @@ -16,7 +16,15 @@ save_forecast <- function(recipe, global_attributes <- .get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar + if (global_attributes$system == 'Multimodel'){ + if (fcst.horizon == 'decadal'){ + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]$name]]$calendar + } else { + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } + } else { + calendar <- archive$System[[global_attributes$system]]$calendar + } # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), @@ -28,13 +36,17 @@ save_forecast <- function(recipe, ## Method 2: use initial month init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month if (type == 'hcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) + # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + # sprintf('%02d', init_month), '-01'), + # cal = calendar) + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + '-01-01'), cal = calendar) } else if (type == 'fcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) + # init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', + # sprintf('%02d', init_month), '-01'), + # cal = calendar) + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$fcst_year)+1, + '-01-01'), cal = calendar) } } else { if (type == 'hcst') { @@ -122,7 +134,7 @@ save_forecast <- function(recipe, # Get time dimension values and metadata times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) time <- times$time - + # Generate name of output file outfile <- get_filename(outdir, recipe, variable, fcst.sdate, agg, "exp") diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index cd4252ab7365d038b9333f506604e2ad4d6b9afb..2a59f2c2acdc4845fa20c702bc0585d04a7f43ae 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -24,17 +24,27 @@ save_metrics <- function(recipe, # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar + if (global_attributes$system == 'Multimodel'){ + if (fcst.horizon == 'decadal'){ + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]$name]]$calendar + } else { + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } + } else { + calendar <- archive$System[[global_attributes$system]]$calendar + } # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) + # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + # sprintf('%02d', init_month), '-01'), + # cal = calendar) + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + '-01-01'), cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -49,8 +59,9 @@ save_metrics <- function(recipe, if (fcst.horizon == 'decadal') { if (!is.null(recipe$Analysis$Time$fcst_year)) { #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') + # fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + # sprintf('%02d', init_month), '01') + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1]) } else { fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') } diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R index 127e98909d3e067c3516bc79aef8474e33cc7d17..0f4d70a17a6eaf4e56070f1484dddbbb9f6d274c 100644 --- a/modules/Saving/R/save_observations.R +++ b/modules/Saving/R/save_observations.R @@ -26,11 +26,12 @@ save_observations <- function(recipe, ## the real initialized date (ask users) # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') ## Method 2: use initial month - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - + # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + # sprintf('%02d', init_month), '-01'), + # cal = calendar) + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + '-01-01'), cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index 862ed5fff9fc4501050dacd51349949534309c8a..7b68b2c27e14b4181f3af6b031ad72d51aa83e83 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -23,15 +23,30 @@ save_percentiles <- function(recipe, # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar + if (global_attributes$system == 'Multimodel') { + if (fcst.horizon == 'decadal') { + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]$name]]$calendar + } else { + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } + } else { + calendar <- archive$System[[global_attributes$system]]$calendar + } + # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) + # if (global_attributes$system == 'Multimodel') { + # init_month <- 11 #TODO: put as if init_month is January + # } else { + # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + # } + # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + # sprintf('%02d', init_month), '-01'), + # cal = calendar) + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + '-01-01'), cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -46,8 +61,9 @@ save_percentiles <- function(recipe, if (fcst.horizon == 'decadal') { if (!is.null(recipe$Analysis$Time$fcst_year)) { #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') + # fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + # sprintf('%02d', init_month), '01') + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1]) } else { fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') } diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index b7da0449d489ae6f6ccb8e3737842195e5e1227a..3253e7b71f74222879bf5a8874604a6815631094 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -29,17 +29,31 @@ save_probabilities <- function(recipe, } fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar + if (global_attributes$system == 'Multimodel'){ + if (fcst.horizon == 'decadal') { + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]$name]]$calendar + } else { + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } + } else { + calendar <- archive$System[[global_attributes$system]]$calendar + } # Generate vector containing leadtimes ## TODO: Move to a separate function? dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) + # if (global_attributes$system == 'Multimodel') { + # init_month <- 11 #TODO: put as if init_month is January + # } else { + # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + # } + # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + # sprintf('%02d', init_month), '-01'), + # cal = calendar) + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + '-01-01'), cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index e5e154213377be7ec82100507a414d93e091d0b6..fa12d61080a598cb981f90d8a28100f4dbd841d9 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -115,10 +115,12 @@ LoadMetrics <- function(system, reference, var, start.year, end.year, ## Define empty list to saved data all_metrics <- sapply(system, function(x) NULL) + names(all_metrics) <- system ## Load data for each system for (sys in 1:length(system)) { ## Define empty list to saved data by_reference <- sapply(reference, function(x) NULL) + names(by_reference) <- reference ## Load data for each reference for (ref in 1:length(reference)) { ## Call function to load metrics data @@ -157,7 +159,6 @@ LoadMetrics <- function(system, reference, var, start.year, end.year, var, "-skill_", period, "_s", m, # mod.pressure, ".nc")}) allfiles_exist <- sapply(allfiles, file.exists) - # Check dims files_exist_by_month <- seq(1:length(allfiles))[allfiles_exist] allfiledims <- sapply(allfiles[allfiles_exist], easyNCDF::NcReadDims) diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index 89f1df44d8302759caa6d3529e32a6fbda26dbca..99909f095072b6c6d170439b31da812decc858a9 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -82,9 +82,11 @@ ScorecardsMulti <- function(data, attributes(input_data)$metrics <- metrics ## Transform data for scorecards by forecast month (types 11 & 12) - transformed_data <- SCTransform(data = input_data, - sdate_dim = 'sdate', - ftime_dim = 'time') + if(length(start.months) >= length(forecast.months)){ + transformed_data <- SCTransform(data = input_data, + sdate_dim = 'sdate', + ftime_dim = 'time') + } ## Load configuration files sys_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/conf/archive.yml")$esarchive @@ -106,6 +108,10 @@ ScorecardsMulti <- function(data, reference.name <- c(reference.name, reference.name1) } + if("Multimodel" %in% system ){ + system.name <- c(system.name, "Multimodel") + } + ## Get metric long names metric.names.list <- .met_names(metrics, var.units) @@ -159,7 +165,7 @@ ScorecardsMulti <- function(data, font.size = 1.1 legend.white.space <- col1.width <- col2.width <- NULL ## Use default values of function - + ## Loop over region for(reg in 1:length(region.names)){ @@ -270,87 +276,91 @@ ScorecardsMulti <- function(data, #### Scorecard_type 11 #### ## (transformation only) - fileout <- .Filename(model = model, eval.name = eval.filename, var = var, - start.year = start.year, end.year = end.year, scorecard.type = 11, - region = sub(" ", "-", region.names[reg]), - fileout.label = fileout.label, output.path = output.path) - if(model == 'system'){ - data_sc_11 <- Subset(transformed_data, c('reference','region'), list(1, reg), drop = 'selected') - } else if(model == 'reference'){ - data_sc_11 <- Subset(transformed_data, c('system','region'), list(1, reg), drop = 'selected') + if(length(start.months) >= length(forecast.months)){ + fileout <- .Filename(model = model, eval.name = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 11, + region = sub(" ", "-", region.names[reg]), + fileout.label = fileout.label, output.path = output.path) + if(model == 'system'){ + data_sc_11 <- Subset(transformed_data, c('reference','region'), list(1, reg), drop = 'selected') + } else if(model == 'reference'){ + data_sc_11 <- Subset(transformed_data, c('system','region'), list(1, reg), drop = 'selected') + } + SCPlotScorecard(data = data_sc_11, + row.dim = model, + subrow.dim = 'time', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = model.name, + subrow.names = forecast.months, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = table.model.name, + subrow.title = 'Forecast Month', + col.title = 'Target month', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = 4, + col2.width = col2.width, + fileout = fileout) } - SCPlotScorecard(data = data_sc_11, - row.dim = model, - subrow.dim = 'time', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = model.name, - subrow.names = forecast.months, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = table.model.name, - subrow.title = 'Forecast Month', - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = 4, - col2.width = col2.width, - fileout = fileout) #### Scorecard_type 12 #### ## (transformation and reorder) - fileout <- .Filename(model = model, eval.name = eval.filename, var = var, - start.year = start.year, end.year = end.year, scorecard.type = 12, - region = sub(" ", "-", region.names[reg]), - fileout.label = fileout.label, output.path = output.path) - new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') - if(model == 'system'){ - data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') - } else if(model == 'reference'){ - data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('system','region'), list(1, reg), drop = 'selected') + if(length(start.months) >= length(forecast.months)){ + fileout <- .Filename(model = model, eval.name = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 12, + region = sub(" ", "-", region.names[reg]), + fileout.label = fileout.label, output.path = output.path) + new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') + if(model == 'system'){ + data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') + } else if(model == 'reference'){ + data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('system','region'), list(1, reg), drop = 'selected') + } + SCPlotScorecard(data = data_sc_12, + row.dim = 'time', + subrow.dim = model, + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = forecast.months, + subrow.names = model.name, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Forecast Month', + subrow.title = table.model.name, + col.title = 'Target month', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = 4, + fileout = fileout) } - SCPlotScorecard(data = data_sc_12, - row.dim = 'time', - subrow.dim = model, - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = forecast.months, - subrow.names = model.name, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Forecast Month', - subrow.title = table.model.name, - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = 4, - fileout = fileout) } ## close loop on region diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 56f08204ad5443b94bbc281a31f3c75cf5cb7614..0e1d7cbc9738111a13f0b760372c6f96712ec183 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -89,13 +89,15 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, attributes(input_data)$metrics <- metrics ## Transform data for scorecards by forecast month (types 3 & 4) - transformed_data <- SCTransform(data = input_data, - sdate_dim = 'sdate', - ftime_dim = 'time') + if(length(start.months) >= length(forecast.months)){ + transformed_data <- SCTransform(data = input_data, + sdate_dim = 'sdate', + ftime_dim = 'time') + } ## Load configuration files - sys_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/conf/archive.yml")$esarchive - var_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/csscorecards/inst/config/variable-dictionary.yml")$vars + sys_dict <- read_yaml("conf/archive.yml")$esarchive + var_dict <- read_yaml("conf/variable-dictionary.yml")$vars ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name @@ -256,46 +258,48 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, #### Scorecard_type 3 #### ## (transformation only) - fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, - start.year = start.year, end.year = end.year, scorecard.type = 3, - fileout.label = fileout.label, output.path = output.path) - data_sc_3 <- Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected') - SCPlotScorecard(data = data_sc_3, - row.dim = 'region', - subrow.dim = 'time', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = region.names, - subrow.names = forecast.months, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Region', - subrow.title = 'Forecast Month', - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - fileout = fileout) + if(length(start.months) >= length(forecast.months)){ + fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, + start.year = start.year, end.year = end.year, scorecard.type = 3, + fileout.label = fileout.label, output.path = output.path) + data_sc_3 <- Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected') + SCPlotScorecard(data = data_sc_3, + row.dim = 'region', + subrow.dim = 'time', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = region.names, + subrow.names = forecast.months, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Region', + subrow.title = 'Forecast Month', + col.title = 'Target month', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + fileout = fileout) + } #### Scorecard_type 4 #### ## (transformation and reorder) ## Scorecard type 4 is same as type 3 for only one region, therefore is ## only plotted if more that one region is requested - if(dim(input_data)['region'] > 1) { + if(dim(data)['region'] > 1 & length(start.months) >= length(forecast.months)){ fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 4, fileout.label = fileout.label, output.path = output.path) diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 0dbcd9210c9227200c872204526ea4e6df05adcb..e0b55641c5b0cbc12e4ec273c1bd54c08665ab48 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -22,20 +22,23 @@ Scorecards <- function(recipe) { output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - system <- recipe$Analysis$Datasets$System$name - reference <- recipe$Analysis$Datasets$Reference$name + system <- recipe$Analysis$Datasets$System + reference <- recipe$Analysis$Datasets$Reference var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max - - if (recipe$Analysis$Workflow$Scorecards$start_months == 'all') { - start.months <- 1:12 + + if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { + start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) } else { start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, split = ", | |,")[[1]]) + if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ + error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") + } } - + regions <- recipe$Analysis$Workflow$Scorecards$regions for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} @@ -74,7 +77,7 @@ Scorecards <- function(recipe) { col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff ncores <- 1 # recipe$Analysis$ncores - + ## Load data files loaded_metrics <- LoadMetrics(system = system, reference = reference, @@ -126,7 +129,7 @@ Scorecards <- function(recipe) { metric.aggregation = metric.aggregation, ncores = ncores) }## close if - + ## Create simple scorecard tables ## (one system only) ## Metrics input must be in the same order as function SC_spatial_aggregation diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index 2c54c48f45ca0f70ecb26370e396d624948b7c0b..6c2daa5ca9bef73704628004f6d872280ad2d582 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -5,6 +5,12 @@ args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] output_dir <- args[2] +## for testing + +recipe_file <- '/esarchive/scratch/nmilders/multimodel/recipe_multimodel_seasonal_nadia_test.yml' +output_dir <- '/esarchive/scratch/nmilders/scorecards_data/multimodel/test/recipe_multimodel_seasonal_nadia_test_20240223114714' + + ## TODO: Replace with function # Read recipe and set outdir recipe <- read_yaml(recipe_file) @@ -13,19 +19,25 @@ recipe$Run$output_dir <- output_dir ## Loop over variables datasets <- recipe$Analysis$Datasets ## TODO: Improve dependency system? -for (system in 1:length(datasets$System)) { - for (reference in 1:length(datasets$Reference)) { - for (variable in 1:length(recipe$Analysis$Variables)) { +for (variable in 1:length(recipe$Analysis$Variables)) { scorecard_recipe <- recipe + scorecard_recipe$Analysis$Datasets$System <- - recipe$Analysis$Datasets$System[[system]] + as.vector(unlist(recipe$Analysis$Datasets$System)) + + ## Include multimodel in systems + if(isTRUE(scorecard_recipe$Analysis$Datasets$Multimodel$execute) || + scorecard_recipe$Analysis$Datasets$Multimodel$execute == 'both' || + scorecard_recipe$Analysis$Datasets$Multimodel$execute == 'yes'){ + scorecard_recipe$Analysis$Datasets$System <- + c(scorecard_recipe$Analysis$Datasets$System, 'Multimodel') + } + scorecard_recipe$Analysis$Datasets$Reference <- - recipe$Analysis$Datasets$Reference[[reference]] + as.vector(unlist(recipe$Analysis$Datasets$Reference)) scorecard_recipe$Analysis$Variables <- recipe$Analysis$Variables[[variable]] # Plot Scorecards Scorecards(scorecard_recipe) - } - } } print("##### SCORECARDS SAVED TO THE OUTPUT DIRECTORY #####") diff --git a/modules/Skill/R/RPS_clim.R b/modules/Skill/R/RPS_clim.R index c93c67476ffcdcbf450f3f2efb37d9f6a2c69b7e..601a10c3245ab8d4ea5103dbd5fe4aad2ce589a0 100644 --- a/modules/Skill/R/RPS_clim.R +++ b/modules/Skill/R/RPS_clim.R @@ -6,7 +6,7 @@ RPS_clim <- function(obs, indices_for_clim = NULL, prob_thresholds = c(1/3, 2/3) } obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: - prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) # clim_probs: [bin, sdate] clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) clim_probs <- array(clim_probs, dim = dim(obs_probs)) @@ -15,6 +15,6 @@ RPS_clim <- function(obs, indices_for_clim = NULL, prob_thresholds = c(1/3, 2/3) probs_clim_cumsum <- apply(clim_probs, 2, cumsum) probs_obs_cumsum <- apply(obs_probs, 2, cumsum) rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) - + return(mean(rps_ref)) } diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 3d00742dd4de5443c9cafddd13a7990ffafc1dd4..10561d10e35a875ef27fe957831b1f437a84819b 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -7,7 +7,12 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o latitude <- fcst$coords$lat longitude <- fcst$coords$lon archive <- get_archive(recipe) - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ + system_name <- paste0('Multimodel-', + recipe$Analysis$Datasets$Multimodel$approach) + } else { + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + } start_date <- paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate) if (tolower(recipe$Analysis$Horizon) == "seasonal") { @@ -51,7 +56,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o # Define brks, centered around zero in the case of anomalies if (grepl("anomaly", var_long_name)) { variable <- paste(variable, "anomaly") - max_value <- max(abs(var_ens_mean)) + max_value <- max(abs(var_ens_mean), na.rm = TRUE) ugly_intervals <- seq(-max_value, max_value, max_value/20) brks <- pretty(ugly_intervals, n = 12, min.n = 8) cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index 5d60f8c1747c8ec8b5435902a3c44107d9a4873f..65b587fca65e1b41cfcd5fe4290ab113a99f6e32 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -27,7 +27,12 @@ plot_most_likely_terciles <- function(recipe, latitude <- fcst$coords$lat longitude <- fcst$coords$lon archive <- get_archive(recipe) - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ + system_name <- paste0('Multimodel-', + recipe$Analysis$Datasets$Multimodel$approach) + } else { + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + } start_date <- paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate) if (tolower(recipe$Analysis$Horizon) == "seasonal") { diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R index b4c2b273dffb70f3d413b33b9575b0c7e1662bc6..8ba679eff8d482a840311657f066d07bd5ceee4e 100644 --- a/modules/Visualization/R/plot_skill_metrics.R +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -23,7 +23,12 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, latitude <- data_cube$coords$lat longitude <- data_cube$coords$lon archive <- get_archive(recipe) - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ + system_name <- paste0('Multimodel-', + recipe$Analysis$Datasets$Multimodel$approach) + } else { + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + } hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) if (tolower(recipe$Analysis$Horizon) == "seasonal") { @@ -243,6 +248,8 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, } fileout <- paste0(outfile, "_ft", forecast_time, ".png") # Plot + info(recipe$Run$logger, + paste("Plotting", display_name)) do.call(fun, args = c(base_args, diff --git a/recipes/examples/recipe_multimodel_splitting.yml b/recipes/examples/recipe_multimodel_splitting.yml new file mode 100644 index 0000000000000000000000000000000000000000..091da3e31da9805d318c83159562c69343d5b11a --- /dev/null +++ b/recipes/examples/recipe_multimodel_splitting.yml @@ -0,0 +1,67 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for seasonal multi-model + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: C} + Datasets: + System: + - {name: ECMWF-SEAS5} + - {name: CMCC-SPS3.5} + Multimodel: + execute: both # Mandatory: Either both, yes/true or no/false + approach: pooled #mean, median + createFrom: Anomalies + split_loading: TRUE + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: + - '0101' ## MMDD + - '0201' + # fcst_year: '2023' # Optional, int: Forecast year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months + Region: + - {name: "Spain", latmin: 34, latmax: 44, lonmin: -10, lonmax: 6} + # - {name: "Germany", latmin: 45, latmax: 56, lonmin: 4, lonmax: 17} + Regrid: + method: conservative + type: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Calibration: + method: raw + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss enssprerr + save: 'all' + cross_validation: no + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics + multi_panel: no + dots: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/nmilders/scorecards_data/multimodel/test/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/ + autosubmit: no diff --git a/recipes/recipe_multimodel_decadal.yml b/recipes/recipe_multimodel_decadal.yml new file mode 100644 index 0000000000000000000000000000000000000000..ce2530e82794a0aeb00509f0d4c7ac1951d5d4a8 --- /dev/null +++ b/recipes/recipe_multimodel_decadal.yml @@ -0,0 +1,74 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for decadal multi-model + +Analysis: + Horizon: decadal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: C} + Datasets: + System: + - {name: EC-Earth3-i4, member: r1i4p1f1 r2i4p1f1 r3i4p1f1} + - {name: CanESM5, member: r1i1p2f1 r2i1p2f1} + Multimodel: + execute: both # Mandatory: Either both, yes/true or no/false + approach: pooled #mean, median + createFrom: Anomalies + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + fcst_year: '2020' # Optional, int: Forecast year 'YYYY' + hcst_start: '2007' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months + Region: + - {name: "Spain", latmin: 34, latmax: 44, lonmin: -10, lonmax: 6} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "r360x180" + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: EnsCorr rpss + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: s2s4e # scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/cdelgado/sunset_outputs/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/cdelgado/gitlat/SUNSET/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/cdelgado/gitlat/SUNSET/main_multimodel_seasonal.R # replace with the path to your script + expid: XXXX # replace with your EXPID + hpc_user: bsc32924 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: carlos.delgado@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/recipes/recipe_multimodel_seasonal.yml b/recipes/recipe_multimodel_seasonal.yml new file mode 100644 index 0000000000000000000000000000000000000000..f974fc48777fd4b420b9a3f162419ebe4ec881ca --- /dev/null +++ b/recipes/recipe_multimodel_seasonal.yml @@ -0,0 +1,82 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for seasonal multi-model + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: C} + Datasets: + System: + - {name: ECMWF-SEAS5.1} + - {name: CMCC-SPS3.5} + - {name: DWD-GCFS2.1} + Multimodel: + execute: both # Mandatory: Either both, yes/true or no/false + approach: pooled #mean, median + createFrom: Anomalies + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: + - '0101' ## MMDD + fcst_year: '2023' # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months + Region: + - {name: "Spain", latmin: 34, latmax: 44, lonmin: -10, lonmax: 6} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "r360x180" + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: EnsCorr rpss + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + ncores: 16 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards # scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ # replace with the directory where your code is + script: + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc32762 # replace with your hpc username + wallclock: 01:00 # hh:mm + wallclock_multimodel: 02:00 + processors_per_job: 4 + processors_multimodel: 16 + custom_directives: ['#SBATCH --exclusive'] + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: victoria.agudetse@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/split.R b/split.R index 0d443c7c84163934f1e01c33e6ad738d3fb782c6..faadafd68051b934cafcc2b8f2c2617dee3f849b 100755 --- a/split.R +++ b/split.R @@ -34,7 +34,10 @@ recipe <- prepare_outputs(recipe_file = arguments$recipe, run_parameters <- divide_recipe(recipe) if (!is.null(recipe$Run$autosubmit) && (recipe$Run$autosubmit)) { - write_autosubmit_conf(recipe, run_parameters$n_atomic_recipes) + write_autosubmit_conf(recipe = recipe, + nchunks = run_parameters$n_atomic_recipes, + chunk_to_recipe = run_parameters$chunk_to_recipe, + split_to_recipe = run_parameters$split_to_recipe) sink(arguments$tmpfile, append = FALSE) # Run with... cat("autosubmit") @@ -47,6 +50,13 @@ if (!is.null(recipe$Run$autosubmit) && (recipe$Run$autosubmit)) { cat(paste0(recipe$Run$code_dir, "\n")) # Output directory cat(paste0(run_parameters$outdir, "\n")) + # Multimodel + if (!is.null(recipe$Analysis$Datasets$Multimodel$execute) && + (recipe$Analysis$Datasets$Multimodel$execute %in% c(TRUE, 'both'))) { + cat("TRUE\n") + } else { + cat("FALSE\n") + } # Scorecards if (!("Scorecards" %in% names(recipe$Analysis$Workflow)) || (!recipe$Analysis$Workflow$Scorecards$execute)) { diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 3346e529ae76f9e58c08371f95cf0ded95fb6533..7586d3d6e4143e9ed8be066bd6ffb031d01eb8fe 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -251,10 +251,10 @@ test_that("4. Saving", { outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( all(basename(list.files(outputs, recursive = T)) %in% -c("tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", - "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", - "tas-percentiles_month11.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc", - "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc")), +c("tas_1991.nc", "tas_1992.nc", "tas_1993.nc", "tas_1994.nc", "tas_2021.nc", + "tas-obs_1991.nc", "tas-obs_1992.nc", "tas-obs_1993.nc", "tas-obs_1994.nc", + "tas-percentiles.nc", "tas-probs_1991.nc", "tas-probs_1992.nc", + "tas-probs_1993.nc", "tas-probs_1994.nc", "tas-probs_2021.nc", "tas-skill.nc")), TRUE ) # open the files and check values/attributes? diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 9adc16b65deb3b84f9a825d93c175504f6304f2b..23821dbb3df08b5f84da2df992de2b4edbbc18e6 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -248,10 +248,10 @@ test_that("4. Saving", { outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( all(basename(list.files(outputs, recursive = T)) %in% -c("tas_19901101.nc", "tas_19911101.nc", "tas_19921101.nc", "tas_20201101.nc", "tas_20211101.nc", - "tas-obs_19901101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", - "tas-percentiles_month11.nc", "tas-probs_19901101.nc", "tas-probs_19911101.nc", - "tas-probs_19921101.nc", "tas-probs_20201101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc")), +c("tas_1990.nc", "tas_1991.nc", "tas_1992.nc", "tas_2020.nc", "tas_2021.nc", + "tas-obs_1990.nc", "tas-obs_1991.nc", "tas-obs_1992.nc", + "tas-percentiles.nc", "tas-probs_1990.nc", "tas-probs_1991.nc", + "tas-probs_1992.nc", "tas-probs_2020.nc", "tas-probs_2021.nc", "tas-skill.nc")), TRUE ) expect_equal( diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 01b790b8255daacce1482357cd174ec26479c039..1f1c7dc8c1bc766305cea49f4c3dae83c814d9c7 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -23,21 +23,21 @@ check_recipe <- function(recipe) { if (!("Analysis" %in% names(recipe))) { error(recipe$Run$logger, "The recipe must contain an element called 'Analysis'.") - error_status <- T + error_status <- TRUE } if (!all(PARAMS %in% names(recipe$Analysis))) { error(recipe$Run$logger, paste0("The element 'Analysis' in the recipe must contain all of ", "the following: ", paste(PARAMS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { error(recipe$Run$logger, paste0("The element 'Horizon' in the recipe must be one of the ", "following: ", paste(HORIZONS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } # Check time settings if (tolower(recipe$Analysis$Horizon) == "seasonal") { @@ -48,7 +48,7 @@ check_recipe <- function(recipe) { paste0("The element 'Time' in the recipe must contain all of the ", "following: ", paste(TIME_SETTINGS_SEASONAL, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } else if (tolower(recipe$Analysis$Horizon) == "decadal") { archive <- read_yaml(ARCHIVE_DECADAL)[[recipe$Run$filesystem]] @@ -57,7 +57,7 @@ check_recipe <- function(recipe) { paste0("The element 'Time' in the recipe must contain all of the ", "following: ", paste(TIME_SETTINGS_DECADAL, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } else { archive <- NULL @@ -81,54 +81,95 @@ check_recipe <- function(recipe) { } # Check system names if (!is.null(archive)) { - if (!all(recipe$Analysis$Datasets$System$name %in% names(archive$System))) { + if (!all(recipe$Analysis$Datasets$System$name %in% + c(names(archive$System), 'Multimodel'))) { error(recipe$Run$logger, "The specified System name was not found in the archive.") - error_status <- T + error_status <- TRUE } # Check reference names if (!all(recipe$Analysis$Datasets$Reference$name %in% names(archive$Reference))) { error(recipe$Run$logger, "The specified Reference name was not found in the archive.") - error_status <- T + error_status <- TRUE } } + # Check multimodel + if (is.null(recipe$Analysis$Datasets$Multimodel) || + (is.logical(recipe$Analysis$Datasets$Multimodel) && + !(recipe$Analysis$Datasets$Multimodel))) { + recipe$Analysis$Datasets$Multimodel <- list(execute = FALSE) + } + if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'false') { + multimodel <- FALSE + } else { + multimodel <- TRUE + } + MULTIMODEL_METHODS <- c("pooled") ## to be added: mean, median... + MULTIMODEL_CREATEFROM <- c("calibration", "anomalies", "indicators") + if (multimodel) { + if (!is.null(recipe$Analysis$Datasets$Multimodel)) { + if (!tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% + c('true', 'false', 'both')) { + error(recipe$Run$logger, + paste("The specified execution for the multimodel is not valid.", + "Please specify yes/true, no/false or 'both'.")) + error_status <- TRUE + } + if (!tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% + MULTIMODEL_METHODS) { + error(recipe$Run$logger, + paste("The specified approach for the multimodel is not valid.", + "Please specify pooled.")) #, mean or median.")) + error_status <- TRUE + } + if (!tolower(recipe$Analysis$Datasets$Multimodel$createFrom) %in% + MULTIMODEL_CREATEFROM) { + error(recipe$Run$logger, + paste("The specified 'createFrom' for the multimodel is not valid.", + "Please specify Calibration, Anomalies, Indicators.")) + error_status <- TRUE + } + } + } else { + recipe$Analysis$Datasets$Multimodel <- FALSE + } # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || (!is.integer(recipe$Analysis$Time$ftime_min))) { error(recipe$Run$logger, "The element 'ftime_min' must be an integer larger than 0.") - error_status <- T + error_status <- TRUE } if ((!(recipe$Analysis$Time$ftime_max > 0)) || (!is.integer(recipe$Analysis$Time$ftime_max))) { error(recipe$Run$logger, "The element 'ftime_max' must be an integer larger than 0.") - error_status <- T + error_status <- TRUE } if (recipe$Analysis$Time$ftime_max < recipe$Analysis$Time$ftime_min) { error(recipe$Run$logger, "'ftime_max' cannot be smaller than 'ftime_min'.") - error_status <- T + error_status <- TRUE } # Check consistency of hindcast years if (!(as.numeric(recipe$Analysis$Time$hcst_start) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_start > 0))) { error(recipe$Run$logger, "The element 'hcst_start' must be a valid year.") - error_status <- T + error_status <- TRUE } if (!(as.numeric(recipe$Analysis$Time$hcst_end) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_end > 0))) { error(recipe$Run$logger, "The element 'hcst_end' must be a valid year.") - error_status <- T + error_status <- TRUE } if (recipe$Analysis$Time$hcst_end < recipe$Analysis$Time$hcst_start) { error(recipe$Run$logger, "'hcst_end' cannot be smaller than 'hcst_start'.") - error_status <- T + error_status <- TRUE } ## TODO: Is this needed? if (is.null(recipe$Analysis$Time$fcst_year) || @@ -173,7 +214,14 @@ check_recipe <- function(recipe) { if (length(recipe$Analysis$Regrid) != 2) { error(recipe$Run$logger, "The 'Regrid' element must specify the 'method' and 'type'.") - error_status <- T + error_status <- TRUE + } + + if (recipe$Analysis$Regrid$type == 'to_system' && multimodel) { + error(recipe$Run$logger, + paste0("The 'Regrid$type' cannot be 'to_system' if ", + "'Multimodel$execute' is yes/true or both.")) + error_status <- TRUE } # TODO: Add Workflow checks? # ... @@ -181,7 +229,7 @@ check_recipe <- function(recipe) { if (length(recipe$Analysis$Horizon) > 1) { error(recipe$Run$logger, "Only one single Horizon can be specified in the recipe") - error_status <- T + error_status <- TRUE } ## TODO: Refine this @@ -213,7 +261,7 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("There must be 4 elements in 'Region': ", paste(LIMITS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } if (length(recipe$Analysis$Region) > 1) { @@ -230,7 +278,7 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("There must be 4 elements in 'Region': ", paste(LIMITS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } ## TODO: Implement multiple regions # nregions <- length(recipe$Analysis$Region) @@ -241,7 +289,7 @@ check_recipe <- function(recipe) { # paste0("Each region defined in element 'Region' ", # "should have 4 elements: ", # paste(limits, collapse = ", "), ".")) - # error_status <- T + # error_status <- TRUE # } # if (length(recipe$Analysis$Region) > 1) { # if (!("name" %in% names(recipe$Analysis$Region[[i]]))) { @@ -268,7 +316,7 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Workflow$Calibration$method)) { error(recipe$Run$logger, "The 'Calibration' element 'method' must be specified.") - error_status <- T + error_status <- TRUE } SAVING_OPTIONS_CALIB <- c("all", "none", "exp_only", "fcst_only") if ((is.null(recipe$Analysis$Workflow$Calibration$save)) || @@ -277,7 +325,7 @@ check_recipe <- function(recipe) { paste0("Please specify which Calibration module outputs you want ", "to save with the 'save' parameter. The options are: ", paste(SAVING_OPTIONS_CALIB, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } # Anomalies @@ -286,12 +334,12 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { error(recipe$Run$logger, "Parameter 'compute' must be defined under 'Anomalies'.") - error_status <- T + error_status <- TRUE } else if (!(is.logical(recipe$Analysis$Workflow$Anomalies$compute))) { error(recipe$Run$logger, paste("Parameter 'Anomalies:compute' must be a logical value", "(True/False or yes/no).")) - error_status <- T + error_status <- TRUE } else if ((recipe$Analysis$Workflow$Anomalies$compute)) { # Cross-validation check if (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation)) { @@ -299,7 +347,7 @@ check_recipe <- function(recipe) { paste("If anomaly computation is requested, parameter", "'cross_validation' must be defined under 'Anomalies', and it must be a logical value (True/False or yes/no).")) - error_status <- T + error_status <- TRUE } # Saving checks SAVING_OPTIONS_ANOM <- c("all", "none", "exp_only", "fcst_only") @@ -309,7 +357,7 @@ check_recipe <- function(recipe) { paste0("Please specify which Anomalies module outputs you want ", "to save with the 'save' parameter. The options are: ", paste(SAVING_OPTIONS_ANOM, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } } @@ -336,21 +384,21 @@ check_recipe <- function(recipe) { paste0("The type of Downscaling request in the recipe is not ", "available. It must be one of the following: ", paste(DOWNSCAL_TYPES, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && (is.null(downscal_params$target_grid))) { error(recipe$Run$logger, paste("A target grid is required for the downscaling method", "requested in the recipe.")) - error_status <- T + error_status <- TRUE } if (downscal_params$type == "int") { if (is.null(downscal_params$int_method)) { error(recipe$Run$logger, paste("Downscaling type 'int' was requested, but no", "interpolation method is provided in the recipe.")) - error_status <- T + error_status <- TRUE } } else if (downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) { @@ -359,32 +407,32 @@ check_recipe <- function(recipe) { paste("Downscaling type", downscal_params$type, "was requested in the recipe, but no", "interpolation method is provided.")) - error_status <- T + error_status <- TRUE } } else if (downscal_params$type == "intbc") { if (is.null(downscal_params$bc_method)) { error(recipe$Run$logger, paste("Downscaling type 'intbc' was requested in the recipe, but", "no bias correction method is provided.")) - error_status <- T + error_status <- TRUE } else if (!(downscal_params$bc_method %in% BC_METHODS)) { error(recipe$Run$logger, paste0("The accepted Bias Correction methods for the downscaling", " module are: ", paste(BC_METHODS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } else if (downscal_params$type == "intlr") { if (length(downscal_params$lr_method) == 0) { error(recipe$Run$logger, paste("Downscaling type 'intlr' was requested in the recipe, but", "no linear regression method was provided.")) - error_status <- T + error_status <- TRUE } else if (!(downscal_params$lr_method %in% LR_METHODS)) { error(recipe$Run$logger, paste0("The accepted linear regression methods for the", " downscaling module are: ", paste(LR_METHODS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } else if (downscal_params$type == "analogs") { if (is.null(downscal_params$nanalogs)) { @@ -397,19 +445,19 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste("Downscaling type 'logreg' was requested in the recipe, but", "no interpolation method was provided.")) - error_status <- T + error_status <- TRUE } if (is.null(downscal_params$log_reg_method)) { error(recipe$Run$logger, paste("Downscaling type 'logreg' was requested in the recipe,", "but no logistic regression method is provided.")) - error_status <- T + error_status <- TRUE } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { error(recipe$Run$logger, paste0("The accepted logistic regression methods for the ", "downscaling module are: ", paste(LOGREG_METHODS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } } @@ -423,12 +471,12 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("Indices uses Anomalies as input, but Anomalies are missing", "in the recipe.")) - error_status <- T + error_status <- TRUE } else if (!(recipe$Analysis$Workflow$Anomalies$compute)) { error(recipe$Run$logger, paste0("Indices uses Anomalies as input, but the parameter", "'Anomalies:compute' is set as no/False.")) - error_status <- T + error_status <- TRUE } recipe_indices <- tolower(names(recipe$Analysis$Workflow$Indices)) if (!all(recipe_indices %in% indices)) { @@ -436,7 +484,7 @@ check_recipe <- function(recipe) { paste0("Some of the indices under 'Indices' are not available.", "The available Indices are: 'NAO', 'Nino1+2', 'Nino3', ", "'Nino3.4' and 'Nino4'.")) - error_status <- T + error_status <- TRUE } # Check that variables correspond with indices requested if (("nao" %in% recipe_indices) && @@ -445,7 +493,7 @@ check_recipe <- function(recipe) { paste0("It is not possible to compute the NAO with some of the ", "variables requested. To compute the NAO, please make sure", "your recipe requests only psl and/or z500.")) - error_status <- T + error_status <- TRUE } if ((any(nino_indices %in% recipe_indices)) && (!all(recipe_variables %in% c("tos", "sst")))) { @@ -453,7 +501,7 @@ check_recipe <- function(recipe) { paste0("It is not possible to compute El Nino indices with some ", "of the variables requested. To compute El Nino, please ", "make sure your recipe requests only tos.")) - error_status <- T + error_status <- TRUE } } @@ -467,7 +515,7 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Workflow$Skill$metric)) { error(recipe$Run$logger, "Parameter 'metric' must be defined under 'Skill'.") - error_status <- T + error_status <- TRUE } else { requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] @@ -476,7 +524,7 @@ check_recipe <- function(recipe) { paste0("Some of the metrics requested under 'Skill' are not ", "available in SUNSET. Check the documentation to see the ", "full list of accepted skill metrics.")) - error_status <- T + error_status <- TRUE } } # Saving checks @@ -487,7 +535,7 @@ check_recipe <- function(recipe) { paste0("Please specify whether you want to save the Skill metrics ", "with the 'save' parameter. The options are: ", paste(SAVING_OPTIONS_SKILL, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } @@ -496,12 +544,12 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, "Parameter 'percentiles' must be defined under 'Probabilities'.") - error_status <- T + error_status <- TRUE } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, paste("Parameter 'Probabilities:percentiles' expects a list.", "See documentation in the wiki for examples.")) - error_status <- T + error_status <- TRUE } # Saving checks SAVING_OPTIONS_PROBS <- c("all", "none", "bins_only", "percentiles_only") @@ -512,7 +560,7 @@ check_recipe <- function(recipe) { "and probability bins with the 'save' parameter. The ", "options are: ", paste(SAVING_OPTIONS_PROBS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } @@ -524,7 +572,7 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Workflow$Visualization$plots)) { error(recipe$Run$logger, "The 'plots' element must be defined under 'Visualization'.") - error_status <- T + error_status <- TRUE } else { plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, ", | |,")[[1]] @@ -532,7 +580,7 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("The options available for the plots are: ", paste(PLOT_OPTIONS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } # Check multi_panel option @@ -545,7 +593,7 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("Parameter 'Visualization:multi_panel' must be a logical ", "value: either 'yes/True' or 'no/False'")) - error_status <- T + error_status <- TRUE } # Check projection if (is.null(recipe$Analysis$Workflow$Visualization$projection)) { @@ -564,7 +612,7 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("Parameter Visualization:mask_terciles must be one of: ", "yes/True, no/False, 'both'")) - error_status <- T + error_status <- TRUE } if (is.null(recipe$Analysis$Workflow$Visualization$dots)) { warn(recipe$Run$logger, @@ -583,7 +631,7 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) { error(recipe$Run$logger, "Parameter 'metric' must be defined under 'Scorecards'.") - error_status <- T + error_status <- TRUE } else { sc_metrics <- strsplit(recipe$Analysis$Workflow$Scorecards$metric, ", | |,")[[1]] @@ -591,7 +639,7 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("All of the metrics requested under 'Scorecards' must ", "be requested in the 'Skill' section.")) - error_status <- T + error_status <- TRUE } } } @@ -610,28 +658,28 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste("Recipe element 'Run' must contain", "all of the following fields:", paste(RUN_FIELDS, collapse=", "), ".")) - error_status <- T + error_status <- TRUE } if (!is.character(recipe$Run$output_dir)) { error(recipe$Run$logger, paste("The Run element 'output_dir' in", recipe$name, "file", "should be a character string indicating the path where", "the outputs should be saved.")) - error_status <- T + error_status <- TRUE } if (!is.character(recipe$Run$code_dir)) { error(recipe$Run$logger, paste("The Run element 'code_dir' in", recipe$name, "file ", "should be a character string indicating the path", "where the code is.")) - error_status <- T + error_status <- TRUE } if (!is.logical(recipe$Run$Terminal)) { error(recipe$Run$logger, paste("The Run element 'Terminal' in", recipe$name, "file ", "should be a boolean value indicating whether or not to", "print the logs in the terminal.")) - error_status <- T + error_status <- TRUE } ## TODO: Review this case, since default value is allowed if (!is.character(recipe$Run$Loglevel) || @@ -640,7 +688,7 @@ check_recipe <- function(recipe) { paste("The Run element 'Loglevel' in", recipe$name, "file", "should be a character string specifying one of the levels available:", paste0(LOG_LEVELS, collapse='/'))) - error_status <- T + error_status <- TRUE } # --------------------------------------------------------------------- @@ -652,7 +700,7 @@ check_recipe <- function(recipe) { "email_address", "notify_completed", "notify_failed") # Autosubmit false by default if (is.null(recipe$Run$autosubmit)) { - recipe$Run$autosubmit <- F + recipe$Run$autosubmit <- FALSE } # Autosubmit configuration checks if (recipe$Run$autosubmit) { @@ -662,22 +710,22 @@ check_recipe <- function(recipe) { if (!("auto_conf" %in% names(recipe$Run))) { error(recipe$Run$logger, "The 'auto_conf' is missing from the 'Run' section of the recipe.") - error_status <- T + error_status <- TRUE } else if (!all(AUTO_PARAMS %in% names(recipe$Run$auto_conf))) { error(recipe$Run$logger, paste0("The element 'Run:auto_conf' must contain all of the ", "following: ", paste(AUTO_PARAMS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } # Check that the script is not NULL and exists if (is.null(recipe$Run$auto_conf$script)) { error(recipe$Run$logger, "A script must be provided to run the recipe with autosubmit.") - error_status <- T + error_status <- TRUE } else if (!file.exists(recipe$Run$auto_conf$script)) { error(recipe$Run$logger, "Could not find the file for the script in 'auto_conf'.") - error_status <- T + error_status <- TRUE } # Check that the experiment ID exists if (is.null(recipe$Run$auto_conf$expid)) { @@ -689,30 +737,55 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste("autosubmit expid -H", auto_specs$platform, "-d ")) - error_status <- T + error_status <- TRUE } else if (!dir.exists(paste0(auto_specs$experiment_dir, recipe$Run$auto_conf$expid))) { error(recipe$Run$logger, paste0("No folder in ", auto_specs$experiment_dir, " for the EXPID", recipe$Run$auto_conf$expid, ". Please make sure it is correct.")) - error_status <- T + error_status <- TRUE } if ((recipe$Run$auto_conf$email_notifications) && (is.null(recipe$Run$auto_conf$email_address))) { error(recipe$Run$logger, "Autosubmit notifications are enabled but email address is empty!") - error_status <- T + error_status <- TRUE } if (is.null(recipe$Run$auto_conf$hpc_user)) { error(recipe$Run$logger, "The 'Run:auto_conf:hpc_user' field can not be empty.") - error_status <- T + error_status <- TRUE } else if ((recipe$Run$filesystem == "esarchive") && (!substr(recipe$Run$auto_conf$hpc_user, 1, 5) == "bsc32")) { error(recipe$Run$logger, "Please check your hpc_user ID. It should look like: 'bsc32xxx'") - error_status <- T + error_status <- TRUE + } + # Multimodel-specific parameters + if (multimodel) { + if (is.null(recipe$Run$auto_conf$wallclock_multimodel)) { + warn(recipe$Run$logger, + paste("No parameter 'wallclock_multimodel' specified, 'wallclock'", + "will be used.")) + recipe$Run$auto_conf$wallclock_multimodel <- recipe$Run$auto_conf$wallclock + } + if (is.null(recipe$Run$auto_conf$custom_directives_multimodel) && + !is.null(recipe$Run$auto_conf$custom_directives_multimodel)) { + warn(recipe$Run$logger, + paste("No 'custom_directives_multimodel' specified, the", + "single-model verification custom directives will be used.")) + recipe$Run$auto_conf$custom_directives_multimodel <- + recipe$Run$auto_conf$custom_directives + } + if (is.null(recipe$Run$auto_conf$processors_multimodel) && + !is.null(recipe$Run$auto_conf$processors_per_job)) { + warn(recipe$Run$logger, + paste("No 'processors_multimodel' specified, the", + "'processors_per_job' parameters will be used.")) + recipe$Run$auto_conf$custom_directives_multimodel <- + recipe$Run$auto_conf$custom_directives + } } } diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index b22274cc5c0e7e5081a3f48f492cdcbfe8fba026..3b2e6eee3399fec5be8daebe097222951f65123e 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -21,7 +21,7 @@ divide_recipe <- function(recipe) { Run = recipe$Run[c("Loglevel", "output_dir", "Terminal", "code_dir", "logfile", "filesystem")]) - # duplicate recipe by independent variables:รง + # duplicate recipe by independent variables: # If a single variable is not given inside a list, rebuild structure if (any(c("name", "freq", "units") %in% names(recipe$Analysis$Variables))) { variables <- recipe$Analysis$Variables @@ -48,31 +48,55 @@ divide_recipe <- function(recipe) { recipe$Analysis$Datasets$System <- NULL recipe$Analysis$Datasets$System[[1]] <- system } - - if (recipe$Analysis$Datasets$Multimodel %in% c(TRUE, 'both')) { - for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Datasets <- - list(System = recipe$Analysis$Datasets$System, - Multimodel = recipe$Analysis$Datasets$Multimodel, - Reference = NULL) - } + # Modify the saving of the individual models in case multimodel is yes or both + if (recipe$Analysis$Datasets$Multimodel$execute %in% c(TRUE, 'both')) { + # Create directory for multimodel recipes + dir.create(paste0(recipe$Run$output_dir, "/logs/recipes/multimodel/"), + recursive = TRUE) + n_models <- length(recipe$Analysis$Datasets$System) + 1 + mm <- tolower(recipe$Analysis$Datasets$Multimodel$approach) } else { - for (sys in 1:length(recipe$Analysis$Datasets$System)) { - for (reci in 1:length(all_recipes)) { + n_models <- length(recipe$Analysis$Datasets$System) + mm <- FALSE + } + for (sys in 1:n_models) { + for (reci in 1:length(all_recipes)) { + if (sys == length(recipe$Analysis$Datasets$System)+1){ + # seasonal only needs the model name; decadal also needs the members + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + m <- unlist(recipe$Analysis$Datasets$System) + } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { + m <- recipe$Analysis$Datasets$System + } all_recipes[[reci]]$Analysis$Datasets <- - list(System = recipe$Analysis$Datasets$System[[sys]], + list(System = list(name = 'Multimodel', + models = m), Multimodel = recipe$Analysis$Datasets$Multimodel, Reference = NULL) - } - if (sys == 1) { - recipes <- all_recipes } else { - recipes <- append(recipes, all_recipes) + all_recipes[[reci]]$Analysis$Datasets <- + list(System = recipe$Analysis$Datasets$System[[sys]], + Multimodel = 'no', + Reference = NULL) } - } # Rest of horizons - all_recipes <- recipes - rm(list = 'recipes') + } + if (sys == 1) { + recipes <- all_recipes + } else { + recipes <- append(recipes, all_recipes) + } + } + all_recipes <- recipes + rm(list = 'recipes') + for (reci in 1:length(all_recipes)) { + if (!isFALSE(mm) && tolower(all_recipes[[reci]]$Analysis$Datasets$System$name) != 'multimodel') { + all_recipes[[reci]]$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save <- 'all' + if (mm %in% c('mean','median')) { + all_recipes[[reci]]$Analysis$Workflow$Probabilities$save <- 'all' + } + } } + # Check references # If a single reference is not given inside a list, rebuild structure if (c("name") %in% names(recipe$Analysis$Datasets$Reference)) { @@ -147,23 +171,48 @@ divide_recipe <- function(recipe) { } } # Rest of horizons # Save all recipes in separate YAML files + chunk <- 1 + split <- 1 + chunk_to_recipe <- list() + split_to_recipe <- list() + total_models <- length(recipe$Analysis$Datasets$System) for (reci in 1:length(all_recipes)) { - if (reci < 10) { - recipe_number <- paste0("0", reci) + ## TODO: Document + recipe_model <- paste0("sys-", + gsub('\\.', '', all_recipes[[reci]]$Analysis$Datasets$System$name)) + # + recipe_split <- paste0("_ref-", all_recipes[[reci]]$Analysis$Datasets$Reference$name, + "_var-", all_recipes[[reci]]$Analysis$Variables$name, + "_reg-", all_recipes[[reci]]$Analysis$Region$name, + "_sdate-", all_recipes[[reci]]$Analysis$Time$sdate) + recipe_name <- paste0(recipe_model, recipe_split) + + + if (all_recipes[[reci]]$Analysis$Datasets$System$name == 'Multimodel') { + recipe_dir <- paste0(recipe$Run$output_dir, "/logs/recipes/multimodel/") + split_to_recipe[split] <- recipe_split + split <- split + 1 } else { - recipe_number <- reci + recipe_dir <- paste0(recipe$Run$output_dir, "/logs/recipes/") + chunk_to_recipe[chunk] <- recipe_name + chunk <- chunk + 1 } write_yaml(all_recipes[[reci]], - paste0(recipe$Run$output_dir, "/logs/recipes/atomic_recipe_", - recipe_number, ".yml")) + paste0(recipe_dir, "atomic_recipe_", recipe_name, ".yml")) } + + # Print information for user info(recipe$Run$logger, - paste("The main recipe has been divided into", length(all_recipes), - "atomic recipes.")) + paste("The main recipe has been divided into", length(chunk_to_recipe), + "single model atomic recipes, plus", length(split_to_recipe), + "multi-model atomic recipes.")) text <- paste0("Check output directory ", recipe$Run$output_dir, "/logs/recipes/ to see all the individual atomic recipes.") info(recipe$Run$logger, text) ## TODO: Change returns? - return(list(n_atomic_recipes = length(all_recipes), - outdir = recipe$Run$output_dir)) + return(list(n_atomic_recipes = length(chunk_to_recipe), # length(all_recipes) + outdir = recipe$Run$output_dir, + chunk_to_recipe = chunk_to_recipe, + split_to_recipe = split_to_recipe)) } + diff --git a/tools/write_autosubmit_conf.R b/tools/write_autosubmit_conf.R index 95ca93f0f09f10f8b8b67a5076f24918c3d6db7e..5f3981974f85d74ae880928f918f419a2ea65462 100644 --- a/tools/write_autosubmit_conf.R +++ b/tools/write_autosubmit_conf.R @@ -1,5 +1,19 @@ -# Function to write autosubmit configuration from an Auto-S2S recipe -write_autosubmit_conf <- function(recipe, nchunks) { +# Function to write autosubmit configuration from an SUNSET recipe. The function +# reads the corresponding AS configuration file templates and fills them with +# the information needed to run the experiment. The modified configuration +# files are saved in the `conf/` folder of the Autosubmit experimet. +# +# recipe: the SUNSET recipe +# nchunks: the number of 'chunks' to be processed by Autosubmit, as returned +# by divide_recipe(). +# chunk_to_recipe: list with the correspondence between the chunk number and +# the name of the atomic recipe, as returned by divide_recipe(). +# split_to_recipe: list with the correspondence between the split number and +# the name of the multi-model atomic recipe, as returned by divide_recipe(). + +write_autosubmit_conf <- function(recipe, nchunks, + chunk_to_recipe, + split_to_recipe) { # Experiment ID expid <- recipe$Run$auto_conf$expid # Directory with the experiment templates @@ -8,6 +22,12 @@ write_autosubmit_conf <- function(recipe, nchunks) { auto_specs <- read_yaml("conf/autosubmit.yml")[[recipe$Run$filesystem]] # Output directory dest_dir <- paste0(auto_specs$experiment_dir, expid, "/conf/") + proj_dir <- paste0(auto_specs$experiment_dir, expid, "/proj/auto-s2s/") + # Create project directory if it does not exist yet so that chunk_to_recipe + # and split_to_recipe files can be created + if (!dir.exists(proj_dir)) { + dir.create(proj_dir, recursive = TRUE) + } # Modify the configuration files according to the info in the recipe for (file in list.files(template_dir)) { conf_type <- strsplit(file, split = "[.]")[[1]][1] @@ -19,6 +39,7 @@ write_autosubmit_conf <- function(recipe, nchunks) { # Section 1: autosubmit.conf ## expid, email notifications and address conf$config$EXPID <- expid + conf$config$AUTOSUBMIT_VERSION <- auto_specs$auto_version if (recipe$Run$auto_conf$email_notifications) { conf$mail$NOTIFICATIONS <- "True" } else { @@ -35,6 +56,12 @@ write_autosubmit_conf <- function(recipe, nchunks) { } else if (conf_type == "jobs") { # Section 3: jobs ## wallclock, notify_on, platform?, processors, + # Create bash file to associate chunk number to recipe name + chunk_file <- paste0(proj_dir, "chunk_to_recipe") + .create_bash_file(fileout = chunk_file, + dictionary = chunk_to_recipe, + variable = "CHUNK") + # Define job parameters conf$JOBS$verification$WALLCLOCK <- recipe$Run$auto_conf$wallclock if (recipe$Run$auto_conf$notify_completed) { conf$JOBS$verification$NOTIFY_ON <- paste(conf$JOBS$verification$NOTIFY_ON, @@ -46,6 +73,43 @@ write_autosubmit_conf <- function(recipe, nchunks) { } conf$JOBS$verification$PROCESSORS <- recipe$Run$auto_conf$processors_per_job # ncores? conf$JOBS$verification$CUSTOM_DIRECTIVES <- recipe$Run$auto_conf$custom_directives + # Only include Multimodel job if sections exists in the recipe + # is set to execute = 'True' or 'both' + if (!is.null(recipe$Analysis$Datasets$Multimodel) && + tolower(recipe$Analysis$Datasets$Multimodel$execute) == "false") { + conf$JOBS$multimodel <- NULL + conf$JOBS$scorecards$DEPENDENCIES <- "verification" + } else { + conf$JOBS$scorecards$DEPENDENCIES <- "multimodel" + # Create bash file to associate split number to recipe name + split_file <- paste0(proj_dir, "split_to_recipe") + .create_bash_file(fileout = split_file, + dictionary = split_to_recipe, + variable = "SPLIT") + # Define multimodel dependencies in the format required by AS config + mm_dependencies <- lapply(split_to_recipe, grep, chunk_to_recipe) + mm_dependencies <- lapply(mm_dependencies, paste, collapse = ",") + names(mm_dependencies) <- paste(1:length(mm_dependencies)) + for (split in names(mm_dependencies)) { + conf$JOBS$multimodel$DEPENDENCIES$verification$SPLITS_FROM[[split]]$CHUNKS_TO <- + mm_dependencies[[split]] + } + # 'Splits' parameter should be the number of mulimodel jobs + conf$JOBS$multimodel$SPLITS <- length(mm_dependencies) + # Define the rest of the parameters + if (recipe$Run$auto_conf$notify_completed) { + conf$JOBS$multimodel$NOTIFY_ON <- paste(conf$JOBS$multimodel$NOTIFY_ON, + "COMPLETED") + } + if (recipe$Run$auto_conf$notify_failed) { + conf$JOBS$multimodel$NOTIFY_ON <- paste(conf$JOBS$multimodel$NOTIFY_ON, + "FAILED") + } + + conf$JOBS$multimodel$PROCESSORS <- recipe$Run$auto_conf$processors_multimodel + conf$JOBS$multimodel$CUSTOM_DIRECTIVES <- recipe$Run$auto_conf$custom_directives_multimodel + conf$JOBS$multimodel$WALLCLOCK <- recipe$Run$auto_conf$wallclock_multimodel + } # Only include Scorecards job if section exists in the recipe and # is set to 'execute: True' if (!("Scorecards" %in% names(recipe$Analysis$Workflow)) || @@ -74,7 +138,6 @@ write_autosubmit_conf <- function(recipe, nchunks) { conf$common$RECIPE <- paste0(recipe$name, ".yml") } # Write config file inside autosubmit dir - ## TODO: Change write.type depending on autosubmit version write.config(conf, paste0(dest_dir, dest_file), write.type = auto_specs$conf_format) Sys.chmod(paste0(dest_dir, dest_file), mode = "755", use_umask = F) @@ -107,3 +170,15 @@ write_autosubmit_conf <- function(recipe, nchunks) { print(paste("nohup autosubmit run", expid, "& disown")) } } + +.create_bash_file <- function(fileout, dictionary, variable) { + file_connection <- file(fileout) + script_lines <- paste0("case $", variable, " in") + for (item in 1:length(dictionary)) { + script_command <- paste0(" ", item, ") recipe='", dictionary[[item]], "' ;;") + script_lines <- c(script_lines, script_command) + } + script_lines <- c(script_lines, "esac") + writeLines(script_lines, file_connection) + close(file_connection) +}