diff --git a/.gitignore b/.gitignore index 8fec461d052296bfd1779cceec4e33b7b8893afa..d17d76340a88f111f22792959ec31a0e53d8a4bf 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ out-logs/ /modules/Calibration/test_victoria.R modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml +conf/vitigeoss-vars-dict.yml diff --git a/README.md b/README.md index e540516abb7c8fc62f42f8db72f9a766cc11cba3..4df05eccb0fd30e154bc3f723f399045dff37503 100644 --- a/README.md +++ b/README.md @@ -9,13 +9,23 @@ The main developers of the tool are Victòria Agudetse (@vagudets), An-Chi Ho (@ Resources --------- -Here you can access a presentation containing information relevant to the tool: -[ESS Verification Suite](https://docs.google.com/presentation/d/1R8Gcz5R_NTgcBQvXBkCPG3jY31BVPDur/edit#slide=id.p1?target=_blank) +You can access the documentation of the Verification Suite through the wiki: +[Auto-s2s Wiki](https://earth.bsc.es/gitlab/es/auto-s2s/-/wikis/home?target=_blank) + +You may also find useful information in the slides from past user meetings: + +[User meeting September 2022](https://docs.google.com/presentation/d/14-qq__fblMt7xvJDaqS5UqfQMXWCf3Ju/edit#slide=id.p1?target=_blank) + +[User meeting June 2022](https://docs.google.com/presentation/d/1R8Gcz5R_NTgcBQvXBkCPG3jY31BVPDur/edit#slide=id.p1?target=_blank) Branching strategy ------------------ Branches containing developments that are to be merged into the tool must contain "dev-" at the beginning of the name, followed by a short, meaningful description of the development in question. E.g. "dev-loading-subseasonal" for the branch containing developments related to the loading of subseasonal datasets. -Users may have their own branches for personal use. These branches should start with "user-\", which can optionally be followed by a brief description. E.g. "user-vagudets-FOCUS". +Users that wish to incorporate their own developments into the core of the tool are encouraged to create a personal fork of the Auto-S2S repository to work on their projects. Please contact Victòria Agudetse at victoria.agudetse@bsc.es to discuss the first steps. + +Mailing list +------------ +User meetings, internal releases and news are announced through the mailing list. You can send an email to victoria.agudetse@bsc.es or an.ho@bsc.es to request subscription. diff --git a/conf/archive.yml b/conf/archive.yml index 63abfb0ad9a4d203f76f5ff992d4f953baaee5b4..c50909c3ed556d255c81c23370a0322e90694f14 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -3,7 +3,9 @@ archive: src: "/esarchive/" System: - system5c3s: + system5c3s: + name: "ECMWF SEAS5" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ecmwf/system5c3s/" daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", @@ -17,6 +19,8 @@ archive: calendar: "proleptic_gregorian" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" system7c3s: + name: "Meteo-France System 7" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/meteofrance/system7c3s/" monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", "prlr":"_f24h/", "sfcWind": "_f6h/", @@ -27,6 +31,8 @@ archive: calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system7c3s.txt" system21_m1: + name: "DWD GCFS 2.1" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/dwd/system21_m1/" monthly_mean: {"tas":"_f6h/", "prlr":"_f24h", "g500":"_f12h/", "sfcWind":"_f6h/", @@ -37,6 +43,8 @@ archive: calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system21_m1.txt" system35c3s: + name: "CMCC-SPS3.5" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/cmcc/system35c3s/" monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", "prlr":"_f24h/", "sfcWind": "_f6h/", @@ -47,6 +55,8 @@ archive: calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system35c3s.txt" system2c3s: + name: "JMA System 2" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/jma/system2c3s/" monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", "tasmax":"_f6h/", "tasmin":"_f6h/"} @@ -56,6 +66,8 @@ archive: calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system2c3s.txt" eccc1: + name: "ECCC CanCM4i" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/eccc/eccc1/" monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", "tasmax":"_f6h/", "tasmin":"_f6h/"} @@ -65,6 +77,8 @@ archive: calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_eccc1.txt" glosea6_system600-c3s: + name: "UKMO GloSea 6 6.0" + institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ukmo/glosea6_system600-c3s/" monthly_mean: {"tas":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/", "prlr":"_f24h/"} @@ -73,8 +87,9 @@ archive: hcst: 28 calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_ukmo600.txt" - ncep-cfsv2: + name: "NCEP CFSv2" + institution: "NOAA NCEP" #? src: "exp/ncep/cfs-v2/" monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", "tasmax":"_f6h/", "tasmin":"_f6h/"} @@ -84,7 +99,9 @@ archive: calendar: "gregorian" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" Reference: - era5: + era5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", @@ -103,6 +120,8 @@ archive: calendar: "standard" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" era5land: + name: "ERA5-Land" + institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5land/" daily_mean: {"tas":"_f1h/", "rsds":"_f1h/", "prlr":"_f1h/", "sfcWind":"_f1h/"} @@ -111,7 +130,9 @@ archive: "sfcWind":"_f1h/", "rsds":"_f1h/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" - uerra: + uerra: + name: "ECMWF UERRA" + institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/uerra_mescan/" daily_mean: {"tas":"_f6h/"} monthly_mean: {"tas":"_f6h/"} diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 61e3f2b8eb5d781be670d619e2113c9e5619d229..2b74bff89ab2c27db06ba8e46a55b125fee21151 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -3,6 +3,8 @@ archive: System: # ---- EC-Earth3-i1: + name: "EC-Earth3-i1" + institution: "EC-Earth-Consortium" src: hcst: "exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: @@ -23,6 +25,8 @@ archive: #NOTE: EC-Earth3-i2 the first file of each sdate has 2 time step only (Nov-Dec). # The rest files are Jan to Dec. EC-Earth3-i2: + name: "EC-Earth3-i2" + institution: "EC-Earth-Consortium" src: hcst: "exp/CMIP6/dcppA-hindcast/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: @@ -41,6 +45,8 @@ archive: # ---- EC-Earth3-i4: + name: "EC-Earth3-i4" + institution: "EC-Earth-Consortium" src: hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" @@ -77,6 +83,8 @@ archive: # ---- HadGEM3-GC31-MM: + name: "HadGEM3-GC31-MM" + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppB-forecast/" @@ -92,10 +100,12 @@ archive: calendar: "360-day" member: r1i1p1f2,r2i1p1f2,r3i1p1f2,r4i1p1f2,r5i1p1f2,r6i1p1f2,r7i1p1f2,r8i1p1f2,r9i1p1f2,r10i1p1f2 initial_month: 11 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/tas/gr/v20200316/tas_Amon_HadGEM3_dcppA-hindcast_s2018-r1i1p1f2_gr_201811-202903.nc" #'r432x324' + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/tas/gn/v20200417/tas_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s1960-r1i1p1f2_gn_196011-196012.nc" #'r432x324' # ---- BCC-CSM2-MR: + name: "BCC-CSM2-MR" + institution: "Beijing Climate Center, Beijing 100081, China" src: hcst: "exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/" fcst: @@ -115,8 +125,10 @@ archive: # ---- CanESM5: + name: "CanESM5" + institution: src: - hcst: "exp/canesm5/cmip6-dcppA-hindcast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/" + hcst: "exp/canesm5/cmip6-dcppA-hindcast/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/" fcst: "exp/canesm5/cmip6-dcppB-forecast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppB-forecast/" first_dcppB_syear: 2020 monthly_mean: @@ -130,10 +142,13 @@ archive: calendar: "365_day" member: r1i1p2f1,r2i1p2f1,r3i1p2f1,r4i1p2f1,r5i1p2f1,r6i1p2f1,r7i1p2f1,r8i1p2f1, r9i1p2f1, r10i1p2f1, r11i1p2f1,r12i1p2f1,r13i1p2f1,r14i1p2f1,r15i1p2f1,r16i1p2f1,r17i1p2f1,r18i1p2f1, r19i1p2f1, r20i1p2f1,r21i1p2f1,r22i1p2f1,r23i1p2f1,r24i1p2f1,r25i1p2f1,r26i1p2f1,r27i1p2f1,r28i1p2f1, r29i1p2f1, r30i1p2f1, r31i1p2f1,r32i1p2f1,r33i1p2f1,r34i1p2f1,r35i1p2f1,r36i1p2f1,r37i1p2f1,r38i1p2f1, r39i1p2f1, r40i1p2f1 initial_month: 1 #next year Jan - reference_grid: "/esarchive/exp/canesm5/cmip6-dcppA-hindcast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_dcppA-hindcast_s2008-r1i1p2f1_gn_200901-201812.nc" + reference_grid: "/esarchive/exp/canesm5/cmip6-dcppA-hindcast/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_dcppA-hindcast_s2008-r1i1p2f1_gn_200901-201812.nc" # ---- +#NOTE: no data there CESM1-1-CAM5-CMIP5: + name: "CESM1-1-CAM5-CMIP5" + institution: src: hcst: "exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast" fcst: @@ -151,7 +166,10 @@ archive: reference_grid: "/esarchive/exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- +#NOTE: in tapes CMCC-CM2-SR5: + name: "CMCC-CM2-SR5" + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppB-forecast/" @@ -170,6 +188,8 @@ archive: # ---- FGOALS-f3-L: + name: "FGOALS-f3-L" + institution: "Chinese Academy of Sciences, Beijing 100029, China" src: hcst: "exp/CMIP6/dcppA-hindcast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppB-forecast/" @@ -189,6 +209,8 @@ archive: # ---- IPSL-CM6A-LR: + name: "IPSL-CM6A-LR" + institution: "IPSL" src: hcst: "exp/CMIP6/dcppA-hindcast/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/dcppA-hindcast/" fcst: @@ -206,6 +228,8 @@ archive: # ---- MIROC6: + name: + institution: src: hcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" fcst: @@ -223,6 +247,8 @@ archive: # ---- MPI-ESM1.2-HR: + name: "MPI-ESM1.2-HR" + institution: "MIROC" src: hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/" fcst: @@ -240,6 +266,8 @@ archive: # ---- MPI-ESM1.2-LR: + name: "MPI-ESM1.2-LR" + institution: "Max-Planck-Institute for Meteorology" src: hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/dcppA-hindcast/" fcst: @@ -257,6 +285,8 @@ archive: # ---- MRI-ESM2-0: + name: "MRI-ESM2-0" + institution: "Meteorological Research Institute, Japan" src: hcst: "exp/CMIP6/dcppA-hindcast/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/dcppA-hindcast/" fcst: @@ -275,6 +305,8 @@ archive: # ---- #NOTE: NorCPM1-i1 and i2 are under the same directory NorCPM1-i1: + name: "NorCPM1-i1" + institution: "NCC" src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: @@ -284,14 +316,16 @@ archive: version: {"tas":"v20200320", "pr":"v20200320", "psl":"v20200320"} daily_mean: grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} - version: {"pr":"v20191005", "tas":"v20200320", "tasmax":"v20191005", "tasmin":"v20191005"} + version: {"pr":"v20191005", "tas":"v20191029", "tasmax":"v20191005", "tasmin":"v20191005"} calendar: "noleap" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 10 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200320/tas_Amon_NorCPM1_dcppA-hindcast_s2008-r1i1p1f1_gn_200810-201812.nc" + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20191029/tas_Amon_NorCPM1_dcppA-hindcast_s2008-r1i1p1f1_gn_200810-201812.nc" # ---- NorCPM1-i2: + name: "NorCPM1-i2" + institution: "NCC" src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: @@ -312,6 +346,8 @@ archive: Reference: GHCNv4: + name: + institution: src: "obs/noaa/ghcn_v4/" monthly_mean: {"tas":"", "tasanomaly":""} daily_mean: @@ -319,6 +355,8 @@ archive: reference_grid: "/esarchive/obs/noaa/ghcn_v4/monthly_mean/tasanomaly/tasanomaly_201811.nc" # ---- ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" monthly_mean: {"tas":"_f1h-r1440x721cds", "prlr":"_f1h-r1440x721cds", "psl":"_f1h-r1440x721cds", "tos":"_f1h-r1440x721cds"} daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", @@ -333,6 +371,8 @@ archive: # ---- JRA-55: + name: "JRA-55" + institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/jma/jra55/" monthly_mean: {"tas":"_f6h", "psl":"_f6h", "tos":"", "pr":"_s0-3h", "prlr":"_s0-3h"} daily_mean: {"tas":"_f6h", "psl":"_f6h", "prlr":"_s0-3h", "sfcWind":"_f6h"} @@ -341,14 +381,18 @@ archive: # ---- GISTEMPv4: + name: "GISTEMPv4" + institution: "NASA Goddard Institute for Space Studies" src: "obs/noaa-nasa/ghcnersstgiss/" monthly_mean: {"tasanomaly":""} daily_mean: calendar: "standard" - reference_grid: "/esarchive/obs/noaa-nasa/ghcnersstgiss/monthly_mean/tasanomaly_200811.nc" + reference_grid: "/esarchive/obs/noaa-nasa/ghcnersstgiss/monthly_mean/tasanomaly/tasanomaly_200811.nc" # ---- HadCRUT4: + name: "HadCRUT4" + institution: "Met Office Hadley Centre / Climatic Research Unit, University of East Anglia" src: "obs/ukmo/hadcrut_v4.6/" monthly_mean: {"tasanomaly":""} daily_mean: @@ -357,9 +401,11 @@ archive: # ---- HadSLP2: + name: "HadSLP2" + institution: src: "obs/ukmo/hadslp_v2/" monthly_mean: {"psl":""} daily_mean: calendar: "proleptic_gregorian" - reference_grid: "/esarchive/obs/ukmo/hadslp_v2/monthly_mean/psl_200811.nc" + reference_grid: "/esarchive/obs/ukmo/hadslp_v2/monthly_mean/psl/psl_200811.nc" diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index f535c4e1295b4527c7634ada2118138f1bacedb1..5125215418cc5294fed85349b09b2392ee16ffae 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -1,41 +1,166 @@ vars: +## NOTE: The units field in this file corresponds to CMOR standards. +## Some variables in esarchive may have different units than stated here. +## Use with caution. # ECVs tas: units: "K" long_name: "Near-Surface Air Temperature" standard_name: "air_temperature" + accum: no + tos: + units: "degC" + long_name: "Sea Surface Temperature" + standard_name: "sea_surface_temperature" + accum: no # outname: "t2" tasmax: units: "K" long_name: "Maximum Near-Surface Air Temperature" standard_name: "air_temperature" + accum: no tasmin: units: "K" long_name: "Minimum Near-Surface Air Temperature" standard_name: "air_temperature" + accum: no + ts: + units: "K" + long_name: "Surface Temperature" + standard_name: "surface_temperature" + accum: no sfcWind: units: "m s-1" long_name: "Near-Surface Wind Speed" standard_name: "wind_speed" + accum: no + sfcWindmax: + units: "m s-1" + long_name: "Daily Maximum Near-Surface Wind Speed" + standard_name: "wind_speed" + accum: no # outname: "wind" rsds: units: "W m-2" long_name: "Surface Downwelling Shortwave Radiation" standard_name: "surface_downwelling_shortwave_flux_in_air" positive: "down" + accum: yes # outname: "rswin" prlr: - units: "mm" + units: "mm/day" long_name: "Total precipitation" standard_name: "total_precipitation_flux" #? Not in CF + accum: yes # outname: "acprec" g500: units: "m2 s-2" long_name: "Geopotential" standard_name: "geopotential" - + accum: no + pr: + units: "kg m-2 s-1" + long_name: "Precipitation" + standard_name: "precipitation_flux" + accum: yes + prc: + units: "kg m-2 s-1" + long_name: "Convective Precipitation" + standard_name: "convective_precipitation_flux" + accum: yes + psl: + units: "Pa" + long_name: "Sea Level Pressure" + standard_name: "air_pressure_at_mean_sea_level" + accum: no + clt: + units: "%" + long_name: "Total Cloud Cover Percentage" + standard_name: "cloud_area_fraction" + accum: no + hurs: + units: "%" + long_name: "Near-Surface Relative Humidity" + standard_name: "relative_humidity" + accum: no + hursmin: + units: "%" + long_name: "Daily Minimum Near-Surface Relative Humidity" + standard_name: "relative_humidity" + accum: no + hursmax: + units: "%" + long_name: "Daily Maximum Near-Surface Relative Humidity" + standard_name: "relative_humidity" + accum: no + hfls: + units: "W m-2" + long_name: "Surface Upward Latent Heat Flux" + standard_name: "surface_upward_latent_heat_flux" + accum: no + huss: + units: "1" + long_name: "Near-Surface Specific Humidity" + standard_name: "specific_humidity" + accum: no + rsut: + units: "W m-2" + long_name: "TOA Outgoing Shortwave Radiation" + standard_name: "toa_outgoing_shortwave_flux" + accum: no + rlut: + units: "W m-2" + long_name: "TOA Outgoing Longwave Radiation" + standard_name: "toa_outgoing_longwave_flux" + accum: no + rsdt: + units: "W m-2" + long_name: "TOA Incident Shortwave Radiation" + standard_name: "toa_incoming_shortwave_flux" + accum: no + ta: + units: "K" + long_name: "Air Temperature" + standard_name: "air_temperature" + accum: no + ua: + units: "m s-1" + long_name: "Eastward Wind" + standard_name: "eastward_wind" + accum: no + uas: + units: "m s-1" + long_name: "Eastward Near-Surface Wind" + standard_name: "eastward_wind" + accum: no + va: + units: "m s-1" + long_name: "Northward Wind" + standard_name: "northward_wind" + accum: no + vas: + units: "m s-1" + long_name: "Northward Near-Surface Wind" + standard_name: "northward wind" + accum: no + zg: + units: "m" + long_name: "Geopotential Height" + standard_name: "geopotential_height" + accum: no + evspsbl: + units: "kg m-2 s-1" + long_name: "Evaporation Including Sublimation and Transpiration" + standard_name: "water_evapotranspiration_flux" + accum: no + hfss: + units: "W m-2" + long_name: "Surface Upward Sensible Heat Flux" + standard_name: "surface_upward_sensible_heat_flux" + accum: no + # Coordinates coords: longitude: @@ -48,6 +173,7 @@ coords: standard_name: "latitude" long_name: "Latitude" axis: "Y" +## TODO: Add plevels # Skill metrics metrics: diff --git a/conf/vars-dict.yml b/conf/vars-dict.yml deleted file mode 100644 index 04549d36001c848521f53fd704b752878b2eb862..0000000000000000000000000000000000000000 --- a/conf/vars-dict.yml +++ /dev/null @@ -1,114 +0,0 @@ - -vars: -# ECVs - tas: - units: "°C" - longname: "Daily mean temperature at surface" - outname: ~ - tasmin: - units: "°C" - longname: "Minimum daily temperature at surface" - outname: ~ - tasmax: - units: "°C" - longname: "Maximum daily temperature at surface" - outname: ~ - sfcwind: - units: "m/s" - longname: "Surface wind speed module" - outname: ~ - rsds: - units: "W/m2" - longname: "Surface solar radiation downwards" - outname: ~ - psl: - units: "hPa" - longname: "Mean sea level pressure" - outname: ~ - prlr: - units: "mm" - longname: "Total precipitation" - outname: ~ -# CFs - cfwnd1: - units: "%" - longname: "Wind Capacity factor IEC1" - outname: ~ - cfwnd2: - units: "%" - longname: "Wind Capacity factor IEC2" - outname: ~ - cfwnd3: - units: "%" - longname: "Wind Capacity factor IEC3" - outname: ~ - cfslr: - units: "%" - longname: "Solar Capacity factor" - outname: ~ -# Energy - edmnd: - units: "GW" - longname: "Electricity Demmand" - outname: ~ - wndpwo: - units: "GW" - longname: "Wind Power" - outname: ~ - dmndnetwnd: - units: "GW" - longname: "Demmand-net-Wind" - outname: ~ -# Indices - Spr32: - units: "days" - longname: > - Total count of days when daily maximum temp exceeded 32°C - from April 21st to June 21st - outname: ~ - SU35: - units: "days" - longname: > - Total count of days when daily maximum temp exceeded 35°C - from June 21st to September 21st - outname: ~ - SU36: - units: "days" - longname: > - Total count of days when daily maximum temp exceeded 36°C - from June 21st to September 21st - outname: ~ - SU40: - units: "days" - longname: > - Total count of days when daily maximum temp exceeded 40°C - from June 21st to September 21st - outname: ~ - GDD: - units: "days" - longname: > - The sum of the daily differences between daily mean - temperature and 10°C from April 1st to October 31st - outname: ~ - GST: - units: "°C" - longname: "The average temperature from April 1st to October 31st" - outname: ~ - SprTX: - units: "°C" - longname: "The average daily maximum temperature from April 1st to October 31st" - outname: ~ - WSDI: - units: "" - longname: > - The total count of days with at least 6 consecutives days - when the daily temperature maximum exceeds its 90th percentile - outname: ~ - SprR: - units: "mm" - longname: 'Total precipitation from April 21st to June 21st' - outname: ~ - HarR: - units: "mm" - longname: 'Total precipitation from August 21st to September 21st' - outname: ~ diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 85b1b0070612a4e96f9cf75652c2f56954491a00..59e5451a437d2ed414c6c7fd9060f300ed9bd029 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -1,9 +1,5 @@ -## TODO: Remove once Alba's fun is merged in CSTools -source("tools/tmp/CST_Calibration.R") - -## Entry params data and recipe? -calibrate_datasets <- function(data, recipe) { +calibrate_datasets <- function(recipe, data) { # Function that calibrates the hindcast using the method stated in the # recipe. If the forecast is not null, it calibrates it as well. # @@ -13,9 +9,9 @@ calibrate_datasets <- function(data, recipe) { method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (method == "raw") { - warning("The Calibration module has been called, but the calibration ", - "method in the recipe is 'raw'. The hcst and fcst will not be ", - "calibrated.") + warn(recipe$Run$logger, "The Calibration module has been called, + but the calibration method in the recipe is 'raw'. + The hcst and fcst will not be calibrated.") fcst_calibrated <- data$fcst hcst_calibrated <- data$hcst CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####" @@ -53,8 +49,9 @@ calibrate_datasets <- function(data, recipe) { ## TODO: implement other calibration methods ## TODO: Restructure the code? if (!(method %in% CST_CALIB_METHODS)) { - stop("Calibration method in the recipe is not available for monthly", - " data.") + error(recipe$Run$logger, "Calibration method in the recipe is not + available for monthly data.") + stop() } else { ## Alba's version of CST_Calibration (pending merge) is being used # Calibrate the hindcast @@ -89,39 +86,43 @@ calibrate_datasets <- function(data, recipe) { } else if (recipe$Analysis$Variables$freq == "daily_mean") { # Daily data calibration using Quantile Mapping if (!(method %in% c("qmap"))) { - stop("Calibration method in the recipe is not available at daily ", - "frequency. Only quantile mapping 'qmap' is implemented.") + error(recipe$Run$logger, "Calibration method in the recipe is not + available for daily data. Only quantile mapping 'qmap is + implemented.") + stop() } # Calibrate the hindcast + dim_order <- names(dim(data$hcst$data)) hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, exp_cor = NULL, - sample_dims = c("syear", - "time", - "ensemble"), - sample_length = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", method = "QUANT", - wet.day = FALSE, ncores = ncores, - na.rm = na.rm) + na.rm = na.rm, + wet.day = F) + # Restore dimension order + hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) if (!is.null(data$fcst)) { # Calibrate the forecast fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, exp_cor = data$fcst, - sample_dims = c("syear", - "time", - "ensemble"), - sample_length = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", method = "QUANT", - wet.day = FALSE, ncores = ncores, - na.rm = na.rm) + na.rm = na.rm, + wet.day = F) + # Restore dimension order + fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) } else { fcst_calibrated <- NULL } } } -print(CALIB_MSG) - ## TODO: Return observations too? + info(recipe$Run$logger, CALIB_MSG) return(list(hcst = hcst_calibrated, fcst = fcst_calibrated)) } diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index ee4381a03e05dc0f92d6e8cc31b3024cc541b7b9..8d54d63def1acb94639e0bf7732a040a7f98b206 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -5,24 +5,8 @@ source("modules/Loading/dates2load.R") source("modules/Loading/check_latlon.R") source("tools/libs.R") -# RECIPE FOR TESTING -# -------------------------------------------------------------------------------- -# recipe_file <- "modules/Loading/testing_recipes/recipe_3.yml" -# recipe_file <- "modules/Loading/testing_recipes/recipe_2.yml" -# recipe_file <- "modules/Loading/testing_recipes/recipe_1.yml" -load_datasets <- function(recipe_file) { - - recipe <- read_yaml(recipe_file) - recipe$filepath <- recipe_file - recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) - - ## TODO: this should come from the main script - # Create output folder and log: - logger <- prepare_outputs(recipe = recipe) - folder <- logger$foldername - log_file <- logger$logname - logger <- logger$logger +load_datasets <- function(recipe) { # ------------------------------------------- # Set params ----------------------------------------- @@ -40,7 +24,8 @@ load_datasets <- function(recipe_file) { store.freq <- recipe$Analysis$Variables$freq # get sdates array - sdates <- dates2load(recipe, logger) + ## LOGGER: Change dates2load to extract logger from recipe? + sdates <- dates2load(recipe, recipe$Run$logger) idxs <- NULL idxs$hcst <- get_timeidx(sdates$hcst, @@ -84,16 +69,19 @@ load_datasets <- function(recipe_file) { # ----------- obs.path <- paste0(archive$src, - obs.dir, store.freq, "/$var$", - reference_descrip[[store.freq]][[variable]],"$var$_$file_date$.nc") + obs.dir, store.freq, "/$var$", + reference_descrip[[store.freq]][[variable]], + "$var$_$file_date$.nc") hcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") + hcst.dir, store.freq, "/$var$", + exp_descrip[[store.freq]][[variable]], + "$var$_$file_date$.nc") fcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") + hcst.dir, store.freq, "/$var$", + exp_descrip[[store.freq]][[variable]], + "$var$_$file_date$.nc") # Define regrid parameters: #------------------------------------------------------------------- @@ -258,28 +246,28 @@ load_datasets <- function(recipe_file) { dim(dates) <- dim(dates_file) obs <- Start(dat = obs.path, - var = variable, - file_date = sort(unique(dates_file)), - time = dates, - time_var = 'time', - time_across = 'file_date', - merge_across_dims = TRUE, - merge_across_dims_narm = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) } # Adds ensemble dim to obs (for consistency with hcst/fcst) @@ -295,23 +283,77 @@ load_datasets <- function(recipe_file) { # Check for consistency between hcst and obs grid if (!(recipe$Analysis$Regrid$type == 'none')) { if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { - stop("hcst and obs don't share the same latitude.") + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") } if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) { - stop("hcst and obs don't share the same longitude.") + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } } - + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + if (dictionary$vars[[variable]]$accum) { + info(recipe$Run$logger, + "Accumulated variable: setting negative values to zero.") + obs$data[obs$data < 0] <- 0 + hcst$data[hcst$data < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[fcst$data < 0] <- 0 + } + } + + # Convert prlr from m/s to mm/day + ## TODO: Make a unit conversion function? + if (variable == "prlr") { + # Verify that the units are m/s and the same in obs and hcst + if ((attr(obs$Variable, "variable")$units != + attr(hcst$Variable, "variable")$units) && + (attr(obs$Variable, "variable")$units == "m s-1")) { + + info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") + obs$data <- obs$data*84000*1000 + attr(obs$Variable, "variable")$units <- "mm/day" + hcst$data <- hcst$data*84000*1000 + attr(hcst$Variable, "variable")$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data <- fcst$data*84000*1000 + attr(fcst$Variable, "variable")$units <- "mm/day" + } + } + } # Print a summary of the loaded data for the user, for each object - data_summary(hcst, store.freq) - data_summary(obs, store.freq) - if (!is.null(fcst)) { - data_summary(fcst, store.freq) + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } } - print("##### DATA LOADING COMPLETED SUCCESSFULLY #####") + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") ############################################################################ # @@ -328,7 +370,7 @@ load_datasets <- function(recipe_file) { # freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") # #if (any(is.na(hcst))){ - # fatal(logger, + # fatal(recipe$Run$logger, # paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", # " ################################################# ", # " ###### MISSING FILES #### ", @@ -342,7 +384,7 @@ load_datasets <- function(recipe_file) { #} # #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ - # fatal(logger, + # fatal(recipe$logger, # paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", # " ################################################# ", # " ###### MISSING FILES #### ", @@ -355,22 +397,12 @@ load_datasets <- function(recipe_file) { # quit(status=1) #} # - #info(logger, + #info(recipe$logger, # "######### DATA LOADING COMPLETED SUCCESFULLY ##############") ############################################################################ ############################################################################ - ## TODO: we need to define accumulated vars - #filters negative values in accum vars - #if (accum){ - # obs$data[obs$data < 0 ] <- 0 - # hcst$data[hcst$data < 0 ] <- 0 - # if (!is.null(fcst)){ - # fcst$data[fcst$data < 0 ] <- 0 - # } - #} - return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 7f99ac68b130a9fa2cd1073a645feac4872531ab..8046344b85bd15d75d684accf78efc2a95abb5ba 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -14,21 +14,21 @@ source("tools/libs.R") ## TODO: Remove once the fun is included in CSTools source("tools/tmp/as.s2dv_cube.R") - #==================================================================== # recipe_file <- "modules/Loading/testing_recipes/recipe_decadal.yml" # recipe_file <- "modules/Loading/testing_recipes/recipe_decadal_daily.yml" -load_datasets <- function(recipe_file) { +load_datasets <- function(recipe) { - recipe <- read_yaml(recipe_file) - recipe$filename <- recipe_file archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive # Print Start() info or not DEBUG <- FALSE + ## TODO: this should come from the main script + # Create output folder and log: + #------------------------- # Read from recipe: #------------------------- @@ -103,7 +103,6 @@ load_datasets <- function(recipe_file) { Start_default_arg_list <- list( dat = path_list, #file.path(hcst.path, hcst.files), var = variable, - ensemble = member, syear = paste0(sdates_hcst), chunk = 'all', chunk_depends = 'syear', @@ -115,6 +114,7 @@ load_datasets <- function(recipe_file) { latitude_reorder = Sort(decreasing = TRUE), longitude = values(list(lons.min, lons.max)), longitude_reorder = circularsort, + ensemble = member, transform = regrid_params$fcst.transform, transform_extra_cells = 2, transform_params = list(grid = regrid_params$fcst.gridtype, @@ -144,7 +144,7 @@ load_datasets <- function(recipe_file) { # Reshape and reorder dimensions ## dat should be 1, syear should be length of dat; reorder dimensions dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6]) - hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'ensemble', 'syear', 'time', 'latitude', 'longitude')) + hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) # Manipulate time attr because Start() cannot read it correctly wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only @@ -161,11 +161,14 @@ load_datasets <- function(recipe_file) { tmp_time_attr <- attr(hcst, 'Variables')$common$time # change syear to c(sday, sweek, syear) - dim(hcst) <- c(dim(hcst)[1:3], sday = 1, sweek = 1, dim(hcst)[4:7]) - if (!identical(dim(tmp_time_attr), dim(hcst)[6:7])) { - stop("hcst has problem in matching data and time attr dimension.") + # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] + dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) + if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) { + error(recipe$Run$logger, + "hcst has problem in matching data and time attr dimension.") + stop() } - dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(hcst)[6:7]) + dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 if (multi_path) { @@ -213,8 +216,9 @@ load_datasets <- function(recipe_file) { # Reshape and reorder dimensions ## dat should be 1, syear should be length of dat; reorder dimensions + ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble] dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) - fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'ensemble', 'syear', 'time', 'latitude', 'longitude')) + fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) # Manipulate time attr because Start() cannot read it correctly wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only @@ -231,11 +235,14 @@ load_datasets <- function(recipe_file) { tmp_time_attr <- attr(fcst, 'Variables')$common$time # change syear to c(sday, sweek, syear) - dim(fcst) <- c(dim(fcst)[1:3], sday = 1, sweek = 1, dim(fcst)[4:7]) - if (!identical(dim(tmp_time_attr), dim(fcst)[6:7])) { - stop("fcst has problem in matching data and time attr dimension.") + # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] + dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) + if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { + error(recipe$Run$logger, + "fcst has problem in matching data and time attr dimension.") + stop() } - dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(fcst)[6:7]) + dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 if (multi_path) { @@ -247,8 +254,11 @@ load_datasets <- function(recipe_file) { fcst <- as.s2dv_cube(fcst) ) - if (!identical(dim(hcst$data)[-6], dim(fcst$data)[-6])) { - stop("hcst and fcst do not share the same dimension structure.") + # Only syear could be different + if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { + error(recipe$Run$logger, + "hcst and fcst do not share the same dimension structure.") + stop() } } else { @@ -334,20 +344,14 @@ load_datasets <- function(recipe_file) { # sday sweek syear time # 1 1 2 14 - -# # TODO: Reorder obs dims to match hcst dims? -# # Adds ensemble dim to obs (for consistency with hcst/fcst) -# default_dims <- c(dat = 1, var = 1, sweek = 1, -# sday = 1, syear = 1, time = 1, -# latitude = 1, longitude = 1, ensemble = 1) -# default_dims[names(dim(obs))] <- dim(obs) -# dim(obs) <- default_dims - - if (!identical(dim(obs), dim(hcst$data)[-3])) { - stop("obs and hcst dimensions do not match.") + # Only ensemble dim could be different + if (!identical(dim(obs), dim(hcst$data)[-9])) { + error(recipe$Run$logger, + "obs and hcst dimensions do not match.") + stop() } # Add ensemble dim to obs - dim(obs) <- c(dim(obs)[1:2], ensemble = 1, dim(obs)[3:8]) + dim(obs) <- c(dim(obs), ensemble = 1) # Change class from startR_array to s2dv_cube suppressWarnings( @@ -356,62 +360,164 @@ load_datasets <- function(recipe_file) { #------------------------------------------- -# Step 4. Print the summary of data -#------------------------------------------- - - # Print a summary of the loaded data for the user, for each object - data_summary(hcst, store.freq) - data_summary(obs, store.freq) - if (!is.null(fcst)) { - data_summary(fcst, store.freq) - } - - -#------------------------------------------- -# Step 5. Verify the consistance btwn hcst and obs +# Step 4. Verify the consistance between data #------------------------------------------- # dimension if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) { - stop("hcst and obs don't share the same dimension names.") + error(recipe$Run$logger, + "hcst and obs don't share the same dimension names.") + stop() } else { ens_ind <- which(names(dim(obs$data)) == 'ensemble') match_ind <- match(names(dim(obs$data))[-ens_ind], names(dim(hcst$data))) - if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) stop("hcst and obs don't share the same dimension length.") + if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) { + error(recipe$Run$logger, + "hcst and obs don't share the same dimension length.") + stop() + } } # time attribute if (!identical(format(hcst$Dates$start, '%Y%m'), - format(obs$Dates$start, '%Y%m'))) - stop("hcst and obs don't share the same time.") + format(obs$Dates$start, '%Y%m'))) { + error(recipe$Run$logger, + "hcst and obs don't share the same time.") + stop() + } # lat and lon attributes - if (!identical(as.vector(hcst$lat), - as.vector(obs$lat))) - stop("hcst and obs don't share the same latitude.") - if (!identical(as.vector(hcst$lon), - as.vector(obs$lon))) - stop("hcst and obs don't share the same longitude.") + if (!(recipe$Analysis$Regrid$type == 'none')) { + if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + + if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + } # Check fcst if (!is.null(fcst)) { # dimension if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) { - stop("hcst and fcst don't share the same dimension names.") + error(recipe$Run$logger, + "hcst and fcst don't share the same dimension names.") + stop() } else { ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear')) match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data))) - if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) - stop("hcst and fcst don't share the same dimension length.") + if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) { + error(recipe$Run$logger, + "hcst and fcst don't share the same dimension length.") + stop() + } } # lat and lon attributes - if (!identical(as.vector(hcst$lat), - as.vector(fcst$lat))) - stop("hcst and fcst don't share the same latitude.") - if (!identical(as.vector(hcst$lon), - as.vector(fcst$lon))) - stop("hcst and fcst don't share the same longitude.") + if (!(recipe$Analysis$Regrid$type == 'none')) { + if (!identical(as.vector(hcst$lat), as.vector(fcst$lat))) { + lat_error_msg <- paste("Latitude mismatch between hcst and fcst.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + fcst_lat_msg <- paste0("First fcst lat: ", fcst$lat[1], + "; Last fcst lat: ", fcst$lat[length(fcst$lat)]) + info(recipe$Run$logger, fcst_lat_msg) + stop("hcst and fcst don't share the same latitudes.") + } + + if (!identical(as.vector(hcst$lon), as.vector(fcst$lon))) { + lon_error_msg <- paste("Longitude mismatch between hcst and fcst.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + fcst_lon_msg <- paste0("First fcst lon: ", fcst$lon[1], + "; Last fcst lon: ", fcst$lon[length(fcst$lon)]) + info(recipe$Run$logger, fcst_lon_msg) + stop("hcst and fcst don't share the same longitudes.") + } + } + + } + + +#------------------------------------------- +# Step 5. Tune data +#------------------------------------------- + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + if (dictionary$vars[[variable]]$accum) { + info(recipe$Run$logger, + " Accumulated variable: setting negative values to zero.") + obs$data[obs$data < 0] <- 0 + hcst$data[hcst$data < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[fcst$data < 0] <- 0 + } + } + # Convert precipitation to mm/day + ## TODO: Make a function? + if (variable == "prlr") { + # Verify that the units are m/s and the same in obs and hcst + if ((attr(obs$Variable, "variable")$units != + attr(hcst$Variable, "variable")$units) && + (attr(obs$Variable, "variable")$units == "m s-1")) { + + info(recipe$Run$logger, + "Converting precipitation from m/s to mm/day.") + obs$data <- obs$data*84000*1000 + attr(obs$Variable, "variable")$units <- "mm/day" + hcst$data <- hcst$data*84000*1000 + attr(hcst$Variable, "variable")$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data <- fcst$data*84000*1000 + attr(fcst$Variable, "variable")$units <- "mm/day" + } + } } +#------------------------------------------- +# Step 6. Print summary +#------------------------------------------- + + # Print a summary of the loaded data for the user, for each object + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } + } + + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Loading/testing_recipes/recipe_5.yml b/modules/Loading/testing_recipes/recipe_system2c3s-prlr-nofcst.yml similarity index 100% rename from modules/Loading/testing_recipes/recipe_5.yml rename to modules/Loading/testing_recipes/recipe_system2c3s-prlr-nofcst.yml diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml b/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml new file mode 100644 index 0000000000000000000000000000000000000000..ca52e8cc161ece1eb0a23e1be391df1b2318bf5f --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml @@ -0,0 +1,46 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: rsds + freq: monthly_mean + Datasets: + System: + name: system5c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Skill: + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 1 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_2.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml similarity index 100% rename from modules/Loading/testing_recipes/recipe_2.yml rename to modules/Loading/testing_recipes/recipe_system5c3s-tas.yml diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml b/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml new file mode 100644 index 0000000000000000000000000000000000000000..197c109c0e5dbb7f2419131cc2f4a328995dc34f --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml @@ -0,0 +1,46 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: prlr + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Skill: + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 1 + remove_NAs: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_6.yml b/modules/Loading/testing_recipes/recipe_system7c3s-tas-specs.yml similarity index 100% rename from modules/Loading/testing_recipes/recipe_6.yml rename to modules/Loading/testing_recipes/recipe_system7c3s-tas-specs.yml diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml similarity index 98% rename from modules/Loading/testing_recipes/recipe_4.yml rename to modules/Loading/testing_recipes/recipe_system7c3s-tas.yml index 4e7896f526108a94abdda8672f000155d7522ccb..f83f91bb44f645ddbc84f6541f57fb82a09caa7b 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml @@ -18,7 +18,7 @@ Analysis: hcst_start: '1993' hcst_end: '2016' ftime_min: 1 - ftime_max: 3 + ftime_max: 6 Region: latmin: -10 latmax: 10 diff --git a/modules/Loading/testing_recipes/recipe_1.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml similarity index 100% rename from modules/Loading/testing_recipes/recipe_1.yml rename to modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml diff --git a/modules/Loading/testing_recipes/recipe_3.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml similarity index 100% rename from modules/Loading/testing_recipes/recipe_3.yml rename to modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml diff --git a/modules/Loading/testing_recipes/recipe_test-logging.yml b/modules/Loading/testing_recipes/recipe_test-logging.yml new file mode 100644 index 0000000000000000000000000000000000000000..372f6d83bdd31b8258bd28aa42237c95a8111ff3 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_test-logging.yml @@ -0,0 +1,47 @@ +Description: + Author: V. Agudetse + Info: Light recipe to raise some errors/warnings and test the logging system + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 1 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: qmap + Skill: + metric: mean_bias bias_SS + Probabilities: + percentiles: + Indicators: + index: no + ncores: 1 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index f50a06ff760742f199ccf3582dd21f688b4081ee..ed0933f2a6c053b0e2a51880a072213789310292 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -2,10 +2,11 @@ source("modules/Saving/paths2save.R") -save_data <- function(recipe, archive, data, +save_data <- function(recipe, data, calibrated_data = NULL, skill_metrics = NULL, - probabilities = NULL) { + probabilities = NULL, + archive = NULL) { # Wrapper for the saving functions. # recipe: The auto-s2s recipe @@ -19,14 +20,20 @@ save_data <- function(recipe, archive, data, if (is.null(recipe)) { stop("The 'recipe' parameter is mandatory.") } - if (is.null(archive)) { - stop("The 'archive' parameter is mandatory.") - } + if (is.null(data)) { stop("The 'data' parameter is mandatory. It should be the output of", "load_datasets().") } - + if (is.null(archive)) { + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + archive <- read_yaml(paste0(recipe$Run$code_dir, + "conf/archive.yml"))$archive + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + archive <- read_yaml(paste0(recipe$Run$code_dir, + "conf/archive_decadal.yml"))$archive + } + } dict <- read_yaml("conf/variable-dictionary.yml") # Create output directory @@ -76,18 +83,21 @@ save_data <- function(recipe, archive, data, } } -get_global_attributes <- function(recipe) { +get_global_attributes <- function(recipe, archive) { # Generates metadata of interest to add to the global attributes of the # netCDF files. parameters <- recipe$Analysis hcst_period <- paste0(parameters$Time$hcst_start, " to ", parameters$Time$hcst_end) current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) + system_name <- parameters$Datasets$System$name + reference_name <- parameters$Datasets$Reference$name attrs <- list(reference_period = hcst_period, - institution = "BSC-CNS", - system = parameters$Datasets$System$name, - reference = parameters$Datasets$Reference$name, + institution_system = archive$System[[system_name]]$institution, + institution_reference = archive$Reference[[reference_name]]$institution, + system = system_name, + reference = reference_name, calibration_method = parameters$Workflow$Calibration$method, computed_on = current_time) @@ -162,7 +172,7 @@ save_forecast <- function(data_cube, variable <- data_cube$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name - global_attributes <- get_global_attributes(recipe) + global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$System[[global_attributes$system]]$calendar @@ -205,7 +215,8 @@ save_forecast <- function(data_cube, leadtimes <- as.numeric(dates - init_date)/3600 syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) # expect dim = [sday = 1, sweek = 1, syear, time] + # expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) for (i in syears) { # Select year from array and rearrange dimensions fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) @@ -244,8 +255,9 @@ save_forecast <- function(data_cube, # Select start date if (fcst.horizon == 'decadal') { - #NOTE: Not good to use data_cube$load_parameters$dat1 because decadal data has been reshaped -# fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') + ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal + ## data has been reshaped + # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') # init_date is like "1990-11-01" init_date <- as.POSIXct(init_date) @@ -279,7 +291,7 @@ save_forecast <- function(data_cube, ArrayToNc(vars, outfile) } } - print("##### FCST SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, "##### FCST SAVED TO NETCDF FILE #####") } @@ -300,7 +312,7 @@ save_observations <- function(data_cube, variable <- data_cube$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name - global_attributes <- get_global_attributes(recipe) + global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$Reference[[global_attributes$reference]]$calendar @@ -327,7 +339,8 @@ save_observations <- function(data_cube, leadtimes <- as.numeric(dates - init_date)/3600 syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) # expect dim = [sday = 1, sweek = 1, syear, time] + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) for (i in syears) { # Select year from array and rearrange dimensions fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) @@ -414,11 +427,11 @@ save_observations <- function(data_cube, ArrayToNc(vars, outfile) } } - print("##### OBS SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") } ## TODO: Place inside a function somewhere -# if (tolower(agg) == "country"){ +# if (tolower(agg) == "country") { # load(mask.path) # grid <- europe.countries.iso # } else { @@ -445,7 +458,7 @@ save_metrics <- function(skill, } # Add global and variable attributes - global_attributes <- get_global_attributes(recipe) + global_attributes <- get_global_attributes(recipe, archive) attr(skill[[1]], 'global_attrs') <- global_attributes for (i in 1:length(skill)) { @@ -531,7 +544,7 @@ save_metrics <- function(skill, vars <- c(vars, skill) ArrayToNc(vars, outfile) } - print("##### SKILL METRICS SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") } save_corr <- function(skill, @@ -553,7 +566,7 @@ save_corr <- function(skill, } # Add global and variable attributes - global_attributes <- get_global_attributes(recipe) + global_attributes <- get_global_attributes(recipe, archive) attr(skill[[1]], 'global_attrs') <- global_attributes for (i in 1:length(skill)) { @@ -638,7 +651,8 @@ save_corr <- function(skill, vars <- c(vars, skill) ArrayToNc(vars, outfile) } - print("##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, + "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") } save_percentiles <- function(percentiles, @@ -659,7 +673,7 @@ save_percentiles <- function(percentiles, } # Add global and variable attributes - global_attributes <- get_global_attributes(recipe) + global_attributes <- get_global_attributes(recipe, archive) attr(percentiles[[1]], 'global_attrs') <- global_attributes for (i in 1:length(percentiles)) { @@ -737,7 +751,7 @@ save_percentiles <- function(percentiles, vars <- c(vars, percentiles) ArrayToNc(vars, outfile) } - print("##### PERCENTILES SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") } save_probabilities <- function(probs, @@ -759,7 +773,7 @@ save_probabilities <- function(probs, variable <- data_cube$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name - global_attributes <- get_global_attributes(recipe) + global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$System[[global_attributes$system]]$calendar @@ -783,7 +797,8 @@ save_probabilities <- function(probs, leadtimes <- as.numeric(dates - init_date)/3600 syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) # expect dim = [sday = 1, sweek = 1, syear, time] + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) for (i in syears) { # Select year from array and rearrange dimensions probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') @@ -845,5 +860,5 @@ save_probabilities <- function(probs, ArrayToNc(vars, outfile) } } - print("##### PROBABILITIES SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, "##### PROBABILITIES SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/export_2_nc-s2s4e.R b/modules/Saving/export_2_nc-s2s4e.R deleted file mode 100644 index abf526e5f105021b41ece65ff5b52be09b8e4f51..0000000000000000000000000000000000000000 --- a/modules/Saving/export_2_nc-s2s4e.R +++ /dev/null @@ -1,583 +0,0 @@ -library(easyNCDF) - -save_bias <- function(variable, - data, - fcst.sdate, - outfile, - leadtimes, - grid, - agg, - fcst.type) { - - lalo <- c('longitude', 'latitude') #decathlon subseasonal - - ## TODO: Sort out different aggregation cases - # if (tolower(agg) == "global") { - # data <- Reorder(data, c(lalo,'time')) - # } else { - # data <- Reorder(data, c('country', 'time')) - # } - - if (variable %in% c("tas", "tasmin", "tasmax")) { - obs <- data; units <- "ºC"; - var.longname <- "Temperature bias" - } else { - # Unit conversion - data.conv <- convert_data(list(fcst=data,test=data),variable,leadtimes,fcst.type,"forecast") - obs <- data.conv$data$fcst; units <- data.conv$units; - var.longname <- data.conv$var.longname - remove(data.conv) - } - - if (tolower(agg) == "country"){ - dims <- c('Country', 'time') - var.expname <- paste0(variable, '_country') - var.sdname <- paste0("Country-Aggregated ", var.longname) - } else { - dims <- c(lalo,'time') - var.expname <- get_outname(variable,VARS_DICT) - var.sdname <- var.longname - } - - metadata <- list(obs = list(name = var.expname, - standard_name = var.sdname, - units = units)) - attr(obs, 'variables') <- metadata - names(dim(obs)) <- dims - - times <- get_times(fcst.type, leadtimes, fcst.sdate) - time <- times$time - time_step <- times$time_step - - if (tolower(agg) == "country") { - - country <- get_countries(grid) - ArrayToNc(list(country, time, time_step, obs), outfile) - - } else { - - latlon <- get_latlon(grid$lat, grid$lon) - ArrayToNc(list(latlon$lat, latlon$lon, time, obs, time_step), outfile) - - } -} - -save_obs_country_file <- - function(variable, - obs, - fcst.sdate, - outfile, - fcst.type, - monthnames) { - - if (fcst.type == 'seasonal'){ - mask.path <- 'masks/mask_europe_system5_2.Rdata' - } else { - mask.path <- 'masks/mask_europe_S2S_ecmwf.Rdata' - } - - load(mask.path) - - ifelse(exists("lonlat_dctln"), - lalo <- c('longitude','latitude'), #decathlon subseasonal - lalo <- c('latitude','longitude')) #no decathlon - - obs <- Reorder(obs, c(lalo,'time')) -# obs <- Reorder(obs, c('latitude','longitude','time')) - - obs.country <-Apply(data=list(pointdata=obs), -# target_dims=c('latitude','longitude'), - target_dims=lalo, - output_dims=c('country'), - mask.path=mask.path, - ncores=2, - split_factor=1, - fun = Country_mean)[[1]] - - vars <- yaml.load_file(VARS_DICT)$vars - units <- vars[[variable]]$units - var.longname <- vars[[variable]]$longname - - metadata <- list(obs.country = - list( - name = paste0(variable, "_country"), - standard_name = paste0(var.longname, " (Country-Aggregated)"), - units = units - ) - ) - - attr(obs.country, 'variables') <- metadata - names(dim(obs.country)) <- c('Country', 'time') - - times <- get_times(fcst.type, monthnames, fcst.sdate) - time <- times$time - time_step <- times$time_step - - country <- 1:length(europe.countries.iso) - dim(country) <- length(country) - metadata <- list( country = list( - standard_name = paste(europe.countries.iso, collapse=" "), - units = 'Country ISO 3166-1 alpha 3 Code')) #if region, these units are incorrect. - attr(country, 'variables') <- metadata - names(dim(country)) <- 'Country' - - - ArrayToNc(list(country,time,time_step,obs.country), - outfile) - - } - -save_obs <- function(variable, - data, - fcst.sdate, - outfile, - leadtimes, - grid, - agg, - fcst.type) { - - lalo <- c('longitude','latitude') #decathlon subseasonal - - if (tolower(agg) == "global"){ - data <- Reorder(data, c(lalo,'time')) - } else { - data <- Reorder(data, c('country', 'time')) - } - - data.conv <- convert_data(list(fcst=data,test=data),variable,leadtimes,fcst.type,"forecast") - obs <- data.conv$data$fcst; units <- data.conv$units; - var.longname <- data.conv$var.longname - remove(data.conv) - - if (tolower(agg) == "country"){ - dims <- c('Country', 'time') - var.expname <- paste0(variable, '_country') - var.sdname <- paste0("Country-Aggregated ", var.longname) - } else { - dims <- c(lalo,'time') - var.expname <- get_outname(variable,VARS_DICT) - var.sdname <- var.longname - } - - metadata <- list(obs = list(name = var.expname, - standard_name = var.sdname, - units = units)) - attr(obs, 'variables') <- metadata - names(dim(obs)) <- dims - - times <- get_times(fcst.type, leadtimes, fcst.sdate) - time <- times$time - time_step <- times$time_step - - if (tolower(agg) == "country") { - - country <- get_countries(grid) - ArrayToNc(list(country, time, time_step, obs), outfile) - - } else { - - latlon <- get_latlon(grid$lat, grid$lon) - ArrayToNc(list(latlon$lat, latlon$lon, time, obs, time_step), outfile) - - } -} - - -save_forecast <- function(variable, - fcst, - fcst.sdate, - outfile, - leadtimes, - grid, - agg, - fcst.type) { - - lalo <- c('longitude','latitude') #decathlon subseasonal - - if (tolower(agg) == "global"){ - fcst <- Reorder(fcst, c(lalo,'member', - 'time')) - } else { - fcst <- Reorder(fcst, c('country','member', 'time')) - } - - # Unit conversion - fcst.conv <- convert_data(list(fcst=fcst,test=fcst),variable,leadtimes,fcst.type,"forecast") - fcst <- fcst.conv$data$fcst; units <- fcst.conv$units; - var.longname <- fcst.conv$var.longname - remove(fcst.conv) - - if (tolower(agg) == "country"){ - dims <- c('Country', 'ensemble', 'time') - var.expname <- paste0(variable, '_country') - var.sdname <- paste0("Country-Aggregated ", var.longname) - } else { - dims <- c(lalo,'ensemble', 'time') - var.expname <- get_outname(variable,VARS_DICT) - var.sdname <- var.longname - } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - units = units)) - attr(fcst, 'variables') <- metadata - names(dim(fcst)) <- dims - - times <- get_times(fcst.type, leadtimes, fcst.sdate) - time <- times$time - time_step <- times$time_step - - if (tolower(agg) == "country") { - - country <- get_countries(grid) - ArrayToNc(list(country, time, time_step, fcst), outfile) - - } else { - - latlon <- get_latlon(grid$lat, grid$lon) - ArrayToNc(list(latlon$lat, latlon$lon, time, fcst, time_step), outfile) - - } -} - -save_probs <- function(variable, - probs, - fcst.sdate, - outfile, - monthnames, - grid, - agg, - fcst.type) { - - lalo <- c('longitude','latitude') #decathlon subseasonal - - if (tolower(agg) == "global"){ - probs <- lapply(probs, function(x){ - Reorder(x, c('bin',lalo, 'time'))}) - } - - pbn <- Subset(probs$tercile, 'bin', list(1), drop='selected') - pn <- Subset(probs$tercile, 'bin', list(2), drop='selected') - pan <- Subset(probs$tercile, 'bin', list(3), drop='selected') - p10 <- Subset(probs$extreme, 'bin', list(1), drop='selected') - p90 <- Subset(probs$extreme, 'bin', list(3), drop='selected') - - pn.sdname <- paste('Probability below normal category ', sep=''); - pan.sdname <- paste('Probability above normal category ', sep=''); - pbn.sdname <- paste('Probability normal category ', sep=''); - p10.sdname <- paste('Probability below extreme category ', sep=''); - p90.sdname <- paste('Probability above extreme category ', sep=''); - - if (tolower(agg) == "country"){ - dims <- c('Country', 'time') - pn.sdanme <- paste0('Country-Aggregated ', pn.sdname) - pbn.sdanme <- paste0('Country-Aggregated ', pbn.sdname) - pan.sdanme <- paste0('Country-Aggregated ', pan.sdname) - p10.sdanme <- paste0('Country-Aggregated ', p10.sdname) - p90.sdanme <- paste0('Country-Aggregated ', p90.sdname) - } else { - dims <- c(lalo, 'time') - pn.sdanme <- paste0('Global ', pn.sdname) - pbn.sdanme <- paste0('Global ', pbn.sdname) - pan.sdanme <- paste0('Global ', pan.sdname) - p10.sdanme <- paste0('Global ', p10.sdname) - p90.sdanme <- paste0('Global ', p90.sdname) - } - - metadata <- list(pbn = list(name = 'prob_bn', - standard_name = pbn.sdname ), - pn = list(name = 'prob_n', - standard_name = pn.sdname), - pan = list(name = 'prob_an', - standard_name = pan.sdname), - p10 = list(name = 'prob_bp10', - standard_name = p10.sdname), - p90 = list(name = 'prob_ap90', - standard_name = p90.sdname)) - - attr(pbn, 'variables') <- metadata[1] - attr(pn, 'variables') <- metadata[2] - attr(pan, 'variables') <- metadata[3] - attr(p10, 'variables') <- metadata[4] - attr(p90, 'variables') <- metadata[5] - - names(dim(pbn)) <- dims - names(dim(pn)) <- dims - names(dim(pan)) <- dims - names(dim(p10)) <- dims - names(dim(p90)) <- dims - - times <- get_times(fcst.type, monthnames, fcst.sdate) - time <- times$time - time_step <- times$time_step - - if (tolower(agg) == "country") { - - country <- get_countries(grid) - ArrayToNc(list(country, time, pbn, pn, pan, p10, p90, time_step), outfile) - - } else { - - latlon <- get_latlon(grid$lat, grid$lon) - latitude <- latlon$lat; longitude <- latlon$lon - ArrayToNc(list(latitude, longitude, time, pbn, pn, pan, p10, p90, - time_step), outfile) - } - - } - -get_times <- function (fcst.type, leadtimes, sdate){ - - switch(tolower(fcst.type), - "seasonal" = {len <- length(leadtimes); ref <- 'months since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "sub_obs" = {len <- 52; ref <- 'week of the year '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- 4; ref <- 'weeks since '; - stdname <- ''} - ) - - time <- 1:len - dim(time) <- length(time) - #metadata <- list(time = list(standard_name = stdname, - metadata <- list(time = list( - units = paste0(ref, sdate, ' 00:00:00'))) - attr(time, 'variables') <- metadata - names(dim(time)) <- 'time' - - time_step <- 1 - dim(time_step) <- length(time_step) - metadata <- list(time_step = list(units = paste0( - ref, sdate, ' 00:00:00'))) - attr(time_step, 'variables') <- metadata - names(dim(time_step)) <- 'time_step' - - sdate <- 1:length(sdate) - dim(sdate) <- length(sdate) - metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), collapse=", "), - units = paste0('Init date'))) - attr(sdate, 'variables') <- metadata - names(dim(sdate)) <- 'sdate' - - return(list(time_step=time_step, time=time, sdate=sdate)) - -} - -get_countries <- function(europe.countries.iso){ - - country <- 1:length(europe.countries.iso) - dim(country) <- length(country) - metadata <- list( country = list( - standard_name = paste(europe.countries.iso, collapse=" "), - units = 'Country ISO 3166-1 alpha 3 Code')) - attr(country, 'variables') <- metadata - names(dim(country)) <- 'Country' - return(country) - -} - -get_latlon <- function(lat, lon){ - - longitude <- lon - dim(longitude) <- length(longitude) - metadata <- list(longitude = list(units = 'degrees_east')) - attr(longitude, 'variables') <- metadata - names(dim(longitude)) <- 'longitude' - - latitude <- lat - dim(latitude) <- length(latitude) - metadata <- list(latitude = list(units = 'degrees_north')) - attr(latitude, 'variables') <- metadata - names(dim(latitude)) <- 'latitude' - - return(list(lat=latitude,lon=longitude)) - -} - -save_metrics <- function(variable, - skill, - fcst.sdate, - grid, - outfile, - monthnames, - fcst.type, - agg) -{ - - lalo <- c('longitude', 'latitude') - - ## TODO: Sort out aggregation - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x){ - Reorder(x[[1]], c(lalo, 'time'))}) - } - - for (i in 1:length(skill)) { - - metric <- names(skill[i]) - if (tolower(agg) == "country"){ - sdname <- paste0(names(metric), " region-aggregated metric") - dims <- c('Country', 'time') - } else { - sdname <- paste0(names(metric), " grid point metric") - dims <- c(lalo, 'time') - } - metadata <- list(name = metric, standard_name = sdname) - - attr(skill[i], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } - - times <- get_times(fcst.type, monthnames, fcst.sdate) - time <- times$time - time_step <- times$time_step - - if (tolower(agg) == "country") { - - country <- get_countries(grid) - ArrayToNc(append(country, time, skill, time_step), outfile) - - } else { - - latlon <- get_latlon(grid$lat, grid$lon) - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill, list(time_step)) - ArrayToNc(vars, outfile) - } -} - -convert_seasosnal_prlr <- function(data2convert, leadtimes,filetype){ - - ind <- 1:length(leadtimes) - #dates <- paste0(leadtimes,"01") - # computes the last day of the month - lastday <- sapply(ind, function(x) - {as.integer(substr((seq(as.Date(leadtimes[x],"%Y%m%d"), - length=2,by="months")-1)[2], - 9, 10))}) - - if (filetype == "terciles"){ - ter <- sapply(ind, function(x) { - Subset(data2convert$tercile, along='time', - indices=x,drop='selected')*1000*3600*24*lastday[x]}, - simplify='array') - ext <- sapply(ind, function(x) { - Subset(data2convert$extreme, along='time', - indices=x,drop='selected')*1000*3600*24*lastday[x]}, - simplify='array') - - data2convert <- list(tercile=ter, extreme=ext) - } else { - - ens <- sapply(ind, function(x) { - Subset(data2convert$fcst, along='time', - indices=x,drop='selected')*1000*3600*24*lastday[x]}, - simplify='array') - - data2convert <- list(fcst=ens) - } - - return(data2convert) -} - -convert_data <- function(data,variable, leadtimes, fcst.type,filetype){ - - vars <- yaml.load_file(VARS_DICT)$vars - units <- vars[[variable]]$units - var.longname <- vars[[variable]]$longname - - if (variable %in% c("tas","tasmin","tasmax")){ - data <- lapply(data, function(x){ x - 273.15}) - } else if (variable %in% c("psl")){ - data <- lapply(data, function(x){ x/100}) - } else { - print("WARNING: NO DATA CONVERSION APPLIED") - } - - - return(list(data=data, units=units, var.longname=var.longname)) - -} - - -## TODO: implement lists as in save_metrics -save_terciles <- function(variable, - terciles, - fcst.sdate, - grid, - outfile, - leadtimes, - fcst.type, - agg) { - - lalo <- c('longitude','latitude') #decathlon subseasonal - - if (tolower(agg) == "global"){ - terciles <- lapply(terciles, function(x){ - Reorder(x, c('bin',lalo, 'time'))}) - } - - terciles.conv <- convert_data(terciles,variable,leadtimes,fcst.type,"terciles") - terciles <- terciles.conv$data; units <- terciles.conv$units; - var.longname <- terciles.conv$var.longname - remove(terciles.conv) - - p33 <- Subset(terciles$tercile, 'bin', list(1), drop='selected') - - p66 <- Subset(terciles$tercile, 'bin', list(2), drop='selected') - p10 <- Subset(terciles$extreme, 'bin', list(1), drop='selected') - p90 <- Subset(terciles$extreme, 'bin', list(2), drop='selected') - - p33.sdname <- paste('Lower Tercile ', sep=''); - p66.sdname <- paste('Upper Tercile ', sep=''); - p10.sdname <- paste('Lower extreme', sep=''); - p90.sdname <- paste('Upper extreme', sep=''); - - if (tolower(agg) == "country"){ - dims <- c('Country', 'time') - p33.sdanme <- paste0('Country-Aggregated ', p33.sdname) - p66.sdanme <- paste0('Country-Aggregated ', p66.sdname) - p10.sdanme <- paste0('Country-Aggregated ', p10.sdname) - p90.sdanme <- paste0('Country-Aggregated ', p90.sdname) - } else { - dims <- c(lalo, 'time') - p33.sdanme <- paste0('Global ', p33.sdname) - p66.sdanme <- paste0('Global ', p66.sdname) - p10.sdanme <- paste0('Gloabl ', p10.sdname) - p90.sdanme <- paste0('Global ', p90.sdname) - } - - metadata <- list(pbn = list(name = 'p33', - standard_name = p33.sdname ), - pn = list(name = 'p66', - standard_name = p66.sdname), - pan = list(name = 'p10', - standard_name = p10.sdname), - p10 = list(name = 'p90', - standard_name = p90.sdname)) - - attr(p33, 'variables') <- metadata[1] - attr(p66, 'variables') <- metadata[2] - attr(p10, 'variables') <- metadata[3] - attr(p90, 'variables') <- metadata[4] - - names(dim(p33)) <- dims - names(dim(p66)) <- dims - names(dim(p10)) <- dims - names(dim(p90)) <- dims - - times <- get_times(fcst.type, leadtimes, fcst.sdate) - time <- times$time - time_step <- times$time_step - - if (tolower(agg) == "country") { - - country <- get_countries(grid) - ArrayToNc(list(country, time, p33, p66, p10, p90, time_step), outfile) - - } else { - - latlon <- get_latlon(grid$lat, grid$lon) - ArrayToNc(list(latlon$lat, latlon$lon, time, p33, p66, p10, p90, time_step), outfile) - } -} diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 70f6cc9243a6465d452d9d375e6f95bafccc8682..f48ebe7b058969568bee2c14d47394de18664c0e 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -39,7 +39,7 @@ get_dir <- function(recipe, agg = "global") { ## TODO: Get aggregation from recipe ## TODO: Add time frequency - outdir <- recipe$Run$output_dir + outdir <- paste0(recipe$Run$output_dir, "/outputs/") variable <- recipe$Analysis$Variables$name if (!is.null(recipe$Analysis$Time$fcst_year)) { if (tolower(recipe$Analysis$Horizon) == 'decadal') { diff --git a/modules/Skill/CRPS.R b/modules/Skill/CRPS.R deleted file mode 100644 index 942ec9e4796860aac501001b322b4cf5e024c1ef..0000000000000000000000000000000000000000 --- a/modules/Skill/CRPS.R +++ /dev/null @@ -1,119 +0,0 @@ -#'Compute the Continuous Ranked Probability Score -#' -#'The Continuous Ranked Probability Score (CRPS; Wilks, 2011) is the continuous -#'version of the Ranked Probability Score (RPS; Wilks, 2011). It is a skill metric -#'to evaluate the full distribution of probabilistic forecasts. It has a negative -#'orientation (i.e., the higher-quality forecast the smaller CRPS) and it rewards -#'the forecast that has probability concentration around the observed value. In case -#'of a deterministic forecast, the CRPS is reduced to the mean absolute error. It has -#'the same units as the data. The function is based on enscrps_cpp from SpecsVerification. -#' -#'@param exp A named numerical array of the forecast with at least time -#' dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the probabilities of the forecast. The default value is 'member'. -#'@param Fair A logical indicating whether to compute the FairCRPS (the -#' potential RPSS that the forecast would have with an infinite ensemble size). -#' The default value is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A numerical array of CRPS with the same dimensions as "exp" except the -#''time_dim' and 'memb_dim' dimensions. -#' -#'@references -#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 -#' -#'@examples -#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -#'res <- CRPS(exp = exp, obs = obs) -#' -#'@import multiApply -#'@importFrom SpecsVerification enscrps_cpp -#'@importFrom ClimProjDiags Subset -#'@export -CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', - Fair = FALSE, ncores = NULL) { - - # Check inputs - ## exp and obs (1) - if (!is.array(exp) | !is.numeric(exp)) - stop('Parameter "exp" must be a numeric array.') - if (!is.array(obs) | !is.numeric(obs)) - stop('Parameter "obs" must be a numeric array.') - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) - stop('Parameter "time_dim" must be a character string.') - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - ## exp and obs (2) - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]),1)){ - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else {stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1).")} - } - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (!identical(length(name_exp), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim'.")) - } - ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { - stop("Parameter 'Fair' must be either TRUE or FALSE.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################### - - crps <- Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim), - output_dims = time_dim, - fun = .CRPS, Fair = Fair, - ncores = ncores)$output1 - - # Return only the mean RPS - crps <- MeanDims(crps, time_dim, na.rm = FALSE) - - return(crps) -} - -.CRPS <- function(exp, obs, Fair = FALSE) { - # exp: [sdate, memb] - # obs: [sdate] - - if (Fair) { # FairCRPS - R_new <- Inf - } else {R_new <- NA} - - crps <- SpecsVerification::enscrps_cpp(ens = exp, obs = obs, R_new = R_new) - - return(crps) -} diff --git a/modules/Skill/CRPSS.R b/modules/Skill/CRPSS.R deleted file mode 100644 index 9f5edbd5fe1372fedc3c130cd49a16afa7b24fd6..0000000000000000000000000000000000000000 --- a/modules/Skill/CRPSS.R +++ /dev/null @@ -1,172 +0,0 @@ -#'Compute the Continuous Ranked Probability Skill Score -#' -#'The Continuous Ranked Probability Skill Score (CRPSS; Wilks, 2011) is the skill score -#'based on the Continuous Ranked Probability Score (CRPS; Wilks, 2011). It can be used to -#'assess whether a forecast presents an improvement or worsening with respect to -#'a reference forecast. The CRPSS ranges between minus infinite and 1. If the -#'CRPSS is positive, it indicates that the forecast has higher skill than the -#'reference forecast, while a negative value means that it has a lower skill. -#'Examples of reference forecasts are the climatological forecast (same -#'probabilities for all categories for all time steps), persistence, a previous -#'model version, and another model. It is computed as CRPSS = 1 - CRPS_exp / CRPS_ref. -#'The statistical significance is obtained based on a Random Walk test at the -#'95% confidence level (DelSole and Tippett, 2016). -#' -#'@param exp A named numerical array of the forecast with at least time -#' dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. -#'@param ref A named numerical array of the reference forecast data with at -#' least time dimension. The dimensions must be the same as 'exp' except -#' 'memb_dim'. If it is NULL, the climatological forecast is used as reference -#' forecast. The default value is NULL. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the probabilities of the forecast and the reference forecast. The -#' default value is 'member'. -#'@param Fair A logical indicating whether to compute the FairCRPSS (the -#' potential CRPSS that the forecast would have with an infinite ensemble size). -#' The default value is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'\item{$crpss}{ -#' A numerical array of the CRPSS with the same dimensions as "exp" except the -#' 'time_dim' and 'memb_dim' dimensions. -#'} -#'\item{$sign}{ -#' A logical array of the statistical significance of the CRPSS with the same -#' dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. -#'} -#' -#'@references -#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 -#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 -#' -#'@examples -#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -#'ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#'res <- CRPSS(exp = exp, obs = obs) ## climatology as reference forecast -#'res <- CRPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast -#' -#'@import multiApply -#'@importFrom ClimProjDiags Subset -#'@export -CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', - Fair = FALSE, ncores = NULL) { - - # Check inputs - ## exp, obs, and ref (1) - if (!is.array(exp) | !is.numeric(exp)) - stop('Parameter "exp" must be a numeric array.') - if (!is.array(obs) | !is.numeric(obs)) - stop('Parameter "obs" must be a numeric array.') - if (!is.null(ref)) { - if (!is.array(ref) | !is.numeric(ref)) - stop('Parameter "ref" must be a numeric array.') - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) - stop('Parameter "time_dim" must be a character string.') - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - if (!is.null(ref) & !time_dim %in% names(dim(ref))) { - stop("Parameter 'time_dim' is not found in 'ref' dimension.") - } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { - stop("Parameter 'memb_dim' is not found in 'ref' dimension.") - } - ## exp and obs (2) - if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]),1)){ - obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else {stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1).")} - } - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (!identical(length(name_exp), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) - } - if (!is.null(ref)) { - name_ref <- sort(names(dim(ref))) - name_ref <- name_ref[-which(name_ref == memb_dim)] - if (!identical(length(name_exp), length(name_ref)) | - !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) - } - } - ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { - stop("Parameter 'Fair' must be either TRUE or FALSE.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################### - - # Compute CRPSS - if (!is.null(ref)) { # use "ref" as reference forecast - data <- list(exp = exp, obs = obs, ref = ref) - target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim, - ref = c(time_dim, memb_dim)) - } else { - data <- list(exp = exp, obs = obs) - target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim) - } - output <- Apply(data, - target_dims = target_dims, - fun = .CRPSS, - Fair = Fair, - ncores = ncores) - - return(output) -} - -.CRPSS <- function(exp, obs, ref = NULL, Fair = FALSE) { - # exp: [sdate, memb] - # obs: [sdate] - # ref: [sdate, memb] or NULL - - # CRPS of the forecast - crps_exp <- .CRPS(exp = exp, obs = obs, Fair = Fair) - - # CRPS of the reference forecast - if (is.null(ref)){ - ## using climatology as reference forecast - ## all the time steps are used as if they were members - ## then, ref dimensions are [sdate, memb], both with length(sdate) - ref <- array(data = obs, dim = c(member = length(obs))) - ref <- InsertDim(data = ref, posdim = 1, lendim = length(obs), name = 'sdate') - } - crps_ref <- .CRPS(exp = ref, obs = obs, Fair = Fair) - - # CRPSS - crpss <- 1 - mean(crps_exp) / mean(crps_ref) - - # Significance - sign <- .RandomWalkTest(skill_A = crps_exp, skill_B = crps_ref)$signif - - return(list(crpss = crpss, sign = sign)) -} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 5b6807b62338ea1fb55dc857d8f5ec02c6d9cc4e..99f12346c21e98e87c4e78efcaf688c0bb37ee41 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -7,13 +7,13 @@ # - reliability diagram # - ask Carlos which decadal metrics he is currently using +source("modules/Skill/compute_quants.R") +source("modules/Skill/compute_probs.R") source("modules/Skill/s2s.metrics.R") -## TODO: Remove once the new version of s2dv is released -source("modules/Skill/CRPS.R") -source("modules/Skill/CRPSS.R") -source("modules/Skill/RandomWalkTest.R") -source("modules/Skill/Bias.R") -source("modules/Skill/AbsBiasSS.R") +## TODO: Remove when new version of s2dv is released +source("modules/Skill/tmp/RandomWalkTest.R") +source("modules/Skill/tmp/Bias.R") +source("modules/Skill/tmp/AbsBiasSS.R") ## TODO: Implement this in the future ## Which parameter are required? @@ -51,7 +51,7 @@ source("modules/Skill/AbsBiasSS.R") # " running Skill module ", "\n", # " it can call ", metric_fun )) -compute_skill_metrics <- function(exp, obs, recipe) { +compute_skill_metrics <- function(recipe, exp, obs) { # exp: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml @@ -184,7 +184,9 @@ compute_skill_metrics <- function(exp, obs, recipe) { metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', 'rpss'))) { - stop("Some of the requested metrics are not available.") + ## TODO: Test this scenario + warn(recipe$Run$logger, + "Some of the requested metrics are not available.") } capture.output( skill <- Compute_verif_metrics(exp$data, obs$data, @@ -201,11 +203,11 @@ compute_skill_metrics <- function(exp, obs, recipe) { skill_metrics[[ metric ]] <- skill } } - print("##### SKILL METRIC COMPUTATION COMPLETE #####") + info(recipe$Run$logger, "##### SKILL METRIC COMPUTATION COMPLETE #####") return(skill_metrics) } -compute_probabilities <- function(data, recipe) { +compute_probabilities <- function(recipe, data) { if (is.null(recipe$Analysis$ncores)) { ncores <- 1 @@ -213,10 +215,8 @@ compute_probabilities <- function(data, recipe) { ncores <- recipe$Analysis$ncores } - ## TODO: Remove commented lines and include warning if quantile() - ## can not accept na.rm = FALSE if (is.null(recipe$Analysis$remove_NAs)) { - na.rm = T + na.rm = F } else { na.rm = recipe$Analysis$remove_NAs } @@ -224,45 +224,50 @@ compute_probabilities <- function(data, recipe) { named_probs <- list() named_quantiles <- list() if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { - stop("Quantiles and probability bins have been requested, but no ", - "thresholds are provided in the recipe.") + error(recipe$Run$logger, "Quantiles and probability bins have been + requested, but no thresholds are provided in the recipe.") + stop() } else { for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { # Parse thresholds in recipe thresholds <- sapply(element, function (x) eval(parse(text = x))) - # Compute quantiles and probability bins - probs <- Compute_probs(data$data, thresholds, - ncores = ncores, - na.rm = TRUE) - # Separate quantiles into individual arrays and name them percentile_xx - for (i in seq(1:dim(probs$quantiles)['bin'][[1]])) { - named_quantiles <- append(named_quantiles, - list(probs$quantiles[i, , , , , , ,])) - names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", - as.integer(thresholds[i]*100)) + quants <- compute_quants(data$data, thresholds, + ncores = ncores, + na.rm = na.rm) + probs <- compute_probs(data$data, quants, + ncores = ncores, + na.rm = na.rm) + for (i in seq(1:dim(quants)['bin'][[1]])) { + named_quantiles <- append(named_quantiles, + list(ClimProjDiags::Subset(quants, + 'bin', i))) + names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", + as.integer(thresholds[i]*100)) } - # Separate probability bins into individual arrays and name them: - # prob_b = prob. below, prob_a = prob. above, prob_xx_to_yy - for (i in seq(1:dim(probs$probs)['bin'][[1]])) { - if (i == 1) { - name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) - } else if (i == dim(probs$probs)['bin'][[1]]) { - name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) - } else { - name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", - as.integer(thresholds[i]*100)) - } - named_probs <- append(named_probs, list(probs$probs[i, , , , , , , ,])) - names(named_probs)[length(named_probs)] <- name_i + for (i in seq(1:dim(probs)['bin'][[1]])) { + if (i == 1) { + name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) + } else if (i == dim(probs)['bin'][[1]]) { + name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) + } else { + name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", + as.integer(thresholds[i]*100)) + } + named_probs <- append(named_probs, + list(ClimProjDiags::Subset(probs, + 'bin', i))) + names(named_probs)[length(named_probs)] <- name_i } } named_probs <- lapply(named_probs, function(x) {.drop_dims(x)}) named_quantiles <- lapply(named_quantiles, function(x) {.drop_dims(x)}) } - print("##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") + info(recipe$Run$logger, + "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") return(list(probs=named_probs, percentiles=named_quantiles)) } +## TODO: Replace with ClimProjDiags::Subset .drop_dims <- function(metric_array) { # Drop all singleton dimensions metric_array <- drop(metric_array) diff --git a/modules/Skill/compute_probs.R b/modules/Skill/compute_probs.R new file mode 100644 index 0000000000000000000000000000000000000000..1c17b35872597c8215337b436e036b0026edf7f8 --- /dev/null +++ b/modules/Skill/compute_probs.R @@ -0,0 +1,38 @@ +## TODO: Document header +compute_probs <- function(data, quantiles, + ncores=1, quantile_dims=c('syear', 'ensemble'), + probs_dims=list('ensemble', 'bin'), + split_factor=1, na.rm=FALSE) { + + # Define na.rm behavior + if (na.rm) { + .c2p <- function(x, t) { + if (any(!is.na(x))) { + # If the array contains any non-NA values, call convert2prob + colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) + } else { + # If the array only contains NAs, return NA vector + rep(NA, dim(t)[['bin']] + 1) # vector with as many NAs as prob bins. + } + } + } else { + .c2p <- function(x, t) { + if (any(is.na(x))) { + # If the array contains any NA values, return NA vector + rep(NA, dim(t)[['bin']] + 1) + } else { + # If there are no NAs, call convert2prob + colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) + } + } + } + + probs <- Apply(data = list(x = data, t = quantiles), + target_dims = probs_dims, + .c2p, + output_dims = "bin", + split_factor = split_factor, + ncores = ncores)[[1]] + + return(probs) +} diff --git a/modules/Skill/compute_quants.R b/modules/Skill/compute_quants.R new file mode 100644 index 0000000000000000000000000000000000000000..8c89e87e4bf41d2a77a0ddb1a3196a8f9643cdbd --- /dev/null +++ b/modules/Skill/compute_quants.R @@ -0,0 +1,33 @@ +## TODO: Document header + +compute_quants <- function(data, thresholds, + ncores=1, quantile_dims=c('syear', 'ensemble'), + probs_dims=list('ensemble', 'bin'), + split_factor=1, na.rm=FALSE) { + + # Define na.rm behavior + if (na.rm) { + .get_quantiles <- function(x, t) { + quantile(as.vector(x), t, na.rm = TRUE) + } + } else { + .get_quantiles <- function(x, t) { + if (any(is.na(x))) { + # If the array contains any NA values, return NA vector + rep(NA, length(t)) + } else { + # If there are no NAs, call quantile() + quantile(as.vector(x), t, na.rm = FALSE) + } + } + } + + quantiles <- Apply(data, + target_dims = quantile_dims, + function(x, t) {.get_quantiles(as.vector(x), thresholds)}, + output_dims = "bin", + ncores = ncores, + split_factor = split_factor)[[1]] + + return(quantiles) +} diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R index 7c0aa30e190ac7a00b10eeedbc74fa845047f446..04e9d801abd8efd0bdec6f39ec50b0d4fa1c34fc 100644 --- a/modules/Skill/s2s.metrics.R +++ b/modules/Skill/s2s.metrics.R @@ -1,7 +1,4 @@ -source("modules/Skill/s2s.probs.R") - - # MERGES verification dimns int single sdate dim along which the # verification metrics will be computed mergedims <- function(data, indims, outdim) { @@ -127,24 +124,36 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, } else if (metric == "frpss_sign") { - terciles_obs <- Compute_probs(obs, c(1/3, 2/3), - quantile_dims=c(time.dim), - ncores=ncores, - split_factor=1, - na.rm=na.rm) + terciles_obs <- compute_quants(obs, c(1/3, 2/3), + quantile_dims=c(time.dim), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + terciles_exp <- compute_quants(exp, c(1/3, 2/3), + quantile_dims=c(time.dim, 'ensemble'), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + probs_obs <- compute_probs(obs, terciles_obs, + quantile_dims=c(time.dim), + ncores=ncores, + split_factor=1, + na.rm=na.rm) - terciles_exp <- Compute_probs(exp, c(1/3, 2/3), - quantile_dims=c(time.dim, 'ensemble'), - ncores=ncores, - split_factor=1, - na.rm=na.rm) + probs_exp <- compute_probs(exp, terciles_exp, + quantile_dims=c(time.dim), + ncores=ncores, + split_factor=1, + na.rm=na.rm) - probs_clim = array(data = 1/3, dim = dim(terciles_obs$probs)) + probs_clim = array(data = 1/3, dim = dim(probs_obs)) frps <- NULL n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][] frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim, - probs_obs = terciles_obs$probs), + probs_obs = probs_obs), target_dims = 'bin', fun = .rps_from_probs, n_categories = 3, @@ -152,8 +161,8 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, Fair = TRUE, ncores = ncores)$output1 - frps$exp <- multiApply::Apply(data = list(probs_exp = terciles_exp$probs, - probs_obs = terciles_obs$probs), + frps$exp <- multiApply::Apply(data = list(probs_exp = probs_exp, + probs_obs = probs_obs), target_dims = 'bin', fun = .rps_from_probs, n_categories = 3, diff --git a/modules/Skill/s2s.probs.R b/modules/Skill/s2s.probs.R deleted file mode 100644 index 889330453d15bb3e90e7f14de5eb5b456697643d..0000000000000000000000000000000000000000 --- a/modules/Skill/s2s.probs.R +++ /dev/null @@ -1,42 +0,0 @@ - - -Compute_probs <- function(data, thresholds, - ncores=1, quantile_dims=c('syear', 'ensemble'), - probs_dims=list('ensemble', 'bin'), - split_factor=1, na.rm=FALSE) { - - quantiles <- Apply(data, - quantile_dims, - function(x, na.rm) {quantile(as.vector(x), - thresholds,na.rm=na.rm)}, - output_dims="bin", - ncores=ncores, - na.rm=na.rm, - split_factor=split_factor)[[1]] - - if (na.rm == FALSE) { - c2p <- function(x, t) { - colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) - } - } else { - c2p <- function(x, t) { - if (any(!is.na(x))){ - colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) - } else { - rep(NA, dim(t)[['bin']] + 1) # vector with as many NAs as probability bins. - } - } - } - - probs <- Apply(data = list(x = data, t = quantiles), - target_dims = probs_dims, - c2p, - output_dims = "bin", - split_factor = split_factor, - ncores = ncores)[[1]] - - return(list(probs=probs, quantiles=quantiles)) - -} - - diff --git a/modules/Skill/AbsBiasSS.R b/modules/Skill/tmp/AbsBiasSS.R similarity index 97% rename from modules/Skill/AbsBiasSS.R rename to modules/Skill/tmp/AbsBiasSS.R index 0838f251a97e62a4e82f65423a6ccd567b2b6b77..0ceb009c7b4dc33b6f9788dc6cb8459f0e25767b 100644 --- a/modules/Skill/AbsBiasSS.R +++ b/modules/Skill/tmp/AbsBiasSS.R @@ -257,15 +257,16 @@ AbsBiasSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = NULL, obs_data <- obs_data[good_values] } } + ## Bias of the exp - bias_exp <- .Bias(exp = exp_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = TRUE) + bias_exp <- .Bias(exp = exp_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = FALSE) ## Bias of the ref if (is.null(ref)) { ## Climatological forecast - ref_data <- mean(obs_data, na.rm = na.rm) + ref_data <- rep(mean(obs_data, na.rm = na.rm), length(obs_data)) } - bias_ref <- .Bias(exp = ref_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = TRUE) + bias_ref <- .Bias(exp = ref_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = FALSE) ## Skill score and significance - biasSS[i, j] <- 1 - bias_exp / bias_ref + biasSS[i, j] <- 1 - mean(bias_exp) / mean(bias_ref) sign[i, j] <- .RandomWalkTest(skill_A = bias_exp, skill_B = bias_ref)$signif } } diff --git a/modules/Skill/Bias.R b/modules/Skill/tmp/Bias.R similarity index 100% rename from modules/Skill/Bias.R rename to modules/Skill/tmp/Bias.R diff --git a/modules/Skill/RandomWalkTest.R b/modules/Skill/tmp/RandomWalkTest.R similarity index 100% rename from modules/Skill/RandomWalkTest.R rename to modules/Skill/tmp/RandomWalkTest.R diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R new file mode 100644 index 0000000000000000000000000000000000000000..ff0e9fd44d8e71b831819271d51c7f1374e20068 --- /dev/null +++ b/modules/Visualization/Visualization.R @@ -0,0 +1,353 @@ +#G# TODO: Remove once released in s2dv/CSTools +source("modules/Visualization/tmp/PlotMostLikelyQuantileMap.R") +source("modules/Visualization/tmp/PlotCombinedMap.R") + +## TODO: Add the possibility to read the data directly from netCDF +## TODO: Adapt to multi-model case +## TODO: Add param 'raw'? + +plot_data <- function(recipe, + data, + calibrated_data = NULL, + skill_metrics = NULL, + probabilities = NULL, + archive = NULL, + significance = F) { + + # Try to produce and save several basic plots. + # recipe: the auto-s2s recipe as read by read_yaml() + # archive: the auto-s2s archive as read by read_yaml() + # data: list containing the hcst, obs and (optional) fcst s2dv_cube objects + # calibrated_data: list containing the calibrated hcst and (optional) fcst + # s2dv_cube objects + # skill_metrics: list of arrays containing the computed skill metrics + # significance: Bool. Whether to include significance dots where applicable + + outdir <- paste0(get_dir(recipe), "/plots/") + dir.create(outdir, showWarnings = FALSE, recursive = TRUE) + + if ((is.null(skill_metrics)) && (is.null(calibrated_data)) && + (is.null(data$fcst))) { + stop("The Visualization module has been called, but there is no data ", + "that can be plotted.") + } + + if (is.null(archive)) { + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + archive <- read_yaml(paste0(recipe$Run$code_dir, + "conf/archive.yml"))$archive + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + archive <- read_yaml(paste0(recipe$Run$code_dir, + "conf/archive_decadal.yml"))$archive + } + } + + # Plot skill metrics + if (!is.null(skill_metrics)) { + plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, + significance) + } + + # Plot forecast ensemble mean + if (!is.null(calibrated_data$fcst)) { + plot_ensemble_mean(recipe, archive, calibrated_data$fcst, outdir) + } else if (!is.null(data$fcst)) { + warn(recipe$Run$logger, "Only the uncalibrated forecast was provided. + Using this data to plot the forecast ensemble mean.") + plot_ensemble_mean(recipe, archive, data$fcst, outdir) + } + + # Plot Most Likely Terciles + if ((!is.null(probabilities)) && (!is.null(calibrated_data$fcst))) { + plot_most_likely_terciles(recipe, archive, calibrated_data$fcst, + probabilities$percentiles, outdir) + } else if ((!is.null(probabilities)) && (!is.null(data$fcst))) { + warn(recipe$Run$logger, "Only the uncalibrated forecast was provided. + Using this data to plot the most likely terciles.") + plot_most_likely_terciles(recipe, archive, data$fcst, + probabilities$percentiles, outdir) + } +} + +plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, + outdir, significance = F) { + + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } + # Abort if skill_metrics is not list + if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { + stop("The element 'skill_metrics' must be a list of named arrays.") + } + + latitude <- data_cube$lat + longitude <- data_cube$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + init_month <- lubridate::month(as.numeric(substr(recipe$Analysis$Time$sdate, + start = 1, stop = 2)), + label = T, abb = T) + + # Group different metrics by type + skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", + "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", + "enscorr_specs") + scores <- c("rps", "frps", "crps", "frps_specs") + + for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { + + if (name %in% names(skill_metrics)) { + # Define plot characteristics and metric name to display in plot + if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", + "rpss_specs", "bss90_specs", "bss10_specs")) { + display_name <- toupper(strsplit(name, "_")[[1]][1]) + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + + } else if (name == "mean_bias_ss") { + display_name <- "Mean Bias Skill Score" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + + } else if (name %in% c("enscorr", "enscorr_specs")) { + display_name <- "Ensemble Mean Correlation" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + + } else if (name %in% scores) { + skill <- skill_metrics[[name]] + display_name <- toupper(strsplit(name, "_")[[1]][1]) + brks <- seq(0, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "Reds") + + } else if (name == "enssprerr") { + ## TODO: Adjust colorbar parameters + skill <- skill_metrics[[name]] + display_name <- "Spread-to-Error Ratio" + brks <- pretty(0:max(skill, na.rm = T), n = 20, min.n = 10) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + + } else if (name == "mean_bias") { + skill <- skill_metrics[[name]] + display_name <- "Mean Bias" + max_value <- max(abs(skill)) + ugly_intervals <- seq(-max_value, max_value, (max_value*2)/10) + brks <- pretty(ugly_intervals, n = 20, min.n = 10) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + } + + options(bitmapType = "cairo") + + # Reorder dimensions + skill <- Reorder(skill, c("time", "longitude", "latitude")) + # If the significance has been requested and the variable has it, + # retrieve it and reorder its dimensions. + significance_name <- paste0(name, "_significance") + if ((significance) && (significance_name %in% names(skill_metrics))) { + significance_name <- paste0(name, "_significance") + skill_significance <- skill_metrics[[significance_name]] + skill_significance <- Reorder(skill_significance, c("time", + "longitude", + "latitude")) + # Split skill significance into list of lists, along the time dimension + # This allows for plotting the significance dots correctly. + skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + dim = 'time', + level = "sublist", + names = "dots") + } else { + skill_significance <- NULL + } + # Define output file name and titles + outfile <- paste0(outdir, name, ".png") + toptitle <- paste(display_name, "-", data_cube$Variable$varName, + "-", system_name, "-", init_month, hcst_period) + months <- unique(lubridate::month(data_cube$Dates$start, + label = T, abb = F)) + titles <- as.vector(months) + # Plot + suppressWarnings( + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + asplit(skill, MARGIN=1), # Splitting array into a list + longitude, latitude, + special_args = skill_significance, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents=F, + brks = brks, + cols = col2, + fileout = outfile, + bar_label_digits = 3) + ) + } + } + + info(recipe$Run$logger, + "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") +} + +plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { + + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } + + latitude <- fcst$lat + longitude <- fcst$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + variable <- recipe$Analysis$Variables$name + units <- attr(fcst$Variable, "variable")$units + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + + # Compute ensemble mean + ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') + # Drop extra dims, add time dim if missing: + ensemble_mean <- drop(ensemble_mean) + + if (!("time" %in% names(dim(ensemble_mean)))) { + dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) + } + if (!'syear' %in% names(dim(ensemble_mean))) { + ensemble_mean <- Reorder(ensemble_mean, c("time", "longitude", "latitude")) + } else { + ensemble_mean <- Reorder(ensemble_mean, c("syear", "time", "longitude", "latitude")) + } + ## TODO: Redefine column colors, possibly depending on variable + if (variable == 'prlr') { + palette = "BrBG" + rev = F + } else { + palette = "RdBu" + rev = T + } + + brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) + col2 <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) + # color <- colorRampPalette(col2)(length(brks) - 1) + options(bitmapType = "cairo") + + for (i_syear in start_date) { + # Define name of output file and titles + if (length(start_date) == 1) { + i_ensemble_mean <- ensemble_mean + outfile <- paste0(outdir, "forecast_ensemble_mean.png") + } else { + i_ensemble_mean <- ensemble_mean[which(start_date == i_syear), , , ] + outfile <- paste0(outdir, "forecast_ensemble_mean_", i_syear, ".png") + } + toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, + "- Initialization:", i_syear) + months <- lubridate::month(fcst$Dates$start[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + titles <- as.vector(months) + # Plots + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + i_ensemble_mean, longitude, latitude, + filled.continents = F, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + units = units, + cols = col2, + brks = brks, + fileout = outfile, + bar_label_digits = 4) + } + + info(recipe$Run$logger, + "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") +} + +plot_most_likely_terciles <- function(recipe, archive, + fcst, + percentiles, + outdir) { + + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } + + latitude <- fcst$lat + longitude <- fcst$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + variable <- recipe$Analysis$Variables$name + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + if (is.null(recipe$Analysis$remove_NAs)) { + recipe$Analysis$remove_NAs <- FALSE + } + if (is.null(recipe$Analysis$ncores)) { + recipe$Analysis$ncores <- 1 + } + + # Compute probability bins for the forecast + if (is.null(percentiles$percentile_33) | is.null(percentiles$percentile_33)) { + stop("The quantile array does not contain the 33rd and 66th percentiles,", + " the most likely tercile map cannot be plotted.") + } + + terciles <- abind(percentiles$percentile_33, percentiles$percentile_66, + along = 0) + names(dim(terciles)) <- c("bin", names(dim(percentiles$percentile_33))) + probs_fcst <- compute_probs(fcst$data, terciles, + ncores = recipe$Analysis$ncores, + na.rm = recipe$Analysis$remove_NAs) + + # Drop extra dims, add time dim if missing: + probs_fcst <- drop(probs_fcst) + if (!("time" %in% names(dim(probs_fcst)))) { + dim(probs_fcst) <- c("time" = 1, dim(probs_fcst)) + } + if (!'syear' %in% names(dim(probs_fcst))) { + probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) + } else { + probs_fcst <- Reorder(probs_fcst, c("syear", "time", "bin", "longitude", "latitude")) + } + + for (i_syear in start_date) { + # Define name of output file and titles + if (length(start_date) == 1) { + i_probs_fcst <- probs_fcst + outfile <- paste0(outdir, "forecast_most_likely_tercile.png") + } else { + i_probs_fcst <- probs_fcst[which(start_date == i_syear), , , , ] + outfile <- paste0(outdir, "forecast_most_likely_tercile_", i_syear, ".png") + } + toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", + "Initialization:", i_syear) + months <- lubridate::month(fcst$Dates$start[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + ## TODO: Ensure this works for daily and sub-daily cases + titles <- as.vector(months) + + # Plots + ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked + ## on. + suppressWarnings( + PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), + cat_dim = 'bin', + i_probs_fcst, longitude, latitude, + coast_width = 1.5, + title_scale = 0.6, + legend_scale = 0.8, #cex_bar_titles = 0.6, + toptitle = toptitle, + titles = titles, + fileout = outfile, + bar_label_digits = 2, + triangle_ends = c(F, F), width = 11, height = 8) + ) + } + + info(recipe$Run$logger, + "##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/tmp/PlotCombinedMap.R b/modules/Visualization/tmp/PlotCombinedMap.R new file mode 100644 index 0000000000000000000000000000000000000000..a7b5fc9701765a9969a29ff27373405a9a89198e --- /dev/null +++ b/modules/Visualization/tmp/PlotCombinedMap.R @@ -0,0 +1,608 @@ +#'Plot Multiple Lon-Lat Variables In a Single Map According to a Decision Function +#'@description Plot a number a two dimensional matrices with (longitude, latitude) dimensions on a single map with the cylindrical equidistant latitude and longitude projection. +#'@author Nicolau Manubens, \email{nicolau.manubens@bsc.es} +#'@author Veronica Torralba, \email{veronica.torralba@bsc.es} +#' +#'@param maps List of matrices to plot, each with (longitude, latitude) dimensions, or 3-dimensional array with the dimensions (longitude, latitude, map). Dimension names are required. +#'@param lon Vector of longitudes. Must match the length of the corresponding dimension in 'maps'. +#'@param lat Vector of latitudes. Must match the length of the corresponding dimension in 'maps'. +#'@param map_select_fun Function that selects, for each grid point, which value to take among all the provided maps. This function receives as input a vector of values for a same grid point for all the provided maps, and must return a single selected value (not its index!) or NA. For example, the \code{min} and \code{max} functions are accepted. +#'@param display_range Range of values to be displayed for all the maps. This must be a numeric vector c(range min, range max). The values in the parameter 'maps' can go beyond the limits specified in this range. If the selected value for a given grid point (according to 'map_select_fun') falls outside the range, it will be coloured with 'col_unknown_map'. +#'@param map_dim Optional name for the dimension of 'maps' along which the multiple maps are arranged. Only applies when 'maps' is provided as a 3-dimensional array. Takes the value 'map' by default. +#'@param brks Colour levels to be sent to PlotEquiMap. This parameter is optional and adjusted automatically by the function. +#'@param cols List of vectors of colours to be sent to PlotEquiMap for the colour bar of each map. This parameter is optional and adjusted automatically by the function (up to 5 maps). The colours provided for each colour bar will be automatically interpolated to match the number of breaks. Each item in this list can be named, and the name will be used as title for the corresponding colour bar (equivalent to the parameter 'bar_titles'). +#'@param col_unknown_map Colour to use to paint the grid cells for which a map is not possible to be chosen according to 'map_select_fun' or for those values that go beyond 'display_range'. Takes the value 'white' by default. +#'@param mask Optional numeric array with dimensions (latitude, longitude), with values in the range [0, 1], indicating the opacity of the mask over each grid point. Cells with a 0 will result in no mask, whereas cells with a 1 will result in a totally opaque superimposed pixel coloured in 'col_mask'. +#'@param col_mask Colour to be used for the superimposed mask (if specified in 'mask'). Takes the value 'grey' by default. +#'@param dots Array of same dimensions as 'var' or with dimensions +#' c(n, dim(var)), where n is the number of dot/symbol layers to add to the +#' plot. A value of TRUE at a grid cell will draw a dot/symbol on the +#' corresponding square of the plot. By default all layers provided in 'dots' +#' are plotted with dots, but a symbol can be specified for each of the +#' layers via the parameter 'dot_symbol'. +#'@param bar_titles Optional vector of character strings providing the titles to be shown on top of each of the colour bars. +#'@param legend_scale Scale factor for the size of the colour bar labels. Takes 1 by default. +#'@param cex_bar_titles Scale factor for the sizes of the bar titles. Takes 1.5 by default. +#'@param fileout File where to save the plot. If not specified (default) a graphics device will pop up. Extensions allowed: eps/ps, jpeg, png, pdf, bmp and tiff +#'@param width File width, in the units specified in the parameter size_units (inches by default). Takes 8 by default. +#'@param height File height, in the units specified in the parameter size_units (inches by default). Takes 5 by default. +#'@param size_units Units of the size of the device (file or window) to plot in. Inches ('in') by default. See ?Devices and the creator function of the corresponding device. +#'@param res Resolution of the device (file or window) to plot in. See ?Devices and the creator function of the corresponding device. +#'@param drawleg Where to draw the common colour bar. Can take values TRUE, +#' FALSE or:\cr +#' 'up', 'u', 'U', 'top', 't', 'T', 'north', 'n', 'N'\cr +#' 'down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S' (default)\cr +#' 'right', 'r', 'R', 'east', 'e', 'E'\cr +#' 'left', 'l', 'L', 'west', 'w', 'W' +#'@param ... Additional parameters to be passed on to \code{PlotEquiMap}. + +#'@seealso \code{PlotCombinedMap} and \code{PlotEquiMap} +#' +#'@importFrom s2dv PlotEquiMap ColorBar +#'@importFrom maps map +#'@importFrom graphics box image layout mtext par plot.new +#'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off hcl jpeg pdf png postscript svg tiff +#'@examples +#'# Simple example +#'x <- array(1:(20 * 10), dim = c(lat = 10, lon = 20)) / 200 +#'a <- x * 0.6 +#'b <- (1 - x) * 0.6 +#'c <- 1 - (a + b) +#'lons <- seq(0, 359.5, length = 20) +#'lats <- seq(-89.5, 89.5, length = 10) +#'PlotCombinedMap(list(a, b, c), lons, lats, +#' toptitle = 'Maximum map', +#' map_select_fun = max, +#' display_range = c(0, 1), +#' bar_titles = paste('% of belonging to', c('a', 'b', 'c')), +#' brks = 20, width = 10, height = 8) +#' +#'Lon <- c(0:40, 350:359) +#'Lat <- 51:26 +#'data <- rnorm(51 * 26 * 3) +#'dim(data) <- c(map = 3, lon = 51, lat = 26) +#'mask <- sample(c(0,1), replace = TRUE, size = 51 * 26) +#'dim(mask) <- c(lat = 26, lon = 51) +#'PlotCombinedMap(data, lon = Lon, lat = Lat, map_select_fun = max, +#' display_range = range(data), mask = mask, +#' width = 12, height = 8) +#' +#'@export +PlotCombinedMap <- function(maps, lon, lat, + map_select_fun, display_range, + map_dim = 'map', + brks = NULL, cols = NULL, + col_unknown_map = 'white', + mask = NULL, col_mask = 'grey', + dots = NULL, + bar_titles = NULL, legend_scale = 1, + cex_bar_titles = 1.5, + plot_margin = NULL, bar_margin = rep(0, 4), + fileout = NULL, width = 8, height = 5, + size_units = 'in', res = 100, drawleg = T, + ...) { + args <- list(...) + + # If there is any filenames to store the graphics, process them + # to select the right device + if (!is.null(fileout)) { + deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, + units = size_units, res = res) + saveToFile <- deviceInfo$fun + fileout <- deviceInfo$files + } + + # Check probs + error <- FALSE + if (is.list(maps)) { + if (length(maps) < 1) { + stop("Parameter 'maps' must be of length >= 1 if provided as a list.") + } + check_fun <- function(x) { + is.numeric(x) && (length(dim(x)) == 2) + } + if (!all(sapply(maps, check_fun))) { + error <- TRUE + } + ref_dims <- dim(maps[[1]]) + equal_dims <- all(sapply(maps, function(x) identical(dim(x), ref_dims))) + if (!equal_dims) { + stop("All arrays in parameter 'maps' must have the same dimension ", + "sizes and names when 'maps' is provided as a list of arrays.") + } + num_maps <- length(maps) + maps <- unlist(maps) + dim(maps) <- c(ref_dims, map = num_maps) + map_dim <- 'map' + } + if (!is.numeric(maps)) { + error <- TRUE + } + if (is.null(dim(maps))) { + error <- TRUE + } + if (length(dim(maps)) != 3) { + error <- TRUE + } + if (error) { + stop("Parameter 'maps' must be either a numeric array with 3 dimensions ", + " or a list of numeric arrays of the same size with the 'lon' and ", + "'lat' dimensions.") + } + dimnames <- names(dim(maps)) + + # Check map_dim + if (is.character(map_dim)) { + if (is.null(dimnames)) { + stop("Specified a dimension name in 'map_dim' but no dimension names provided ", + "in 'maps'.") + } + map_dim <- which(dimnames == map_dim) + if (length(map_dim) < 1) { + stop("Dimension 'map_dim' not found in 'maps'.") + } else { + map_dim <- map_dim[1] + } + } else if (!is.numeric(map_dim)) { + stop("Parameter 'map_dim' must be either a numeric value or a ", + "dimension name.") + } + if (length(map_dim) != 1) { + stop("Parameter 'map_dim' must be of length 1.") + } + map_dim <- round(map_dim) + + # Work out lon_dim and lat_dim + lon_dim <- NULL + if (!is.null(dimnames)) { + lon_dim <- which(dimnames %in% c('lon', 'longitude'))[1] + } + if (length(lon_dim) < 1) { + lon_dim <- (1:3)[-map_dim][1] + } + lon_dim <- round(lon_dim) + + lat_dim <- NULL + if (!is.null(dimnames)) { + lat_dim <- which(dimnames %in% c('lat', 'latitude'))[1] + } + if (length(lat_dim) < 1) { + lat_dim <- (1:3)[-map_dim][2] + } + lat_dim <- round(lat_dim) + + # Check lon + if (!is.numeric(lon)) { + stop("Parameter 'lon' must be a numeric vector.") + } + if (length(lon) != dim(maps)[lon_dim]) { + stop("Parameter 'lon' does not match the longitude dimension in 'maps'.") + } + + # Check lat + if (!is.numeric(lat)) { + stop("Parameter 'lat' must be a numeric vector.") + } + if (length(lat) != dim(maps)[lat_dim]) { + stop("Parameter 'lat' does not match the longitude dimension in 'maps'.") + } + + # Check map_select_fun + if (is.numeric(map_select_fun)) { + if (length(dim(map_select_fun)) != 2) { + stop("Parameter 'map_select_fun' must be an array with dimensions ", + "'lon' and 'lat' if provided as an array.") + } + if (!identical(dim(map_select_fun), dim(maps)[-map_dim])) { + stop("The dimensions 'lon' and 'lat' in the 'map_select_fun' array must ", + "have the same size, name and order as in the 'maps' parameter.") + } + } + if (!is.function(map_select_fun)) { + stop("The parameter 'map_select_fun' must be a function or a numeric array.") + } + + # Check display_range + if (!is.numeric(display_range) || length(display_range) != 2) { + stop("Parameter 'display_range' must be a numeric vector of length 2.") + } + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = display_range[1], to = display_range[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= dim(maps)[map_dim]) { + chosen_sets <- 1:(dim(maps)[map_dim]) + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(1:length(col_sets), dim(maps)[map_dim]) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != dim(maps)[map_dim]) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in 1:length(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (!is.null(names(cols))) { + bar_titles <- names(cols) + } else { + bar_titles <- paste0("Map ", 1:length(cols)) + } + } else { + if (!is.character(bar_titles)) { + stop("Parameter 'bar_titles' must be a character vector.") + } + if (length(bar_titles) != length(cols)) { + stop("Parameter 'bar_titles' must be of the same length as the number of ", + "maps in 'maps'.") + } + } + + # Check legend_scale + if (!is.numeric(legend_scale)) { + stop("Parameter 'legend_scale' must be numeric.") + } + + # Check col_unknown_map + if (!is.character(col_unknown_map)) { + stop("Parameter 'col_unknown_map' must be a character string.") + } + + # Check col_mask + if (!is.character(col_mask)) { + stop("Parameter 'col_mask' must be a character string.") + } + + # Check mask + if (!is.null(mask)) { + if (!is.numeric(mask)) { + stop("Parameter 'mask' must be numeric.") + } + if (length(dim(mask)) != 2) { + stop("Parameter 'mask' must have two dimensions.") + } + if ((dim(mask)[1] != dim(maps)[lat_dim]) || + (dim(mask)[2] != dim(maps)[lon_dim])) { + stop("Parameter 'mask' must have dimensions c(lat, lon).") + } + } + # Check dots + if (!is.null(dots)) { + if (length(dim(dots)) != 2) { + stop("Parameter 'mask' must have two dimensions.") + } + if ((dim(dots)[1] != dim(maps)[lat_dim]) || + (dim(dots)[2] != dim(maps)[lon_dim])) { + stop("Parameter 'mask' must have dimensions c(lat, lon).") + } + } + + #---------------------- + # Identify the most likely map + #---------------------- + brks_norm <- seq(0, 1, length.out = length(brks)) + if (is.function(map_select_fun)) { + range_width <- display_range[2] - display_range[1] + ml_map <- apply(maps, c(lat_dim, lon_dim), function(x) { + if (any(is.na(x))) { + res <- NA + } else { + res <- which(x == map_select_fun(x)) + if (length(res) > 0) { + res <- res[1] + if (map_select_fun(x) < display_range[1] || + map_select_fun(x) > display_range[2]) { + res <- -0.5 + } else { + res <- res + (map_select_fun(x) - display_range[1]) / range_width + if (map_select_fun(x) == display_range[1]) { + res <- res + brks_norm[2] / (num_brks * 2) + } + } + } else { + res <- -0.5 + } + } + res + }) + } else { + stop("Providing 'map_select_fun' as array not implemented yet.") + ml_map <- map_select_fun + } + nmap <- dim(maps)[map_dim] + nlat <- length(lat) + nlon <- length(lon) + + #---------------------- + # Set latitudes from minimum to maximum + #---------------------- + if (lat[1] > lat[nlat]){ + lat <- lat[nlat:1] + indices <- list(nlat:1, TRUE) + ml_map <- do.call("[", c(list(x = ml_map), indices)) + if (!is.null(mask)){ + mask <- mask[nlat:1, ] + } + if (!is.null(dots)){ + dots <- dots[nlat:1,] + } + } + + #---------------------- + # Set layout and parameters + #---------------------- + # Open connection to graphical device + if (!is.null(fileout)) { + saveToFile(fileout) + } else if (names(dev.cur()) == 'null device') { + dev.new(units = size_units, res = res, width = width, height = height) + } + #NOTE: I think plot.new() is not necessary in any case. +# plot.new() + par(font.main = 1) + # If colorbars need to be plotted, re-define layout. + if (drawleg) { + layout(matrix(c(rep(1, nmap),2:(nmap + 1)), 2, nmap, byrow = TRUE), heights = c(6, 1.5)) + } + + #---------------------- + # Set colors and breaks and then PlotEquiMap + #---------------------- + tcols <- c(col_unknown_map, cols[[1]]) + for (k in 2:nmap) { + tcols <- append(tcols, c(col_unknown_map, cols[[k]])) + } + + tbrks <- c(-1, brks_norm + rep(1:nmap, each = length(brks))) + + if (is.null(plot_margin)) { + plot_margin <- c(5, 4, 4, 2) + 0.1 # default of par()$mar + } + + PlotEquiMap(var = ml_map, lon = lon, lat = lat, + brks = tbrks, cols = tcols, drawleg = FALSE, + filled.continents = FALSE, dots = dots, mar = plot_margin, ...) + + #---------------------- + # Add overplot on top + #---------------------- + if (!is.null(mask)) { + dims_mask <- dim(mask) + latb <- sort(lat, index.return = TRUE) + dlon <- lon[2:dims_mask[2]] - lon[1:(dims_mask[2] - 1)] + wher <- which(dlon > (mean(dlon) + 1)) + if (length(wher) > 0) { + lon[(wher + 1):dims_mask[2]] <- lon[(wher + 1):dims_mask[2]] - 360 + } + lonb <- sort(lon, index.return = TRUE) + + cols_mask <- sapply(seq(from = 0, to = 1, length.out = 10), + function(x) adjustcolor(col_mask, alpha.f = x)) + image(lonb$x, latb$x, t(mask)[lonb$ix, latb$ix], + axes = FALSE, col = cols_mask, + breaks = seq(from = 0, to = 1, by = 0.1), + xlab='', ylab='', add = TRUE, xpd = TRUE) + if (!exists('coast_color')) { + coast_color <- 'black' + } + if (min(lon) < 0) { + map('world', interior = FALSE, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon -180 to 180). + } else { + map('world2', interior = FALSE, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon 0 to 360). + } + box() + } + + #---------------------- + # Add colorbars + #---------------------- + if ('toptitle' %in% names(args)) { + size_title <- 1 + if ('title_scale' %in% names(args)) { + size_title <- args[['title_scale']] + } + old_mar <- par('mar') + old_mar[3] <- old_mar[3] - (2 * size_title + 1) + par(mar = old_mar) + } + + if (drawleg) { + for (k in 1:nmap) { + ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, + draw_separators = TRUE, extra_margin = c(2, 0, 2, 0), + label_scale = legend_scale * 1.5) + if (!is.null(bar_titles)) { + mtext(bar_titles[[k]], 3, line = -3, cex = cex_bar_titles) + } + #TODO: Change to below code. Plot title together. extra_margin needs to be adjusted. +# ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, +# draw_separators = TRUE, extra_margin = c(1, 0, 1, 0), +# label_scale = legend_scale * 1.5, title = bar_titles[[k]], title_scale = cex_bar_titles) + } + } + + # If the graphic was saved to file, close the connection with the device + if (!is.null(fileout)) dev.off() +} + +# Color bar for PlotMostLikelyQuantileMap +multi_ColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits = NULL, var_limits = NULL, + triangle_ends = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, label_scale = 1, extra_margin = rep(0, 4), + ...) { + + minimum_value <- ceiling(1 / nmap * 10 * 1.1) * 10 + display_range = c(minimum_value, 100) + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = display_range[1], to = display_range[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(1:length(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != dim(maps)[map_dim]) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in 1:length(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { + s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, + bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + #TODO: col_inf and col_sup + return(list(brks = brks, cols = cols)) + } + +} + +#TODO: use s2dv:::.SelectDevice and remove this function here? +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + warning(paste0("some extensions of the filenames provided in 'fileout' are not ", ext[1],". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + diff --git a/modules/Visualization/tmp/PlotMostLikelyQuantileMap.R b/modules/Visualization/tmp/PlotMostLikelyQuantileMap.R new file mode 100644 index 0000000000000000000000000000000000000000..9f9f1914d8de30f26adc1b285a3c60a41f264b6a --- /dev/null +++ b/modules/Visualization/tmp/PlotMostLikelyQuantileMap.R @@ -0,0 +1,196 @@ +#'Plot Maps of Most Likely Quantiles +#' +#'@author Veronica Torralba, \email{veronica.torralba@bsc.es}, Nicolau Manubens, \email{nicolau.manubens@bsc.es} +#'@description This function receives as main input (via the parameter \code{probs}) a collection of longitude-latitude maps, each containing the probabilities (from 0 to 1) of the different grid cells of belonging to a category. As many categories as maps provided as inputs are understood to exist. The maps of probabilities must be provided on a common rectangular regular grid, and a vector with the longitudes and a vector with the latitudes of the grid must be provided. The input maps can be provided in two forms, either as a list of multiple two-dimensional arrays (one for each category) or as a three-dimensional array, where one of the dimensions corresponds to the different categories. +#' +#'@param probs a list of bi-dimensional arrays with the named dimensions 'latitude' (or 'lat') and 'longitude' (or 'lon'), with equal size and in the same order, or a single tri-dimensional array with an additional dimension (e.g. 'bin') for the different categories. The arrays must contain probability values between 0 and 1, and the probabilities for all categories of a grid cell should not exceed 1 when added. +#'@param lon a numeric vector with the longitudes of the map grid, in the same order as the values along the corresponding dimension in \code{probs}. +#'@param lat a numeric vector with the latitudes of the map grid, in the same order as the values along the corresponding dimension in \code{probs}. +#'@param cat_dim the name of the dimension along which the different categories are stored in \code{probs}. This only applies if \code{probs} is provided in the form of 3-dimensional array. The default expected name is 'bin'. +#'@param bar_titles vector of character strings with the names to be drawn on top of the color bar for each of the categories. As many titles as categories provided in \code{probs} must be provided. +#'@param col_unknown_cat character string with a colour representation of the colour to be used to paint the cells for which no category can be clearly assigned. Takes the value 'white' by default. +#'@param drawleg Where to draw the common colour bar. Can take values TRUE, +#' FALSE or:\cr +#' 'up', 'u', 'U', 'top', 't', 'T', 'north', 'n', 'N'\cr +#' 'down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S' (default)\cr +#' 'right', 'r', 'R', 'east', 'e', 'E'\cr +#' 'left', 'l', 'L', 'west', 'w', 'W' +#'@param ... additional parameters to be sent to \code{PlotCombinedMap} and \code{PlotEquiMap}. +#'@seealso \code{PlotCombinedMap} and \code{PlotEquiMap} +#' +#'@importFrom maps map +#'@importFrom graphics box image layout mtext par plot.new +#'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off hcl jpeg pdf png postscript svg tiff +#'@examples +#'# Simple example +#'x <- array(1:(20 * 10), dim = c(lat = 10, lon = 20)) / 200 +#'a <- x * 0.6 +#'b <- (1 - x) * 0.6 +#'c <- 1 - (a + b) +#'lons <- seq(0, 359.5, length = 20) +#'lats <- seq(-89.5, 89.5, length = 10) +#'PlotMostLikelyQuantileMap(list(a, b, c), lons, lats, +#' toptitle = 'Most likely tercile map', +#' bar_titles = paste('% of belonging to', c('a', 'b', 'c')), +#' brks = 20, width = 10, height = 8) +#' +#'# More complex example +#'n_lons <- 40 +#'n_lats <- 20 +#'n_timesteps <- 100 +#'n_bins <- 4 +#' +#'# 1. Generation of sample data +#'lons <- seq(0, 359.5, length = n_lons) +#'lats <- seq(-89.5, 89.5, length = n_lats) +#' +#'# This function builds a 3-D gaussian at a specified point in the map. +#'make_gaussian <- function(lon, sd_lon, lat, sd_lat) { +#' w <- outer(lons, lats, function(x, y) dnorm(x, lon, sd_lon) * dnorm(y, lat, sd_lat)) +#' min_w <- min(w) +#' w <- w - min_w +#' w <- w / max(w) +#' w <- t(w) +#' names(dim(w)) <- c('lat', 'lon') +#' w +#'} +#' +#'# This function generates random time series (with values ranging 1 to 5) +#'# according to 2 input weights. +#'gen_data <- function(w1, w2, n) { +#' r <- sample(1:5, n, +#' prob = c(.05, .9 * w1, .05, .05, .9 * w2), +#' replace = TRUE) +#' r <- r + runif(n, -0.5, 0.5) +#' dim(r) <- c(time = n) +#' r +#'} +#' +#'# We build two 3-D gaussians. +#'w1 <- make_gaussian(120, 80, 20, 30) +#'w2 <- make_gaussian(260, 60, -10, 40) +#' +#'# We generate sample data (with dimensions time, lat, lon) according +#'# to the generated gaussians +#'sample_data <- multiApply::Apply(list(w1, w2), NULL, +#' gen_data, n = n_timesteps)$output1 +#' +#'# 2. Binning sample data +#'prob_thresholds <- 1:n_bins / n_bins +#'prob_thresholds <- prob_thresholds[1:(n_bins - 1)] +#'thresholds <- quantile(sample_data, prob_thresholds) +#' +#'binning <- function(x, thresholds) { +#' n_samples <- length(x) +#' n_bins <- length(thresholds) + 1 +#' +#' thresholds <- c(thresholds, max(x)) +#' result <- 1:n_bins +#' lower_threshold <- min(x) - 1 +#' for (i in 1:n_bins) { +#' result[i] <- sum(x > lower_threshold & x <= thresholds[i]) / n_samples +#' lower_threshold <- thresholds[i] +#' } +#' +#' dim(result) <- c(bin = n_bins) +#' result +#'} +#' +#'bins <- multiApply::Apply(sample_data, 'time', binning, thresholds)$output1 +#' +#'# 3. Plotting most likely quantile/bin +#'PlotMostLikelyQuantileMap(bins, lons, lats, +#' toptitle = 'Most likely quantile map', +#' bar_titles = paste('% of belonging to', letters[1:n_bins]), +#' mask = 1 - (w1 + w2 / max(c(w1, w2))), +#' brks = 20, width = 10, height = 8) +#' +#'@export +PlotMostLikelyQuantileMap <- function(probs, lon, lat, cat_dim = 'bin', + bar_titles = NULL, + col_unknown_cat = 'white', drawleg = T, + ...) { + # Check probs + error <- FALSE + if (is.list(probs)) { + if (length(probs) < 1) { + stop("Parameter 'probs' must be of length >= 1 if provided as a list.") + } + check_fun <- function(x) { + is.numeric(x) && (length(dim(x)) == 2) + } + if (!all(sapply(probs, check_fun))) { + error <- TRUE + } + ref_dims <- dim(probs[[1]]) + equal_dims <- all(sapply(probs, function(x) identical(dim(x), ref_dims))) + if (!equal_dims) { + stop("All arrays in parameter 'probs' must have the same dimension ", + "sizes and names when 'probs' is provided as a list of arrays.") + } + num_probs <- length(probs) + probs <- unlist(probs) + dim(probs) <- c(ref_dims, map = num_probs) + cat_dim <- 'map' + } + if (!is.numeric(probs)) { + error <- TRUE + } + if (is.null(dim(probs))) { + error <- TRUE + } + if (length(dim(probs)) != 3) { + error <- TRUE + } + if (error) { + stop("Parameter 'probs' must be either a numeric array with 3 dimensions ", + " or a list of numeric arrays of the same size with the 'lon' and ", + "'lat' dimensions.") + } + dimnames <- names(dim(probs)) + + # Check cat_dim + if (is.character(cat_dim)) { + if (is.null(dimnames)) { + stop("Specified a dimension name in 'cat_dim' but no dimension names provided ", + "in 'probs'.") + } + cat_dim <- which(dimnames == cat_dim) + if (length(cat_dim) < 1) { + stop("Dimension 'cat_dim' not found in 'probs'.") + } + cat_dim <- cat_dim[1] + } else if (!is.numeric(cat_dim)) { + stop("Parameter 'cat_dim' must be either a numeric value or a ", + "dimension name.") + } + if (length(cat_dim) != 1) { + stop("Parameter 'cat_dim' must be of length 1.") + } + cat_dim <- round(cat_dim) + nprobs <- dim(probs)[cat_dim] + + # Check bar_titles + if (is.null(bar_titles)) { + if (nprobs == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nprobs == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nprobs, " (%)") + } + } + + minimum_value <- ceiling(1 / nprobs * 10 * 1.1) * 10 + + # By now, the PlotCombinedMap function is included below in this file. + # In the future, PlotCombinedMap will be part of s2dverification and will + # be properly imported. + PlotCombinedMap(probs * 100, lon, lat, map_select_fun = max, + display_range = c(minimum_value, 100), + map_dim = cat_dim, + bar_titles = bar_titles, + col_unknown_map = col_unknown_cat, + drawleg = drawleg, ...) +} diff --git a/modules/test_decadal.R b/modules/test_decadal.R index 8077b7e200142a638568e5e30ff60ac71d209407..80304f978052889c19eccdf85ba9f1e99488dfe8 100644 --- a/modules/test_decadal.R +++ b/modules/test_decadal.R @@ -3,18 +3,28 @@ source("modules/Loading/Loading_decadal.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") recipe_file <- "modules/Loading/testing_recipes/recipe_decadal.yml" -recipe <- read_yaml(recipe_file) -archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive +recipe <- prepare_outputs(recipe_file) +# archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive # Load datasets -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) + # Calibrate datasets -calibrated_data <- calibrate_datasets(data, recipe) +calibrated_data <- calibrate_datasets(recipe, data) + # Compute skill metrics -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) + # Compute percentiles and probability bins -probabilities <- compute_probabilities(calibrated_data$hcst, recipe) +probabilities <- compute_probabilities(recipe, calibrated_data$hcst) + # Export all data to netCDF -save_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities) +save_data(recipe, data, calibrated_data, skill_metrics, probabilities) + +# Plot data +plot_data(recipe, data, calibrated_data, skill_metrics, probabilities, + significance = T) + diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R new file mode 100644 index 0000000000000000000000000000000000000000..d8eb5c4eba48b064ad463dc52b6e863d02b0589e --- /dev/null +++ b/modules/test_seasonal.R @@ -0,0 +1,23 @@ +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-tas.yml" +recipe <- prepare_outputs(recipe_file) +# archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + +# Load datasets +data <- load_datasets(recipe) +# Calibrate datasets +calibrated_data <- calibrate_datasets(recipe, data) +# Compute skill metrics +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +# Compute percentiles and probability bins +probabilities <- compute_probabilities(recipe, calibrated_data$hcst) +# Export all data to netCDF +save_data(recipe, data, calibrated_data, skill_metrics, probabilities) +# Plot data +plot_data(recipe, data, calibrated_data, skill_metrics, probabilities, + significance = T) diff --git a/modules/test_victoria.R b/modules/test_victoria.R deleted file mode 100644 index eb5933b3547a7d7e2ff10e8d251aee0bed0a538f..0000000000000000000000000000000000000000 --- a/modules/test_victoria.R +++ /dev/null @@ -1,20 +0,0 @@ - -source("modules/Loading/Loading.R") -source("modules/Calibration/Calibration.R") -source("modules/Skill/Skill.R") -source("modules/Saving/Saving.R") - -recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" -recipe <- read_yaml(recipe_file) -archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive - -# Load datasets -data <- load_datasets(recipe_file) -# Calibrate datasets -calibrated_data <- calibrate_datasets(data, recipe) -# Compute skill metrics -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) -# Compute percentiles and probability bins -probabilities <- compute_probabilities(calibrated_data$hcst, recipe) -# Export all data to netCDF -save_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities) diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R index 720662d49a844ab4c4fc95e44bf4fe7094b95efe..c9833d2b34413422f49dcb13d24991ed624c0d80 100644 --- a/tests/testthat/test-decadal_daily_1.R +++ b/tests/testthat/test-decadal_daily_1.R @@ -8,12 +8,12 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-decadal_daily_1.yml" -recipe <- read_yaml(recipe_file) +recipe <- prepare_outputs(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) ))}) ## Calibrate datasets @@ -65,11 +65,11 @@ names(data$obs) ) expect_equal( dim(data$hcst$data), -c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 3, time = 90, latitude = 7, longitude = 11) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 3, time = 90, latitude = 7, longitude = 11, ensemble = 3) ) expect_equal( dim(data$fcst$data), -c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 2, time = 90, latitude = 7, longitude = 11) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 2, time = 90, latitude = 7, longitude = 11, ensemble = 3) ) expect_equal( dim(data$hcst$Dates$start), @@ -77,12 +77,12 @@ c(sday = 1, sweek = 1, syear = 3, time = 90) ) # hcst data expect_equal( -as.vector(drop(data$hcst$data)[1, 2:3, 1:3, 1, 1]), +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[1, 2:3, 1:3, 1, 1]), c(298.5787, 293.6479, 298.5042, 293.7802, 297.8072, 293.0764), tolerance = 0.0001 ) expect_equal( -as.vector(drop(data$hcst$data)[2, , 89:90, 1, 1]), +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[2, , 89:90, 1, 1]), c(301.6978, 308.9792, 308.4501, 302.1620, 307.6034, 307.6388), tolerance = 0.0001 ) @@ -99,12 +99,12 @@ tolerance = 0.0001 # fcst data expect_equal( -as.vector(drop(data$fcst$data)[1, , 1:3, 1, 1]), +as.vector(aperm(drop(data$fcst$data), c(5, 1:4))[1, , 1:3, 1, 1]), c(295.0745, 291.1006, 296.2279, 291.6309, 295.3123, 290.8995), tolerance = 0.0001 ) expect_equal( -as.vector(drop(data$fcst$data)[2, , 89:90, 1, 1]), +as.vector(aperm(drop(data$fcst$data), c(5, 1:4))[2, , 89:90, 1, 1]), c(305.3428, 305.0657, 305.5445, 305.5681), tolerance = 0.0001 ) diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 6f43aaa1a2782b9511a246a25c72f8b4eac63486..5cf1922e73e1ee4bc1a8c46f9998cc64ecbc145b 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -6,27 +6,28 @@ source("modules/Loading/Loading_decadal.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_1.yml" -recipe <- read_yaml(recipe_file) +recipe <- prepare_outputs(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) ))}) # Calibrate datasets suppressWarnings({invisible(capture.output( - calibrated_data <- calibrate_datasets(data, recipe) + calibrated_data <- calibrate_datasets(recipe, data) ))}) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(calibrated_data$hcst, recipe) +probs <- compute_probabilities(recipe, calibrated_data$hcst) ))}) # Saving @@ -35,6 +36,14 @@ save_data(recipe = recipe, data = data, calibrated_data = calibrated_data, skill_metrics = skill_metrics, probabilities = probs, archive = archive) ))}) +# Plotting +suppressWarnings({invisible(capture.output( +plot_data(recipe = recipe, archive = archive, data = data, + calibrated_data = calibrated_data, skill_metrics = skill_metrics, + probabilities = probs, significance = T) +))}) + + outdir <- get_dir(recipe) #====================================== @@ -75,18 +84,18 @@ names(data$obs) ) expect_equal( dim(data$hcst$data), -c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4, ensemble = 2) ) expect_equal( dim(data$fcst$data), -c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4, ensemble = 2) ) expect_equal( dim(data$hcst$Dates$start), c(sday = 1, sweek = 1, syear = 4, time = 3) ) expect_equal( -as.vector(drop(data$hcst$data)[,1:2,1,2,3]), +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[, 1:2,1,2,3]), c(291.3831, 291.6227, 292.3012, 290.9779), tolerance = 0.0001 ) @@ -140,11 +149,11 @@ class(calibrated_data$fcst), ) expect_equal( dim(calibrated_data$hcst$data), -c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4, ensemble = 2) ) expect_equal( dim(calibrated_data$fcst$data), -c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4, ensemble = 2) ) expect_equal( mean(calibrated_data$fcst$data), @@ -157,7 +166,7 @@ mean(calibrated_data$hcst$data), tolerance = 0.0001 ) expect_equal( -as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), +as.vector(aperm(drop(calibrated_data$hcst$data), c(5, 1:4))[1, , 2, 3, 4]), c(286.3895, 286.6408, 290.6652, 288.3759), tolerance = 0.0001 ) @@ -251,7 +260,7 @@ test_that("4. Saving", { expect_equal( list.files(outdir), -c("tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", +c("plots", "tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-percentiles_month11.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc", "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-skill_month11.nc") @@ -261,7 +270,17 @@ c("tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "t #) +}) + + +test_that("5. Visualization", { +expect_equal( +list.files(paste0(outdir, "/plots/")), +c("forecast_ensemble_mean.png", "forecast_most_likely_tercile.png", + "rpss.png") +) + }) # Delete files -unlink(paste0(outdir, list.files(outdir))) +unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 8fd3525d17f1997e760d95246564d54450a48c49..4dd72ebf13c632479bbd34772a54509805b7c057 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -8,25 +8,24 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_2.yml" -recipe <- read_yaml(recipe_file) -archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive +recipe <- prepare_outputs(recipe_file) # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) ))}) # Calibrate datasets suppressWarnings({invisible(capture.output( - calibrated_data <- calibrate_datasets(data, recipe) + calibrated_data <- calibrate_datasets(recipe, data) ))}) # Compute skill metrics -suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +suppressMessages({invisible(capture.output( +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(calibrated_data$hcst, recipe) +probs <- compute_probabilities(recipe, calibrated_data$hcst) ))}) #====================================== @@ -67,11 +66,11 @@ names(data$obs) ) expect_equal( dim(data$hcst$data), -c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 3, time = 14, latitude = 8, longitude = 5) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 3, time = 14, latitude = 8, longitude = 5, ensemble = 3) ) expect_equal( dim(data$fcst$data), -c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 2, time = 14, latitude = 8, longitude = 5) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 2, time = 14, latitude = 8, longitude = 5, ensemble = 3) ) expect_equal( dim(data$hcst$Dates$start), @@ -83,7 +82,7 @@ c(sday = 1, sweek = 1, syear = 3, time = 14) #) # hcst data expect_equal( -as.vector(drop(data$hcst$data)[1, , 1:2, 2, 2]), +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[1,, 1:2, 2, 2]), c(272.8613, 271.0689, 270.8007, 273.5594, 272.1561, 272.8729), tolerance = 0.0001 ) @@ -99,7 +98,7 @@ tolerance = 0.0001 ) # fcst data expect_equal( -as.vector(drop(data$fcst$data)[1, , 1:2, 2, 2]), +as.vector(aperm(drop(data$fcst$data), c(5, 1:4))[1, , 1:2, 2, 2]), c(271.7708, 271.8424, 272.4980, 273.5842), tolerance = 0.0001 ) diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index b6be7f2b815b20130f6a30eb289bf844e0ff7ad2..7535e8dc6fcdedeb2eb7a69fa1f13e917e6178bc 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -8,25 +8,25 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_3.yml" -recipe <- read_yaml(recipe_file) +recipe <- prepare_outputs(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) ))}) # Calibrate datasets suppressWarnings({invisible(capture.output( - calibrated_data <- calibrate_datasets(data, recipe) + calibrated_data <- calibrate_datasets(recipe, data) ))}) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(calibrated_data$hcst, recipe) +probs <- compute_probabilities(recipe, calibrated_data$hcst) ))}) #====================================== @@ -63,7 +63,7 @@ names(data$obs) ) expect_equal( dim(data$hcst$data), -c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 25, longitude = 16) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 25, longitude = 16, ensemble = 3) ) expect_equal( dim(data$hcst$Dates$start), @@ -71,7 +71,7 @@ c(sday = 1, sweek = 1, syear = 4, time = 3) ) # hcst data expect_equal( -as.vector(drop(data$hcst$data)[3, , 2, 2, 2]), +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[3, , 2, 2, 2]), c(278.4305, 279.5065, 280.4110, 278.7608), tolerance = 0.0001 ) @@ -113,7 +113,7 @@ names(calibrated_data), c("hcst", "fcst") ) expect_equal( -as.vector(drop(calibrated_data$hcst$data)[3, , 2, 2, 2]), +as.vector(aperm(drop(calibrated_data$hcst$data), c(5, 1:4))[3, , 2, 2, 2]), c(279.0648, 281.0578, 282.6535, 280.3137), tolerance = 0.0001 ) diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index c37b551486bb65a409079e762bb5145740ed4f0a..5b771d77fd4bf22eb74626ec7b2170602234dd0b 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -6,21 +6,20 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-seasonal_daily_1.yml" - +recipe <- prepare_outputs(recipe_file) # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) ))}) -recipe <- read_yaml(recipe_file) - +# Calibrate data suppressWarnings({invisible(capture.output( -calibrated_data <- calibrate_datasets(data, recipe) +calibrated_data <- calibrate_datasets(recipe, data) ))}) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) ))}) test_that("1. Loading", { @@ -123,17 +122,17 @@ c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, lon ) expect_equal( mean(calibrated_data$hcst$data), -289.6468, +289.5354, tolerance = 0.0001 ) expect_equal( as.vector(drop(calibrated_data$hcst$data)[1, 1:4, 2, 3, 4]), -c(295.1077, 294.2161, 294.5801, 292.6326), +c(291.5555, 291.9029, 293.2685, 290.7782), tolerance = 0.0001 ) expect_equal( range(calibrated_data$hcst$data), -c(283.9447, 297.7496), +c(284.2823, 296.7545), tolerance = 0.0001 ) }) @@ -160,7 +159,7 @@ c(time = 31, latitude = 4, longitude = 4) ) expect_equal( skill_metrics$enscorr_specs[1:3, 1, 1], -c(0.8159317, 0.8956195, 0.8355627), +c(0.7509920, 0.6514916, 0.5118371), tolerance=0.0001 ) }) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 0f9149b9cc1a8b54be6123d265827e0a6b199993..86feedfbb14eb550cdf8cb26ee00283964d3df4d 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -4,35 +4,43 @@ source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") recipe_file <- "tests/recipes/recipe-seasonal_monthly_1.yml" -recipe <- read_yaml(recipe_file) +recipe <- prepare_outputs(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets suppressWarnings({invisible(capture.output( -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) ))}) +# Calibrate data suppressWarnings({invisible(capture.output( -calibrated_data <- calibrate_datasets(data, recipe) +calibrated_data <- calibrate_datasets(recipe, data) ))}) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(calibrated_data$hcst, recipe) +probs <- compute_probabilities(recipe, calibrated_data$hcst) ))}) # Saving suppressWarnings({invisible(capture.output( save_data(recipe = recipe, data = data, calibrated_data = calibrated_data, - skill_metrics = skill_metrics, probabilities = probs, archive = archive) + skill_metrics = skill_metrics, probabilities = probs) ))}) +# Plotting +suppressWarnings({invisible(capture.output( +plot_data(recipe = recipe, data = data, calibrated_data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + significance = T) +))}) outdir <- get_dir(recipe) # ------- TESTS -------- @@ -208,7 +216,7 @@ test_that("4. Saving", { expect_equal( list.files(outdir), -c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", "tas_19961101.nc", "tas_20201101.nc", +c("plots", "tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", "tas-skill_month11.nc") @@ -216,5 +224,14 @@ c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", "tas_19961101.nc", "t }) +test_that("5. Visualization", { +expect_equal( +list.files(paste0(outdir, "/plots/")), +c("crpss.png", "enscorr_specs.png", "enscorr.png", "forecast_ensemble_mean.png", + "forecast_most_likely_tercile.png", "rpss.png") +) + +}) + # Delete files -unlink(paste0(outdir, list.files(outdir))) +unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) diff --git a/tools/data_summary.R b/tools/data_summary.R index e211e202cf63cccc183cc31bd42c3466d0752f92..34b6bd6e47d54477e1afc8cd846c4f3f7b54b0bd 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -4,27 +4,29 @@ ## TODO: Adapt to daily/subseasonal cases ## TODO: Add check for missing files/NAs by dimension -data_summary <- function(object, frequency) { +data_summary <- function(data_cube, recipe) { # Get name, leadtime months and date range - object_name <- deparse(substitute(object)) - if (tolower(frequency) == "monthly_mean") { + object_name <- deparse(substitute(data_cube)) + if (recipe$Analysis$Variables$freq == "monthly_mean") { date_format <- '%b %Y' - } else if (tolower(frequency) == "daily_mean") { + } else if (recipe$Analysis$Variables$freq == "daily_mean") { date_format <- '%b %d %Y' } - months <- unique(format(as.Date(object$Dates[[1]]), format = '%B')) + months <- unique(format(as.Date(data_cube$Dates[[1]]), format = '%B')) months <- paste(as.character(months), collapse=", ") - sdate_min <- format(min(as.Date(object$Dates[[1]])), format = date_format) - sdate_max <- format(max(as.Date(object$Dates[[1]])), format = date_format) + sdate_min <- format(min(as.Date(data_cube$Dates[[1]])), format = date_format) + sdate_max <- format(max(as.Date(data_cube$Dates[[1]])), format = date_format) - print("DATA SUMMARY:") + # Create log instance and sink output to logfile and terminal + info(recipe$Run$logger, "DATA SUMMARY:") + sink(recipe$Run$logfile, append = TRUE, split = TRUE) print(paste0(object_name, " months: ", months)) print(paste0(object_name, " range: ", sdate_min, " to ", sdate_max)) print(paste0(object_name, " dimensions: ")) - print(dim(object$data)) + print(dim(data_cube$data)) print(paste0("Statistical summary of the data in ", object_name, ":")) - print(summary(object$data)) + print(summary(data_cube$data)) print("---------------------------------------------") - + sink() } diff --git a/tools/libs.R b/tools/libs.R index 2b29835927f2443e7f5e8f67cacc345db0436633..a0767f76c79105d9e60cc88cc76bde875136abc4 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -12,6 +12,8 @@ library(easyNCDF) library(CSTools) library(lubridate) library(PCICt) +library(RColorBrewer) +library(grDevices) # # library(parallel) # library(pryr) # To check mem usage. diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 18cc2e5827508e9d53839ccc6c45103aa956bcaf..8a6831788769db69196c0c4be100c2d00e7f2f68 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -5,52 +5,57 @@ #'the recipe. It returns an object of class logger that stores information on #'the recipe configuration and errors. #' -#'@param recipe Auto-S2S configuration recipe as returned by read_yaml() +#'@param recipe_file path to a YAML file with Auto-S2S configuration recipe #' -#'@return list contaning logger object, log filename and log directory name +#'@return list contaning recipe with logger, log file name and log dir name #' #'@import log4r +#'@import yaml #' #'@examples #'setwd("/esarchive/scratch/vagudets/repos/auto-s2s/") #'library(yaml) -#'recipe <- read_yaml("modules/data_load/recipe_1.yml") -#'logger <- prepare_outputs(recipe) -#'folder <- logger$foldername -#'log_file <- logger$logname -#'logger <- logger$logger +#'recipe <- prepare_outputs("modules/data_load/recipe_1.yml") +#'info(recipe$Run$logger, "This is an info message") #' #'@export -prepare_outputs <- function(recipe) { +prepare_outputs <- function(recipe_file) { # recipe: the content of the readed recipe # file: the recipe file name + recipe <- read_yaml(recipe_file) + recipe$recipe_path <- recipe_file + recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) + output_dir = recipe$Run$output_dir # Create output folders: folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$name)), "_", gsub(" ", "", gsub(":", "", gsub("-", "", Sys.time())))) + print("Saving all outputs to:") print(output_dir) print(folder_name) - dir.create(file.path(output_dir, folder_name, 'plots'), recursive = TRUE) - dir.create(file.path(output_dir, folder_name, 'outputs')) + dir.create(file.path(output_dir, folder_name, 'outputs'), recursive = TRUE) dir.create(file.path(output_dir, folder_name, 'logs')) dir.create(file.path(output_dir, folder_name, 'logs', 'recipes')) - file.copy(recipe$filepath, file.path(output_dir, folder_name, 'logs')) + file.copy(recipe$recipe_path, file.path(output_dir, folder_name, 'logs', + 'recipes')) logfile <- file.path(output_dir, folder_name, 'logs', 'log.txt') + file.create(logfile) # Set default behaviour of log output file: if (is.null(recipe$Run)) { recipe$Run <- list(Loglevel = 'INFO', Terminal = TRUE) } - if (is.null(recipe$Run$Loglevel)) { + if (is.null(recipe$Run$Loglevel)) { recipe$Run$Loglevel <- 'INFO' } + if (!is.logical(recipe$Run$Terminal)) { recipe$Run$Terminal <- TRUE } @@ -61,9 +66,13 @@ prepare_outputs <- function(recipe) { layout = default_log_layout()))) } else { logger <- logger(threshold = recipe$Run$Loglevel, - appenders = list(file_appender(logfile, append = TRUE, + appenders = list(file_appende(logfile, append = TRUE, layout = default_log_layout()))) } - return(list(logger = logger, logname = logfile, - foldername = file.path(output_dir, folder_name))) + + recipe$Run$output_dir <- file.path(output_dir, folder_name) + recipe$Run$logger <- logger + recipe$Run$logfile <- logfile + + return(recipe) } diff --git a/tools/tmp/CST_Calibration.R b/tools/tmp/CST_Calibration.R deleted file mode 100644 index 79b41951e13ed3ec2ba5f187165ef9c675b44ffa..0000000000000000000000000000000000000000 --- a/tools/tmp/CST_Calibration.R +++ /dev/null @@ -1,563 +0,0 @@ -#'Forecast Calibration -#' -#'@author Verónica Torralba, \email{veronica.torralba@bsc.es} -#'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be} -#'@description Equivalent to function \code{Calibration} but for objects of class \code{s2dv_cube}. -#' -#'@param exp an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal hindcast experiment data in the element named \code{$data}. The hindcast is used to calibrate the forecast in case the forecast is provided; if not, the same hindcast will be calibrated instead. -#'@param obs an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the observed data in the element named \code{$data}. -#'@param exp_cor an optional object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}. If the forecast is provided, it will be calibrated using the hindcast and observations; if not, the hindcast will be calibrated instead. -#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}. -#'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. In case the forecast is provided, any chosen eval.method is over-ruled and a third option is used. -#'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. -#'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. -#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. See Details section for further information about its use and compatibility with \code{na.fill}. -#'@param apply_to is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. -#'@param alpha is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. -#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. -#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. -#'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. -#'@return an object of class \code{s2dv_cube} containing the calibrated forecasts in the element \code{$data} with the same dimensions as the one in the exp object. -#' -#'@importFrom s2dv InsertDim -#'@import abind -#' -#'@seealso \code{\link{CST_Load}} -#' -#'@examples -#'# Example 1: -#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) -#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) -#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'lon <- seq(0, 30, 5) -#'lat <- seq(0, 25, 5) -#'exp <- list(data = mod1, lat = lat, lon = lon) -#'obs <- list(data = obs1, lat = lat, lon = lon) -#'attr(exp, 'class') <- 's2dv_cube' -#'attr(obs, 'class') <- 's2dv_cube' -#'a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", eval.method = "in-sample") -#'str(a) -#' -#'# Example 2: -#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) -#'mod2 <- 1 : (1 * 3 * 1 * 5 * 6 * 7) -#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'dim(mod2) <- c(dataset = 1, member = 3, sdate = 1, ftime = 5, lat = 6, lon = 7) -#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) -#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'lon <- seq(0, 30, 5) -#'lat <- seq(0, 25, 5) -#'exp <- list(data = mod1, lat = lat, lon = lon) -#'obs <- list(data = obs1, lat = lat, lon = lon) -#'exp_cor <- list(data = mod2, lat = lat, lon = lon) -#'attr(exp, 'class') <- 's2dv_cube' -#'attr(obs, 'class') <- 's2dv_cube' -#'attr(exp_cor, 'class') <- 's2dv_cube' -#'a <- CST_Calibration(exp = exp, obs = obs, exp_cor = exp_cor, cal.method = "evmos") -#'str(a) -#'@export - -CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", - eval.method = "leave-one-out", multi.model = FALSE, - na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, - memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { - - if(!missing(multi.model) & !(cal.method == "mse_min")){ - warning(paste0("The multi.model parameter is ignored when using the calibration method ", cal.method)) - } - - if(is.null(exp_cor)){ #exp will be used to calibrate and will also be calibrated: "calibrate hindcast" - if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) { - stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - exp$data <- Calibration(exp = exp$data, obs = obs$data, exp_cor = NULL, - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - memb_dim = memb_dim, sdate_dim = sdate_dim, - ncores = ncores) - exp$Datasets <- c(exp$Datasets, obs$Datasets) - exp$source_files <- c(exp$source_files, obs$source_files) - - return(exp) - - }else{ #if exp_cor is provided, it will be calibrated: "calibrate forecast instead of hindcast" - eval.method = "hindcast-vs-forecast" #if exp_cor is provided, eval.method is overrruled (because if exp_cor is provided, the train data will be all data of "exp" and the evalutaion data will be all data of "exp_cor"; no need for "leave-one-out" or "in-sample") - if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube") || !inherits(exp_cor, "s2dv_cube")) { - stop("Parameter 'exp', 'obs' and 'exp_cor' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - exp_cor$data <- Calibration(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - memb_dim = memb_dim, sdate_dim = sdate_dim, - ncores = ncores) - exp_cor$Datasets <- c(exp_cor$Datasets, obs$Datasets) - exp_cor$source_files <- c(exp_cor$source_files, exp$source_files, obs$source_files) - - return(exp_cor) - - } -} - - - -#'Forecast Calibration -#' -#'@author Verónica Torralba, \email{veronica.torralba@bsc.es} -#'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be} -#'@description Five types of member-by-member bias correction can be performed. The \code{"bias"} method corrects the bias only, the \code{"evmos"} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). The \code{"rpc-based"} method adjusts the forecast variance ensuring that the ratio of predictable components (RPC) is equal to one, as in Eade et al. (2014). -#'@description Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. -#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x -#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate predictions underestimate the predictability of the read world? Geophysical Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 -#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. doi:10.5194/npg-18-147-2011 -#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using member-by-member approaches: theoretical aspects. Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. doi:10.1002/qj.2397 -#' -#'@param exp a multidimensional array with named dimensions (at least 'sdate' and 'member') containing the seasonal hindcast experiment data. The hindcast is used to calibrate the forecast in case the forecast is provided; if not, the same hindcast will be calibrated instead. -#'@param obs a multidimensional array with named dimensions (at least 'sdate') containing the observed data. -#'@param exp_cor an optional multidimensional array with named dimensions (at least 'sdate' and 'member') containing the seasonal forecast experiment data. If the forecast is provided, it will be calibrated using the hindcast and observations; if not, the hindcast will be calibrated instead. -#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}. -#'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. In case the forecast is provided, any chosen eval.method is over-ruled and a third option is used. -#'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. -#'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. -#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. -#'@param apply_to is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. -#'@param alpha is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. -#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. -#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. -#'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. -#'@return an array containing the calibrated forecasts with the same dimensions as the \code{exp} array. -#' -#'@importFrom s2dv InsertDim MeanDims Reorder -#'@import abind -#'@import multiApply -#'@importFrom s2dverification Subset -#' -#'@seealso \code{\link{CST_Load}} -#' -#'@details -#'Both the \code{na.fill} and \code{na.rm} parameters can be used to indicate how the function has to handle the NA values. The \code{na.fill} parameter checks whether there are more than three forecast-observations pairs to perform the computation. In case there are three or less pairs, the computation is not carried out, and the value returned by the function depends on the value of this parameter (either NA if \code{na.fill == TRUE} or the uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} is used to indicate the function whether to remove the missing values during the computation of the parameters needed to perform the calibration. -#' -#'@examples -#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) -#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) -#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'a <- Calibration(exp = mod1, obs = obs1) -#'str(a) -#'@export -Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", - eval.method = "leave-one-out", - multi.model = FALSE, na.fill = TRUE, - na.rm = TRUE, apply_to = NULL, alpha = NULL, - memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { - - dim.exp <- dim(exp) - amt.dims.exp <- length(dim.exp) - dim.obs <- dim(obs) - amt.dims.obs <- length(dim.obs) - dim.names.exp <- names(dim.exp) - dim.names.obs <- names(dim.obs) - if(!is.null(exp_cor)){ - dim.exp_cor <- dim(exp_cor) - amt.dims.exp_cor <- length(dim.exp_cor) - dim.names.exp_cor <- names(dim.exp_cor) - } - if (is.null(memb_dim) || !is.character(memb_dim)) { - stop("Parameter 'memb_dim' should be a character string indicating the", - "name of the dimension where members are stored in 'exp'.") - } - if (length(memb_dim) > 1) { - memb_dim <- memb_dim[1] - warning("Parameter 'memb_dim' has length greater than 1 and only", - " the first element will be used.") - } - - if (is.null(sdate_dim) || !is.character(sdate_dim)) { - stop("Parameter 'sdate_dim' should be a character string indicating the", - "name of the dimension where start dates are stored in 'exp'.") - } - if (length(sdate_dim) > 1) { - sdate_dim <- sdate_dim[1] - warning("Parameter 'sdate_dim' has length greater than 1 and only", - " the first element will be used.") - } - target.dim.names.exp <- c(memb_dim, sdate_dim) - target.dim.names.obs <- sdate_dim - - if (!all(target.dim.names.exp %in% dim.names.exp)) { - stop("Parameter 'exp' must have the dimensions defined in memb_dim ", - "and sdate_dim.") - } - - if(!is.null(exp_cor)){ - if (!all(target.dim.names.exp %in% dim.names.exp_cor)) { - stop("Parameter 'exp_cor' must have the dimensions defined in memb_dim ", - "and sdate_dim.") - } - } - - if (!all(c(sdate_dim) %in% dim.names.obs)) { - stop("Parameter 'obs' must have the dimension defined in sdate_dim ", - "parameter.") - } - - if (any(is.na(exp))) { - warning("Parameter 'exp' contains NA values.") - } - - if(!is.null(exp_cor)){ - if (any(is.na(exp_cor))) { - warning("Parameter 'exp_cor' contains NA values.") - } - } - - if (any(is.na(obs))) { - warning("Parameter 'obs' contains NA values.") - } - - if (memb_dim %in% names(dim(obs))) { - obs <- Subset(obs, along = memb_dim, indices = 1, drop = "selected") - } - data.set.sufficiently.large.out <- - Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), - ncores = ncores, - fun = .data.set.sufficiently.large)$output1 - - if(!all(data.set.sufficiently.large.out)){ - if(na.fill){ - warning("Some forecast data could not be corrected due to data lack", - " and is replaced with NA values") - } else { - warning("Some forecast data could not be corrected due to data lack", - " and is replaced with uncorrected values") - } - } - - if (!na.rm %in% c(TRUE,FALSE)) { - stop("Parameter 'na.rm' must be TRUE or FALSE.") - } - if (cal.method == 'rpc-based') { - if (is.null(apply_to)) { - apply_to <- 'sign' - warning("'apply_to' cannot be NULL for 'rpc-based' method so it has been set to 'sign', as in Eade et al. (2014).") - } else if (!apply_to %in% c('all','sign')) { - stop("'apply_to' must be either 'all' or 'sign' when 'rpc-based' method is used.") - } - if (apply_to == 'sign') { - if (is.null(alpha)) { - alpha <- 0.1 - warning("'alpha' cannot be NULL for 'rpc-based' method so it has been set to 0.1, as in Eade et al. (2014).") - } else if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1) { - stop("'alpha' must be a number between 0 and 1.") - } - } - } - - if(is.null(exp_cor)){ - calibrated <- Apply(data = list(exp = exp, obs = obs), - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), - ncores = ncores, output_dims = target.dim.names.exp, - fun = .cal)$output1 - dexes <- match(names(dim(exp)), names(dim(calibrated))) - calibrated <- aperm(calibrated, dexes) - dimnames(calibrated) <- dimnames(exp)[dexes] - }else{ - calibrated <- Apply(data = list(exp = exp, obs = obs, exp_cor = exp_cor), - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs, exp_cor = target.dim.names.exp), - ncores = ncores, output_dims = target.dim.names.exp, - fun = .cal)$output1 - dexes <- match(names(dim(exp_cor)), names(dim(calibrated))) - calibrated <- aperm(calibrated, dexes) - dimnames(calibrated) <- dimnames(exp_cor)[dexes] - } - - return(calibrated) -} - - -.data.set.sufficiently.large <- function(exp, obs){ - amt.min.samples <- 3 - amt.good.pts <- sum(!is.na(obs) & !apply(exp, c(2), function(x) all(is.na(x)))) - return(amt.good.pts > amt.min.samples) -} - -.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor){ - if(eval.method == "leave-one-out"){ - dexes.lst <- lapply(seq(1, amt.points), function(x) return(list(eval.dexes = x, - train.dexes = seq(1, amt.points)[-x]))) - } else if (eval.method == "in-sample"){ - dexes.lst <- list(list(eval.dexes = seq(1, amt.points), - train.dexes = seq(1, amt.points))) - } else if (eval.method == "hindcast-vs-forecast"){ - dexes.lst <- list(list(eval.dexes = seq(1,amt.points_cor), - train.dexes = seq(1, amt.points))) - } else { - stop(paste0("unknown sampling method: ",eval.method)) - } - return(dexes.lst) -} - -.cal <- function(exp, obs, exp_cor = NULL, cal.method, eval.method, multi.model, na.fill, na.rm, apply_to, alpha) { - if(is.null(exp_cor)){ - exp_cor <- exp ## generate a copy of exp so that the same function can run - ## when exp_cor is provided and when it's not - } - obs <- as.vector(obs) - dims.fc <- dim(exp) - dims.fc_cor <- dim(exp_cor) ## new line - amt.mbr <- dims.fc[1] - amt.sdate <- dims.fc[2] - amt.sdate_cor <- dims.fc_cor[2] ## new line - var.cor.fc <- NA * exp_cor ## modified line (exp_cor instead of exp); - ## in case of exp_cor not provided, at this point exp_cor - ## is already the same as exp so no change - names(dim(var.cor.fc)) <- dims.fc - - if(!.data.set.sufficiently.large(exp = exp, obs = obs)){ - if(na.fill){ - return(var.cor.fc) - } else { - var.cor.fc[] <- exp[] - return(var.cor.fc) - } - } - eval.train.dexeses <- .make.eval.train.dexes(eval.method, amt.points = amt.sdate, - amt.points_cor = amt.sdate_cor) - amt.resamples <- length(eval.train.dexeses) - for (i.sample in seq(1, amt.resamples)) { - # defining training (tr) and evaluation (ev) subsets - eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes - train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes - - fc.ev <- exp_cor[ , eval.dexes, drop = FALSE] ## modified line (exp_cor instead of exp) - ## fc.ev is used to evaluate (not train; train should be done with exp (hindcast)) - fc.tr <- exp[ , train.dexes] - obs.tr <- obs[train.dexes , drop = FALSE] - - if(cal.method == "bias"){ - var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) - } else if(cal.method == "evmos"){ # forecast correction implemented - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) - #calculate value for regression parameters - init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) - } else if (cal.method == "mse_min"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) - #calculate value for regression parameters - init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm) - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) - } else if (cal.method == "crps_min"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) - #calculate initial value for regression parameters - init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) - init.par[3] <- sqrt(init.par[3]) - #calculate regression parameters on training dataset - optim.tmp <- optim(par = init.par, - fn = .calc.crps.opt, - gr = .calc.crps.grad.opt, - quant.obs.fc = quant.obs.fc.tr, - na.rm = na.rm, - method = "BFGS") - - mbm.par <- optim.tmp$par - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm) - } else if (cal.method == 'rpc-based') { - ens_mean.ev <- multiApply::Apply(data = fc.ev, target_dims = names(amt.mbr), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean - ens_mean.tr <- multiApply::Apply(data = fc.tr, target_dims = names(amt.mbr), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean - ens_spread.tr <- multiApply::Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(amt.sdate), fun = "-")$output1 ## Ensemble spread - exp_mean.tr <- mean(fc.tr, na.rm = na.rm) ## Mean (climatology) - var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) ## Ensemble mean variance - var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) ## Variance of ensemble members about ensemble mean (= spread) - var_obs.tr <- var(obs.tr, na.rm = na.rm) ## Variance in the observations - r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) ## Correlation between observations and the ensemble mean - if ((apply_to == 'all') || (apply_to == 'sign' && cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { - ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr - var.cor.fc[ , eval.dexes] <- s2dv::Reorder(data = multiApply::Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, ens_mean_cal = ens_mean_cal), target_dims = names(amt.sdate), fun = .CalibrationMembersRPC, var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, - order = names(dims.fc)) - dim(var.cor.fc) <- dims.fc - } else { ## no significant -> replacing with observed climatology - var.cor.fc[ , eval.dexes] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.ev)) - } - } else { - stop("unknown calibration method: ",cal.method) - } - } - return(var.cor.fc) -} - -.calc.obs.fc.quant <- function(obs, fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts and corresponding observations - amt.mbr <- dim(fc)[1] - obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) - fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) - cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") - obs.av <- mean(obs, na.rm = na.rm) - obs.sd <- sd(obs, na.rm = na.rm) - return( - append( - .calc.fc.quant(fc = fc, na.rm = na.rm), - list( - obs.per.ens = obs.per.ens, - cor.obs.fc = cor.obs.fc, - obs.av = obs.av, - obs.sd = obs.sd - ) - ) - ) -} - -.calc.obs.fc.quant.ext <- function(obs, fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations - amt.mbr <- dim(fc)[1] - obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) - fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) - cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") - obs.av <- mean(obs, na.rm = na.rm) - obs.sd <- sd(obs, na.rm = na.rm) - - return( - append( - .calc.fc.quant.ext(fc = fc, na.rm = na.rm), - list( - obs.per.ens = obs.per.ens, - cor.obs.fc = cor.obs.fc, - obs.av = obs.av, - obs.sd = obs.sd - ) - ) - ) -} - - -.calc.fc.quant <- function(fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts - amt.mbr <- dim(fc)[1] - fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) - fc.ens.av.av <- mean(fc.ens.av, na.rm = na.rm) - fc.ens.av.sd <- sd(fc.ens.av, na.rm = na.rm) - fc.ens.av.per.ens <- InsertDim(fc.ens.av, posdim = 1, lendim = amt.mbr) - fc.ens.sd <- apply(fc, c(2), sd, na.rm = na.rm) - fc.ens.var.av.sqrt <- sqrt(mean(fc.ens.sd^2, na.rm = na.rm)) - fc.dev <- fc - fc.ens.av.per.ens - fc.dev.sd <- sd(fc.dev, na.rm = na.rm) - fc.av <- mean(fc, na.rm = na.rm) - fc.sd <- sd(fc, na.rm = na.rm) - return( - list( - fc.ens.av = fc.ens.av, - fc.ens.av.av = fc.ens.av.av, - fc.ens.av.sd = fc.ens.av.sd, - fc.ens.av.per.ens = fc.ens.av.per.ens, - fc.ens.sd = fc.ens.sd, - fc.ens.var.av.sqrt = fc.ens.var.av.sqrt, - fc.dev = fc.dev, - fc.dev.sd = fc.dev.sd, - fc.av = fc.av, - fc.sd = fc.sd - ) - ) -} - -.calc.fc.quant.ext <- function(fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts - - amt.mbr <- dim(fc)[1] - repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr) - repmat2.tmp <- aperm(repmat1.tmp, c(2, 1, 3)) - spr.abs <- apply(abs(repmat1.tmp - repmat2.tmp), c(3), mean, na.rm = na.rm) - spr.abs.per.ens <- InsertDim(spr.abs, posdim = 1, lendim = amt.mbr) - - return( - append(.calc.fc.quant(fc, na.rm = na.rm), - list(spr.abs = spr.abs, spr.abs.per.ens = spr.abs.per.ens)) - ) -} - -#Below are the core or elementary functions to calculate the regression parameters for the different methods -.calc.mse.min.par <- function(quant.obs.fc, multi.model = F, na.rm){ - par.out <- rep(NA, 3) - - if(multi.model){ - par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.ens.var.av.sqrt) - } else { - par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.dev.sd) - } - par.out[2] <- with(quant.obs.fc, abs(cor.obs.fc) * obs.sd / fc.ens.av.sd) - par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm) - - return(par.out) -} -.calc.evmos.par <- function(quant.obs.fc, na.rm){ - par.out <- rep(NA, 2) - par.out[2] <- with(quant.obs.fc, obs.sd / fc.sd) - par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm) - return(par.out) -} -#Below are the core or elementary functions to calculate the functions necessary for the minimization of crps -.calc.crps.opt <- function(par, quant.obs.fc, na.rm){ - return( - with(quant.obs.fc, - mean(abs(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens + - ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev)), na.rm = na.rm) - - mean(abs((par[3])^2 * spr.abs + par[4]) / 2., na.rm = na.rm) - ) - ) -} - -.calc.crps.grad.opt <- function(par, quant.obs.fc, na.rm){ - sgn1 <- with(quant.obs.fc,sign(obs.per.ens - - (par[1] + par[2] * fc.ens.av.per.ens + - ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev))) - sgn2 <- with(quant.obs.fc, sign((par[3])^2 + par[4] / spr.abs.per.ens)) - sgn3 <- with(quant.obs.fc,sign((par[3])^2 * spr.abs + par[4])) - deriv.par1 <- mean(sgn1, na.rm = na.rm) - deriv.par2 <- with(quant.obs.fc, mean(sgn1 * fc.dev, na.rm = na.rm)) - deriv.par3 <- with(quant.obs.fc, - mean(2* par[3] * sgn1 * sgn2 * fc.ens.av.per.ens, na.rm = na.rm) - - mean(spr.abs * sgn3, na.rm = na.rm) / 2.) - deriv.par4 <- with(quant.obs.fc, - mean(sgn1 * sgn2 * fc.ens.av.per.ens / spr.abs.per.ens, na.rm = na.rm) - - mean(sgn3, na.rm = na.rm) / 2.) - return(c(deriv.par1, deriv.par2, deriv.par3, deriv.par4)) -} - -#Below are the core or elementary functions to correct the evaluation set based on the regression parameters -.correct.evmos.fc <- function(fc, par, na.rm){ - quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm) - return(with(quant.fc.mp, par[1] + par[2] * fc)) -} -.correct.mse.min.fc <- function(fc, par, na.rm){ - quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm) - return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * par[3])) -} -.correct.crps.min.fc <- function(fc, par, na.rm){ - quant.fc.mp <- .calc.fc.quant.ext(fc = fc, na.rm = na.rm) - return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * abs((par[3])^2 + par[4] / spr.abs))) -} - -# Function to calibrate the individual members with the RPC-based method -.CalibrationMembersRPC <- function(exp, ens_mean, ens_mean_cal, var_obs, var_noise, r){ - member_cal <- (exp - ens_mean) * sqrt(var_obs) * sqrt(1 - r^2) / sqrt(var_noise) + ens_mean_cal - return(member_cal) -}