From 597db0b7ba4612b4cd1468bbe991742623076610 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 7 Mar 2022 16:51:57 +0100 Subject: [PATCH 01/47] Changed recipe to recipe --- modules/data_load/load.R | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 9af22eec..4e47c33a 100644 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -21,25 +21,25 @@ source("tools/libs.R") # ------------------------------------------------------------------------------------------- # Set params ----------------------------------------- -hcst.inityear <- recipe$params$Time$hcst_start -hcst.endyear <- recipe$params$Time$hcst_end -ltmin <- recipe$params$Time$leadtimemin -ltmax <- recipe$params$Time$leadtimemax -lats.min <- recipe$params$Region$latmin -lats.max <- recipe$params$Region$latmax -lons.min <- recipe$params$Region$lonmin -lons.max <- recipe$params$Region$lonmax -ref.name <- recipe$params$Datasets$Reference$name -exp.name <- recipe$params$Datasets$System$name - -variable <- recipe$params$Variables$name -store.freq <- recipe$params$Variables$freq +hcst.inityear <- recipe$Analysis$Time$hcst_start +hcst.endyear <- recipe$Analysis$Time$hcst_end +ltmin <- recipe$Analysis$Time$leadtimemin +ltmax <- recipe$Analysis$Time$leadtimemax +lats.min <- recipe$Analysis$Region$latmin +lats.max <- recipe$Analysis$Region$latmax +lons.min <- recipe$Analysis$Region$lonmin +lons.max <- recipe$Analysis$Region$lonmax +ref.name <- recipe$Analysis$Datasets$Reference$name +exp.name <- recipe$Analysis$Datasets$System$name + +variable <- recipe$Analysis$Variables$name +store.freq <- recipe$Analysis$Variables$freq stream <- verifications$stream sdates <- verifications$fcst.sdate ## TODO: define fcst.name -##fcst.name <- recipe$params$Datasets$System[[sys]]$name +##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name # get sdates array sdates <- dates2load(recipe, logger) -- GitLab From ab1bae507314c99a137a06f30b6d359873163aca Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 7 Mar 2022 17:05:58 +0100 Subject: [PATCH 02/47] Change recipe to read_yaml, adjust parameter names --- modules/data_load/load.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 4e47c33a..c15bc592 100644 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -9,9 +9,9 @@ source("tools/libs.R") # RECIPE FOR TESTING # ------------------------------------------------------------------------------------------- -# recipe <- read_yaml("/esarchive/scratch/lpalma/git/auto-s2s/modules/data_load/recipe_1.yml") + recipe <- read_yaml("/esarchive/scratch/lpalma/git/auto-s2s/modules/data_load/recipe_1.yml") args <- NULL; args[1] <- "/esarchive/scratch/lpalma/git/auto-s2s/modules/data_load/recipe_1.yml" - recipe <- Recipe$new(filename=args[1]) + # recipe <- Recipe$new(filename=args[1]) recipe$filename <- args[1] # Create output folder and log: logger <- prepare_outputs(recipe = recipe) @@ -45,7 +45,7 @@ sdates <- verifications$fcst.sdate sdates <- dates2load(recipe, logger) # get esarchive datasets dict: -archive <- read_yaml(paste0(recipe$run_conf_conf$code_dir,"conf/archive.yml"))$archive +archive <- read_yaml(paste0(recipe$Run$code_dir,"conf/archive.yml"))$archive exp_descrip <- archive[[exp.name]] freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) -- GitLab From 9b3251fa05ff6292280b3e5db08ae87804544dee Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 7 Mar 2022 17:14:48 +0100 Subject: [PATCH 03/47] Changed recipe to recipe to work with read_yaml --- modules/data_load/regrid.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/data_load/regrid.R b/modules/data_load/regrid.R index a8091997..88c74236 100644 --- a/modules/data_load/regrid.R +++ b/modules/data_load/regrid.R @@ -1,27 +1,27 @@ get_regrid_params <- function(recipe,archive){ - exp.name <- recipe$params$Datasets$System$name - ref.name <- recipe$params$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + ref.name <- recipe$Analysis$Datasets$Reference$name exp_descrip <- archive[[exp.name]] reference_descrip <- archive[[ref.name]] - if (tolower(recipe$params$Regrid$type) == 'to_reference') { + if (tolower(recipe$Analysis$Regrid$type) == 'to_reference') { regrid_params <- list(fcst.gridtype=reference_descrip$regrid, - fcst.gridmethod=recipe$params$Regrid$method, + fcst.gridmethod=recipe$Analysis$Regrid$method, fcst.transform=CDORemapper, obs.gridtype=NULL, obs.gridmethod=NULL, obs.transform=NULL) - } else if (tolower(recipe$params$Regrid$type) == 'to_system') { + } else if (tolower(recipe$Analysis$Regrid$type) == 'to_system') { regrid_params <- list(fcst.gridtype=NULL, fcst.gridmethod=NULL, fcst.transform=NULL, obs.gridtype=exp_descrip$regrid, - obs.gridmethod=recipe$params$Regrid$method, + obs.gridmethod=recipe$Analysis$Regrid$method, obs.transform=CDORemapper) } ##TODO: Else condition? middle interpolation? -- GitLab From a26d10c0d6f17c933988544285b1f53a6edfda51 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 8 Mar 2022 09:08:07 +0100 Subject: [PATCH 04/47] Changed params to Analysis to read recipe --- modules/data_load/dates2load.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R index 17c5cb99..c7c392a5 100644 --- a/modules/data_load/dates2load.R +++ b/modules/data_load/dates2load.R @@ -3,7 +3,7 @@ # both for the hcst and fcst. dates2load <- function(recipe, logger){ - recipe <- recipe$params$Time + recipe <- recipe$Analysis$Time # hcst dates @@ -41,6 +41,3 @@ add_dims <- function(data, type){ return(data) } - - - -- GitLab From 624fc20c87d6c046414a4adb1bd005b007361d9f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 8 Mar 2022 09:17:06 +0100 Subject: [PATCH 05/47] Added calls to yaml and CSTools, removed call to add_dims.R --- tools/libs.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/libs.R b/tools/libs.R index 77dcf7d7..7b010f54 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -2,12 +2,13 @@ library(log4r) library(startR) library(ClimProjDiags) library(multiApply) +library(yaml) # library(s2dverification) # library(ncdf4) # library(abind) # library(easyVerification) # library(easyNCDF) -# library(CSTools) + library(CSTools) # # library(parallel) # library(pryr) # To check mem usage. @@ -25,4 +26,4 @@ library(multiApply) source("tools/check_recipe.R") source("tools/prepare_outputs.R") source("tools/divide_recipe.R") -source("tools/add_dims.R") # Not sure if necessary yet +# source("tools/add_dims.R") # Not sure if necessary yet -- GitLab From 4eaa6ed61b8c6ee32ffad3a80f950d0829203b81 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 8 Mar 2022 09:33:18 +0100 Subject: [PATCH 06/47] Added second atomic recipe for testing --- modules/data_load/recipe_2.yml | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 modules/data_load/recipe_2.yml diff --git a/modules/data_load/recipe_2.yml b/modules/data_load/recipe_2.yml new file mode 100644 index 00000000..8569bfba --- /dev/null +++ b/modules/data_load/recipe_2.yml @@ -0,0 +1,43 @@ +Description: + Author: V. Agudetse + '': split version +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system5c3s + Multimodel: no + Reference: + name: era5 + Time: + sdate: + fcst_syear: '2017' + fcst_sday: '0701' + hcst_start: '1993' + hcst_end: '2016' + leadtimemin: 2 + leadtimemax: 4 + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 360 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: SBC + Skill: + metric: RPSS + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ -- GitLab From 27a1840f799b1c1c3c8f8901323e6535cbac89cc Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 8 Mar 2022 09:49:29 +0100 Subject: [PATCH 07/47] Changed sourced modules, recipe, added TODOs --- modules/data_load/load.R | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index c15bc592..6b82924d 100644 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -1,23 +1,23 @@ source("modules/data_load/dates2load.R") source("modules/data_load/regrid.R") -source("modules/data_load/s2dv_cube.R") -source("recipe.R") +# source("modules/data_load/s2dv_cube.R") +# source("recipe.R") # Load required libraries source("tools/libs.R") # RECIPE FOR TESTING # ------------------------------------------------------------------------------------------- - recipe <- read_yaml("/esarchive/scratch/lpalma/git/auto-s2s/modules/data_load/recipe_1.yml") - args <- NULL; args[1] <- "/esarchive/scratch/lpalma/git/auto-s2s/modules/data_load/recipe_1.yml" - # recipe <- Recipe$new(filename=args[1]) - recipe$filename <- args[1] +## TODO: Get only the last part of the path as the recipe$filename? +recipe <- read_yaml("/esarchive/scratch/vagudets/repos/auto-s2s/modules/data_load/recipe_2.yml") +args <- NULL; args[1] <- "/esarchive/scratch/vagudets/repos/auto-s2s/modules/data_load/recipe_2.yml" +recipe$filename <- args[1] # Create output folder and log: - logger <- prepare_outputs(recipe = recipe) - folder <- logger$foldername - log_file <- logger$logname - logger <- logger$logger +logger <- prepare_outputs(recipe = recipe) +folder <- logger$foldername +log_file <- logger$logname +logger <- logger$logger # ------------------------------------------------------------------------------------------- # Set params ----------------------------------------- @@ -35,8 +35,9 @@ exp.name <- recipe$Analysis$Datasets$System$name variable <- recipe$Analysis$Variables$name store.freq <- recipe$Analysis$Variables$freq -stream <- verifications$stream -sdates <- verifications$fcst.sdate +## TODO: Examine this verifications part, verify if it's necessary +# stream <- verifications$stream +# sdates <- verifications$fcst.sdate ## TODO: define fcst.name ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name @@ -75,7 +76,7 @@ hcst.path <- paste0(archive$src, # Regrid: #------------------------------------------------------------------- -regrid_params <- get_regrid_params(recipe,archive) +regrid_params <- get_regrid_params(recipe, archive) # Timeseries load -- GitLab From 17cebf13332c1fc35998101d607031914dbe5368 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 8 Mar 2022 13:03:17 +0100 Subject: [PATCH 08/47] Change lon bound from 360 to 359.9 --- modules/data_load/recipe_2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data_load/recipe_2.yml b/modules/data_load/recipe_2.yml index 8569bfba..2775cdf5 100644 --- a/modules/data_load/recipe_2.yml +++ b/modules/data_load/recipe_2.yml @@ -24,7 +24,7 @@ Analysis: latmin: -90 latmax: 90 lonmin: 0 - lonmax: 360 + lonmax: 359.9 Regrid: method: bilinear type: to_system -- GitLab From 99780b056182ab31fe538a3b9adc1aabe5789d87 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 8 Mar 2022 16:25:18 +0100 Subject: [PATCH 09/47] Changed param name regrid to reference_grid as discussed --- conf/archive.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index e70d4f2d..2b945396 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -15,7 +15,7 @@ archive: nmember: fcst: 51 hcst: 25 - regrid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" + reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" era5: src: "recon/ecmwf/era5/" @@ -26,15 +26,15 @@ archive: lon_circular_sort: ini: 0 end: 361 - regrid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" era5land: src: "recon/ecmwf/era5land/" daily_mean: {"tas":"_f1h/","rsds":"_f1h/", "prlr":"_f1h/","sfcWind":"_f1h/"} - regrid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" + reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" uerra: src: "recon/ecmwf/uerra_mescan/" daily_mean: {"tas":"_f6h/"} - grid: "/esarchive/recon/ecmwf/uerra_mescan/daily_mean/tas_f6h/tas_201805.nc" + reference_grid: "/esarchive/recon/ecmwf/uerra_mescan/daily_mean/tas_f6h/tas_201805.nc" -- GitLab From 3f00139dc15278364d7fd05c6d0448dcd2428605 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 8 Mar 2022 16:33:19 +0100 Subject: [PATCH 10/47] Added a couple systems to the archive --- conf/archive.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/conf/archive.yml b/conf/archive.yml index 2b945396..eb5a0cfa 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -16,6 +16,30 @@ archive: fcst: 51 hcst: 25 reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" + system7c3s: + src: "exp/meteofrance/system7c3s/" + monthly_mean: {"tas":"_f6h/","g500":"_f12h/", + "prlr":"_f24h/", "sfcWind": "_f6h/"} + lat_decreasing_sort: TRUE + lon_circular_sort: + ini: 0 + end: 361 + nmember: + fcst: 51 + hcst: 25 + reference_grid: "/esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_20191001.nc" + system35c3s: + src: "/exp/cmcc/system35c3s/" + monthly_mean: {"tas":"_f6h/","g500":"_f12h/", + "prlr":"_f24h/", "sfcWind": "_f6h/"} + lat_decreasing_sort: TRUE + lon_circular_sort: + ini: 0 + end: 361 + nmember: + fcst: 50 + hcst: 40 + reference_grid: "/esarchive/exp/cmcc/system35c3s/monthly_mean/tas_f6h/tas_20210101.nc" era5: src: "recon/ecmwf/era5/" -- GitLab From 50c5867df17cdf6a205ae274954452718ca166e1 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 8 Mar 2022 17:00:52 +0100 Subject: [PATCH 11/47] Removed exp lat_decreasing_sort and lon_circular_sort from the archive --- conf/archive.yml | 12 ------------ modules/data_load/load.R | 17 +++++++++++------ 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index eb5a0cfa..b689003c 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -8,10 +8,6 @@ archive: "prlr":"_s0-24h/","sfcWind":"_f6h/"} monthly_mean: {"tas":"_f6h/","rsds":"_s0-24h/", "prlr":"_s0-24h/","sfcWind":"_f6h/"} - lat_decreasing_sort: TRUE - lon_circular_sort: - ini: 0 - end: 361 nmember: fcst: 51 hcst: 25 @@ -20,10 +16,6 @@ archive: src: "exp/meteofrance/system7c3s/" monthly_mean: {"tas":"_f6h/","g500":"_f12h/", "prlr":"_f24h/", "sfcWind": "_f6h/"} - lat_decreasing_sort: TRUE - lon_circular_sort: - ini: 0 - end: 361 nmember: fcst: 51 hcst: 25 @@ -32,10 +24,6 @@ archive: src: "/exp/cmcc/system35c3s/" monthly_mean: {"tas":"_f6h/","g500":"_f12h/", "prlr":"_f24h/", "sfcWind": "_f6h/"} - lat_decreasing_sort: TRUE - lon_circular_sort: - ini: 0 - end: 361 nmember: fcst: 50 hcst: 40 diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 6b82924d..0af64c3e 100644 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -78,6 +78,13 @@ hcst.path <- paste0(archive$src, #------------------------------------------------------------------- regrid_params <- get_regrid_params(recipe, archive) +# Longitude sort +#------------------------------------------------------------------- +if (lons.min >= 0) { + circularsort <- CircularSort(0, 360) +} else { + circularsort <- CircularSort(-180, 180) +} # Timeseries load #------------------------------------------------------------------- @@ -87,12 +94,9 @@ hcst <- Start(dat = hcst.path, file_date = sdates$hcst, time = indices(ltmin:ltmax), latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - exp_descrip$lat_decreasing_sort), + latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - exp_descrip$lon_circular_sort$ini, - exp_descrip$lon_circular_sort$end), + longitude_reorder = circularsort, transform = regrid_params$fcst.transform, transform_params = list(grid = regrid_params$fcst.gridtype, method = regrid_params$fcst.gridmethod, @@ -155,8 +159,9 @@ dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") dim(dates_file) <- dim(Subset(hcst, along=c('dat','var', 'latitude', 'longitude', 'member'), - list(1,1,1,1,1), drop="selected")) + list(1,1,1,1,1), drop="selected")) +## TODO: Change latitude and longitude reorder of obs Start call obs <- Start(dat = obs.path, var = variable, file_date = dates_file, -- GitLab From 908455cb121e720f09bfacff1592670807ca0495 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 9 Mar 2022 12:38:22 +0100 Subject: [PATCH 12/47] Changed variable name to match archive --- modules/data_load/regrid.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/data_load/regrid.R b/modules/data_load/regrid.R index 88c74236..5d0a6efe 100644 --- a/modules/data_load/regrid.R +++ b/modules/data_load/regrid.R @@ -8,7 +8,7 @@ get_regrid_params <- function(recipe,archive){ if (tolower(recipe$Analysis$Regrid$type) == 'to_reference') { - regrid_params <- list(fcst.gridtype=reference_descrip$regrid, + regrid_params <- list(fcst.gridtype=reference_descrip$reference_grid, fcst.gridmethod=recipe$Analysis$Regrid$method, fcst.transform=CDORemapper, obs.gridtype=NULL, @@ -20,7 +20,7 @@ get_regrid_params <- function(recipe,archive){ regrid_params <- list(fcst.gridtype=NULL, fcst.gridmethod=NULL, fcst.transform=NULL, - obs.gridtype=exp_descrip$regrid, + obs.gridtype=exp_descrip$reference_grid, obs.gridmethod=recipe$Analysis$Regrid$method, obs.transform=CDORemapper) -- GitLab From 185584172ba95a4a78e84eef8b9a92bbbe0eb5ba Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 9 Mar 2022 12:57:44 +0100 Subject: [PATCH 13/47] Formatting --- modules/data_load/load.R | 21 +++++++++++---------- modules/data_load/recipe_2.yml | 8 ++++---- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 0af64c3e..50b2cfb2 100644 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -46,7 +46,7 @@ store.freq <- recipe$Analysis$Variables$freq sdates <- dates2load(recipe, logger) # get esarchive datasets dict: -archive <- read_yaml(paste0(recipe$Run$code_dir,"conf/archive.yml"))$archive +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive exp_descrip <- archive[[exp.name]] freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) @@ -68,11 +68,11 @@ hcst.nmember <- exp_descrip$nmember$hcst # ----------- obs.path <- paste0(archive$src, obs.dir, store.freq, "/$var$", - reference_descrip[[store.freq]][[variable]],"$var$_$file_date$.nc") + reference_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") hcst.path <- paste0(archive$src, hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]],"$var$_$file_date$.nc") + exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") # Regrid: #------------------------------------------------------------------- @@ -103,8 +103,8 @@ hcst <- Start(dat = hcst.path, crop = c(lons.min, lons.max, lats.min, lats.max)), transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), member = c('ensemble')), member = indices(1:hcst.nmember), return_vars = list(latitude = 'dat', @@ -128,9 +128,10 @@ startR_to_s2dv <- function(startR_array){ names(dim(dates_end)) <- dates_dims s2dv_object <- s2dv_cube(data = attr2array(startR_array), - lon = attr2array(attr(startR_array,'Variables')$dat1$longitude), - lat = attr2array(attr(startR_array,'Variables')$dat1$latitude), - Variable = list(varName = names(attr(startR_array, "Variables")$common)[2], + lon = attr2array(attr(startR_array, 'Variables')$dat1$longitude), + lat = attr2array(attr(startR_array, 'Variables')$dat1$latitude), + Variable = list(varName = names(attr(startR_array, + 'Variables')$common)[2], level = NULL), Dates = list(start = dates_start, end = dates_end), @@ -172,8 +173,8 @@ obs <- Start(dat = obs.path, longitude_reorder = CircularSort( reference_descrip$lon_circular_sort$ini, reference_descrip$lon_circular_sort$end), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), transform = regrid_params$obs.transform, transform_params = list(grid = regrid_params$obs.gridtype, method = regrid_params$obs.gridmethod, diff --git a/modules/data_load/recipe_2.yml b/modules/data_load/recipe_2.yml index 2775cdf5..d2376b23 100644 --- a/modules/data_load/recipe_2.yml +++ b/modules/data_load/recipe_2.yml @@ -5,7 +5,7 @@ Analysis: Horizon: Seasonal Variables: name: tas - freq: monthly_mean + freq: daily_mean Datasets: System: name: system5c3s @@ -21,10 +21,10 @@ Analysis: leadtimemin: 2 leadtimemax: 4 Region: - latmin: -90 - latmax: 90 + latmin: -10 + latmax: 10 lonmin: 0 - lonmax: 359.9 + lonmax: 20 Regrid: method: bilinear type: to_system -- GitLab From 481f6ed6637492e76b00f2cd665968f2ef97a01a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 9 Mar 2022 16:34:21 +0100 Subject: [PATCH 14/47] formatting --- modules/data_load/regrid.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data_load/regrid.R b/modules/data_load/regrid.R index 5d0a6efe..a9b4f62a 100644 --- a/modules/data_load/regrid.R +++ b/modules/data_load/regrid.R @@ -1,5 +1,5 @@ -get_regrid_params <- function(recipe,archive){ +get_regrid_params <- function(recipe, archive){ exp.name <- recipe$Analysis$Datasets$System$name ref.name <- recipe$Analysis$Datasets$Reference$name -- GitLab From 714426fd1354d30fb2b43c81462253ae33150b97 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 9 Mar 2022 16:34:35 +0100 Subject: [PATCH 15/47] Change latitude and longitude reorder of obs Start call --- modules/data_load/load.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 50b2cfb2..c9be5d82 100644 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -113,7 +113,10 @@ hcst <- Start(dat = hcst.path, split_multiselected_dims = TRUE, retrieve = TRUE) +# Conversion from startR_array to s2dv_array +#------------------------------------------------------------------- +## TODO: Terminology? attr2array <- function(attr){ return(array(as.vector(attr), dim(attr))) } @@ -162,17 +165,14 @@ dim(dates_file) <- dim(Subset(hcst, 'latitude', 'longitude', 'member'), list(1,1,1,1,1), drop="selected")) -## TODO: Change latitude and longitude reorder of obs Start call + obs <- Start(dat = obs.path, var = variable, file_date = dates_file, latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - reference_descrip$lat_decreasing_sort), + latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - reference_descrip$lon_circular_sort$ini, - reference_descrip$lon_circular_sort$end), + longitude_reorder = circularsort synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), transform = regrid_params$obs.transform, -- GitLab From efa5555bbaf6cd8c8c9e79d23af875cab36bfd32 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 9 Mar 2022 16:35:24 +0100 Subject: [PATCH 16/47] Remove longitude and latitude sort for reference --- conf/archive.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index b689003c..9fe54e15 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -34,10 +34,6 @@ archive: daily_mean: {"tas":"_f1h-r1440x721cds/","rsds":"_f1h-r1440x721cds/", "prlr":"_f1h-r1440x721cds/","sfcWind":"_f1h-r1440x721cds/"} monthly_mean: {"tas":"_f1h-r1440x721cds/"} - lat_decreasing_sort: TRUE - lon_circular_sort: - ini: 0 - end: 361 reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" era5land: src: "recon/ecmwf/era5land/" -- GitLab From ed75301565ec07533af6498a6e96496b8cd10ae7 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 9 Mar 2022 16:57:38 +0100 Subject: [PATCH 17/47] fixed typo and format --- modules/data_load/load.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index c9be5d82..3363f96b 100644 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -172,20 +172,20 @@ obs <- Start(dat = obs.path, latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort + longitude_reorder = circularsort, synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), transform = regrid_params$obs.transform, transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), + method = regrid_params$obs.gridmethod, + crop = c(lons.min, lons.max, + lats.min, lats.max)), transform_vars = c('latitude', 'longitude'), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) ## TODO: Create s2dv object -- GitLab From c6a028664ff9f8696f7d98709b409d015cf8d5da Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 10 Mar 2022 09:45:59 +0100 Subject: [PATCH 18/47] Removed deprecated argument 'crop' from 'transform_params', added comments --- modules/data_load/load.R | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 3363f96b..2c6d5daa 100644 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -116,7 +116,6 @@ hcst <- Start(dat = hcst.path, # Conversion from startR_array to s2dv_array #------------------------------------------------------------------- -## TODO: Terminology? attr2array <- function(attr){ return(array(as.vector(attr), dim(attr))) } @@ -165,6 +164,8 @@ dim(dates_file) <- dim(Subset(hcst, 'latitude', 'longitude', 'member'), list(1,1,1,1,1), drop="selected")) +## TODO: Not working for daily obs +## TODO: Not loading the correct dates? obs <- Start(dat = obs.path, var = variable, @@ -177,15 +178,13 @@ obs <- Start(dat = obs.path, longitude = c('lon', 'longitude')), transform = regrid_params$obs.transform, transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) ## TODO: Create s2dv object -- GitLab From f54eb608e0884014ae2e2633ca218f08a40dd0f2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 10 Mar 2022 09:46:21 +0100 Subject: [PATCH 19/47] test monthly data --- modules/data_load/recipe_2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data_load/recipe_2.yml b/modules/data_load/recipe_2.yml index d2376b23..a63eaceb 100644 --- a/modules/data_load/recipe_2.yml +++ b/modules/data_load/recipe_2.yml @@ -5,7 +5,7 @@ Analysis: Horizon: Seasonal Variables: name: tas - freq: daily_mean + freq: monthly_mean Datasets: System: name: system5c3s -- GitLab From 48108e0237ada0d523a75c2e688b11e82e849be5 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 10 Mar 2022 13:07:04 +0100 Subject: [PATCH 20/47] Removed deprecated argument 'crop' from 'transform_params' from hcst start call --- modules/data_load/load.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 2c6d5daa..9cf29136 100644 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -99,9 +99,7 @@ hcst <- Start(dat = hcst.path, longitude_reorder = circularsort, transform = regrid_params$fcst.transform, transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), + method = regrid_params$fcst.gridmethod), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), -- GitLab From c5b4d79de28c4f72a0f6997f9bb52b3325073d03 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 10 Mar 2022 15:41:47 +0100 Subject: [PATCH 21/47] Rearranged some parts of the code --- modules/data_load/load.R | 68 ++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 9cf29136..4d9fe10f 100644 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -86,7 +86,7 @@ if (lons.min >= 0) { circularsort <- CircularSort(-180, 180) } -# Timeseries load +# Hindcast timeseries load #------------------------------------------------------------------- hcst <- Start(dat = hcst.path, @@ -111,6 +111,38 @@ hcst <- Start(dat = hcst.path, split_multiselected_dims = TRUE, retrieve = TRUE) +# Get forecast dates +dates <- attr(hcst, 'Variables')$common$time +dates_file <- sapply(dates, format, '%Y%m%d') +dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") +dim(dates_file) <- dim(Subset(hcst, + along=c('dat','var', + 'latitude', 'longitude', 'member'), + list(1,1,1,1,1), drop="selected")) + +# Reference timeseries load +#------------------------------------------------------------------- +## TODO: Not working for daily obs + +obs <- Start(dat = obs.path, + var = variable, + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + # Conversion from startR_array to s2dv_array #------------------------------------------------------------------- @@ -147,45 +179,13 @@ startR_to_s2dv <- function(startR_array){ } hcst <- startR_to_s2dv(hcst) - +obs <- startR_to_s2dv(obs) ## TODO: new files checker? #hcst.NA_files <- c(attributes(hcst)$NotFoundFiles) #hcst.NA_files <- hcst.NA_files[!is.na(hcst.NA_files)] #try(hcst.NA_files <- hcst.NA_files[order(hcst.NA_files)]) -dates <- attr(hcst, 'Variables')$common$time -dates_file <- sapply(dates, format, '%Y%m%d') -dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") -dim(dates_file) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member'), - list(1,1,1,1,1), drop="selected")) - -## TODO: Not working for daily obs -## TODO: Not loading the correct dates? - -obs <- Start(dat = obs.path, - var = variable, - file_date = dates_file, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude')), - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - - ## TODO: Create s2dv object - dates_file <- paste0(dates_file, '01') dim(dates_file) <- dim(Subset(hcst, along=c('dat','var', -- GitLab From 46d625ebeaa914bc5afa3f09b526663b2296b171 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 11 Mar 2022 15:01:22 +0100 Subject: [PATCH 22/47] formatting --- modules/data_load/load.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 4d9fe10f..7096170d 100644 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -126,13 +126,13 @@ dim(dates_file) <- dim(Subset(hcst, obs <- Start(dat = obs.path, var = variable, - file_date = dates_file, + file_date = dates_file, latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), longitude_reorder = circularsort, synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude')), + longitude = c('lon', 'longitude')), transform = regrid_params$obs.transform, transform_params = list(grid = regrid_params$obs.gridtype, method = regrid_params$obs.gridmethod), -- GitLab From 8eef8ed66b1614c4894920bce754636ecac8f8ee Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 11 Mar 2022 16:25:25 +0100 Subject: [PATCH 23/47] formatting --- modules/data_load/load.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 7096170d..253d2de3 100644 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -71,8 +71,8 @@ obs.path <- paste0(archive$src, reference_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") hcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") + hcst.dir, store.freq, "/$var$", + exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") # Regrid: #------------------------------------------------------------------- @@ -88,7 +88,7 @@ if (lons.min >= 0) { # Hindcast timeseries load #------------------------------------------------------------------- - +## TODO: Adapt to daily data hcst <- Start(dat = hcst.path, var = variable, file_date = sdates$hcst, -- GitLab From f4c151c8ba7e5e22745d5a386c00e1be71e8e032 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 14 Mar 2022 10:50:07 +0100 Subject: [PATCH 24/47] Format fixes --- modules/data_load/load.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) mode change 100644 => 100755 modules/data_load/load.R diff --git a/modules/data_load/load.R b/modules/data_load/load.R old mode 100644 new mode 100755 index 253d2de3..35ddc9c6 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -94,9 +94,9 @@ hcst <- Start(dat = hcst.path, file_date = sdates$hcst, time = indices(ltmin:ltmax), latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), + latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, + longitude_reorder = circularsort, transform = regrid_params$fcst.transform, transform_params = list(grid = regrid_params$fcst.gridtype, method = regrid_params$fcst.gridmethod), @@ -126,7 +126,7 @@ dim(dates_file) <- dim(Subset(hcst, obs <- Start(dat = obs.path, var = variable, - file_date = dates_file, + file_date = dates_file, latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), @@ -171,7 +171,6 @@ startR_to_s2dv <- function(startR_array){ source_files = attr2array(attr(startR_array, "Files")) #Datasets = list(exp1 = list(InitializationsDates = list(Member_1 = "01011990", # Members = "Member_1"))) - ) return(s2dv_object) -- GitLab From 5d1c4a1e004f4129f4dcd221eb716b98b699f952 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 14 Mar 2022 10:50:55 +0100 Subject: [PATCH 25/47] Changes for testing --- modules/data_load/recipe_1.yml | 4 ++-- modules/data_load/recipe_2.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/data_load/recipe_1.yml b/modules/data_load/recipe_1.yml index 69ef8245..ca99311f 100644 --- a/modules/data_load/recipe_1.yml +++ b/modules/data_load/recipe_1.yml @@ -39,5 +39,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/data_load/recipe_2.yml b/modules/data_load/recipe_2.yml index a63eaceb..d7272e90 100644 --- a/modules/data_load/recipe_2.yml +++ b/modules/data_load/recipe_2.yml @@ -5,13 +5,13 @@ Analysis: Horizon: Seasonal Variables: name: tas - freq: monthly_mean + freq: daily_mean Datasets: System: name: system5c3s Multimodel: no Reference: - name: era5 + name: era5land Time: sdate: fcst_syear: '2017' -- GitLab From c9c010c379c4680d0b1e3207942d34eeae18b3fe Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 14 Mar 2022 10:51:33 +0100 Subject: [PATCH 26/47] Added monthly mean to era5land --- conf/archive.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/archive.yml b/conf/archive.yml index 9fe54e15..ec0f743c 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -39,6 +39,7 @@ archive: src: "recon/ecmwf/era5land/" daily_mean: {"tas":"_f1h/","rsds":"_f1h/", "prlr":"_f1h/","sfcWind":"_f1h/"} + monthly_mean: {"tas":"_f1h/"} reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" uerra: src: "recon/ecmwf/uerra_mescan/" -- GitLab From ae0529429ff53276337fb9f74610aeae6b27da06 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 15 Mar 2022 11:43:44 +0100 Subject: [PATCH 27/47] Rename dim according to conventions --- modules/data_load/load.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 35ddc9c6..4f868ec9 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -10,8 +10,8 @@ source("tools/libs.R") # RECIPE FOR TESTING # ------------------------------------------------------------------------------------------- ## TODO: Get only the last part of the path as the recipe$filename? -recipe <- read_yaml("/esarchive/scratch/vagudets/repos/auto-s2s/modules/data_load/recipe_2.yml") -args <- NULL; args[1] <- "/esarchive/scratch/vagudets/repos/auto-s2s/modules/data_load/recipe_2.yml" +recipe <- read_yaml("modules/data_load/recipe_1.yml") +args <- NULL; args[1] <- "modules/data_load/recipe_1.yml" recipe$filename <- args[1] # Create output folder and log: logger <- prepare_outputs(recipe = recipe) @@ -74,7 +74,7 @@ hcst.path <- paste0(archive$src, hcst.dir, store.freq, "/$var$", exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") -# Regrid: +# Define regrid parameters: #------------------------------------------------------------------- regrid_params <- get_regrid_params(recipe, archive) @@ -102,9 +102,8 @@ hcst <- Start(dat = hcst.path, method = regrid_params$fcst.gridmethod), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - member = c('ensemble')), - member = indices(1:hcst.nmember), + longitude = c('lon', 'longitude')), + ensemble = indices(1:hcst.nmember), return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), @@ -127,6 +126,7 @@ dim(dates_file) <- dim(Subset(hcst, obs <- Start(dat = obs.path, var = variable, file_date = dates_file, +# time = indices(ltmin:ltmax), latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), @@ -137,11 +137,11 @@ obs <- Start(dat = obs.path, transform_params = list(grid = regrid_params$obs.gridtype, method = regrid_params$obs.gridmethod), transform_vars = c('latitude', 'longitude'), - return_vars = list(latitude = 'dat', + return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + split_multiselected_dims = TRUE, + retrieve = TRUE) # Conversion from startR_array to s2dv_array #------------------------------------------------------------------- -- GitLab From df4f75f1cc5b9b7659972f6d86385ace8631a1c5 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 15 Mar 2022 11:46:36 +0100 Subject: [PATCH 28/47] Finish renaming 'member' to 'ensemble' --- modules/data_load/load.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 4f868ec9..c394a007 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -116,7 +116,7 @@ dates_file <- sapply(dates, format, '%Y%m%d') dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") dim(dates_file) <- dim(Subset(hcst, along=c('dat','var', - 'latitude', 'longitude', 'member'), + 'latitude', 'longitude', 'ensemble'), list(1,1,1,1,1), drop="selected")) # Reference timeseries load @@ -188,13 +188,13 @@ obs <- startR_to_s2dv(obs) dates_file <- paste0(dates_file, '01') dim(dates_file) <- dim(Subset(hcst, along=c('dat','var', - 'latitude', 'longitude', 'member'), + 'latitude', 'longitude', 'ensemble'), list(1,1,1,1,1), drop="selected")) file_dates <- paste0(file_dates, '01') dim(file_dates) <- dim(Subset(hcst, along=c('dat','var', - 'latitude', 'longitude', 'member', 'time'), + 'latitude', 'longitude', 'ensemble', 'time'), list(1,1,1,1,1,1), drop="selected")) @@ -240,7 +240,7 @@ info(logger, default_dims <- c(dat = 1, var = 1, sweek = 1, sday = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, member = 1) + latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(obs))] <- dim(obs) dim(obs) <- default_dims -- GitLab From 80f6b3aa56ed0c13b329686caf3ddc450410d666 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 15 Mar 2022 12:39:45 +0100 Subject: [PATCH 29/47] Removed duplicated dates from dates_file array --- modules/data_load/load.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index c394a007..541b2094 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -114,8 +114,9 @@ hcst <- Start(dat = hcst.path, dates <- attr(hcst, 'Variables')$common$time dates_file <- sapply(dates, format, '%Y%m%d') dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") +dates_file <- dates_file[!duplicated(dates_file)] # Avoid duplicates dim(dates_file) <- dim(Subset(hcst, - along=c('dat','var', + along=c('dat', 'var', 'latitude', 'longitude', 'ensemble'), list(1,1,1,1,1), drop="selected")) @@ -123,7 +124,7 @@ dim(dates_file) <- dim(Subset(hcst, #------------------------------------------------------------------- ## TODO: Not working for daily obs -obs <- Start(dat = obs.path, +obs <- Start(dat = obs.path, var = variable, file_date = dates_file, # time = indices(ltmin:ltmax), -- GitLab From 27ed5c97ad6ab8ac4aae8edddc4c3ca48875adcd Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 15 Mar 2022 12:39:55 +0100 Subject: [PATCH 30/47] testing --- modules/data_load/recipe_1.yml | 6 +++--- modules/data_load/recipe_2.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/data_load/recipe_1.yml b/modules/data_load/recipe_1.yml index ca99311f..a6cdda67 100644 --- a/modules/data_load/recipe_1.yml +++ b/modules/data_load/recipe_1.yml @@ -21,10 +21,10 @@ Analysis: leadtimemin: 2 leadtimemax: 4 Region: - latmin: -90 - latmax: 90 + latmin: -10 + latmax: 10 lonmin: 0 - lonmax: 360 + lonmax: 20 Regrid: method: bilinear type: to_system diff --git a/modules/data_load/recipe_2.yml b/modules/data_load/recipe_2.yml index d7272e90..d2376b23 100644 --- a/modules/data_load/recipe_2.yml +++ b/modules/data_load/recipe_2.yml @@ -11,7 +11,7 @@ Analysis: name: system5c3s Multimodel: no Reference: - name: era5land + name: era5 Time: sdate: fcst_syear: '2017' -- GitLab From 85f296f49e4839374498b7d50c9321c69194fee6 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 16 Mar 2022 08:55:55 +0100 Subject: [PATCH 31/47] Add second level 'System' and 'Reference' to archive --- conf/archive.yml | 89 +++++++++++++++++++------------------- modules/data_load/load.R | 16 ++++--- modules/data_load/regrid.R | 4 +- 3 files changed, 56 insertions(+), 53 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index ec0f743c..0feb34b8 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -1,49 +1,50 @@ archive: - src: "/esarchive/" - system5c3s: - src: "exp/ecmwf/system5c3s/" - daily_mean: {"tas":"_f6h/","rsds":"_s0-24h/", - "prlr":"_s0-24h/","sfcWind":"_f6h/"} - monthly_mean: {"tas":"_f6h/","rsds":"_s0-24h/", - "prlr":"_s0-24h/","sfcWind":"_f6h/"} - nmember: - fcst: 51 - hcst: 25 - reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" - system7c3s: - src: "exp/meteofrance/system7c3s/" - monthly_mean: {"tas":"_f6h/","g500":"_f12h/", - "prlr":"_f24h/", "sfcWind": "_f6h/"} - nmember: - fcst: 51 - hcst: 25 - reference_grid: "/esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_20191001.nc" - system35c3s: - src: "/exp/cmcc/system35c3s/" - monthly_mean: {"tas":"_f6h/","g500":"_f12h/", - "prlr":"_f24h/", "sfcWind": "_f6h/"} - nmember: - fcst: 50 - hcst: 40 - reference_grid: "/esarchive/exp/cmcc/system35c3s/monthly_mean/tas_f6h/tas_20210101.nc" - - era5: - src: "recon/ecmwf/era5/" - daily_mean: {"tas":"_f1h-r1440x721cds/","rsds":"_f1h-r1440x721cds/", - "prlr":"_f1h-r1440x721cds/","sfcWind":"_f1h-r1440x721cds/"} - monthly_mean: {"tas":"_f1h-r1440x721cds/"} - reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" - era5land: - src: "recon/ecmwf/era5land/" - daily_mean: {"tas":"_f1h/","rsds":"_f1h/", - "prlr":"_f1h/","sfcWind":"_f1h/"} - monthly_mean: {"tas":"_f1h/"} - reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" - uerra: - src: "recon/ecmwf/uerra_mescan/" - daily_mean: {"tas":"_f6h/"} - reference_grid: "/esarchive/recon/ecmwf/uerra_mescan/daily_mean/tas_f6h/tas_201805.nc" + src: "/esarchive/" + System: + system5c3s: + src: "exp/ecmwf/system5c3s/" + daily_mean: {"tas":"_f6h/","rsds":"_s0-24h/", + "prlr":"_s0-24h/","sfcWind":"_f6h/"} + monthly_mean: {"tas":"_f6h/","rsds":"_s0-24h/", + "prlr":"_s0-24h/","sfcWind":"_f6h/"} + nmember: + fcst: 51 + hcst: 25 + reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" + system7c3s: + src: "exp/meteofrance/system7c3s/" + monthly_mean: {"tas":"_f6h/","g500":"_f12h/", + "prlr":"_f24h/", "sfcWind": "_f6h/"} + nmember: + fcst: 51 + hcst: 25 + reference_grid: "/esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_20191001.nc" + system35c3s: + src: "/exp/cmcc/system35c3s/" + monthly_mean: {"tas":"_f6h/","g500":"_f12h/", + "prlr":"_f24h/", "sfcWind": "_f6h/"} + nmember: + fcst: 50 + hcst: 40 + reference_grid: "/esarchive/exp/cmcc/system35c3s/monthly_mean/tas_f6h/tas_20210101.nc" + Reference: + era5: + src: "recon/ecmwf/era5/" + daily_mean: {"tas":"_f1h-r1440x721cds/","rsds":"_f1h-r1440x721cds/", + "prlr":"_f1h-r1440x721cds/","sfcWind":"_f1h-r1440x721cds/"} + monthly_mean: {"tas":"_f1h-r1440x721cds/"} + reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + era5land: + src: "recon/ecmwf/era5land/" + daily_mean: {"tas":"_f1h/","rsds":"_f1h/", + "prlr":"_f1h/","sfcWind":"_f1h/"} + monthly_mean: {"tas":"_f1h/"} + reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" + uerra: + src: "recon/ecmwf/uerra_mescan/" + daily_mean: {"tas":"_f6h/"} + reference_grid: "/esarchive/recon/ecmwf/uerra_mescan/daily_mean/tas_f6h/tas_201805.nc" diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 541b2094..1e841c98 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -47,10 +47,10 @@ sdates <- dates2load(recipe, logger) # get esarchive datasets dict: archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive -exp_descrip <- archive[[exp.name]] +exp_descrip <- archive$System[[exp.name]] freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) -reference_descrip <- archive[[ref.name]] +reference_descrip <- archive$Reference[[ref.name]] freq.obs <- unlist(reference_descrip[[store.freq]][variable]) obs.dir <- reference_descrip$src fcst.dir <- exp_descrip$src @@ -88,7 +88,6 @@ if (lons.min >= 0) { # Hindcast timeseries load #------------------------------------------------------------------- -## TODO: Adapt to daily data hcst <- Start(dat = hcst.path, var = variable, file_date = sdates$hcst, @@ -110,7 +109,8 @@ hcst <- Start(dat = hcst.path, split_multiselected_dims = TRUE, retrieve = TRUE) -# Get forecast dates +# Get forecast dates +## TODO: Adapt for daily data dates <- attr(hcst, 'Variables')$common$time dates_file <- sapply(dates, format, '%Y%m%d') dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") @@ -118,11 +118,11 @@ dates_file <- dates_file[!duplicated(dates_file)] # Avoid duplicates dim(dates_file) <- dim(Subset(hcst, along=c('dat', 'var', 'latitude', 'longitude', 'ensemble'), - list(1,1,1,1,1), drop="selected")) + list(1,1,1,1,1), drop="selected")) -# Reference timeseries load +# Reference load #------------------------------------------------------------------- -## TODO: Not working for daily obs +## TODO: Time dimension not working for daily obs obs <- Start(dat = obs.path, var = variable, @@ -181,6 +181,8 @@ startR_to_s2dv <- function(startR_array){ hcst <- startR_to_s2dv(hcst) obs <- startR_to_s2dv(obs) +## TODO: Review/modify code from here onwards. + ## TODO: new files checker? #hcst.NA_files <- c(attributes(hcst)$NotFoundFiles) #hcst.NA_files <- hcst.NA_files[!is.na(hcst.NA_files)] diff --git a/modules/data_load/regrid.R b/modules/data_load/regrid.R index a9b4f62a..9aaab6e4 100644 --- a/modules/data_load/regrid.R +++ b/modules/data_load/regrid.R @@ -3,8 +3,8 @@ get_regrid_params <- function(recipe, archive){ exp.name <- recipe$Analysis$Datasets$System$name ref.name <- recipe$Analysis$Datasets$Reference$name - exp_descrip <- archive[[exp.name]] - reference_descrip <- archive[[ref.name]] + exp_descrip <- archive$System[[exp.name]] + reference_descrip <- archive$Reference[[ref.name]] if (tolower(recipe$Analysis$Regrid$type) == 'to_reference') { -- GitLab From b3c5f4962a816b09e112701a6d9f7131ecb85d3f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 17 Mar 2022 13:11:18 +0100 Subject: [PATCH 32/47] Workaround for loading of daily obs --- modules/data_load/load.R | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 1e841c98..04b98d17 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -10,8 +10,8 @@ source("tools/libs.R") # RECIPE FOR TESTING # ------------------------------------------------------------------------------------------- ## TODO: Get only the last part of the path as the recipe$filename? -recipe <- read_yaml("modules/data_load/recipe_1.yml") -args <- NULL; args[1] <- "modules/data_load/recipe_1.yml" +recipe <- read_yaml("modules/data_load/recipe_2.yml") +args <- NULL; args[1] <- "modules/data_load/recipe_2.yml" recipe$filename <- args[1] # Create output folder and log: logger <- prepare_outputs(recipe = recipe) @@ -115,10 +115,18 @@ dates <- attr(hcst, 'Variables')$common$time dates_file <- sapply(dates, format, '%Y%m%d') dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") dates_file <- dates_file[!duplicated(dates_file)] # Avoid duplicates -dim(dates_file) <- dim(Subset(hcst, - along=c('dat', 'var', - 'latitude', 'longitude', 'ensemble'), - list(1,1,1,1,1), drop="selected")) +## Workaround to fix dimension issues after Start() call +if (store.freq == "monthly_mean") { + dim(dates_file) <- dim(Subset(hcst, + along=c('dat', 'var', + 'latitude', 'longitude', 'ensemble'), + list(1,1,1,1,1), drop="selected")) +} else if (store.freq == "daily_mean") { + dim(dates_file) <- dim(Subset(hcst, + along=c('dat', 'var', + 'latitude', 'longitude', 'ensemble', 'time'), + list(1,1,1,1,1,1), drop="selected")) +} # Reference load #------------------------------------------------------------------- @@ -127,7 +135,7 @@ dim(dates_file) <- dim(Subset(hcst, obs <- Start(dat = obs.path, var = variable, file_date = dates_file, -# time = indices(ltmin:ltmax), + time = indices(ltmin:ltmax), latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), -- GitLab From 53501200321fd219b98c76f9c3246eafbd638b82 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 21 Mar 2022 15:54:44 +0100 Subject: [PATCH 33/47] Provisional fix for monthly and daily data until startR bug is fixed --- modules/data_load/load.R | 76 +++++++++++++++++----------------- modules/data_load/recipe_1.yml | 2 +- 2 files changed, 40 insertions(+), 38 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 04b98d17..752c81f0 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -1,4 +1,3 @@ - source("modules/data_load/dates2load.R") source("modules/data_load/regrid.R") # source("modules/data_load/s2dv_cube.R") @@ -10,8 +9,8 @@ source("tools/libs.R") # RECIPE FOR TESTING # ------------------------------------------------------------------------------------------- ## TODO: Get only the last part of the path as the recipe$filename? -recipe <- read_yaml("modules/data_load/recipe_2.yml") -args <- NULL; args[1] <- "modules/data_load/recipe_2.yml" +recipe <- read_yaml("modules/data_load/recipe_1.yml") +args <- NULL; args[1] <- "modules/data_load/recipe_1.yml" recipe$filename <- args[1] # Create output folder and log: logger <- prepare_outputs(recipe = recipe) @@ -101,7 +100,8 @@ hcst <- Start(dat = hcst.path, method = regrid_params$fcst.gridmethod), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude')), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), ensemble = indices(1:hcst.nmember), return_vars = list(latitude = 'dat', longitude = 'dat', @@ -110,47 +110,49 @@ hcst <- Start(dat = hcst.path, retrieve = TRUE) # Get forecast dates -## TODO: Adapt for daily data +## TODO: Replace with new code once An-Chi's fix is merged in startR dates <- attr(hcst, 'Variables')$common$time -dates_file <- sapply(dates, format, '%Y%m%d') -dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") -dates_file <- dates_file[!duplicated(dates_file)] # Avoid duplicates -## Workaround to fix dimension issues after Start() call +dates_file <- sapply(dates, format, '%Y%m') +dates.dims <- c(sday = 1, sweek = 1, time = 1) +dates.dims[names(dim(dates))] <- dim(dates) +dim(dates) <- dates.dims +names(dim(dates))[4] <- "sdate" # to be changed to 'syear' + +# For monthly data, dates are changed to day 1 of month correct retrieval. if (store.freq == "monthly_mean") { - dim(dates_file) <- dim(Subset(hcst, - along=c('dat', 'var', - 'latitude', 'longitude', 'ensemble'), - list(1,1,1,1,1), drop="selected")) -} else if (store.freq == "daily_mean") { - dim(dates_file) <- dim(Subset(hcst, - along=c('dat', 'var', - 'latitude', 'longitude', 'ensemble', 'time'), - list(1,1,1,1,1,1), drop="selected")) + dates_hcst <- dates # store dates dims + dates <- lubridate::floor_date(dates, unit = 'month') + dim(dates) <- dim(dates_hcst) # restore dates dims } +dim(dates_file) <- dim(dates) + # Reference load #------------------------------------------------------------------- -## TODO: Time dimension not working for daily obs obs <- Start(dat = obs.path, - var = variable, - file_date = dates_file, - time = indices(ltmin:ltmax), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude')), - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude=c('lat','latitude'), + longitude=c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) # Conversion from startR_array to s2dv_array #------------------------------------------------------------------- diff --git a/modules/data_load/recipe_1.yml b/modules/data_load/recipe_1.yml index a6cdda67..cdb7b0be 100644 --- a/modules/data_load/recipe_1.yml +++ b/modules/data_load/recipe_1.yml @@ -15,7 +15,7 @@ Analysis: Time: sdate: fcst_syear: '2017' - fcst_sday: '0701' + fcst_sday: '0101' hcst_start: '1993' hcst_end: '2016' leadtimemin: 2 -- GitLab From f9d166592c0663d44b448fad1f7b013552b1208a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 22 Mar 2022 14:28:03 +0100 Subject: [PATCH 34/47] Add middle interpolation case --- modules/data_load/regrid.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/data_load/regrid.R b/modules/data_load/regrid.R index 9aaab6e4..0d3c96de 100644 --- a/modules/data_load/regrid.R +++ b/modules/data_load/regrid.R @@ -24,7 +24,15 @@ get_regrid_params <- function(recipe, archive){ obs.gridmethod=recipe$Analysis$Regrid$method, obs.transform=CDORemapper) - } ##TODO: Else condition? middle interpolation? + } else { + ##TODO: Define conditions and recipe params for middle interpolation case + regrid_params <- list(fcst.gridtype=recipe$Analysis$Regrid$type, # r256x128 + fcst.gridmethod=recipe$Analysis$Regrid$method, + fcst.transform=CDORemapper, + obs.gridtype=recipe$Analysis$Regrid$type, + obs.gridmethod=recipe$Analysis$Regrid$method, + obs.transform=CDORemapper) + } return(regrid_params) -- GitLab From 3955854ab9bcfe61581213e1d81260fe996503c4 Mon Sep 17 00:00:00 2001 From: lpalma Date: Tue, 22 Mar 2022 14:42:46 +0100 Subject: [PATCH 35/47] modules for nord3v2, testing_recipes dir and changes in load.R --- MODULES | 12 +- modules/data_load/fcst_seas.load.R-OLD | 249 ------------------ modules/data_load/load.R | 15 +- modules/data_load/s2dv_cube.R | 185 ------------- .../data_load/{dates2load.R => sdates2load.R} | 6 +- .../{ => testing_recipes}/recipe_1.yml | 0 .../{ => testing_recipes}/recipe_2.yml | 4 +- 7 files changed, 22 insertions(+), 449 deletions(-) delete mode 100644 modules/data_load/fcst_seas.load.R-OLD delete mode 100644 modules/data_load/s2dv_cube.R rename modules/data_load/{dates2load.R => sdates2load.R} (88%) rename modules/data_load/{ => testing_recipes}/recipe_1.yml (100%) rename modules/data_load/{ => testing_recipes}/recipe_2.yml (84%) diff --git a/MODULES b/MODULES index a37e52d6..a5e3edc2 100644 --- a/MODULES +++ b/MODULES @@ -11,6 +11,16 @@ if [ $BSC_MACHINE == "power" ]; then module load CDO/1.9.4-foss-2018b module load R/3.6.1-foss-2018b +elif [ $BSC_MACHINE == "nord3v2" ]; then + + module use /gpfs/projects/bsc32/software/suselinux/11/modules/all + module unuse /apps/modules/modulefiles/applications /apps/modules/modulefiles/compilers /apps/modules/modulefiles/tools /apps/modules/modulefiles/libraries /apps/modules/modulefiles/environment + + + module load CDO/1.9.8-foss-2019b + module load R/3.6.2-foss-2019b + module load OpenMPI/4.0.5-GCC-8.3.0-nord3-v2 + elif [ $BSC_MACHINE == "nord3" ]; then module use /gpfs/projects/bsc32/software/suselinux/11/modules/all @@ -19,8 +29,8 @@ elif [ $BSC_MACHINE == "nord3" ]; then module unuse /apps/modules/modulefiles/libraries /apps/modules/modulefiles/environment module unuse /apps/modules/PRACE - module load R/3.6.2-foss-2019b module load CDO/1.9.8-foss-2019b + module load R/3.6.2-foss-2019b else diff --git a/modules/data_load/fcst_seas.load.R-OLD b/modules/data_load/fcst_seas.load.R-OLD deleted file mode 100644 index 5e6a4b29..00000000 --- a/modules/data_load/fcst_seas.load.R-OLD +++ /dev/null @@ -1,249 +0,0 @@ - - -fcst.month <- substr(fcst.sdate,5,6) -fcst.year <- substr(fcst.sdate,1,4) - -file_dates.fcst <- paste(fcst.year, fcst.month, sep = "") -file_dates <- paste(strtoi(hcst.inityear):strtoi(hcst.endyear), - fcst.month, sep = "") - - -file_dates <- add_dims(file_dates) -file_dates.fcst <- add_dims(file_dates.fcst) -# Take parameters from conf/archive for datasets: -table <- read_yaml(paste0(conf$code_dir, "conf/archive.yml")) -dataset_descrip <- table$archive[which(names(table$archive) == fcst.name)][[1]] -freq.hcst <- unlist(dataset_descrip[store.freq][[1]][variable]) -reference_descrip <- table$archive[which(names(table$archive) == - tolower(ref.name))][[1]] -freq.obs <- unlist(reference_descrip[store.freq][[1]][variable]) -obs.dir <- reference_descrip$src -fcst.dir <- dataset_descrip$src -hcst.dir <- dataset_descrip$src -fcst.nmember <- dataset_descrip$nmember$fcst -hcst.nmember <- dataset_descrip$nmember$hcst - -if ("accum" %in% names(reference_descrip)) { - accum <- unlist(reference_descrip$accum[store.freq][[1]]) -} else { - accum <- FALSE -} -# ----------- - obs.path <- paste0("/esarchive/", - obs.dir, store.freq, "/$var$", - freq.obs,"$var$_$file_date$.nc") - - hcst.path <- paste0("/esarchive/", - hcst.dir, store.freq, "/$var$", - freq.hcst,"$var$_$file_date$01", - ".nc") - - fcst.path <- paste0("/esarchive/", - fcst.dir, store.freq, "/$var$", - freq.hcst,"$var$_$file_date$01", - ".nc") -#------------------------------------------------------------------- -# Regrid: -if (tolower(recipe$Analysis$Regrid$type) == 'reference') { - fcst.gridtype <- reference_descrip$regrid - fcst.gridmethod <- recipe$Analysis$Regrid$method - fcst.tranform <- CDORemapper - obs.gridtype <- NULL - obs.gridmethod <- NULL - obs.tranform <- NULL -} else if (tolower(recipe$Analysis$Regrid$type) == 'system') { - fcst.gridtype <- NULL - fcst.gridmethod <- NULL - fcst.transform <- NULL - obs.gridtype <- dataset_descrip$regrid - obs.gridmethod <- recipe$Analysis$Regrid$method - obs.transform <- CDORemapper -} else { - fcst.gridtype <- recipe$Analysis$Regrid$type - fcst.gridmethod <- recipe$Analysis$Regrid$method - fcst.transform <- CDORemapper - obs.gridtype <- recipe$Analysis$Regrid$type - obs.gridmethod <- recipe$Analysis$Regrid$method - obs.transform <- CDORemapper -} - - - # Timeseries load - #------------------------------------------------------------------- - - if (tolower(stream) == "fcst"){ - - fcst <- Start(dat = fcst.path, - var = variable, - file_date = file_dates.fcst, - time = indices(ltmin:ltmax), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - dataset_descrip$lat_decreasing_sort), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - dataset_descrip$lon_circular_sort$ini, - dataset_descrip$lon_circular_sort$end), - transform = fcst.transform, - transform_params = list(grid = fcst.gridtype, - method = fcst.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude'), - member = c('ensemble')), - member = indices(1:fcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - } - - hcst <- Start(dat = hcst.path, - var = variable, - file_date = file_dates, - time = indices(ltmin:ltmax), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - dataset_descrip$lat_decreasing_sort), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - dataset_descrip$lon_circular_sort$ini, - dataset_descrip$lon_circular_sort$end), - transform = fcst.transform, - transform_params = list(grid = fcst.gridtype, - method = fcst.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude'), - member = c('ensemble')), - member = indices(1:hcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - - hcst.NA_files <- c(attributes(hcst)$NotFoundFiles) - hcst.NA_files <- hcst.NA_files[!is.na(hcst.NA_files)] - try(hcst.NA_files <- hcst.NA_files[order(hcst.NA_files)]) - - dates <- attr(hcst, 'Variables')$common$time - dates_file <- sapply(dates, format, '%Y%m%d') - dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") - dim(dates_file) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member'), - list(1,1,1,1,1), drop="selected")) - - obs <- Start(dat = obs.path, - var = variable, - file_date = dates_file, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - reference_descrip$lat_decreasing_sort), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - reference_descrip$lon_circular_sort$ini, - reference_descrip$lon_circular_sort$end), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - transform = obs.transform, - transform_params = list(grid = obs.gridtype, - method = obs.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - - dates_file <- paste0(dates_file, '01') - dim(dates_file) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member'), - list(1,1,1,1,1), drop="selected")) - - file_dates <- paste0(file_dates, '01') - dim(file_dates) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member', 'time'), - list(1,1,1,1,1,1), drop="selected")) - - - - obs.NA_dates.ind <- Apply(obs, - fun=(function(x){ all(is.na(x))}), - target_dims=c('time', 'latitude', 'longitude'))[[1]] - obs.NA_dates <- dates_file[obs.NA_dates.ind] - obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] - obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", - freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") - - if (any(is.na(hcst))){ - fatal(logger, - paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", - " ################################################# ", - " ###### MISSING FILES #### ", - " ################################################# ", - "hcst files:", - hcst.NA_files, - " ################################################# ", - " ################################################# ", - sep="\n")) - quit(status = 1) - } - - if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ - fatal(logger, - paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", - " ################################################# ", - " ###### MISSING FILES #### ", - " ################################################# ", - "obs files:", - obs.NA_files, - " ################################################# ", - " ################################################# ", - sep="\n")) - quit(status=1) - } - - info(logger, - "######### DATA LOADING COMPLETED SUCCESFULLY ##############") - - default_dims <- c(dat = 1, var = 1, sweek = 1, - sday = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, member = 1) - - default_dims[names(dim(obs))] <- dim(obs) - dim(obs) <- default_dims - - lon <- attr(obs, 'Variables')$dat1$longitude - lat <- attr(obs, 'Variables')$dat1$latitude - hcst.times <- attr(hcst, 'Variables')$common$time - hcst.times <- sort(unique(sapply(as.character(hcst.times), - substr, 1, 10))) - - #obs<-Subset(obs,c('dat','var'),list(1,1),drop='selected') - #hcst<-Subset(hcst,c('dat','var'),list(1,1),drop='selected') - #if (stream == "fcst"){ - # fcst<-Subset(fcst,c('dat','var'),list(1,1),drop='selected') - #} - - #filters negative values in accum vars - if (accum){ - obs[obs < 0 ] <- 0 - hcst[hcst < 0 ] <- 0 - if (stream == "fcst"){ fcst[fcst < 0 ] <- 0 } - } - - sdates.hcst <- file_dates - sdates.fcst <- file_dates.fcst - leadtimes.hcst <- dates_file - diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 1e841c98..70b4b7bd 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -1,17 +1,15 @@ -source("modules/data_load/dates2load.R") -source("modules/data_load/regrid.R") -# source("modules/data_load/s2dv_cube.R") -# source("recipe.R") -# Load required libraries +# Load required libraries/funs +source("modules/data_load/sdates2load.R") +source("modules/data_load/regrid.R") source("tools/libs.R") # RECIPE FOR TESTING # ------------------------------------------------------------------------------------------- ## TODO: Get only the last part of the path as the recipe$filename? -recipe <- read_yaml("modules/data_load/recipe_1.yml") -args <- NULL; args[1] <- "modules/data_load/recipe_1.yml" +args <- NULL; args[1] <- "modules/data_load/testing_recipes/recipe_2.yml" +recipe <- read_yaml(args[1]) recipe$filename <- args[1] # Create output folder and log: logger <- prepare_outputs(recipe = recipe) @@ -43,7 +41,7 @@ store.freq <- recipe$Analysis$Variables$freq ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name # get sdates array -sdates <- dates2load(recipe, logger) +sdates <- sdates2load(recipe, logger) # get esarchive datasets dict: archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive @@ -268,4 +266,3 @@ if (accum){ sdates.hcst <- file_dates leadtimes.hcst <- dates_file - diff --git a/modules/data_load/s2dv_cube.R b/modules/data_load/s2dv_cube.R deleted file mode 100644 index a69a0fa2..00000000 --- a/modules/data_load/s2dv_cube.R +++ /dev/null @@ -1,185 +0,0 @@ -## TODO: NEEDED IMPROVEMENTS FOR ESS TOOL: -# - Having more than one variables could be required (for indices). -# - New checks for new dimensions -# - Instead of lat and lons, the object could have regions. - - - -#'Creation of a 's2dv_cube' object -#' -#'@description This function allows to create a 's2dv_cube' object by passing information through its parameters. This function will be needed if the data hasn't been loaded using CST_Load or has been transformed with other methods. A 's2dv_cube' object has many different components including metadata. This function will allow to create 's2dv_cube' objects even if not all elements are defined and for each expected missed parameter a warning message will be returned. -#' -#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -#' -#'@param data an array with any number of named dimensions, typically an object output from CST_Load, with the following dimensions: dataset, member, sdate, ftime, lat and lon. -#'@param lon an array with one dimension containing the longitudes and attributes: dim, cdo_grid_name, data_across_gw, array_across_gw, first_lon, last_lon and projection. -#'@param lat an array with one dimension containing the latitudes and attributes: dim, cdo_grid_name, first_lat, last_lat and projection. -#'@param Variable a list of two elements: \code{varName} a character string indicating the abbreviation of a variable name and \code{level} a character string indicating the level (e.g., "2m"), if it is not required it could be set as NULL. -#'@param Datasets a named list with the dataset model with two elements: \code{InitiatlizationDates}, containing a list of the start dates for each member named with the names of each member, and \code{Members} containing a vector with the member names (e.g., "Member_1") -#'@param Dates a named list of two elements: \code{start}, an array of dimensions (sdate, time) with the POSIX initial date of each forecast time of each starting date, and \code{end}, an array of dimensions (sdate, time) with the POSIX final date of each forecast time of each starting date. -#'@param when a time stamp of the date issued by the Load() call to obtain the data. -#'@param source_files a vector of character strings with complete paths to all the found files involved in the Load() call. -#' -#'@return The function returns an object of class 's2dv_cube'. -#' -#'@seealso \code{\link[s2dverification]{Load}} and \code{\link{CST_Load}} -#'@examples -#'exp_original <- 1:100 -#'dim(exp_original) <- c(lat = 2, time = 10, lon = 5) -#'exp1 <- s2dv_cube(data = exp_original) -#'class(exp1) -#'exp2 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50)) -#'class(exp2) -#'exp3 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m')) -#'class(exp3) -#'exp4 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999))) -#'class(exp4) -#'exp5 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999)), -#' when = "2019-10-23 19:15:29 CET") -#'class(exp5) -#'exp6 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999)), -#' when = "2019-10-23 19:15:29 CET", -#' source_files = c("/path/to/file1.nc", "/path/to/file2.nc")) -#'class(exp6) -#'exp7 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999)), -#' when = "2019-10-23 19:15:29 CET", -#' source_files = c("/path/to/file1.nc", "/path/to/file2.nc"), -#' Datasets = list( -#' exp1 = list(InitializationsDates = list(Member_1 = "01011990", -#' Members = "Member_1")))) -#'class(exp7) -#'dim(exp_original) <- c(dataset = 1, member = 1, sdate = 2, ftime = 5, lat = 2, lon = 5) -#'exp8 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999))) -#'class(exp8) -#'@export -s2dv_cube <- function(data, lon = NULL, lat = NULL, Variable = NULL, Datasets = NULL, - Dates = NULL, when = NULL, source_files = NULL) { - - TEMPORAL_DIMS <- c('syear','sday','sweek', 'time') - - if (is.null(data) | !is.array(data) | is.null(names(dim(data)))) { - stop("Parameter 'data' must be an array with named dimensions.") - } - dims <- dim(data) - if (is.null(lon)) { - if (!any(c('lon', 'longitude') %in% names(dims))) { - warning("Parameter 'lon' is not provided but data contains a ", - "longitudinal dimension.") - } else { - warning("Parameter 'lon' is not provided so the data is from an ", - "unknown location.") - } - } - if (is.null(lat)) { - if (!any(c('lat', 'latitude') %in% names(dims))) { - warning("Parameter 'lat' is not provided but data contains a ", - "latitudinal dimension.") - } else { - warning("Parameter 'lat' is not provided so the data is from an ", - "unknown location.") - } - } - if (is.null(Variable)) { - warning("Parameter 'Variable' is not provided so the metadata ", - "of 's2dv_cube' object will be incomplete.") - } - if (is.null(Datasets)) { - warning("Parameter 'Datasets' is not provided so the metadata ", - "of 's2dv_cube' object will be incomplete.") - } - if (is.null(Dates)) { - if (!any(TEMPORAL_DIMS %in% names(dims))) { - warning("Parameter 'Dates' is not provided but data contains a ", - "temporal dimension.") - } else { - warning("Parameter 'Dates' is not provided so the data is from an ", - "unknown time period.") - } - } - if (is.null(when)) { - warning("Parameter 'when' is not provided so the metadata ", - "of 's2dv_cube' object will be incomplete.") - } - if (is.null(source_files)) { - warning("Parameter 'source_files' is not provided so the metadata ", - "of 's2dv_cube' object will be incomplete.") - } - if (!is.null(Variable)) { - if (!is.list(Variable)) { - Variable <- list(Variable) - } - if (names(Variable)[1] != 'varName' | names(Variable)[2] != 'level') { - warning("The name of the first elment of parameter 'Variable' is ", - "expected to be 'varName' and the second 'level'.") - } - if (!is.character(Variable[[1]])) { - warning("The element 'Varname' of parameter 'Variable' must be ", - "a character.") - } - } - # Dimensions comparison - if (!is.null(lon)) { - if (any(names(dims) %in% c('lon', 'longitude'))) { - if (dims[(names(dims) %in% c('lon', 'longitude'))] != length(lon) & - dims[(names(dims) %in% c('lon', 'longitude'))] != 1) { - stop("Length of parameter 'lon' doesn't match the length of ", - "longitudinal dimension in parameter 'data'.") - } - } - } - if (!is.null(lat)) { - if (any(names(dims) %in% c('lat', 'latitude'))) { - if (dims[(names(dims) %in% c('lat', 'latitude'))] != length(lat) & - dims[(names(dims) %in% c('lat', 'latitude'))] != 1) { - stop("Length of parameter 'lat' doesn't match the length of ", - "latitudinal dimension in parameter 'data'.") - } - } - } - if (!is.null(Dates)) { - if (!is.list(Dates)) { - stop("Parameter 'Dates' must be a list.") - } else { - if (length(Dates) > 2) { - warning("Parameter 'Dates' is a list with more than 2 ", - "elements and only the first two will be used.") - Dates <- Dates[1 : 2] - } - if (names(Dates)[1] != 'start' | names(Dates)[2] != 'end') { - warning("The name of the first element of parameter 'Dates' ", - "is expected to be 'start' and the second 'end'.") - } - if (length(Dates[[1]]) != length(Dates[[2]]) & - length(Dates) == 2) { - stop("The length of the elements in parameter 'Dates' must ", - "be equal.") - } - time_dims <- dims[names(dims) %in% TEMPORAL_DIMS] - if (prod(time_dims) != length(Dates[[1]])) { - stop("The length of the temporal dimension doesn't match ", - " with the length of elements in parameter 'Dates'.") - } - } - } - object <- list(data = data, lon = lon, lat = lat, Variable = Variable, - Datasets = Datasets, Dates = Dates, when = when, - source_files = source_files) - class(object) <- 's2dv_cube' - return(object) -} diff --git a/modules/data_load/dates2load.R b/modules/data_load/sdates2load.R similarity index 88% rename from modules/data_load/dates2load.R rename to modules/data_load/sdates2load.R index c7c392a5..2fa8b0ad 100644 --- a/modules/data_load/dates2load.R +++ b/modules/data_load/sdates2load.R @@ -1,7 +1,7 @@ # Taking the recipe returns the array of sdates to be loaded # both for the hcst and fcst. -dates2load <- function(recipe, logger){ +sdates2load <- function(recipe, logger){ recipe <- recipe$Analysis$Time @@ -14,7 +14,7 @@ dates2load <- function(recipe, logger){ # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$sdate$fcst_syear)){ file_dates.fcst <- paste0(recipe$sdate$fcst_syear,recipe$sdate$fcst_sday) - file_dates.fcst <- add_dims(file_dates.fcst, "fcst") + file_dates.fcst <- .add_dims(file_dates.fcst, "fcst") } else { file_dates.fcst <- NULL info(logger, @@ -27,7 +27,7 @@ dates2load <- function(recipe, logger){ } # adds the correspondent dims to each sdate array -add_dims <- function(data, type){ +.add_dims <- function(data, type){ if (type == "hcst"){ default_dims <- c(sday = 1, sweek = 1, diff --git a/modules/data_load/recipe_1.yml b/modules/data_load/testing_recipes/recipe_1.yml similarity index 100% rename from modules/data_load/recipe_1.yml rename to modules/data_load/testing_recipes/recipe_1.yml diff --git a/modules/data_load/recipe_2.yml b/modules/data_load/testing_recipes/recipe_2.yml similarity index 84% rename from modules/data_load/recipe_2.yml rename to modules/data_load/testing_recipes/recipe_2.yml index d2376b23..f7e40feb 100644 --- a/modules/data_load/recipe_2.yml +++ b/modules/data_load/testing_recipes/recipe_2.yml @@ -39,5 +39,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ -- GitLab From 545a52841d233ff2196b0319161257eaf69253a4 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 23 Mar 2022 12:20:50 +0100 Subject: [PATCH 36/47] Add loading of forecast --- modules/data_load/load.R | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 752c81f0..eec6efbd 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -73,6 +73,10 @@ hcst.path <- paste0(archive$src, hcst.dir, store.freq, "/$var$", exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") +fcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", + exp_descrip[[store.freq]][[variable]], "$var$_$fcst_syear$.nc") + # Define regrid parameters: #------------------------------------------------------------------- regrid_params <- get_regrid_params(recipe, archive) @@ -85,7 +89,7 @@ if (lons.min >= 0) { circularsort <- CircularSort(-180, 180) } -# Hindcast timeseries load +# Load hindcast #------------------------------------------------------------------- hcst <- Start(dat = hcst.path, var = variable, @@ -109,7 +113,31 @@ hcst <- Start(dat = hcst.path, split_multiselected_dims = TRUE, retrieve = TRUE) -# Get forecast dates +# Load forecast +#------------------------------------------------------------------- +fcst <- Start(dat = fcst.path, + var = variable, + fcst_syear = sdates$fcst, + time = indices(ltmin:ltmax), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'fcst_syear'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + +# Get reference dates from hcst Start call ## TODO: Replace with new code once An-Chi's fix is merged in startR dates <- attr(hcst, 'Variables')$common$time dates_file <- sapply(dates, format, '%Y%m') @@ -118,7 +146,8 @@ dates.dims[names(dim(dates))] <- dim(dates) dim(dates) <- dates.dims names(dim(dates))[4] <- "sdate" # to be changed to 'syear' -# For monthly data, dates are changed to day 1 of month correct retrieval. +# For monthly data, dates are changed to day 1 of month for correct retrieval. +## TODO: Separate calls for monthly and daily? if (store.freq == "monthly_mean") { dates_hcst <- dates # store dates dims dates <- lubridate::floor_date(dates, unit = 'month') @@ -127,7 +156,7 @@ if (store.freq == "monthly_mean") { dim(dates_file) <- dim(dates) -# Reference load +# Load reference #------------------------------------------------------------------- obs <- Start(dat = obs.path, @@ -189,6 +218,7 @@ startR_to_s2dv <- function(startR_array){ } hcst <- startR_to_s2dv(hcst) +fcst <- startR_to_s2dv(fcst) obs <- startR_to_s2dv(obs) ## TODO: Review/modify code from here onwards. -- GitLab From 57512bcd52852940bf40f87f845e14bfc4a32ae5 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 24 Mar 2022 12:35:29 +0100 Subject: [PATCH 37/47] Use new sd2v_cube() fun (in development) --- modules/data_load/load.R | 8 +++++--- tools/libs.R | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index eec6efbd..64e9831b 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -1,6 +1,6 @@ source("modules/data_load/dates2load.R") source("modules/data_load/regrid.R") -# source("modules/data_load/s2dv_cube.R") +source("/esarchive/scratch/vagudets/repos/cstools/R/s2dv_cube.R") # source("recipe.R") # Load required libraries @@ -190,7 +190,6 @@ attr2array <- function(attr){ return(array(as.vector(attr), dim(attr))) } - startR_to_s2dv <- function(startR_array){ dates_dims <- c("syear","time") @@ -198,6 +197,8 @@ startR_to_s2dv <- function(startR_array){ dates_end <- attr(startR_array, 'Variables')$common$time names(dim(dates_start)) <- dates_dims names(dim(dates_end)) <- dates_dims + ## TODO: change this line? + time_dims <- c("time", "sday", "sweek", "syear", "sdate", "fcst_syear") s2dv_object <- s2dv_cube(data = attr2array(startR_array), lon = attr2array(attr(startR_array, 'Variables')$dat1$longitude), @@ -207,7 +208,8 @@ startR_to_s2dv <- function(startR_array){ level = NULL), Dates = list(start = dates_start, end = dates_end), - when = Sys.time(), + time_dims = time_dims, + when = Sys.time(), source_files = attr2array(attr(startR_array, "Files")) #Datasets = list(exp1 = list(InitializationsDates = list(Member_1 = "01011990", # Members = "Member_1"))) diff --git a/tools/libs.R b/tools/libs.R index 7b010f54..75804970 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -8,7 +8,7 @@ library(yaml) # library(abind) # library(easyVerification) # library(easyNCDF) - library(CSTools) +# library(CSTools) # # library(parallel) # library(pryr) # To check mem usage. -- GitLab From b4d5f125653272b3225435f5a03bbc5a8eb41001 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 25 Mar 2022 17:01:38 +0100 Subject: [PATCH 38/47] Add CDO-compatible grid to regrid param description --- recipes/seasonal_oper.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/seasonal_oper.yml b/recipes/seasonal_oper.yml index 3e3d04e3..5e5f61fc 100644 --- a/recipes/seasonal_oper.yml +++ b/recipes/seasonal_oper.yml @@ -43,7 +43,7 @@ Analysis: - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} Regrid: method: bilinear # str mandatory - type: to_system # str either to_system or to_reference mandatory + type: to_system # str either to_system, to_reference or CDO-compatible grid mandatory Data_load: module: "modules/data_load/seas5.load.R" Workflow: -- GitLab From 926ce97d7832e2fac2e6a4fd0aed6ef20ab9194b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 25 Mar 2022 17:06:31 +0100 Subject: [PATCH 39/47] Separate daily vs monthly call for obs --- modules/data_load/load.R | 92 +++++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 39 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 64e9831b..ae4b00ff 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -1,7 +1,6 @@ source("modules/data_load/dates2load.R") -source("modules/data_load/regrid.R") +source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") source("/esarchive/scratch/vagudets/repos/cstools/R/s2dv_cube.R") -# source("recipe.R") # Load required libraries source("tools/libs.R") @@ -137,51 +136,66 @@ fcst <- Start(dat = fcst.path, split_multiselected_dims = TRUE, retrieve = TRUE) +# Load reference +#------------------------------------------------------------------- + # Get reference dates from hcst Start call -## TODO: Replace with new code once An-Chi's fix is merged in startR +## TODO: Replace with new code once An-Chi's fix is released in StartR dates <- attr(hcst, 'Variables')$common$time dates_file <- sapply(dates, format, '%Y%m') dates.dims <- c(sday = 1, sweek = 1, time = 1) dates.dims[names(dim(dates))] <- dim(dates) dim(dates) <- dates.dims names(dim(dates))[4] <- "sdate" # to be changed to 'syear' - -# For monthly data, dates are changed to day 1 of month for correct retrieval. -## TODO: Separate calls for monthly and daily? -if (store.freq == "monthly_mean") { - dates_hcst <- dates # store dates dims - dates <- lubridate::floor_date(dates, unit = 'month') - dim(dates) <- dim(dates_hcst) # restore dates dims -} - dim(dates_file) <- dim(dates) -# Load reference -#------------------------------------------------------------------- - -obs <- Start(dat = obs.path, - var = variable, - file_date = sort(unique(dates_file)), - time = dates, - time_var = 'time', - time_across = 'file_date', - merge_across_dims = TRUE, - merge_across_dims_narm = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude=c('lat','latitude'), +# Separate Start() call for monthly vs daily data +if (store.freq == "monthly_mean") { + obs <- Start(dat = obs.path, + var = variable, + file_date = dates_file, + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude=c('lat','latitude'), longitude=c('lon','longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) +} else if (store.freq == "daily_mean") { + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude=c('lat','latitude'), + longitude=c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) +} # Conversion from startR_array to s2dv_array #------------------------------------------------------------------- @@ -192,12 +206,12 @@ attr2array <- function(attr){ startR_to_s2dv <- function(startR_array){ - dates_dims <- c("syear","time") + dates_dims <- c("syear", "time") dates_start <- attr(startR_array, 'Variables')$common$time dates_end <- attr(startR_array, 'Variables')$common$time names(dim(dates_start)) <- dates_dims names(dim(dates_end)) <- dates_dims - ## TODO: change this line? + ## TODO: change this line when time attributes work correctly? time_dims <- c("time", "sday", "sweek", "syear", "sdate", "fcst_syear") s2dv_object <- s2dv_cube(data = attr2array(startR_array), -- GitLab From 6a1825f393e390802a8f5538b58c2fb352b6ec6e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 28 Mar 2022 16:50:36 +0200 Subject: [PATCH 40/47] Ensure correct obs dates for daily case --- modules/data_load/load.R | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index ae4b00ff..56eecf19 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -8,8 +8,8 @@ source("tools/libs.R") # RECIPE FOR TESTING # ------------------------------------------------------------------------------------------- ## TODO: Get only the last part of the path as the recipe$filename? -recipe <- read_yaml("modules/data_load/recipe_1.yml") -args <- NULL; args[1] <- "modules/data_load/recipe_1.yml" +recipe <- read_yaml("modules/data_load/recipe_2.yml") +args <- NULL; args[1] <- "modules/data_load/recipe_2.yml" recipe$filename <- args[1] # Create output folder and log: logger <- prepare_outputs(recipe = recipe) @@ -140,13 +140,19 @@ fcst <- Start(dat = fcst.path, #------------------------------------------------------------------- # Get reference dates from hcst Start call + ## TODO: Replace with new code once An-Chi's fix is released in StartR dates <- attr(hcst, 'Variables')$common$time +# Get year and month for file_date dates_file <- sapply(dates, format, '%Y%m') dates.dims <- c(sday = 1, sweek = 1, time = 1) dates.dims[names(dim(dates))] <- dim(dates) +# Set hour to 12:00 to ensure correct date retrieval for daily data +lubridate::hour(dates) <- 12 +lubridate::minute(dates) <- 00 +# Restore correct dimensions dim(dates) <- dates.dims -names(dim(dates))[4] <- "sdate" # to be changed to 'syear' +names(dim(dates))[4] <- "sdate" ## TODO: change to 'syear' dim(dates_file) <- dim(dates) # Separate Start() call for monthly vs daily data @@ -164,8 +170,8 @@ if (store.freq == "monthly_mean") { transform_params = list(grid = regrid_params$obs.gridtype, method = regrid_params$obs.gridmethod), transform_vars = c('latitude', 'longitude'), - synonims = list(latitude=c('lat','latitude'), - longitude=c('lon','longitude')), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), @@ -188,8 +194,8 @@ if (store.freq == "monthly_mean") { transform_params = list(grid = regrid_params$obs.gridtype, method = regrid_params$obs.gridmethod), transform_vars = c('latitude', 'longitude'), - synonims = list(latitude=c('lat','latitude'), - longitude=c('lon','longitude')), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), @@ -197,6 +203,8 @@ if (store.freq == "monthly_mean") { retrieve = TRUE) } +# TODO: Reorder obs dims to match hcst dims? + # Conversion from startR_array to s2dv_array #------------------------------------------------------------------- -- GitLab From 8ef407eca7252c2cea74a2714434886c99b7aef4 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 29 Mar 2022 09:15:18 +0200 Subject: [PATCH 41/47] Added tasmin and tasmax to SEAS5 monthly --- conf/archive.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conf/archive.yml b/conf/archive.yml index 0feb34b8..30d4c81a 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -8,7 +8,8 @@ archive: daily_mean: {"tas":"_f6h/","rsds":"_s0-24h/", "prlr":"_s0-24h/","sfcWind":"_f6h/"} monthly_mean: {"tas":"_f6h/","rsds":"_s0-24h/", - "prlr":"_s0-24h/","sfcWind":"_f6h/"} + "prlr":"_s0-24h/","sfcWind":"_f6h/", + "tasmin":"_f24h/","tasmax":"_f24h/"} nmember: fcst: 51 hcst: 25 -- GitLab From 7e62b726a0a96d2baeefbaff1f2a7765c37171f5 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 29 Mar 2022 10:06:08 +0200 Subject: [PATCH 42/47] Changed 'sdate' dim name back to 'syear' --- modules/data_load/dates2load.R | 2 +- modules/data_load/load.R | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R index c7c392a5..a00aefad 100644 --- a/modules/data_load/dates2load.R +++ b/modules/data_load/dates2load.R @@ -31,7 +31,7 @@ add_dims <- function(data, type){ if (type == "hcst"){ default_dims <- c(sday = 1, sweek = 1, - sdate = length(data)) + syear = length(data)) } else { default_dims <- c(fcst_syear = length(data)) } diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 56eecf19..29e141d4 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -152,7 +152,7 @@ lubridate::hour(dates) <- 12 lubridate::minute(dates) <- 00 # Restore correct dimensions dim(dates) <- dates.dims -names(dim(dates))[4] <- "sdate" ## TODO: change to 'syear' +names(dim(dates))[4] <- "syear" dim(dates_file) <- dim(dates) # Separate Start() call for monthly vs daily data @@ -220,7 +220,7 @@ startR_to_s2dv <- function(startR_array){ names(dim(dates_start)) <- dates_dims names(dim(dates_end)) <- dates_dims ## TODO: change this line when time attributes work correctly? - time_dims <- c("time", "sday", "sweek", "syear", "sdate", "fcst_syear") + time_dims <- c("time", "sday", "sweek", "syear", "fcst_syear") s2dv_object <- s2dv_cube(data = attr2array(startR_array), lon = attr2array(attr(startR_array, 'Variables')$dat1$longitude), @@ -245,6 +245,7 @@ hcst <- startR_to_s2dv(hcst) fcst <- startR_to_s2dv(fcst) obs <- startR_to_s2dv(obs) + ## TODO: Review/modify code from here onwards. ## TODO: new files checker? -- GitLab From 909e99622d830acf55188aa4659f1e916bb9c9f1 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 30 Mar 2022 15:31:25 +0200 Subject: [PATCH 43/47] Add fun headers and small changes --- modules/data_load/dates2load.R | 21 ++++++++++++++++----- modules/data_load/load.R | 2 +- modules/data_load/recipe_1.yml | 2 +- modules/data_load/recipe_2.yml | 6 +++--- tools/prepare_outputs.R | 23 +++++++++++++++++++++++ 5 files changed, 44 insertions(+), 10 deletions(-) diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R index a00aefad..f5d673b9 100644 --- a/modules/data_load/dates2load.R +++ b/modules/data_load/dates2load.R @@ -1,6 +1,17 @@ - -# Taking the recipe returns the array of sdates to be loaded -# both for the hcst and fcst. +#'Read requested dates from recipe and return array of dates to be loaded +#' +#'The purpose of this function is to read the recipe configuration data for +#'Auto-S2S workflows, retrieve the start and end dates for the hindcast and +#'the forecast dates, and return two arrays containing the requested dates, to +#'be passed to the Start() call. If no fcst date is provided, it returns an +#'empty object. +#' +#'@param recipe Auto-S2S configuration recipe as returned by read_yaml() +#'@param logger object of class logger containing log output file information +#' +#'@return a list of two arrays containing dates for hcst and fcst +#' +#'@export dates2load <- function(recipe, logger){ recipe <- recipe$Analysis$Time @@ -13,7 +24,7 @@ dates2load <- function(recipe, logger){ # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$sdate$fcst_syear)){ - file_dates.fcst <- paste0(recipe$sdate$fcst_syear,recipe$sdate$fcst_sday) + file_dates.fcst <- paste0(recipe$sdate$fcst_syear, recipe$sdate$fcst_sday) file_dates.fcst <- add_dims(file_dates.fcst, "fcst") } else { file_dates.fcst <- NULL @@ -21,7 +32,7 @@ dates2load <- function(recipe, logger){ paste("fcst_year empty in the recipe, creating empty fcst object...")) } - return(list(hcst=file_dates,fcst=file_dates.fcst)) + return(list(hcst = file_dates, fcst = file_dates.fcst)) ## TODO: document header of fun } diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 29e141d4..61c0531e 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -7,7 +7,7 @@ source("tools/libs.R") # RECIPE FOR TESTING # ------------------------------------------------------------------------------------------- -## TODO: Get only the last part of the path as the recipe$filename? +## TODO: Should this part go to OperationalCS.R? recipe <- read_yaml("modules/data_load/recipe_2.yml") args <- NULL; args[1] <- "modules/data_load/recipe_2.yml" recipe$filename <- args[1] diff --git a/modules/data_load/recipe_1.yml b/modules/data_load/recipe_1.yml index cdb7b0be..c7cdfca1 100644 --- a/modules/data_load/recipe_1.yml +++ b/modules/data_load/recipe_1.yml @@ -5,7 +5,7 @@ Analysis: Horizon: Seasonal Variables: name: tas - freq: monthly_mean + freq: daily_mean Datasets: System: name: system5c3s diff --git a/modules/data_load/recipe_2.yml b/modules/data_load/recipe_2.yml index d2376b23..0f44c0c6 100644 --- a/modules/data_load/recipe_2.yml +++ b/modules/data_load/recipe_2.yml @@ -5,16 +5,16 @@ Analysis: Horizon: Seasonal Variables: name: tas - freq: daily_mean + freq: monthly_mean Datasets: System: - name: system5c3s + name: system7c3s Multimodel: no Reference: name: era5 Time: sdate: - fcst_syear: '2017' + fcst_syear: '2020' fcst_sday: '0701' hcst_start: '1993' hcst_end: '2016' diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index b440612d..06f8270f 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -1,3 +1,26 @@ +#'Read recipe YAML file and create and store logfile info +#' +#'The purpose of this function is to read the recipe configuration for Auto-S2S +#'workflows and create logfiles stores in an the output directory specified in +#'the recipe. It returns an object of class logger that stores information on +#'the recipe configuration and errors. +#' +#'@param recipe Auto-S2S configuration recipe as returned by read_yaml() +#' +#'@return list contaning logger object, log filename and log directory name +#' +#'@import log4r +#' +#'@examples +#'setwd("/esarchive/scratch/vagudets/repos/auto-s2s/") +#'library(yaml) +#'recipe <- read_yaml("modules/data_load/recipe_1.yml") +#'logger <- prepare_outputs(recipe) +#'folder <- logger$foldername +#'log_file <- logger$logname +#'logger <- logger$logger +#' +#'@export prepare_outputs <- function(recipe) { -- GitLab From 8ed851a4fc56a80fa6bfd40e5b5a8b641942c784 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 30 Mar 2022 15:35:11 +0200 Subject: [PATCH 44/47] Add grid description files for archive --- conf/archive.yml | 6 +++--- conf/grid_description/griddes_system35c3s.txt | 19 +++++++++++++++++++ conf/grid_description/griddes_system7c3s.txt | 19 +++++++++++++++++++ 3 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 conf/grid_description/griddes_system35c3s.txt create mode 100644 conf/grid_description/griddes_system7c3s.txt diff --git a/conf/archive.yml b/conf/archive.yml index 30d4c81a..57574919 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -21,15 +21,15 @@ archive: nmember: fcst: 51 hcst: 25 - reference_grid: "/esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_20191001.nc" + reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system7c3s.txt" system35c3s: - src: "/exp/cmcc/system35c3s/" + src: "exp/cmcc/system35c3s/" monthly_mean: {"tas":"_f6h/","g500":"_f12h/", "prlr":"_f24h/", "sfcWind": "_f6h/"} nmember: fcst: 50 hcst: 40 - reference_grid: "/esarchive/exp/cmcc/system35c3s/monthly_mean/tas_f6h/tas_20210101.nc" + reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system35c3s.txt" Reference: era5: src: "recon/ecmwf/era5/" diff --git a/conf/grid_description/griddes_system35c3s.txt b/conf/grid_description/griddes_system35c3s.txt new file mode 100644 index 00000000..3b1e1a98 --- /dev/null +++ b/conf/grid_description/griddes_system35c3s.txt @@ -0,0 +1,19 @@ +# Grid description file for CMCC 3.5 (C3S) +# Serves as reference_grid for archive.yml +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/grid_description/griddes_system7c3s.txt b/conf/grid_description/griddes_system7c3s.txt new file mode 100644 index 00000000..b6f18478 --- /dev/null +++ b/conf/grid_description/griddes_system7c3s.txt @@ -0,0 +1,19 @@ +# Grid description file for Meteofrance System 7 (C3S) +# Serves as reference_grid for archive.ym +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = longitude +xlongname = "longitude" +xunits = "degrees_east" +yname = latitude +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 -- GitLab From 776714657317cf08e609de15d631a2f2fa2fdca6 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 31 Mar 2022 15:56:24 +0200 Subject: [PATCH 45/47] small changes --- conf/grid_description/griddes_system35c3s.txt | 2 +- modules/data_load/dates2load.R | 4 ++-- modules/data_load/recipe_2.yml | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/grid_description/griddes_system35c3s.txt b/conf/grid_description/griddes_system35c3s.txt index 3b1e1a98..a1248680 100644 --- a/conf/grid_description/griddes_system35c3s.txt +++ b/conf/grid_description/griddes_system35c3s.txt @@ -1,4 +1,4 @@ -# Grid description file for CMCC 3.5 (C3S) +# Grid description file for CMCC SPSv3.5 (C3S) # Serves as reference_grid for archive.yml # # gridID 2 diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R index f5d673b9..fc26d6f0 100644 --- a/modules/data_load/dates2load.R +++ b/modules/data_load/dates2load.R @@ -1,4 +1,4 @@ -#'Read requested dates from recipe and return array of dates to be loaded +#'Read requested dates from recipe and return array of file dates to be loaded #' #'The purpose of this function is to read the recipe configuration data for #'Auto-S2S workflows, retrieve the start and end dates for the hindcast and @@ -9,7 +9,7 @@ #'@param recipe Auto-S2S configuration recipe as returned by read_yaml() #'@param logger object of class logger containing log output file information #' -#'@return a list of two arrays containing dates for hcst and fcst +#'@return a list of two arrays containing file dates for hcst and fcst #' #'@export dates2load <- function(recipe, logger){ diff --git a/modules/data_load/recipe_2.yml b/modules/data_load/recipe_2.yml index 0f44c0c6..a39886e7 100644 --- a/modules/data_load/recipe_2.yml +++ b/modules/data_load/recipe_2.yml @@ -8,14 +8,14 @@ Analysis: freq: monthly_mean Datasets: System: - name: system7c3s + name: system35c3s Multimodel: no Reference: - name: era5 + name: era5land Time: sdate: fcst_syear: '2020' - fcst_sday: '0701' + fcst_sday: '1101' hcst_start: '1993' hcst_end: '2016' leadtimemin: 2 -- GitLab From 53fc839cbf1cb89d7cdfec13151166d01b6eb44b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 31 Mar 2022 15:58:44 +0200 Subject: [PATCH 46/47] clean-up & update --- conf/grid_description/griddes_system21_m1.txt | 17 ++ modules/data_load/fcst_seas.load.R-OLD | 249 ------------------ modules/data_load/regrid.R | 41 --- modules/data_load/s2dv_cube.R | 185 ------------- recipe.R | 36 --- 5 files changed, 17 insertions(+), 511 deletions(-) create mode 100644 conf/grid_description/griddes_system21_m1.txt delete mode 100644 modules/data_load/fcst_seas.load.R-OLD delete mode 100644 modules/data_load/regrid.R delete mode 100644 modules/data_load/s2dv_cube.R delete mode 100644 recipe.R diff --git a/conf/grid_description/griddes_system21_m1.txt b/conf/grid_description/griddes_system21_m1.txt new file mode 100644 index 00000000..bf9ac52b --- /dev/null +++ b/conf/grid_description/griddes_system21_m1.txt @@ -0,0 +1,17 @@ +# Grid description file for DWD GCFS2.1 (CDS) +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/modules/data_load/fcst_seas.load.R-OLD b/modules/data_load/fcst_seas.load.R-OLD deleted file mode 100644 index 5e6a4b29..00000000 --- a/modules/data_load/fcst_seas.load.R-OLD +++ /dev/null @@ -1,249 +0,0 @@ - - -fcst.month <- substr(fcst.sdate,5,6) -fcst.year <- substr(fcst.sdate,1,4) - -file_dates.fcst <- paste(fcst.year, fcst.month, sep = "") -file_dates <- paste(strtoi(hcst.inityear):strtoi(hcst.endyear), - fcst.month, sep = "") - - -file_dates <- add_dims(file_dates) -file_dates.fcst <- add_dims(file_dates.fcst) -# Take parameters from conf/archive for datasets: -table <- read_yaml(paste0(conf$code_dir, "conf/archive.yml")) -dataset_descrip <- table$archive[which(names(table$archive) == fcst.name)][[1]] -freq.hcst <- unlist(dataset_descrip[store.freq][[1]][variable]) -reference_descrip <- table$archive[which(names(table$archive) == - tolower(ref.name))][[1]] -freq.obs <- unlist(reference_descrip[store.freq][[1]][variable]) -obs.dir <- reference_descrip$src -fcst.dir <- dataset_descrip$src -hcst.dir <- dataset_descrip$src -fcst.nmember <- dataset_descrip$nmember$fcst -hcst.nmember <- dataset_descrip$nmember$hcst - -if ("accum" %in% names(reference_descrip)) { - accum <- unlist(reference_descrip$accum[store.freq][[1]]) -} else { - accum <- FALSE -} -# ----------- - obs.path <- paste0("/esarchive/", - obs.dir, store.freq, "/$var$", - freq.obs,"$var$_$file_date$.nc") - - hcst.path <- paste0("/esarchive/", - hcst.dir, store.freq, "/$var$", - freq.hcst,"$var$_$file_date$01", - ".nc") - - fcst.path <- paste0("/esarchive/", - fcst.dir, store.freq, "/$var$", - freq.hcst,"$var$_$file_date$01", - ".nc") -#------------------------------------------------------------------- -# Regrid: -if (tolower(recipe$Analysis$Regrid$type) == 'reference') { - fcst.gridtype <- reference_descrip$regrid - fcst.gridmethod <- recipe$Analysis$Regrid$method - fcst.tranform <- CDORemapper - obs.gridtype <- NULL - obs.gridmethod <- NULL - obs.tranform <- NULL -} else if (tolower(recipe$Analysis$Regrid$type) == 'system') { - fcst.gridtype <- NULL - fcst.gridmethod <- NULL - fcst.transform <- NULL - obs.gridtype <- dataset_descrip$regrid - obs.gridmethod <- recipe$Analysis$Regrid$method - obs.transform <- CDORemapper -} else { - fcst.gridtype <- recipe$Analysis$Regrid$type - fcst.gridmethod <- recipe$Analysis$Regrid$method - fcst.transform <- CDORemapper - obs.gridtype <- recipe$Analysis$Regrid$type - obs.gridmethod <- recipe$Analysis$Regrid$method - obs.transform <- CDORemapper -} - - - # Timeseries load - #------------------------------------------------------------------- - - if (tolower(stream) == "fcst"){ - - fcst <- Start(dat = fcst.path, - var = variable, - file_date = file_dates.fcst, - time = indices(ltmin:ltmax), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - dataset_descrip$lat_decreasing_sort), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - dataset_descrip$lon_circular_sort$ini, - dataset_descrip$lon_circular_sort$end), - transform = fcst.transform, - transform_params = list(grid = fcst.gridtype, - method = fcst.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude'), - member = c('ensemble')), - member = indices(1:fcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - } - - hcst <- Start(dat = hcst.path, - var = variable, - file_date = file_dates, - time = indices(ltmin:ltmax), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - dataset_descrip$lat_decreasing_sort), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - dataset_descrip$lon_circular_sort$ini, - dataset_descrip$lon_circular_sort$end), - transform = fcst.transform, - transform_params = list(grid = fcst.gridtype, - method = fcst.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude'), - member = c('ensemble')), - member = indices(1:hcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - - hcst.NA_files <- c(attributes(hcst)$NotFoundFiles) - hcst.NA_files <- hcst.NA_files[!is.na(hcst.NA_files)] - try(hcst.NA_files <- hcst.NA_files[order(hcst.NA_files)]) - - dates <- attr(hcst, 'Variables')$common$time - dates_file <- sapply(dates, format, '%Y%m%d') - dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") - dim(dates_file) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member'), - list(1,1,1,1,1), drop="selected")) - - obs <- Start(dat = obs.path, - var = variable, - file_date = dates_file, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - reference_descrip$lat_decreasing_sort), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - reference_descrip$lon_circular_sort$ini, - reference_descrip$lon_circular_sort$end), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - transform = obs.transform, - transform_params = list(grid = obs.gridtype, - method = obs.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - - dates_file <- paste0(dates_file, '01') - dim(dates_file) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member'), - list(1,1,1,1,1), drop="selected")) - - file_dates <- paste0(file_dates, '01') - dim(file_dates) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member', 'time'), - list(1,1,1,1,1,1), drop="selected")) - - - - obs.NA_dates.ind <- Apply(obs, - fun=(function(x){ all(is.na(x))}), - target_dims=c('time', 'latitude', 'longitude'))[[1]] - obs.NA_dates <- dates_file[obs.NA_dates.ind] - obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] - obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", - freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") - - if (any(is.na(hcst))){ - fatal(logger, - paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", - " ################################################# ", - " ###### MISSING FILES #### ", - " ################################################# ", - "hcst files:", - hcst.NA_files, - " ################################################# ", - " ################################################# ", - sep="\n")) - quit(status = 1) - } - - if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ - fatal(logger, - paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", - " ################################################# ", - " ###### MISSING FILES #### ", - " ################################################# ", - "obs files:", - obs.NA_files, - " ################################################# ", - " ################################################# ", - sep="\n")) - quit(status=1) - } - - info(logger, - "######### DATA LOADING COMPLETED SUCCESFULLY ##############") - - default_dims <- c(dat = 1, var = 1, sweek = 1, - sday = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, member = 1) - - default_dims[names(dim(obs))] <- dim(obs) - dim(obs) <- default_dims - - lon <- attr(obs, 'Variables')$dat1$longitude - lat <- attr(obs, 'Variables')$dat1$latitude - hcst.times <- attr(hcst, 'Variables')$common$time - hcst.times <- sort(unique(sapply(as.character(hcst.times), - substr, 1, 10))) - - #obs<-Subset(obs,c('dat','var'),list(1,1),drop='selected') - #hcst<-Subset(hcst,c('dat','var'),list(1,1),drop='selected') - #if (stream == "fcst"){ - # fcst<-Subset(fcst,c('dat','var'),list(1,1),drop='selected') - #} - - #filters negative values in accum vars - if (accum){ - obs[obs < 0 ] <- 0 - hcst[hcst < 0 ] <- 0 - if (stream == "fcst"){ fcst[fcst < 0 ] <- 0 } - } - - sdates.hcst <- file_dates - sdates.fcst <- file_dates.fcst - leadtimes.hcst <- dates_file - diff --git a/modules/data_load/regrid.R b/modules/data_load/regrid.R deleted file mode 100644 index 0d3c96de..00000000 --- a/modules/data_load/regrid.R +++ /dev/null @@ -1,41 +0,0 @@ - -get_regrid_params <- function(recipe, archive){ - - exp.name <- recipe$Analysis$Datasets$System$name - ref.name <- recipe$Analysis$Datasets$Reference$name - exp_descrip <- archive$System[[exp.name]] - reference_descrip <- archive$Reference[[ref.name]] - - if (tolower(recipe$Analysis$Regrid$type) == 'to_reference') { - - regrid_params <- list(fcst.gridtype=reference_descrip$reference_grid, - fcst.gridmethod=recipe$Analysis$Regrid$method, - fcst.transform=CDORemapper, - obs.gridtype=NULL, - obs.gridmethod=NULL, - obs.transform=NULL) - - } else if (tolower(recipe$Analysis$Regrid$type) == 'to_system') { - - regrid_params <- list(fcst.gridtype=NULL, - fcst.gridmethod=NULL, - fcst.transform=NULL, - obs.gridtype=exp_descrip$reference_grid, - obs.gridmethod=recipe$Analysis$Regrid$method, - obs.transform=CDORemapper) - - } else { - ##TODO: Define conditions and recipe params for middle interpolation case - regrid_params <- list(fcst.gridtype=recipe$Analysis$Regrid$type, # r256x128 - fcst.gridmethod=recipe$Analysis$Regrid$method, - fcst.transform=CDORemapper, - obs.gridtype=recipe$Analysis$Regrid$type, - obs.gridmethod=recipe$Analysis$Regrid$method, - obs.transform=CDORemapper) - } - - return(regrid_params) - -} - - diff --git a/modules/data_load/s2dv_cube.R b/modules/data_load/s2dv_cube.R deleted file mode 100644 index a69a0fa2..00000000 --- a/modules/data_load/s2dv_cube.R +++ /dev/null @@ -1,185 +0,0 @@ -## TODO: NEEDED IMPROVEMENTS FOR ESS TOOL: -# - Having more than one variables could be required (for indices). -# - New checks for new dimensions -# - Instead of lat and lons, the object could have regions. - - - -#'Creation of a 's2dv_cube' object -#' -#'@description This function allows to create a 's2dv_cube' object by passing information through its parameters. This function will be needed if the data hasn't been loaded using CST_Load or has been transformed with other methods. A 's2dv_cube' object has many different components including metadata. This function will allow to create 's2dv_cube' objects even if not all elements are defined and for each expected missed parameter a warning message will be returned. -#' -#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -#' -#'@param data an array with any number of named dimensions, typically an object output from CST_Load, with the following dimensions: dataset, member, sdate, ftime, lat and lon. -#'@param lon an array with one dimension containing the longitudes and attributes: dim, cdo_grid_name, data_across_gw, array_across_gw, first_lon, last_lon and projection. -#'@param lat an array with one dimension containing the latitudes and attributes: dim, cdo_grid_name, first_lat, last_lat and projection. -#'@param Variable a list of two elements: \code{varName} a character string indicating the abbreviation of a variable name and \code{level} a character string indicating the level (e.g., "2m"), if it is not required it could be set as NULL. -#'@param Datasets a named list with the dataset model with two elements: \code{InitiatlizationDates}, containing a list of the start dates for each member named with the names of each member, and \code{Members} containing a vector with the member names (e.g., "Member_1") -#'@param Dates a named list of two elements: \code{start}, an array of dimensions (sdate, time) with the POSIX initial date of each forecast time of each starting date, and \code{end}, an array of dimensions (sdate, time) with the POSIX final date of each forecast time of each starting date. -#'@param when a time stamp of the date issued by the Load() call to obtain the data. -#'@param source_files a vector of character strings with complete paths to all the found files involved in the Load() call. -#' -#'@return The function returns an object of class 's2dv_cube'. -#' -#'@seealso \code{\link[s2dverification]{Load}} and \code{\link{CST_Load}} -#'@examples -#'exp_original <- 1:100 -#'dim(exp_original) <- c(lat = 2, time = 10, lon = 5) -#'exp1 <- s2dv_cube(data = exp_original) -#'class(exp1) -#'exp2 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50)) -#'class(exp2) -#'exp3 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m')) -#'class(exp3) -#'exp4 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999))) -#'class(exp4) -#'exp5 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999)), -#' when = "2019-10-23 19:15:29 CET") -#'class(exp5) -#'exp6 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999)), -#' when = "2019-10-23 19:15:29 CET", -#' source_files = c("/path/to/file1.nc", "/path/to/file2.nc")) -#'class(exp6) -#'exp7 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999)), -#' when = "2019-10-23 19:15:29 CET", -#' source_files = c("/path/to/file1.nc", "/path/to/file2.nc"), -#' Datasets = list( -#' exp1 = list(InitializationsDates = list(Member_1 = "01011990", -#' Members = "Member_1")))) -#'class(exp7) -#'dim(exp_original) <- c(dataset = 1, member = 1, sdate = 2, ftime = 5, lat = 2, lon = 5) -#'exp8 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999))) -#'class(exp8) -#'@export -s2dv_cube <- function(data, lon = NULL, lat = NULL, Variable = NULL, Datasets = NULL, - Dates = NULL, when = NULL, source_files = NULL) { - - TEMPORAL_DIMS <- c('syear','sday','sweek', 'time') - - if (is.null(data) | !is.array(data) | is.null(names(dim(data)))) { - stop("Parameter 'data' must be an array with named dimensions.") - } - dims <- dim(data) - if (is.null(lon)) { - if (!any(c('lon', 'longitude') %in% names(dims))) { - warning("Parameter 'lon' is not provided but data contains a ", - "longitudinal dimension.") - } else { - warning("Parameter 'lon' is not provided so the data is from an ", - "unknown location.") - } - } - if (is.null(lat)) { - if (!any(c('lat', 'latitude') %in% names(dims))) { - warning("Parameter 'lat' is not provided but data contains a ", - "latitudinal dimension.") - } else { - warning("Parameter 'lat' is not provided so the data is from an ", - "unknown location.") - } - } - if (is.null(Variable)) { - warning("Parameter 'Variable' is not provided so the metadata ", - "of 's2dv_cube' object will be incomplete.") - } - if (is.null(Datasets)) { - warning("Parameter 'Datasets' is not provided so the metadata ", - "of 's2dv_cube' object will be incomplete.") - } - if (is.null(Dates)) { - if (!any(TEMPORAL_DIMS %in% names(dims))) { - warning("Parameter 'Dates' is not provided but data contains a ", - "temporal dimension.") - } else { - warning("Parameter 'Dates' is not provided so the data is from an ", - "unknown time period.") - } - } - if (is.null(when)) { - warning("Parameter 'when' is not provided so the metadata ", - "of 's2dv_cube' object will be incomplete.") - } - if (is.null(source_files)) { - warning("Parameter 'source_files' is not provided so the metadata ", - "of 's2dv_cube' object will be incomplete.") - } - if (!is.null(Variable)) { - if (!is.list(Variable)) { - Variable <- list(Variable) - } - if (names(Variable)[1] != 'varName' | names(Variable)[2] != 'level') { - warning("The name of the first elment of parameter 'Variable' is ", - "expected to be 'varName' and the second 'level'.") - } - if (!is.character(Variable[[1]])) { - warning("The element 'Varname' of parameter 'Variable' must be ", - "a character.") - } - } - # Dimensions comparison - if (!is.null(lon)) { - if (any(names(dims) %in% c('lon', 'longitude'))) { - if (dims[(names(dims) %in% c('lon', 'longitude'))] != length(lon) & - dims[(names(dims) %in% c('lon', 'longitude'))] != 1) { - stop("Length of parameter 'lon' doesn't match the length of ", - "longitudinal dimension in parameter 'data'.") - } - } - } - if (!is.null(lat)) { - if (any(names(dims) %in% c('lat', 'latitude'))) { - if (dims[(names(dims) %in% c('lat', 'latitude'))] != length(lat) & - dims[(names(dims) %in% c('lat', 'latitude'))] != 1) { - stop("Length of parameter 'lat' doesn't match the length of ", - "latitudinal dimension in parameter 'data'.") - } - } - } - if (!is.null(Dates)) { - if (!is.list(Dates)) { - stop("Parameter 'Dates' must be a list.") - } else { - if (length(Dates) > 2) { - warning("Parameter 'Dates' is a list with more than 2 ", - "elements and only the first two will be used.") - Dates <- Dates[1 : 2] - } - if (names(Dates)[1] != 'start' | names(Dates)[2] != 'end') { - warning("The name of the first element of parameter 'Dates' ", - "is expected to be 'start' and the second 'end'.") - } - if (length(Dates[[1]]) != length(Dates[[2]]) & - length(Dates) == 2) { - stop("The length of the elements in parameter 'Dates' must ", - "be equal.") - } - time_dims <- dims[names(dims) %in% TEMPORAL_DIMS] - if (prod(time_dims) != length(Dates[[1]])) { - stop("The length of the temporal dimension doesn't match ", - " with the length of elements in parameter 'Dates'.") - } - } - } - object <- list(data = data, lon = lon, lat = lat, Variable = Variable, - Datasets = Datasets, Dates = Dates, when = when, - source_files = source_files) - class(object) <- 's2dv_cube' - return(object) -} diff --git a/recipe.R b/recipe.R deleted file mode 100644 index 6286e804..00000000 --- a/recipe.R +++ /dev/null @@ -1,36 +0,0 @@ -library(R6) -library(yaml) - -Recipe <- R6Class("Recipe", - public = list( - filename = NULL, - dir = NULL, - run_conf = NULL, - info = NULL, - params = NULL, - initialize = function(filename = NA, dir = NA) { - - if (is.na(dir)){ - self$filename <- basename(filename) - self$dir <- dirname(filename) - } else { - self$filename <- filename - self$dir <- dir - } - yml <- read_yaml(self$get_filepath()) - self$run_conf <- yml$Run - self$info <- yml$Description - self$params <- yml$Analysis - }, - - get_filepath = function() { - return(paste0(self$dir,"/",self$filename)) - } - - ) -) - - -#recipe <- Recipe$new(filename="/esarchive/scratch/lpalma/git/auto-s2s/recipes/seasonal_oper_atomic.yml") -#recipe <- Recipe$new("seasonal_oper_atomic.yml","/esarchive/scratch/lpalma/git/auto-s2s/recipes/") - -- GitLab From 27be2e84c721c93055cec620047557d5dc71e017 Mon Sep 17 00:00:00 2001 From: lpalma Date: Fri, 1 Apr 2022 09:48:56 +0200 Subject: [PATCH 47/47] wrapped laod fun, change dims in s2dv Dates and tested CTS_Calibration --- modules/Calibration/Calibration.R | 255 ++++++-- modules/data_load/dates2load.R | 4 +- modules/data_load/load.R | 607 +++++++++--------- .../data_load/testing_recipes/recipe_2.yml | 4 +- modules/test.R | 31 + 5 files changed, 558 insertions(+), 343 deletions(-) create mode 100644 modules/test.R diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index b86ea959..295bb406 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -1,42 +1,217 @@ -# Code to apply any correction method: -# simple bias adjustment -# variance inflation -# quantile mapping - -## Which parameter are required? -if (!("obs" %in% ls()) || is.null(obs)) { - error(logger, - "There is no object 'obs' in the global environment or it is NULL") - stop("EXECUTION FAILED") -} -if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) { - error(logger, - "There is no object 'fcst' in the global environment or it is NULL") - stop("EXECUTION FAILED") -} -if (!("hcst" %in% ls()) || is.null(hcst)) { - error(logger, - "There is no object 'hcst' in the global environment or it is NULL") - stop("EXECUTION FAILED") -} -if (!("method" %in% ls()) || is.null(method)) { - warn(logger, - "Calibration method not found and it is set as 'SBC'.") - method <- 'SBC' -} -if (method %in% c('SBC', 'SimpleBiasCorrection')) { - cal_fun <- "CSTools::Calibration" - cal_method <- "bias" -} else if (method %in% c("Inflation", "VarianceInflation")) { - cal_fun <- "CSTools::Calibration" - cal_method <- "evmos" -} else if (method %in% c("QM", "QuantileMapping")) { - cal_fun <- "CSTools::QuantileMapping" -} else { - error(logger, "Unknown calibration method definde in the recipe.") - stop("EXECUTION FAILED") +## Code to apply any correction method: +## simple bias adjustment +## variance inflation +## quantile mapping +# +### Which parameter are required? +#if (!("obs" %in% ls()) || is.null(obs)) { +# error(logger, +# "There is no object 'obs' in the global environment or it is NULL") +# stop("EXECUTION FAILED") +#} +#if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) { +# error(logger, +# "There is no object 'fcst' in the global environment or it is NULL") +# stop("EXECUTION FAILED") +#} +#if (!("hcst" %in% ls()) || is.null(hcst)) { +# error(logger, +# "There is no object 'hcst' in the global environment or it is NULL") +# stop("EXECUTION FAILED") +#} +#if (!("method" %in% ls()) || is.null(method)) { +# warn(logger, +# "Calibration method not found and it is set as 'SBC'.") +# method <- 'SBC' +#} +#if (method %in% c('SBC', 'SimpleBiasCorrection')) { +# cal_fun <- "CSTools::Calibration" +# cal_method <- "bias" +#} else if (method %in% c("Inflation", "VarianceInflation")) { +# cal_fun <- "CSTools::Calibration" +# cal_method <- "evmos" +#} else if (method %in% c("QM", "QuantileMapping")) { +# cal_fun <- "CSTools::QuantileMapping" +#} else { +# error(logger, "Unknown calibration method definde in the recipe.") +# stop("EXECUTION FAILED") +#} +#info(logger, paste("#-------------------------- ", "\n", +# "running Calibration module", "\n", +# "it can call", cal_fun )) + +#' Function that bias-adjust the S2S hindcast +# +#' this function bias adjust the hindcast data using +#' the leave-one-out method and the specified fun +# +#'@param obs A numeric array with named dimensions, +#' representing the observational data used +#' in the bias-adjustment method. +#' 'sday','syear','member' are mandatory dims + +#'@param hcst A numeric array with named dimensions, +#' representing the system hindcast data used +#' in the bias-adjustment method. +#' 'sday','syear','member' are mandatory dims + +#'@param fun bias adjustment function + +#'@param mm TRUE if the experiment data is conformed by +#' multiple systems (dat > 1) + +#'@param ncores An integer indicating the number of cores to +#' use for parallel computation. The default value is NULL. + +#'@param target_dims Dims needed to do the calibration +#'@param output_dims Output dims from the calib fun + +#'@return A numeric array with the bias adjusted hindcast, +#' + +CST_CALIB_METHODS <- c("bias","evmos","mse_min","crps_min","rpc-based") + +hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, + split_factor=1, ncores=32) +{ + + # Replicates observations for bias adjusting each + # system of the multi-model separately + if(mm){ + obs.mm <- obs + for(dat in 1:(dim(hcst)['dat'][[1]]-1)){ + obs.mm <- abind(obs2,obs, + along = which(names(dim(obs)) == 'dat')) + } + names(dim(obs.mm)) <- names(dim(obs)) + obs <- obs.mm + remove(obs.mm) + } + + if (method %in% CST_CALIB_METHODS) { + + # Hcst calibration + hcst <- CSTools::CST_Calibration(hcst,obs, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "member", + sdate_dim = "syear", + ncores = ncores) + + } else { + #error(logger, + # "Calibration method is not implemented in CSTools") + } + + hcst[!is.finite(hcst)] <- NA + remove(hcst) + + # Merges all the ensembles from the different systems into + # one single ensemble + if(mm){ + hcst <- MergeDims(hcst, + merge_dims=c('dat','member'), + rename_dim='member') + hcst <- drop_na_dim(hcst,'member') + } + + # removes dat and var dimensions if needed + try(hcst<-Subset(hcst, + c('var'),list(1),drop='selected')) + try(hcst<-Subset(hcst, + c('dat'),list(1),drop='selected')) + + return(hcst) + } -info(logger, paste("#-------------------------- ", "\n", - "running Calibration module", "\n", - "it can call", cal_fun )) +#' Function that bias-adjust the S2S forecast +# +#' this function bias adjust the forecast data using +#' hindcast and observational data +# +#'@param obs A numeric array with named dimensions, +#' representing the observational data used +#' in the bias-adjustment method. +#' 'sday','syear','member' are mandatory dims + +#'@param hcst A numeric array with named dimensions, +#' representing the system hindcast data used +#' in the bias-adjustment method. +#' 'sday','syear','member' are mandatory dims + +#'@param fcst A numeric array with named dimensions, +#' representing the system forecast to be +#' bias-adjusted. +#' 'sday','syear','member' are mandatory dims + +#'@param fun bias adjustment function + +#'@param mm TRUE if the experiment data is conformed by +#' multiple systems (dat > 1) + +#'@param ncores An integer indicating the number of cores to +#' use for parallel computation. The default value is NULL. + +#'@param target_dims Dims needed to do the calibration +#'@param output_dims Output dims from the calib fun + +#'@return A numeric array with the bias adjusted forecast, +#' + +## TODO: +## needs and update: look at hcst version +#fcst_calib <- function(obs, hcst, fcst, fun, mm=F, +# na.rm=T, split_factor=1,ncores=32, +# target_dims=c('sday','syear','member'), +# output_dims=c('member')) +#{ +# +# # Replicates observations for bias adjusting each +# # system of the multi-model separately +# if(mm){ +# obs.mm <- obs +# for(dat in 1:(dim(hcst)['dat'][[1]]-1)){ +# obs.mm <- abind(obs2,obs, +# along=which(names(dim(obs)) == 'dat')) +# } +# names(dim(obs.mm)) <- names(dim(obs)) +# obs <- obs.mm +# remove(obs.mm) +# } +# +# # Fcst Calibration +# calibrated_fcst <-Apply(data=list(obs=obs,hcst=hcst,fcst=fcst), +# extra_info=list(na.rm=na.rm), +# target_dims=target_dims, +# output_dims=output_dims, +# na.rm=na.rm, +# ncores=ncores, +# fun = .fcstcal)[[1]] +# +# calibrated_fcst[!is.finite(calibrated_fcst)] <- NA +# +# # Merges all the ensembles from the different systems into +# # one single ensemble +# if(mm){ +# calibrated_fcst <- MergeDims(calibrated_fcst, +# merge_dims=c('dat','member'), +# rename_dim='member') +# calibrated_fcst <- drop_na_dim(calibrated_fcst,'member') +# } +# +# # removes dat and var dimensions if needed +# try(calibrated_fcst<-Subset(calibrated_fcst, +# c('var'),list(1),drop='selected')) +# try(calibrated_fcst<-Subset(calibrated_fcst, +# c('dat'),list(1),drop='selected')) +# +# return(calibrated_fcst) +# +# +#} diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R index 4c851838..eda5f239 100644 --- a/modules/data_load/dates2load.R +++ b/modules/data_load/dates2load.R @@ -20,12 +20,12 @@ dates2load <- function(recipe, logger){ file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate$fcst_sday) - file_dates <- add_dims(file_dates, "hcst") + file_dates <- .add_dims(file_dates, "hcst") # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$sdate$fcst_syear)){ file_dates.fcst <- paste0(recipe$sdate$fcst_syear, recipe$sdate$fcst_sday) - file_dates.fcst <- add_dims(file_dates.fcst, "fcst") + file_dates.fcst <- .add_dims(file_dates.fcst, "fcst") } else { file_dates.fcst <- NULL info(logger, diff --git a/modules/data_load/load.R b/modules/data_load/load.R index ed0ba1be..a6b2a5f0 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -3,238 +3,51 @@ ## TODO: remove paths to personal scratchs source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") source("/esarchive/scratch/vagudets/repos/cstools/R/s2dv_cube.R") - # Load required libraries/funs source("modules/data_load/dates2load.R") -source("modules/data_load/regrid.R") source("tools/libs.R") -# RECIPE FOR TESTING -# ------------------------------------------------------------------------------------------- -## TODO: Get only the last part of the path as the recipe$filename? -args <- NULL; args[1] <- "modules/data_load/testing_recipes/recipe_2.yml" -recipe <- read_yaml(args[1]) -recipe$filename <- args[1] -# Create output folder and log: -logger <- prepare_outputs(recipe = recipe) -folder <- logger$foldername -log_file <- logger$logname -logger <- logger$logger -# ------------------------------------------------------------------------------------------- - -# Set params ----------------------------------------- -hcst.inityear <- recipe$Analysis$Time$hcst_start -hcst.endyear <- recipe$Analysis$Time$hcst_end -ltmin <- recipe$Analysis$Time$leadtimemin -ltmax <- recipe$Analysis$Time$leadtimemax -lats.min <- recipe$Analysis$Region$latmin -lats.max <- recipe$Analysis$Region$latmax -lons.min <- recipe$Analysis$Region$lonmin -lons.max <- recipe$Analysis$Region$lonmax -ref.name <- recipe$Analysis$Datasets$Reference$name -exp.name <- recipe$Analysis$Datasets$System$name - -variable <- recipe$Analysis$Variables$name -store.freq <- recipe$Analysis$Variables$freq - -## TODO: Examine this verifications part, verify if it's necessary -# stream <- verifications$stream -# sdates <- verifications$fcst.sdate - -## TODO: define fcst.name -##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name - -# get sdates array -sdates <- dates2load(recipe, logger) - -# get esarchive datasets dict: -archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive -exp_descrip <- archive$System[[exp.name]] - -freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) -reference_descrip <- archive$Reference[[ref.name]] -freq.obs <- unlist(reference_descrip[[store.freq]][variable]) -obs.dir <- reference_descrip$src -fcst.dir <- exp_descrip$src -hcst.dir <- exp_descrip$src -fcst.nmember <- exp_descrip$nmember$fcst -hcst.nmember <- exp_descrip$nmember$hcst - -## TODO: it is necessary? -##if ("accum" %in% names(reference_descrip)) { -## accum <- unlist(reference_descrip$accum[store.freq][[1]]) -##} else { -## accum <- FALSE -##} - -# ----------- -obs.path <- paste0(archive$src, - obs.dir, store.freq, "/$var$", - reference_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") - -hcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") - -fcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], "$var$_$fcst_syear$.nc") - -# Define regrid parameters: -#------------------------------------------------------------------- -regrid_params <- get_regrid_params(recipe, archive) - -# Longitude sort -#------------------------------------------------------------------- -if (lons.min >= 0) { - circularsort <- CircularSort(0, 360) -} else { - circularsort <- CircularSort(-180, 180) -} - -# Load hindcast -#------------------------------------------------------------------- -hcst <- Start(dat = hcst.path, - var = variable, - file_date = sdates$hcst, - time = indices(ltmin:ltmax), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = indices(1:hcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - -# Load forecast -#------------------------------------------------------------------- -fcst <- Start(dat = fcst.path, - var = variable, - fcst_syear = sdates$fcst, - time = indices(ltmin:ltmax), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = indices(1:fcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'fcst_syear'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - -# Load reference -#------------------------------------------------------------------- - -# Get reference dates from hcst Start call - -## TODO: Replace with new code once An-Chi's fix is released in StartR -dates <- attr(hcst, 'Variables')$common$time -# Get year and month for file_date -dates_file <- sapply(dates, format, '%Y%m') -dates.dims <- c(sday = 1, sweek = 1, time = 1) -dates.dims[names(dim(dates))] <- dim(dates) -# Set hour to 12:00 to ensure correct date retrieval for daily data -lubridate::hour(dates) <- 12 -lubridate::minute(dates) <- 00 -# Restore correct dimensions -dim(dates) <- dates.dims -names(dim(dates))[4] <- "syear" -dim(dates_file) <- dim(dates) - -# Separate Start() call for monthly vs daily data -if (store.freq == "monthly_mean") { - obs <- Start(dat = obs.path, - var = variable, - file_date = dates_file, - merge_across_dims = TRUE, - merge_across_dims_narm = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) -} else if (store.freq == "daily_mean") { - obs <- Start(dat = obs.path, - var = variable, - file_date = sort(unique(dates_file)), - time = dates, - time_var = 'time', - time_across = 'file_date', - merge_across_dims = TRUE, - merge_across_dims_narm = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) -} - -# TODO: Reorder obs dims to match hcst dims? - +## TODO: Move to CSOperational # Conversion from startR_array to s2dv_array #------------------------------------------------------------------- - attr2array <- function(attr){ return(array(as.vector(attr), dim(attr))) } startR_to_s2dv <- function(startR_array){ - dates_dims <- c("syear", "time") + dates_dims <- dim(Subset(startR_array, + along=c('dat','var', + 'latitude', 'longitude', 'ensemble'), + list(1,1,1,1,1), drop="selected")) + + #sdates_dims <- dim(Subset(startR_array, + # along=c('dat','var','time','sweek','sday', + # 'latitude', 'longitude', 'ensemble'), + # list(1,1,1,1,1,1,1,1), drop="selected")) + dates_start <- attr(startR_array, 'Variables')$common$time dates_end <- attr(startR_array, 'Variables')$common$time - names(dim(dates_start)) <- dates_dims - names(dim(dates_end)) <- dates_dims + #sdates <- unlist(attr(startR_array, 'FileSelectors')$dat1$file_date) + + dim(dates_start) <- dates_dims + dim(dates_end) <- dates_dims + #dim(sdates) <- sdates_dims ## TODO: change this line when time attributes work correctly? time_dims <- c("time", "sday", "sweek", "syear", "fcst_syear") s2dv_object <- s2dv_cube(data = attr2array(startR_array), - lon = attr2array(attr(startR_array, 'Variables')$dat1$longitude), - lat = attr2array(attr(startR_array, 'Variables')$dat1$latitude), + lon = attr2array(attr(startR_array, + 'Variables')$dat1$longitude), + lat = attr2array(attr(startR_array, + 'Variables')$dat1$latitude), Variable = list(varName = names(attr(startR_array, - 'Variables')$common)[2], + 'Variables')$common)[2], level = NULL), Dates = list(start = dates_start, end = dates_end), - time_dims = time_dims, + #sdate = sdates), + time_dims = time_dims, when = Sys.time(), source_files = attr2array(attr(startR_array, "Files")) #Datasets = list(exp1 = list(InitializationsDates = list(Member_1 = "01011990", @@ -245,95 +58,291 @@ startR_to_s2dv <- function(startR_array){ } -hcst <- startR_to_s2dv(hcst) -fcst <- startR_to_s2dv(fcst) -obs <- startR_to_s2dv(obs) - - -## TODO: Review/modify code from here onwards. - -## TODO: new files checker? -#hcst.NA_files <- c(attributes(hcst)$NotFoundFiles) -#hcst.NA_files <- hcst.NA_files[!is.na(hcst.NA_files)] -#try(hcst.NA_files <- hcst.NA_files[order(hcst.NA_files)]) - -dates_file <- paste0(dates_file, '01') -dim(dates_file) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'ensemble'), - list(1,1,1,1,1), drop="selected")) - -file_dates <- paste0(file_dates, '01') -dim(file_dates) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'ensemble', 'time'), - list(1,1,1,1,1,1), drop="selected")) - - - -obs.NA_dates.ind <- Apply(obs, - fun=(function(x){ all(is.na(x))}), - target_dims=c('time', 'latitude', 'longitude'))[[1]] -obs.NA_dates <- dates_file[obs.NA_dates.ind] -obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] -obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", - freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") - -if (any(is.na(hcst))){ - fatal(logger, - paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", - " ################################################# ", - " ###### MISSING FILES #### ", - " ################################################# ", - "hcst files:", - hcst.NA_files, - " ################################################# ", - " ################################################# ", - sep="\n")) - quit(status = 1) -} - -if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ - fatal(logger, - paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", - " ################################################# ", - " ###### MISSING FILES #### ", - " ################################################# ", - "obs files:", - obs.NA_files, - " ################################################# ", - " ################################################# ", - sep="\n")) - quit(status=1) -} - -info(logger, - "######### DATA LOADING COMPLETED SUCCESFULLY ##############") - -default_dims <- c(dat = 1, var = 1, sweek = 1, - sday = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - -default_dims[names(dim(obs))] <- dim(obs) -dim(obs) <- default_dims - -lon <- attr(obs, 'Variables')$dat1$longitude -lat <- attr(obs, 'Variables')$dat1$latitude -hcst.times <- attr(hcst, 'Variables')$common$time -hcst.times <- sort(unique(sapply(as.character(hcst.times), - substr, 1, 10))) +# RECIPE FOR TESTING +# -------------------------------------------------------------------------------- +# recipe_file <- "modules/data_load/testing_recipes/recipe_2.yml" + +load_datasets <- function(recipe_file){ + + ## TODO: Get only the last part of the path as the recipe$filename? + recipe <- read_yaml(recipe_file) + recipe$filename <- recipe_file + + ## TODO: this should come from the main script + # Create output folder and log: + logger <- prepare_outputs(recipe = recipe) + folder <- logger$foldername + log_file <- logger$logname + logger <- logger$logger + # ------------------------------------------- + + # Set params ----------------------------------------- + hcst.inityear <- recipe$Analysis$Time$hcst_start + hcst.endyear <- recipe$Analysis$Time$hcst_end + ltmin <- recipe$Analysis$Time$leadtimemin + ltmax <- recipe$Analysis$Time$leadtimemax + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + + variable <- recipe$Analysis$Variables$name + store.freq <- recipe$Analysis$Variables$freq + + ## TODO: Examine this verifications part, verify if it's necessary + # stream <- verifications$stream + # sdates <- verifications$fcst.sdate + + ## TODO: define fcst.name + ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name + + # get sdates array + sdates <- dates2load(recipe, logger) + + # get esarchive datasets dict: + archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + exp_descrip <- archive$System[[exp.name]] + + freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) + reference_descrip <- archive$Reference[[ref.name]] + freq.obs <- unlist(reference_descrip[[store.freq]][variable]) + obs.dir <- reference_descrip$src + fcst.dir <- exp_descrip$src + hcst.dir <- exp_descrip$src + fcst.nmember <- exp_descrip$nmember$fcst + hcst.nmember <- exp_descrip$nmember$hcst + + ## TODO: it is necessary? + ##if ("accum" %in% names(reference_descrip)) { + ## accum <- unlist(reference_descrip$accum[store.freq][[1]]) + ##} else { + ## accum <- FALSE + ##} + + # ----------- + obs.path <- paste0(archive$src, + obs.dir, store.freq, "/$var$", + reference_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") + + hcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", + exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") + + fcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", + exp_descrip[[store.freq]][[variable]], "$var$_$fcst_syear$.nc") + + # Define regrid parameters: + #------------------------------------------------------------------- + regrid_params <- get_regrid_params(recipe, archive) + + # Longitude sort + #------------------------------------------------------------------- + if (lons.min >= 0) { + circularsort <- CircularSort(0, 360) + } else { + circularsort <- CircularSort(-180, 180) + } + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + time = indices(ltmin:ltmax), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:hcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + hcst <- startR_to_s2dv(hcst) + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)){ + + fcst <- Start(dat = fcst.path, + var = variable, + fcst_syear = sdates$fcst, + time = indices(ltmin:ltmax), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'fcst_syear'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + fcst <- startR_to_s2dv(fcst) -#obs<-Subset(obs,c('dat','var'),list(1,1),drop='selected') -#hcst<-Subset(hcst,c('dat','var'),list(1,1),drop='selected') -#if (stream == "fcst"){ -# fcst<-Subset(fcst,c('dat','var'),list(1,1),drop='selected') -#} + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + # Get reference dates from hcst Start call + + ## TODO: Replace with new code once An-Chi's fix is released in StartR + #dates <- attr(hcst, 'Variables')$common$time + dates <- hcst$Dates$start + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dates.dims <- c(sday = 1, sweek = 1, time = 1) + dates.dims[names(dim(dates))] <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dates.dims + names(dim(dates))[4] <- "syear" + dim(dates_file) <- dim(dates) + + # Separate Start() call for monthly vs daily data + if (store.freq == "monthly_mean") { + + obs <- Start(dat = obs.path, + var = variable, + file_date = dates_file, + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq == "daily_mean") { + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + } + + # TODO: Reorder obs dims to match hcst dims? + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sweek = 1, + sday = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + obs <- startR_to_s2dv(obs) + + ############################################################################ + # + # CHECKS ON MISSING FILES + # + ############################################################################ + + #obs.NA_dates.ind <- Apply(obs, + # fun=(function(x){ all(is.na(x))}), + # target_dims=c('time', 'latitude', 'longitude'))[[1]] + #obs.NA_dates <- dates_file[obs.NA_dates.ind] + #obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] + #obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", + # freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") + # + #if (any(is.na(hcst))){ + # fatal(logger, + # paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "hcst files:", + # hcst.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status = 1) + #} + # + #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ + # fatal(logger, + # paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "obs files:", + # obs.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status=1) + #} + # + #info(logger, + # "######### DATA LOADING COMPLETED SUCCESFULLY ##############") + + ############################################################################ + ############################################################################ + + ## TODO: we need to define accumulated vars + #filters negative values in accum vars + #if (accum){ + # obs$data[obs$data < 0 ] <- 0 + # hcst$data[hcst$data < 0 ] <- 0 + # if (!is.null(fcst)){ + # fcst$data[fcst$data < 0 ] <- 0 + # } + #} + + return(list(hcst = hcst, fcst = fcst, obs = obs)) -#filters negative values in accum vars -if (accum){ - obs[obs < 0 ] <- 0 - hcst[hcst < 0 ] <- 0 } - -sdates.hcst <- file_dates -leadtimes.hcst <- dates_file diff --git a/modules/data_load/testing_recipes/recipe_2.yml b/modules/data_load/testing_recipes/recipe_2.yml index 89cb68ce..b8d81ad7 100644 --- a/modules/data_load/testing_recipes/recipe_2.yml +++ b/modules/data_load/testing_recipes/recipe_2.yml @@ -8,10 +8,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system35c3s + name: system5c3s Multimodel: no Reference: - name: era5land + name: era5 Time: sdate: fcst_syear: '2020' diff --git a/modules/test.R b/modules/test.R new file mode 100644 index 00000000..9bd30de4 --- /dev/null +++ b/modules/test.R @@ -0,0 +1,31 @@ + + + + +recipe_file <- "modules/data_load/testing_recipes/recipe_2.yml" +source("modules/data_load/load.R") + +data <- load_datasets(recipe_file) + +hcst <- data$hcst +obs <- data$obs + +method <- "bias" +mm=F +ncores=4 +na.rm=T + +# Hcst calibration +hcst <- CSTools::CST_Calibration(hcst,obs, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + + -- GitLab