diff --git a/conf/archive.yml b/conf/archive.yml index e70d4f2d9916b19ee1bd767f85936e5c2de30bd5..0feb34b8e5c2c005326e8c1e90faa4ab20cb1615 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -1,40 +1,50 @@ archive: - src: "/esarchive/" - system5c3s: - src: "exp/ecmwf/system5c3s/" - daily_mean: {"tas":"_f6h/","rsds":"_s0-24h/", - "prlr":"_s0-24h/","sfcWind":"_f6h/"} - monthly_mean: {"tas":"_f6h/","rsds":"_s0-24h/", - "prlr":"_s0-24h/","sfcWind":"_f6h/"} - lat_decreasing_sort: TRUE - lon_circular_sort: - ini: 0 - end: 361 - nmember: - fcst: 51 - hcst: 25 - regrid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" - - era5: - src: "recon/ecmwf/era5/" - daily_mean: {"tas":"_f1h-r1440x721cds/","rsds":"_f1h-r1440x721cds/", - "prlr":"_f1h-r1440x721cds/","sfcWind":"_f1h-r1440x721cds/"} - monthly_mean: {"tas":"_f1h-r1440x721cds/"} - lat_decreasing_sort: TRUE - lon_circular_sort: - ini: 0 - end: 361 - regrid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" - era5land: - src: "recon/ecmwf/era5land/" - daily_mean: {"tas":"_f1h/","rsds":"_f1h/", - "prlr":"_f1h/","sfcWind":"_f1h/"} - regrid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" - uerra: - src: "recon/ecmwf/uerra_mescan/" - daily_mean: {"tas":"_f6h/"} - grid: "/esarchive/recon/ecmwf/uerra_mescan/daily_mean/tas_f6h/tas_201805.nc" + src: "/esarchive/" + System: + system5c3s: + src: "exp/ecmwf/system5c3s/" + daily_mean: {"tas":"_f6h/","rsds":"_s0-24h/", + "prlr":"_s0-24h/","sfcWind":"_f6h/"} + monthly_mean: {"tas":"_f6h/","rsds":"_s0-24h/", + "prlr":"_s0-24h/","sfcWind":"_f6h/"} + nmember: + fcst: 51 + hcst: 25 + reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" + system7c3s: + src: "exp/meteofrance/system7c3s/" + monthly_mean: {"tas":"_f6h/","g500":"_f12h/", + "prlr":"_f24h/", "sfcWind": "_f6h/"} + nmember: + fcst: 51 + hcst: 25 + reference_grid: "/esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_20191001.nc" + system35c3s: + src: "/exp/cmcc/system35c3s/" + monthly_mean: {"tas":"_f6h/","g500":"_f12h/", + "prlr":"_f24h/", "sfcWind": "_f6h/"} + nmember: + fcst: 50 + hcst: 40 + reference_grid: "/esarchive/exp/cmcc/system35c3s/monthly_mean/tas_f6h/tas_20210101.nc" + Reference: + era5: + src: "recon/ecmwf/era5/" + daily_mean: {"tas":"_f1h-r1440x721cds/","rsds":"_f1h-r1440x721cds/", + "prlr":"_f1h-r1440x721cds/","sfcWind":"_f1h-r1440x721cds/"} + monthly_mean: {"tas":"_f1h-r1440x721cds/"} + reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + era5land: + src: "recon/ecmwf/era5land/" + daily_mean: {"tas":"_f1h/","rsds":"_f1h/", + "prlr":"_f1h/","sfcWind":"_f1h/"} + monthly_mean: {"tas":"_f1h/"} + reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" + uerra: + src: "recon/ecmwf/uerra_mescan/" + daily_mean: {"tas":"_f6h/"} + reference_grid: "/esarchive/recon/ecmwf/uerra_mescan/daily_mean/tas_f6h/tas_201805.nc" diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R index 17c5cb99c138e05c0824875cbd688c444901d3a5..c7c392a52330100bafd5c1fb7c54fc2acdb49757 100644 --- a/modules/data_load/dates2load.R +++ b/modules/data_load/dates2load.R @@ -3,7 +3,7 @@ # both for the hcst and fcst. dates2load <- function(recipe, logger){ - recipe <- recipe$params$Time + recipe <- recipe$Analysis$Time # hcst dates @@ -41,6 +41,3 @@ add_dims <- function(data, type){ return(data) } - - - diff --git a/modules/data_load/load.R b/modules/data_load/load.R old mode 100644 new mode 100755 index 9af22eec0e7a48edfe192703f3ab5afb00883a35..1e841c98d6db9cf33ec8f0c4405c728a681c76e5 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -1,55 +1,56 @@ source("modules/data_load/dates2load.R") source("modules/data_load/regrid.R") -source("modules/data_load/s2dv_cube.R") -source("recipe.R") +# source("modules/data_load/s2dv_cube.R") +# source("recipe.R") # Load required libraries source("tools/libs.R") # RECIPE FOR TESTING # ------------------------------------------------------------------------------------------- -# recipe <- read_yaml("/esarchive/scratch/lpalma/git/auto-s2s/modules/data_load/recipe_1.yml") - args <- NULL; args[1] <- "/esarchive/scratch/lpalma/git/auto-s2s/modules/data_load/recipe_1.yml" - recipe <- Recipe$new(filename=args[1]) - recipe$filename <- args[1] +## TODO: Get only the last part of the path as the recipe$filename? +recipe <- read_yaml("modules/data_load/recipe_1.yml") +args <- NULL; args[1] <- "modules/data_load/recipe_1.yml" +recipe$filename <- args[1] # Create output folder and log: - logger <- prepare_outputs(recipe = recipe) - folder <- logger$foldername - log_file <- logger$logname - logger <- logger$logger +logger <- prepare_outputs(recipe = recipe) +folder <- logger$foldername +log_file <- logger$logname +logger <- logger$logger # ------------------------------------------------------------------------------------------- # Set params ----------------------------------------- -hcst.inityear <- recipe$params$Time$hcst_start -hcst.endyear <- recipe$params$Time$hcst_end -ltmin <- recipe$params$Time$leadtimemin -ltmax <- recipe$params$Time$leadtimemax -lats.min <- recipe$params$Region$latmin -lats.max <- recipe$params$Region$latmax -lons.min <- recipe$params$Region$lonmin -lons.max <- recipe$params$Region$lonmax -ref.name <- recipe$params$Datasets$Reference$name -exp.name <- recipe$params$Datasets$System$name - -variable <- recipe$params$Variables$name -store.freq <- recipe$params$Variables$freq - -stream <- verifications$stream -sdates <- verifications$fcst.sdate +hcst.inityear <- recipe$Analysis$Time$hcst_start +hcst.endyear <- recipe$Analysis$Time$hcst_end +ltmin <- recipe$Analysis$Time$leadtimemin +ltmax <- recipe$Analysis$Time$leadtimemax +lats.min <- recipe$Analysis$Region$latmin +lats.max <- recipe$Analysis$Region$latmax +lons.min <- recipe$Analysis$Region$lonmin +lons.max <- recipe$Analysis$Region$lonmax +ref.name <- recipe$Analysis$Datasets$Reference$name +exp.name <- recipe$Analysis$Datasets$System$name + +variable <- recipe$Analysis$Variables$name +store.freq <- recipe$Analysis$Variables$freq + +## TODO: Examine this verifications part, verify if it's necessary +# stream <- verifications$stream +# sdates <- verifications$fcst.sdate ## TODO: define fcst.name -##fcst.name <- recipe$params$Datasets$System[[sys]]$name +##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name # get sdates array sdates <- dates2load(recipe, logger) # get esarchive datasets dict: -archive <- read_yaml(paste0(recipe$run_conf_conf$code_dir,"conf/archive.yml"))$archive -exp_descrip <- archive[[exp.name]] +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive +exp_descrip <- archive$System[[exp.name]] freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) -reference_descrip <- archive[[ref.name]] +reference_descrip <- archive$Reference[[ref.name]] freq.obs <- unlist(reference_descrip[[store.freq]][variable]) obs.dir <- reference_descrip$src fcst.dir <- exp_descrip$src @@ -67,47 +68,84 @@ hcst.nmember <- exp_descrip$nmember$hcst # ----------- obs.path <- paste0(archive$src, obs.dir, store.freq, "/$var$", - reference_descrip[[store.freq]][[variable]],"$var$_$file_date$.nc") + reference_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") hcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]],"$var$_$file_date$.nc") + hcst.dir, store.freq, "/$var$", + exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") -# Regrid: +# Define regrid parameters: #------------------------------------------------------------------- -regrid_params <- get_regrid_params(recipe,archive) +regrid_params <- get_regrid_params(recipe, archive) - -# Timeseries load +# Longitude sort #------------------------------------------------------------------- +if (lons.min >= 0) { + circularsort <- CircularSort(0, 360) +} else { + circularsort <- CircularSort(-180, 180) +} +# Hindcast timeseries load +#------------------------------------------------------------------- hcst <- Start(dat = hcst.path, var = variable, file_date = sdates$hcst, time = indices(ltmin:ltmax), latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - exp_descrip$lat_decreasing_sort), + latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - exp_descrip$lon_circular_sort$ini, - exp_descrip$lon_circular_sort$end), + longitude_reorder = circularsort, transform = regrid_params$fcst.transform, transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), + method = regrid_params$fcst.gridmethod), transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude'), - member = c('ensemble')), - member = indices(1:hcst.nmember), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + ensemble = indices(1:hcst.nmember), return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), split_multiselected_dims = TRUE, retrieve = TRUE) +# Get forecast dates +## TODO: Adapt for daily data +dates <- attr(hcst, 'Variables')$common$time +dates_file <- sapply(dates, format, '%Y%m%d') +dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") +dates_file <- dates_file[!duplicated(dates_file)] # Avoid duplicates +dim(dates_file) <- dim(Subset(hcst, + along=c('dat', 'var', + 'latitude', 'longitude', 'ensemble'), + list(1,1,1,1,1), drop="selected")) + +# Reference load +#------------------------------------------------------------------- +## TODO: Time dimension not working for daily obs + +obs <- Start(dat = obs.path, + var = variable, + file_date = dates_file, +# time = indices(ltmin:ltmax), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + +# Conversion from startR_array to s2dv_array +#------------------------------------------------------------------- attr2array <- function(attr){ return(array(as.vector(attr), dim(attr))) @@ -123,9 +161,10 @@ startR_to_s2dv <- function(startR_array){ names(dim(dates_end)) <- dates_dims s2dv_object <- s2dv_cube(data = attr2array(startR_array), - lon = attr2array(attr(startR_array,'Variables')$dat1$longitude), - lat = attr2array(attr(startR_array,'Variables')$dat1$latitude), - Variable = list(varName = names(attr(startR_array, "Variables")$common)[2], + lon = attr2array(attr(startR_array, 'Variables')$dat1$longitude), + lat = attr2array(attr(startR_array, 'Variables')$dat1$latitude), + Variable = list(varName = names(attr(startR_array, + 'Variables')$common)[2], level = NULL), Dates = list(start = dates_start, end = dates_end), @@ -133,7 +172,6 @@ startR_to_s2dv <- function(startR_array){ source_files = attr2array(attr(startR_array, "Files")) #Datasets = list(exp1 = list(InitializationsDates = list(Member_1 = "01011990", # Members = "Member_1"))) - ) return(s2dv_object) @@ -141,57 +179,25 @@ startR_to_s2dv <- function(startR_array){ } hcst <- startR_to_s2dv(hcst) +obs <- startR_to_s2dv(obs) +## TODO: Review/modify code from here onwards. ## TODO: new files checker? #hcst.NA_files <- c(attributes(hcst)$NotFoundFiles) #hcst.NA_files <- hcst.NA_files[!is.na(hcst.NA_files)] #try(hcst.NA_files <- hcst.NA_files[order(hcst.NA_files)]) -dates <- attr(hcst, 'Variables')$common$time -dates_file <- sapply(dates, format, '%Y%m%d') -dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") -dim(dates_file) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member'), - list(1,1,1,1,1), drop="selected")) - -obs <- Start(dat = obs.path, - var = variable, - file_date = dates_file, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - reference_descrip$lat_decreasing_sort), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - reference_descrip$lon_circular_sort$ini, - reference_descrip$lon_circular_sort$end), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - - ## TODO: Create s2dv object - dates_file <- paste0(dates_file, '01') dim(dates_file) <- dim(Subset(hcst, along=c('dat','var', - 'latitude', 'longitude', 'member'), + 'latitude', 'longitude', 'ensemble'), list(1,1,1,1,1), drop="selected")) file_dates <- paste0(file_dates, '01') dim(file_dates) <- dim(Subset(hcst, along=c('dat','var', - 'latitude', 'longitude', 'member', 'time'), + 'latitude', 'longitude', 'ensemble', 'time'), list(1,1,1,1,1,1), drop="selected")) @@ -237,7 +243,7 @@ info(logger, default_dims <- c(dat = 1, var = 1, sweek = 1, sday = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, member = 1) + latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(obs))] <- dim(obs) dim(obs) <- default_dims diff --git a/modules/data_load/recipe_1.yml b/modules/data_load/recipe_1.yml index 69ef8245e666a9c62977bde3e00d3a45924568db..a6cdda674965bbe8c524842432bcb0ca293abdeb 100644 --- a/modules/data_load/recipe_1.yml +++ b/modules/data_load/recipe_1.yml @@ -21,10 +21,10 @@ Analysis: leadtimemin: 2 leadtimemax: 4 Region: - latmin: -90 - latmax: 90 + latmin: -10 + latmax: 10 lonmin: 0 - lonmax: 360 + lonmax: 20 Regrid: method: bilinear type: to_system @@ -39,5 +39,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/data_load/recipe_2.yml b/modules/data_load/recipe_2.yml new file mode 100644 index 0000000000000000000000000000000000000000..d2376b233fffbb1beec99410d8c9cd040b450b37 --- /dev/null +++ b/modules/data_load/recipe_2.yml @@ -0,0 +1,43 @@ +Description: + Author: V. Agudetse + '': split version +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: system5c3s + Multimodel: no + Reference: + name: era5 + Time: + sdate: + fcst_syear: '2017' + fcst_sday: '0701' + hcst_start: '1993' + hcst_end: '2016' + leadtimemin: 2 + leadtimemax: 4 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: SBC + Skill: + metric: RPSS + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/data_load/regrid.R b/modules/data_load/regrid.R index a80919977a34cd3b2c5c7298dfa96ee8dddeebca..9aaab6e40bc33ef17c8facef8fa088c1556c89ac 100644 --- a/modules/data_load/regrid.R +++ b/modules/data_load/regrid.R @@ -1,27 +1,27 @@ -get_regrid_params <- function(recipe,archive){ +get_regrid_params <- function(recipe, archive){ - exp.name <- recipe$params$Datasets$System$name - ref.name <- recipe$params$Datasets$Reference$name - exp_descrip <- archive[[exp.name]] - reference_descrip <- archive[[ref.name]] + exp.name <- recipe$Analysis$Datasets$System$name + ref.name <- recipe$Analysis$Datasets$Reference$name + exp_descrip <- archive$System[[exp.name]] + reference_descrip <- archive$Reference[[ref.name]] - if (tolower(recipe$params$Regrid$type) == 'to_reference') { + if (tolower(recipe$Analysis$Regrid$type) == 'to_reference') { - regrid_params <- list(fcst.gridtype=reference_descrip$regrid, - fcst.gridmethod=recipe$params$Regrid$method, + regrid_params <- list(fcst.gridtype=reference_descrip$reference_grid, + fcst.gridmethod=recipe$Analysis$Regrid$method, fcst.transform=CDORemapper, obs.gridtype=NULL, obs.gridmethod=NULL, obs.transform=NULL) - } else if (tolower(recipe$params$Regrid$type) == 'to_system') { + } else if (tolower(recipe$Analysis$Regrid$type) == 'to_system') { regrid_params <- list(fcst.gridtype=NULL, fcst.gridmethod=NULL, fcst.transform=NULL, - obs.gridtype=exp_descrip$regrid, - obs.gridmethod=recipe$params$Regrid$method, + obs.gridtype=exp_descrip$reference_grid, + obs.gridmethod=recipe$Analysis$Regrid$method, obs.transform=CDORemapper) } ##TODO: Else condition? middle interpolation? diff --git a/tools/libs.R b/tools/libs.R index 77dcf7d7b65ad717c02cbfc1b6cbe2813cc8a20d..7b010f544f8b7ba4b10d249bccd4ca12071a347c 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -2,12 +2,13 @@ library(log4r) library(startR) library(ClimProjDiags) library(multiApply) +library(yaml) # library(s2dverification) # library(ncdf4) # library(abind) # library(easyVerification) # library(easyNCDF) -# library(CSTools) + library(CSTools) # # library(parallel) # library(pryr) # To check mem usage. @@ -25,4 +26,4 @@ library(multiApply) source("tools/check_recipe.R") source("tools/prepare_outputs.R") source("tools/divide_recipe.R") -source("tools/add_dims.R") # Not sure if necessary yet +# source("tools/add_dims.R") # Not sure if necessary yet