diff --git a/MODULES b/MODULES index a37e52d667da53f97f084b6a8c2932f54b6aca26..a5e3edc24a273820a45907c0ccfea8f16a763112 100644 --- a/MODULES +++ b/MODULES @@ -11,6 +11,16 @@ if [ $BSC_MACHINE == "power" ]; then module load CDO/1.9.4-foss-2018b module load R/3.6.1-foss-2018b +elif [ $BSC_MACHINE == "nord3v2" ]; then + + module use /gpfs/projects/bsc32/software/suselinux/11/modules/all + module unuse /apps/modules/modulefiles/applications /apps/modules/modulefiles/compilers /apps/modules/modulefiles/tools /apps/modules/modulefiles/libraries /apps/modules/modulefiles/environment + + + module load CDO/1.9.8-foss-2019b + module load R/3.6.2-foss-2019b + module load OpenMPI/4.0.5-GCC-8.3.0-nord3-v2 + elif [ $BSC_MACHINE == "nord3" ]; then module use /gpfs/projects/bsc32/software/suselinux/11/modules/all @@ -19,8 +29,8 @@ elif [ $BSC_MACHINE == "nord3" ]; then module unuse /apps/modules/modulefiles/libraries /apps/modules/modulefiles/environment module unuse /apps/modules/PRACE - module load R/3.6.2-foss-2019b module load CDO/1.9.8-foss-2019b + module load R/3.6.2-foss-2019b else diff --git a/conf/archive.yml b/conf/archive.yml index 0feb34b8e5c2c005326e8c1e90faa4ab20cb1615..57574919711c876d1306e0ef5cf90375a424374d 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -8,7 +8,8 @@ archive: daily_mean: {"tas":"_f6h/","rsds":"_s0-24h/", "prlr":"_s0-24h/","sfcWind":"_f6h/"} monthly_mean: {"tas":"_f6h/","rsds":"_s0-24h/", - "prlr":"_s0-24h/","sfcWind":"_f6h/"} + "prlr":"_s0-24h/","sfcWind":"_f6h/", + "tasmin":"_f24h/","tasmax":"_f24h/"} nmember: fcst: 51 hcst: 25 @@ -20,15 +21,15 @@ archive: nmember: fcst: 51 hcst: 25 - reference_grid: "/esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_20191001.nc" + reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system7c3s.txt" system35c3s: - src: "/exp/cmcc/system35c3s/" + src: "exp/cmcc/system35c3s/" monthly_mean: {"tas":"_f6h/","g500":"_f12h/", "prlr":"_f24h/", "sfcWind": "_f6h/"} nmember: fcst: 50 hcst: 40 - reference_grid: "/esarchive/exp/cmcc/system35c3s/monthly_mean/tas_f6h/tas_20210101.nc" + reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system35c3s.txt" Reference: era5: src: "recon/ecmwf/era5/" diff --git a/conf/grid_description/griddes_system21_m1.txt b/conf/grid_description/griddes_system21_m1.txt new file mode 100644 index 0000000000000000000000000000000000000000..bf9ac52b4971bf02f2bdd932f3be4f1d866e47ac --- /dev/null +++ b/conf/grid_description/griddes_system21_m1.txt @@ -0,0 +1,17 @@ +# Grid description file for DWD GCFS2.1 (CDS) +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/grid_description/griddes_system35c3s.txt b/conf/grid_description/griddes_system35c3s.txt new file mode 100644 index 0000000000000000000000000000000000000000..a1248680cb468b6fc77a111ae2a683afb47c26a4 --- /dev/null +++ b/conf/grid_description/griddes_system35c3s.txt @@ -0,0 +1,19 @@ +# Grid description file for CMCC SPSv3.5 (C3S) +# Serves as reference_grid for archive.yml +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/grid_description/griddes_system7c3s.txt b/conf/grid_description/griddes_system7c3s.txt new file mode 100644 index 0000000000000000000000000000000000000000..b6f18478e416d212a75c004ae02c27d795bc0495 --- /dev/null +++ b/conf/grid_description/griddes_system7c3s.txt @@ -0,0 +1,19 @@ +# Grid description file for Meteofrance System 7 (C3S) +# Serves as reference_grid for archive.ym +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = longitude +xlongname = "longitude" +xunits = "degrees_east" +yname = latitude +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index b86ea9593af21822b9eadd7967cb5df35c03b7ad..295bb40667cb07d5ebb942240046b3b6b950d741 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -1,42 +1,217 @@ -# Code to apply any correction method: -# simple bias adjustment -# variance inflation -# quantile mapping - -## Which parameter are required? -if (!("obs" %in% ls()) || is.null(obs)) { - error(logger, - "There is no object 'obs' in the global environment or it is NULL") - stop("EXECUTION FAILED") -} -if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) { - error(logger, - "There is no object 'fcst' in the global environment or it is NULL") - stop("EXECUTION FAILED") -} -if (!("hcst" %in% ls()) || is.null(hcst)) { - error(logger, - "There is no object 'hcst' in the global environment or it is NULL") - stop("EXECUTION FAILED") -} -if (!("method" %in% ls()) || is.null(method)) { - warn(logger, - "Calibration method not found and it is set as 'SBC'.") - method <- 'SBC' -} -if (method %in% c('SBC', 'SimpleBiasCorrection')) { - cal_fun <- "CSTools::Calibration" - cal_method <- "bias" -} else if (method %in% c("Inflation", "VarianceInflation")) { - cal_fun <- "CSTools::Calibration" - cal_method <- "evmos" -} else if (method %in% c("QM", "QuantileMapping")) { - cal_fun <- "CSTools::QuantileMapping" -} else { - error(logger, "Unknown calibration method definde in the recipe.") - stop("EXECUTION FAILED") +## Code to apply any correction method: +## simple bias adjustment +## variance inflation +## quantile mapping +# +### Which parameter are required? +#if (!("obs" %in% ls()) || is.null(obs)) { +# error(logger, +# "There is no object 'obs' in the global environment or it is NULL") +# stop("EXECUTION FAILED") +#} +#if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) { +# error(logger, +# "There is no object 'fcst' in the global environment or it is NULL") +# stop("EXECUTION FAILED") +#} +#if (!("hcst" %in% ls()) || is.null(hcst)) { +# error(logger, +# "There is no object 'hcst' in the global environment or it is NULL") +# stop("EXECUTION FAILED") +#} +#if (!("method" %in% ls()) || is.null(method)) { +# warn(logger, +# "Calibration method not found and it is set as 'SBC'.") +# method <- 'SBC' +#} +#if (method %in% c('SBC', 'SimpleBiasCorrection')) { +# cal_fun <- "CSTools::Calibration" +# cal_method <- "bias" +#} else if (method %in% c("Inflation", "VarianceInflation")) { +# cal_fun <- "CSTools::Calibration" +# cal_method <- "evmos" +#} else if (method %in% c("QM", "QuantileMapping")) { +# cal_fun <- "CSTools::QuantileMapping" +#} else { +# error(logger, "Unknown calibration method definde in the recipe.") +# stop("EXECUTION FAILED") +#} +#info(logger, paste("#-------------------------- ", "\n", +# "running Calibration module", "\n", +# "it can call", cal_fun )) + +#' Function that bias-adjust the S2S hindcast +# +#' this function bias adjust the hindcast data using +#' the leave-one-out method and the specified fun +# +#'@param obs A numeric array with named dimensions, +#' representing the observational data used +#' in the bias-adjustment method. +#' 'sday','syear','member' are mandatory dims + +#'@param hcst A numeric array with named dimensions, +#' representing the system hindcast data used +#' in the bias-adjustment method. +#' 'sday','syear','member' are mandatory dims + +#'@param fun bias adjustment function + +#'@param mm TRUE if the experiment data is conformed by +#' multiple systems (dat > 1) + +#'@param ncores An integer indicating the number of cores to +#' use for parallel computation. The default value is NULL. + +#'@param target_dims Dims needed to do the calibration +#'@param output_dims Output dims from the calib fun + +#'@return A numeric array with the bias adjusted hindcast, +#' + +CST_CALIB_METHODS <- c("bias","evmos","mse_min","crps_min","rpc-based") + +hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, + split_factor=1, ncores=32) +{ + + # Replicates observations for bias adjusting each + # system of the multi-model separately + if(mm){ + obs.mm <- obs + for(dat in 1:(dim(hcst)['dat'][[1]]-1)){ + obs.mm <- abind(obs2,obs, + along = which(names(dim(obs)) == 'dat')) + } + names(dim(obs.mm)) <- names(dim(obs)) + obs <- obs.mm + remove(obs.mm) + } + + if (method %in% CST_CALIB_METHODS) { + + # Hcst calibration + hcst <- CSTools::CST_Calibration(hcst,obs, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "member", + sdate_dim = "syear", + ncores = ncores) + + } else { + #error(logger, + # "Calibration method is not implemented in CSTools") + } + + hcst[!is.finite(hcst)] <- NA + remove(hcst) + + # Merges all the ensembles from the different systems into + # one single ensemble + if(mm){ + hcst <- MergeDims(hcst, + merge_dims=c('dat','member'), + rename_dim='member') + hcst <- drop_na_dim(hcst,'member') + } + + # removes dat and var dimensions if needed + try(hcst<-Subset(hcst, + c('var'),list(1),drop='selected')) + try(hcst<-Subset(hcst, + c('dat'),list(1),drop='selected')) + + return(hcst) + } -info(logger, paste("#-------------------------- ", "\n", - "running Calibration module", "\n", - "it can call", cal_fun )) +#' Function that bias-adjust the S2S forecast +# +#' this function bias adjust the forecast data using +#' hindcast and observational data +# +#'@param obs A numeric array with named dimensions, +#' representing the observational data used +#' in the bias-adjustment method. +#' 'sday','syear','member' are mandatory dims + +#'@param hcst A numeric array with named dimensions, +#' representing the system hindcast data used +#' in the bias-adjustment method. +#' 'sday','syear','member' are mandatory dims + +#'@param fcst A numeric array with named dimensions, +#' representing the system forecast to be +#' bias-adjusted. +#' 'sday','syear','member' are mandatory dims + +#'@param fun bias adjustment function + +#'@param mm TRUE if the experiment data is conformed by +#' multiple systems (dat > 1) + +#'@param ncores An integer indicating the number of cores to +#' use for parallel computation. The default value is NULL. + +#'@param target_dims Dims needed to do the calibration +#'@param output_dims Output dims from the calib fun + +#'@return A numeric array with the bias adjusted forecast, +#' + +## TODO: +## needs and update: look at hcst version +#fcst_calib <- function(obs, hcst, fcst, fun, mm=F, +# na.rm=T, split_factor=1,ncores=32, +# target_dims=c('sday','syear','member'), +# output_dims=c('member')) +#{ +# +# # Replicates observations for bias adjusting each +# # system of the multi-model separately +# if(mm){ +# obs.mm <- obs +# for(dat in 1:(dim(hcst)['dat'][[1]]-1)){ +# obs.mm <- abind(obs2,obs, +# along=which(names(dim(obs)) == 'dat')) +# } +# names(dim(obs.mm)) <- names(dim(obs)) +# obs <- obs.mm +# remove(obs.mm) +# } +# +# # Fcst Calibration +# calibrated_fcst <-Apply(data=list(obs=obs,hcst=hcst,fcst=fcst), +# extra_info=list(na.rm=na.rm), +# target_dims=target_dims, +# output_dims=output_dims, +# na.rm=na.rm, +# ncores=ncores, +# fun = .fcstcal)[[1]] +# +# calibrated_fcst[!is.finite(calibrated_fcst)] <- NA +# +# # Merges all the ensembles from the different systems into +# # one single ensemble +# if(mm){ +# calibrated_fcst <- MergeDims(calibrated_fcst, +# merge_dims=c('dat','member'), +# rename_dim='member') +# calibrated_fcst <- drop_na_dim(calibrated_fcst,'member') +# } +# +# # removes dat and var dimensions if needed +# try(calibrated_fcst<-Subset(calibrated_fcst, +# c('var'),list(1),drop='selected')) +# try(calibrated_fcst<-Subset(calibrated_fcst, +# c('dat'),list(1),drop='selected')) +# +# return(calibrated_fcst) +# +# +#} diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R index c7c392a52330100bafd5c1fb7c54fc2acdb49757..eda5f2392ff23c953d173c2a7bd4d91147d37824 100644 --- a/modules/data_load/dates2load.R +++ b/modules/data_load/dates2load.R @@ -1,6 +1,17 @@ - -# Taking the recipe returns the array of sdates to be loaded -# both for the hcst and fcst. +#'Read requested dates from recipe and return array of file dates to be loaded +#' +#'The purpose of this function is to read the recipe configuration data for +#'Auto-S2S workflows, retrieve the start and end dates for the hindcast and +#'the forecast dates, and return two arrays containing the requested dates, to +#'be passed to the Start() call. If no fcst date is provided, it returns an +#'empty object. +#' +#'@param recipe Auto-S2S configuration recipe as returned by read_yaml() +#'@param logger object of class logger containing log output file information +#' +#'@return a list of two arrays containing file dates for hcst and fcst +#' +#'@export dates2load <- function(recipe, logger){ recipe <- recipe$Analysis$Time @@ -9,29 +20,29 @@ dates2load <- function(recipe, logger){ file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate$fcst_sday) - file_dates <- add_dims(file_dates, "hcst") + file_dates <- .add_dims(file_dates, "hcst") # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$sdate$fcst_syear)){ - file_dates.fcst <- paste0(recipe$sdate$fcst_syear,recipe$sdate$fcst_sday) - file_dates.fcst <- add_dims(file_dates.fcst, "fcst") + file_dates.fcst <- paste0(recipe$sdate$fcst_syear, recipe$sdate$fcst_sday) + file_dates.fcst <- .add_dims(file_dates.fcst, "fcst") } else { file_dates.fcst <- NULL info(logger, paste("fcst_year empty in the recipe, creating empty fcst object...")) } - return(list(hcst=file_dates,fcst=file_dates.fcst)) + return(list(hcst = file_dates, fcst = file_dates.fcst)) ## TODO: document header of fun } # adds the correspondent dims to each sdate array -add_dims <- function(data, type){ +.add_dims <- function(data, type){ if (type == "hcst"){ default_dims <- c(sday = 1, sweek = 1, - sdate = length(data)) + syear = length(data)) } else { default_dims <- c(fcst_syear = length(data)) } diff --git a/modules/data_load/fcst_seas.load.R-OLD b/modules/data_load/fcst_seas.load.R-OLD deleted file mode 100644 index 5e6a4b29536d71e160e0297fad3773815cfa2b63..0000000000000000000000000000000000000000 --- a/modules/data_load/fcst_seas.load.R-OLD +++ /dev/null @@ -1,249 +0,0 @@ - - -fcst.month <- substr(fcst.sdate,5,6) -fcst.year <- substr(fcst.sdate,1,4) - -file_dates.fcst <- paste(fcst.year, fcst.month, sep = "") -file_dates <- paste(strtoi(hcst.inityear):strtoi(hcst.endyear), - fcst.month, sep = "") - - -file_dates <- add_dims(file_dates) -file_dates.fcst <- add_dims(file_dates.fcst) -# Take parameters from conf/archive for datasets: -table <- read_yaml(paste0(conf$code_dir, "conf/archive.yml")) -dataset_descrip <- table$archive[which(names(table$archive) == fcst.name)][[1]] -freq.hcst <- unlist(dataset_descrip[store.freq][[1]][variable]) -reference_descrip <- table$archive[which(names(table$archive) == - tolower(ref.name))][[1]] -freq.obs <- unlist(reference_descrip[store.freq][[1]][variable]) -obs.dir <- reference_descrip$src -fcst.dir <- dataset_descrip$src -hcst.dir <- dataset_descrip$src -fcst.nmember <- dataset_descrip$nmember$fcst -hcst.nmember <- dataset_descrip$nmember$hcst - -if ("accum" %in% names(reference_descrip)) { - accum <- unlist(reference_descrip$accum[store.freq][[1]]) -} else { - accum <- FALSE -} -# ----------- - obs.path <- paste0("/esarchive/", - obs.dir, store.freq, "/$var$", - freq.obs,"$var$_$file_date$.nc") - - hcst.path <- paste0("/esarchive/", - hcst.dir, store.freq, "/$var$", - freq.hcst,"$var$_$file_date$01", - ".nc") - - fcst.path <- paste0("/esarchive/", - fcst.dir, store.freq, "/$var$", - freq.hcst,"$var$_$file_date$01", - ".nc") -#------------------------------------------------------------------- -# Regrid: -if (tolower(recipe$Analysis$Regrid$type) == 'reference') { - fcst.gridtype <- reference_descrip$regrid - fcst.gridmethod <- recipe$Analysis$Regrid$method - fcst.tranform <- CDORemapper - obs.gridtype <- NULL - obs.gridmethod <- NULL - obs.tranform <- NULL -} else if (tolower(recipe$Analysis$Regrid$type) == 'system') { - fcst.gridtype <- NULL - fcst.gridmethod <- NULL - fcst.transform <- NULL - obs.gridtype <- dataset_descrip$regrid - obs.gridmethod <- recipe$Analysis$Regrid$method - obs.transform <- CDORemapper -} else { - fcst.gridtype <- recipe$Analysis$Regrid$type - fcst.gridmethod <- recipe$Analysis$Regrid$method - fcst.transform <- CDORemapper - obs.gridtype <- recipe$Analysis$Regrid$type - obs.gridmethod <- recipe$Analysis$Regrid$method - obs.transform <- CDORemapper -} - - - # Timeseries load - #------------------------------------------------------------------- - - if (tolower(stream) == "fcst"){ - - fcst <- Start(dat = fcst.path, - var = variable, - file_date = file_dates.fcst, - time = indices(ltmin:ltmax), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - dataset_descrip$lat_decreasing_sort), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - dataset_descrip$lon_circular_sort$ini, - dataset_descrip$lon_circular_sort$end), - transform = fcst.transform, - transform_params = list(grid = fcst.gridtype, - method = fcst.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude'), - member = c('ensemble')), - member = indices(1:fcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - } - - hcst <- Start(dat = hcst.path, - var = variable, - file_date = file_dates, - time = indices(ltmin:ltmax), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - dataset_descrip$lat_decreasing_sort), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - dataset_descrip$lon_circular_sort$ini, - dataset_descrip$lon_circular_sort$end), - transform = fcst.transform, - transform_params = list(grid = fcst.gridtype, - method = fcst.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude'), - member = c('ensemble')), - member = indices(1:hcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - - hcst.NA_files <- c(attributes(hcst)$NotFoundFiles) - hcst.NA_files <- hcst.NA_files[!is.na(hcst.NA_files)] - try(hcst.NA_files <- hcst.NA_files[order(hcst.NA_files)]) - - dates <- attr(hcst, 'Variables')$common$time - dates_file <- sapply(dates, format, '%Y%m%d') - dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") - dim(dates_file) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member'), - list(1,1,1,1,1), drop="selected")) - - obs <- Start(dat = obs.path, - var = variable, - file_date = dates_file, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = - reference_descrip$lat_decreasing_sort), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort( - reference_descrip$lon_circular_sort$ini, - reference_descrip$lon_circular_sort$end), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - transform = obs.transform, - transform_params = list(grid = obs.gridtype, - method = obs.gridmethod, - crop = c(lons.min, lons.max, - lats.min, lats.max)), - transform_vars = c('latitude', 'longitude'), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - - dates_file <- paste0(dates_file, '01') - dim(dates_file) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member'), - list(1,1,1,1,1), drop="selected")) - - file_dates <- paste0(file_dates, '01') - dim(file_dates) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'member', 'time'), - list(1,1,1,1,1,1), drop="selected")) - - - - obs.NA_dates.ind <- Apply(obs, - fun=(function(x){ all(is.na(x))}), - target_dims=c('time', 'latitude', 'longitude'))[[1]] - obs.NA_dates <- dates_file[obs.NA_dates.ind] - obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] - obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", - freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") - - if (any(is.na(hcst))){ - fatal(logger, - paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", - " ################################################# ", - " ###### MISSING FILES #### ", - " ################################################# ", - "hcst files:", - hcst.NA_files, - " ################################################# ", - " ################################################# ", - sep="\n")) - quit(status = 1) - } - - if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ - fatal(logger, - paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", - " ################################################# ", - " ###### MISSING FILES #### ", - " ################################################# ", - "obs files:", - obs.NA_files, - " ################################################# ", - " ################################################# ", - sep="\n")) - quit(status=1) - } - - info(logger, - "######### DATA LOADING COMPLETED SUCCESFULLY ##############") - - default_dims <- c(dat = 1, var = 1, sweek = 1, - sday = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, member = 1) - - default_dims[names(dim(obs))] <- dim(obs) - dim(obs) <- default_dims - - lon <- attr(obs, 'Variables')$dat1$longitude - lat <- attr(obs, 'Variables')$dat1$latitude - hcst.times <- attr(hcst, 'Variables')$common$time - hcst.times <- sort(unique(sapply(as.character(hcst.times), - substr, 1, 10))) - - #obs<-Subset(obs,c('dat','var'),list(1,1),drop='selected') - #hcst<-Subset(hcst,c('dat','var'),list(1,1),drop='selected') - #if (stream == "fcst"){ - # fcst<-Subset(fcst,c('dat','var'),list(1,1),drop='selected') - #} - - #filters negative values in accum vars - if (accum){ - obs[obs < 0 ] <- 0 - hcst[hcst < 0 ] <- 0 - if (stream == "fcst"){ fcst[fcst < 0 ] <- 0 } - } - - sdates.hcst <- file_dates - sdates.fcst <- file_dates.fcst - leadtimes.hcst <- dates_file - diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 1e841c98d6db9cf33ec8f0c4405c728a681c76e5..a6b2a5f063b444cf5c7a6cdfeabcbab49826a173 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -1,174 +1,54 @@ -source("modules/data_load/dates2load.R") -source("modules/data_load/regrid.R") -# source("modules/data_load/s2dv_cube.R") -# source("recipe.R") -# Load required libraries +## TODO: remove paths to personal scratchs +source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") +source("/esarchive/scratch/vagudets/repos/cstools/R/s2dv_cube.R") +# Load required libraries/funs +source("modules/data_load/dates2load.R") source("tools/libs.R") -# RECIPE FOR TESTING -# ------------------------------------------------------------------------------------------- -## TODO: Get only the last part of the path as the recipe$filename? -recipe <- read_yaml("modules/data_load/recipe_1.yml") -args <- NULL; args[1] <- "modules/data_load/recipe_1.yml" -recipe$filename <- args[1] -# Create output folder and log: -logger <- prepare_outputs(recipe = recipe) -folder <- logger$foldername -log_file <- logger$logname -logger <- logger$logger -# ------------------------------------------------------------------------------------------- - -# Set params ----------------------------------------- -hcst.inityear <- recipe$Analysis$Time$hcst_start -hcst.endyear <- recipe$Analysis$Time$hcst_end -ltmin <- recipe$Analysis$Time$leadtimemin -ltmax <- recipe$Analysis$Time$leadtimemax -lats.min <- recipe$Analysis$Region$latmin -lats.max <- recipe$Analysis$Region$latmax -lons.min <- recipe$Analysis$Region$lonmin -lons.max <- recipe$Analysis$Region$lonmax -ref.name <- recipe$Analysis$Datasets$Reference$name -exp.name <- recipe$Analysis$Datasets$System$name - -variable <- recipe$Analysis$Variables$name -store.freq <- recipe$Analysis$Variables$freq - -## TODO: Examine this verifications part, verify if it's necessary -# stream <- verifications$stream -# sdates <- verifications$fcst.sdate - -## TODO: define fcst.name -##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name - -# get sdates array -sdates <- dates2load(recipe, logger) - -# get esarchive datasets dict: -archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive -exp_descrip <- archive$System[[exp.name]] - -freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) -reference_descrip <- archive$Reference[[ref.name]] -freq.obs <- unlist(reference_descrip[[store.freq]][variable]) -obs.dir <- reference_descrip$src -fcst.dir <- exp_descrip$src -hcst.dir <- exp_descrip$src -fcst.nmember <- exp_descrip$nmember$fcst -hcst.nmember <- exp_descrip$nmember$hcst - -## TODO: it is necessary? -##if ("accum" %in% names(reference_descrip)) { -## accum <- unlist(reference_descrip$accum[store.freq][[1]]) -##} else { -## accum <- FALSE -##} - -# ----------- -obs.path <- paste0(archive$src, - obs.dir, store.freq, "/$var$", - reference_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") - -hcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") - -# Define regrid parameters: -#------------------------------------------------------------------- -regrid_params <- get_regrid_params(recipe, archive) - -# Longitude sort -#------------------------------------------------------------------- -if (lons.min >= 0) { - circularsort <- CircularSort(0, 360) -} else { - circularsort <- CircularSort(-180, 180) -} - -# Hindcast timeseries load -#------------------------------------------------------------------- -hcst <- Start(dat = hcst.path, - var = variable, - file_date = sdates$hcst, - time = indices(ltmin:ltmax), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude')), - ensemble = indices(1:hcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - -# Get forecast dates -## TODO: Adapt for daily data -dates <- attr(hcst, 'Variables')$common$time -dates_file <- sapply(dates, format, '%Y%m%d') -dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") -dates_file <- dates_file[!duplicated(dates_file)] # Avoid duplicates -dim(dates_file) <- dim(Subset(hcst, - along=c('dat', 'var', - 'latitude', 'longitude', 'ensemble'), - list(1,1,1,1,1), drop="selected")) - -# Reference load -#------------------------------------------------------------------- -## TODO: Time dimension not working for daily obs - -obs <- Start(dat = obs.path, - var = variable, - file_date = dates_file, -# time = indices(ltmin:ltmax), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude')), - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - +## TODO: Move to CSOperational # Conversion from startR_array to s2dv_array #------------------------------------------------------------------- - attr2array <- function(attr){ return(array(as.vector(attr), dim(attr))) } - startR_to_s2dv <- function(startR_array){ - dates_dims <- c("syear","time") + dates_dims <- dim(Subset(startR_array, + along=c('dat','var', + 'latitude', 'longitude', 'ensemble'), + list(1,1,1,1,1), drop="selected")) + + #sdates_dims <- dim(Subset(startR_array, + # along=c('dat','var','time','sweek','sday', + # 'latitude', 'longitude', 'ensemble'), + # list(1,1,1,1,1,1,1,1), drop="selected")) + dates_start <- attr(startR_array, 'Variables')$common$time dates_end <- attr(startR_array, 'Variables')$common$time - names(dim(dates_start)) <- dates_dims - names(dim(dates_end)) <- dates_dims + #sdates <- unlist(attr(startR_array, 'FileSelectors')$dat1$file_date) + + dim(dates_start) <- dates_dims + dim(dates_end) <- dates_dims + #dim(sdates) <- sdates_dims + ## TODO: change this line when time attributes work correctly? + time_dims <- c("time", "sday", "sweek", "syear", "fcst_syear") s2dv_object <- s2dv_cube(data = attr2array(startR_array), - lon = attr2array(attr(startR_array, 'Variables')$dat1$longitude), - lat = attr2array(attr(startR_array, 'Variables')$dat1$latitude), + lon = attr2array(attr(startR_array, + 'Variables')$dat1$longitude), + lat = attr2array(attr(startR_array, + 'Variables')$dat1$latitude), Variable = list(varName = names(attr(startR_array, - 'Variables')$common)[2], + 'Variables')$common)[2], level = NULL), Dates = list(start = dates_start, end = dates_end), - when = Sys.time(), + #sdate = sdates), + time_dims = time_dims, + when = Sys.time(), source_files = attr2array(attr(startR_array, "Files")) #Datasets = list(exp1 = list(InitializationsDates = list(Member_1 = "01011990", # Members = "Member_1"))) @@ -178,94 +58,291 @@ startR_to_s2dv <- function(startR_array){ } -hcst <- startR_to_s2dv(hcst) -obs <- startR_to_s2dv(obs) - -## TODO: Review/modify code from here onwards. - -## TODO: new files checker? -#hcst.NA_files <- c(attributes(hcst)$NotFoundFiles) -#hcst.NA_files <- hcst.NA_files[!is.na(hcst.NA_files)] -#try(hcst.NA_files <- hcst.NA_files[order(hcst.NA_files)]) - -dates_file <- paste0(dates_file, '01') -dim(dates_file) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'ensemble'), - list(1,1,1,1,1), drop="selected")) - -file_dates <- paste0(file_dates, '01') -dim(file_dates) <- dim(Subset(hcst, - along=c('dat','var', - 'latitude', 'longitude', 'ensemble', 'time'), - list(1,1,1,1,1,1), drop="selected")) - - - -obs.NA_dates.ind <- Apply(obs, - fun=(function(x){ all(is.na(x))}), - target_dims=c('time', 'latitude', 'longitude'))[[1]] -obs.NA_dates <- dates_file[obs.NA_dates.ind] -obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] -obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", - freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") - -if (any(is.na(hcst))){ - fatal(logger, - paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", - " ################################################# ", - " ###### MISSING FILES #### ", - " ################################################# ", - "hcst files:", - hcst.NA_files, - " ################################################# ", - " ################################################# ", - sep="\n")) - quit(status = 1) -} - -if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ - fatal(logger, - paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", - " ################################################# ", - " ###### MISSING FILES #### ", - " ################################################# ", - "obs files:", - obs.NA_files, - " ################################################# ", - " ################################################# ", - sep="\n")) - quit(status=1) -} - -info(logger, - "######### DATA LOADING COMPLETED SUCCESFULLY ##############") - -default_dims <- c(dat = 1, var = 1, sweek = 1, - sday = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - -default_dims[names(dim(obs))] <- dim(obs) -dim(obs) <- default_dims - -lon <- attr(obs, 'Variables')$dat1$longitude -lat <- attr(obs, 'Variables')$dat1$latitude -hcst.times <- attr(hcst, 'Variables')$common$time -hcst.times <- sort(unique(sapply(as.character(hcst.times), - substr, 1, 10))) +# RECIPE FOR TESTING +# -------------------------------------------------------------------------------- +# recipe_file <- "modules/data_load/testing_recipes/recipe_2.yml" + +load_datasets <- function(recipe_file){ + + ## TODO: Get only the last part of the path as the recipe$filename? + recipe <- read_yaml(recipe_file) + recipe$filename <- recipe_file + + ## TODO: this should come from the main script + # Create output folder and log: + logger <- prepare_outputs(recipe = recipe) + folder <- logger$foldername + log_file <- logger$logname + logger <- logger$logger + # ------------------------------------------- + + # Set params ----------------------------------------- + hcst.inityear <- recipe$Analysis$Time$hcst_start + hcst.endyear <- recipe$Analysis$Time$hcst_end + ltmin <- recipe$Analysis$Time$leadtimemin + ltmax <- recipe$Analysis$Time$leadtimemax + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + + variable <- recipe$Analysis$Variables$name + store.freq <- recipe$Analysis$Variables$freq + + ## TODO: Examine this verifications part, verify if it's necessary + # stream <- verifications$stream + # sdates <- verifications$fcst.sdate + + ## TODO: define fcst.name + ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name + + # get sdates array + sdates <- dates2load(recipe, logger) + + # get esarchive datasets dict: + archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + exp_descrip <- archive$System[[exp.name]] + + freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) + reference_descrip <- archive$Reference[[ref.name]] + freq.obs <- unlist(reference_descrip[[store.freq]][variable]) + obs.dir <- reference_descrip$src + fcst.dir <- exp_descrip$src + hcst.dir <- exp_descrip$src + fcst.nmember <- exp_descrip$nmember$fcst + hcst.nmember <- exp_descrip$nmember$hcst + + ## TODO: it is necessary? + ##if ("accum" %in% names(reference_descrip)) { + ## accum <- unlist(reference_descrip$accum[store.freq][[1]]) + ##} else { + ## accum <- FALSE + ##} + + # ----------- + obs.path <- paste0(archive$src, + obs.dir, store.freq, "/$var$", + reference_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") + + hcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", + exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc") + + fcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", + exp_descrip[[store.freq]][[variable]], "$var$_$fcst_syear$.nc") + + # Define regrid parameters: + #------------------------------------------------------------------- + regrid_params <- get_regrid_params(recipe, archive) + + # Longitude sort + #------------------------------------------------------------------- + if (lons.min >= 0) { + circularsort <- CircularSort(0, 360) + } else { + circularsort <- CircularSort(-180, 180) + } + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + time = indices(ltmin:ltmax), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:hcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + hcst <- startR_to_s2dv(hcst) + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)){ + + fcst <- Start(dat = fcst.path, + var = variable, + fcst_syear = sdates$fcst, + time = indices(ltmin:ltmax), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'fcst_syear'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + fcst <- startR_to_s2dv(fcst) -#obs<-Subset(obs,c('dat','var'),list(1,1),drop='selected') -#hcst<-Subset(hcst,c('dat','var'),list(1,1),drop='selected') -#if (stream == "fcst"){ -# fcst<-Subset(fcst,c('dat','var'),list(1,1),drop='selected') -#} + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + # Get reference dates from hcst Start call + + ## TODO: Replace with new code once An-Chi's fix is released in StartR + #dates <- attr(hcst, 'Variables')$common$time + dates <- hcst$Dates$start + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dates.dims <- c(sday = 1, sweek = 1, time = 1) + dates.dims[names(dim(dates))] <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dates.dims + names(dim(dates))[4] <- "syear" + dim(dates_file) <- dim(dates) + + # Separate Start() call for monthly vs daily data + if (store.freq == "monthly_mean") { + + obs <- Start(dat = obs.path, + var = variable, + file_date = dates_file, + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq == "daily_mean") { + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + } + + # TODO: Reorder obs dims to match hcst dims? + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sweek = 1, + sday = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + obs <- startR_to_s2dv(obs) + + ############################################################################ + # + # CHECKS ON MISSING FILES + # + ############################################################################ + + #obs.NA_dates.ind <- Apply(obs, + # fun=(function(x){ all(is.na(x))}), + # target_dims=c('time', 'latitude', 'longitude'))[[1]] + #obs.NA_dates <- dates_file[obs.NA_dates.ind] + #obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] + #obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", + # freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") + # + #if (any(is.na(hcst))){ + # fatal(logger, + # paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "hcst files:", + # hcst.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status = 1) + #} + # + #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ + # fatal(logger, + # paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "obs files:", + # obs.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status=1) + #} + # + #info(logger, + # "######### DATA LOADING COMPLETED SUCCESFULLY ##############") + + ############################################################################ + ############################################################################ + + ## TODO: we need to define accumulated vars + #filters negative values in accum vars + #if (accum){ + # obs$data[obs$data < 0 ] <- 0 + # hcst$data[hcst$data < 0 ] <- 0 + # if (!is.null(fcst)){ + # fcst$data[fcst$data < 0 ] <- 0 + # } + #} + + return(list(hcst = hcst, fcst = fcst, obs = obs)) -#filters negative values in accum vars -if (accum){ - obs[obs < 0 ] <- 0 - hcst[hcst < 0 ] <- 0 } - -sdates.hcst <- file_dates -leadtimes.hcst <- dates_file - diff --git a/modules/data_load/regrid.R b/modules/data_load/regrid.R deleted file mode 100644 index 9aaab6e40bc33ef17c8facef8fa088c1556c89ac..0000000000000000000000000000000000000000 --- a/modules/data_load/regrid.R +++ /dev/null @@ -1,33 +0,0 @@ - -get_regrid_params <- function(recipe, archive){ - - exp.name <- recipe$Analysis$Datasets$System$name - ref.name <- recipe$Analysis$Datasets$Reference$name - exp_descrip <- archive$System[[exp.name]] - reference_descrip <- archive$Reference[[ref.name]] - - if (tolower(recipe$Analysis$Regrid$type) == 'to_reference') { - - regrid_params <- list(fcst.gridtype=reference_descrip$reference_grid, - fcst.gridmethod=recipe$Analysis$Regrid$method, - fcst.transform=CDORemapper, - obs.gridtype=NULL, - obs.gridmethod=NULL, - obs.transform=NULL) - - } else if (tolower(recipe$Analysis$Regrid$type) == 'to_system') { - - regrid_params <- list(fcst.gridtype=NULL, - fcst.gridmethod=NULL, - fcst.transform=NULL, - obs.gridtype=exp_descrip$reference_grid, - obs.gridmethod=recipe$Analysis$Regrid$method, - obs.transform=CDORemapper) - - } ##TODO: Else condition? middle interpolation? - - return(regrid_params) - -} - - diff --git a/modules/data_load/s2dv_cube.R b/modules/data_load/s2dv_cube.R deleted file mode 100644 index a69a0fa229daf29e946fb802a983dc8da3aab325..0000000000000000000000000000000000000000 --- a/modules/data_load/s2dv_cube.R +++ /dev/null @@ -1,185 +0,0 @@ -## TODO: NEEDED IMPROVEMENTS FOR ESS TOOL: -# - Having more than one variables could be required (for indices). -# - New checks for new dimensions -# - Instead of lat and lons, the object could have regions. - - - -#'Creation of a 's2dv_cube' object -#' -#'@description This function allows to create a 's2dv_cube' object by passing information through its parameters. This function will be needed if the data hasn't been loaded using CST_Load or has been transformed with other methods. A 's2dv_cube' object has many different components including metadata. This function will allow to create 's2dv_cube' objects even if not all elements are defined and for each expected missed parameter a warning message will be returned. -#' -#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -#' -#'@param data an array with any number of named dimensions, typically an object output from CST_Load, with the following dimensions: dataset, member, sdate, ftime, lat and lon. -#'@param lon an array with one dimension containing the longitudes and attributes: dim, cdo_grid_name, data_across_gw, array_across_gw, first_lon, last_lon and projection. -#'@param lat an array with one dimension containing the latitudes and attributes: dim, cdo_grid_name, first_lat, last_lat and projection. -#'@param Variable a list of two elements: \code{varName} a character string indicating the abbreviation of a variable name and \code{level} a character string indicating the level (e.g., "2m"), if it is not required it could be set as NULL. -#'@param Datasets a named list with the dataset model with two elements: \code{InitiatlizationDates}, containing a list of the start dates for each member named with the names of each member, and \code{Members} containing a vector with the member names (e.g., "Member_1") -#'@param Dates a named list of two elements: \code{start}, an array of dimensions (sdate, time) with the POSIX initial date of each forecast time of each starting date, and \code{end}, an array of dimensions (sdate, time) with the POSIX final date of each forecast time of each starting date. -#'@param when a time stamp of the date issued by the Load() call to obtain the data. -#'@param source_files a vector of character strings with complete paths to all the found files involved in the Load() call. -#' -#'@return The function returns an object of class 's2dv_cube'. -#' -#'@seealso \code{\link[s2dverification]{Load}} and \code{\link{CST_Load}} -#'@examples -#'exp_original <- 1:100 -#'dim(exp_original) <- c(lat = 2, time = 10, lon = 5) -#'exp1 <- s2dv_cube(data = exp_original) -#'class(exp1) -#'exp2 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50)) -#'class(exp2) -#'exp3 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m')) -#'class(exp3) -#'exp4 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999))) -#'class(exp4) -#'exp5 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999)), -#' when = "2019-10-23 19:15:29 CET") -#'class(exp5) -#'exp6 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999)), -#' when = "2019-10-23 19:15:29 CET", -#' source_files = c("/path/to/file1.nc", "/path/to/file2.nc")) -#'class(exp6) -#'exp7 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999)), -#' when = "2019-10-23 19:15:29 CET", -#' source_files = c("/path/to/file1.nc", "/path/to/file2.nc"), -#' Datasets = list( -#' exp1 = list(InitializationsDates = list(Member_1 = "01011990", -#' Members = "Member_1")))) -#'class(exp7) -#'dim(exp_original) <- c(dataset = 1, member = 1, sdate = 2, ftime = 5, lat = 2, lon = 5) -#'exp8 <- s2dv_cube(data = exp_original, lon = seq(-10, 10, 5), lat = c(45, 50), -#' Variable = list(varName = 'tas', level = '2m'), -#' Dates = list(start = paste0(rep("01", 10), rep("01", 10), 1990:1999), -#' end = paste0(rep("31", 10), rep("01", 10), 1990:1999))) -#'class(exp8) -#'@export -s2dv_cube <- function(data, lon = NULL, lat = NULL, Variable = NULL, Datasets = NULL, - Dates = NULL, when = NULL, source_files = NULL) { - - TEMPORAL_DIMS <- c('syear','sday','sweek', 'time') - - if (is.null(data) | !is.array(data) | is.null(names(dim(data)))) { - stop("Parameter 'data' must be an array with named dimensions.") - } - dims <- dim(data) - if (is.null(lon)) { - if (!any(c('lon', 'longitude') %in% names(dims))) { - warning("Parameter 'lon' is not provided but data contains a ", - "longitudinal dimension.") - } else { - warning("Parameter 'lon' is not provided so the data is from an ", - "unknown location.") - } - } - if (is.null(lat)) { - if (!any(c('lat', 'latitude') %in% names(dims))) { - warning("Parameter 'lat' is not provided but data contains a ", - "latitudinal dimension.") - } else { - warning("Parameter 'lat' is not provided so the data is from an ", - "unknown location.") - } - } - if (is.null(Variable)) { - warning("Parameter 'Variable' is not provided so the metadata ", - "of 's2dv_cube' object will be incomplete.") - } - if (is.null(Datasets)) { - warning("Parameter 'Datasets' is not provided so the metadata ", - "of 's2dv_cube' object will be incomplete.") - } - if (is.null(Dates)) { - if (!any(TEMPORAL_DIMS %in% names(dims))) { - warning("Parameter 'Dates' is not provided but data contains a ", - "temporal dimension.") - } else { - warning("Parameter 'Dates' is not provided so the data is from an ", - "unknown time period.") - } - } - if (is.null(when)) { - warning("Parameter 'when' is not provided so the metadata ", - "of 's2dv_cube' object will be incomplete.") - } - if (is.null(source_files)) { - warning("Parameter 'source_files' is not provided so the metadata ", - "of 's2dv_cube' object will be incomplete.") - } - if (!is.null(Variable)) { - if (!is.list(Variable)) { - Variable <- list(Variable) - } - if (names(Variable)[1] != 'varName' | names(Variable)[2] != 'level') { - warning("The name of the first elment of parameter 'Variable' is ", - "expected to be 'varName' and the second 'level'.") - } - if (!is.character(Variable[[1]])) { - warning("The element 'Varname' of parameter 'Variable' must be ", - "a character.") - } - } - # Dimensions comparison - if (!is.null(lon)) { - if (any(names(dims) %in% c('lon', 'longitude'))) { - if (dims[(names(dims) %in% c('lon', 'longitude'))] != length(lon) & - dims[(names(dims) %in% c('lon', 'longitude'))] != 1) { - stop("Length of parameter 'lon' doesn't match the length of ", - "longitudinal dimension in parameter 'data'.") - } - } - } - if (!is.null(lat)) { - if (any(names(dims) %in% c('lat', 'latitude'))) { - if (dims[(names(dims) %in% c('lat', 'latitude'))] != length(lat) & - dims[(names(dims) %in% c('lat', 'latitude'))] != 1) { - stop("Length of parameter 'lat' doesn't match the length of ", - "latitudinal dimension in parameter 'data'.") - } - } - } - if (!is.null(Dates)) { - if (!is.list(Dates)) { - stop("Parameter 'Dates' must be a list.") - } else { - if (length(Dates) > 2) { - warning("Parameter 'Dates' is a list with more than 2 ", - "elements and only the first two will be used.") - Dates <- Dates[1 : 2] - } - if (names(Dates)[1] != 'start' | names(Dates)[2] != 'end') { - warning("The name of the first element of parameter 'Dates' ", - "is expected to be 'start' and the second 'end'.") - } - if (length(Dates[[1]]) != length(Dates[[2]]) & - length(Dates) == 2) { - stop("The length of the elements in parameter 'Dates' must ", - "be equal.") - } - time_dims <- dims[names(dims) %in% TEMPORAL_DIMS] - if (prod(time_dims) != length(Dates[[1]])) { - stop("The length of the temporal dimension doesn't match ", - " with the length of elements in parameter 'Dates'.") - } - } - } - object <- list(data = data, lon = lon, lat = lat, Variable = Variable, - Datasets = Datasets, Dates = Dates, when = when, - source_files = source_files) - class(object) <- 's2dv_cube' - return(object) -} diff --git a/modules/data_load/recipe_1.yml b/modules/data_load/testing_recipes/recipe_1.yml similarity index 94% rename from modules/data_load/recipe_1.yml rename to modules/data_load/testing_recipes/recipe_1.yml index a6cdda674965bbe8c524842432bcb0ca293abdeb..c7cdfca14de6d2bc9ededa3f8382a4048e39cf61 100644 --- a/modules/data_load/recipe_1.yml +++ b/modules/data_load/testing_recipes/recipe_1.yml @@ -5,7 +5,7 @@ Analysis: Horizon: Seasonal Variables: name: tas - freq: monthly_mean + freq: daily_mean Datasets: System: name: system5c3s @@ -15,7 +15,7 @@ Analysis: Time: sdate: fcst_syear: '2017' - fcst_sday: '0701' + fcst_sday: '0101' hcst_start: '1993' hcst_end: '2016' leadtimemin: 2 diff --git a/modules/data_load/recipe_2.yml b/modules/data_load/testing_recipes/recipe_2.yml similarity index 75% rename from modules/data_load/recipe_2.yml rename to modules/data_load/testing_recipes/recipe_2.yml index d2376b233fffbb1beec99410d8c9cd040b450b37..b8d81ad7be688b0079b275e83a653a5033e04ec5 100644 --- a/modules/data_load/recipe_2.yml +++ b/modules/data_load/testing_recipes/recipe_2.yml @@ -5,7 +5,7 @@ Analysis: Horizon: Seasonal Variables: name: tas - freq: daily_mean + freq: monthly_mean Datasets: System: name: system5c3s @@ -14,8 +14,8 @@ Analysis: name: era5 Time: sdate: - fcst_syear: '2017' - fcst_sday: '0701' + fcst_syear: '2020' + fcst_sday: '1101' hcst_start: '1993' hcst_end: '2016' leadtimemin: 2 @@ -39,5 +39,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/modules/test.R b/modules/test.R new file mode 100644 index 0000000000000000000000000000000000000000..9bd30de4d9dfddc4f90fd7972a398914bc63fd56 --- /dev/null +++ b/modules/test.R @@ -0,0 +1,31 @@ + + + + +recipe_file <- "modules/data_load/testing_recipes/recipe_2.yml" +source("modules/data_load/load.R") + +data <- load_datasets(recipe_file) + +hcst <- data$hcst +obs <- data$obs + +method <- "bias" +mm=F +ncores=4 +na.rm=T + +# Hcst calibration +hcst <- CSTools::CST_Calibration(hcst,obs, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + + diff --git a/recipe.R b/recipe.R deleted file mode 100644 index 6286e80456050edddeb68942ce1f241ca0f15135..0000000000000000000000000000000000000000 --- a/recipe.R +++ /dev/null @@ -1,36 +0,0 @@ -library(R6) -library(yaml) - -Recipe <- R6Class("Recipe", - public = list( - filename = NULL, - dir = NULL, - run_conf = NULL, - info = NULL, - params = NULL, - initialize = function(filename = NA, dir = NA) { - - if (is.na(dir)){ - self$filename <- basename(filename) - self$dir <- dirname(filename) - } else { - self$filename <- filename - self$dir <- dir - } - yml <- read_yaml(self$get_filepath()) - self$run_conf <- yml$Run - self$info <- yml$Description - self$params <- yml$Analysis - }, - - get_filepath = function() { - return(paste0(self$dir,"/",self$filename)) - } - - ) -) - - -#recipe <- Recipe$new(filename="/esarchive/scratch/lpalma/git/auto-s2s/recipes/seasonal_oper_atomic.yml") -#recipe <- Recipe$new("seasonal_oper_atomic.yml","/esarchive/scratch/lpalma/git/auto-s2s/recipes/") - diff --git a/recipes/seasonal_oper.yml b/recipes/seasonal_oper.yml index 3e3d04e3e2c7a7ab183d47cc9b32c70cad68c11b..5e5f61fcdd2249bf0f97c4932439994547bda7cd 100644 --- a/recipes/seasonal_oper.yml +++ b/recipes/seasonal_oper.yml @@ -43,7 +43,7 @@ Analysis: - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} Regrid: method: bilinear # str mandatory - type: to_system # str either to_system or to_reference mandatory + type: to_system # str either to_system, to_reference or CDO-compatible grid mandatory Data_load: module: "modules/data_load/seas5.load.R" Workflow: diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index b440612dd901c3a908ce77a54dd10729008b57f5..06f8270fcfd89ddc3412777536cb4149334e8cf8 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -1,3 +1,26 @@ +#'Read recipe YAML file and create and store logfile info +#' +#'The purpose of this function is to read the recipe configuration for Auto-S2S +#'workflows and create logfiles stores in an the output directory specified in +#'the recipe. It returns an object of class logger that stores information on +#'the recipe configuration and errors. +#' +#'@param recipe Auto-S2S configuration recipe as returned by read_yaml() +#' +#'@return list contaning logger object, log filename and log directory name +#' +#'@import log4r +#' +#'@examples +#'setwd("/esarchive/scratch/vagudets/repos/auto-s2s/") +#'library(yaml) +#'recipe <- read_yaml("modules/data_load/recipe_1.yml") +#'logger <- prepare_outputs(recipe) +#'folder <- logger$foldername +#'log_file <- logger$logname +#'logger <- logger$logger +#' +#'@export prepare_outputs <- function(recipe) {