From 11969d0ea5abd7106ab7ecfda2c21ae2193d76ea Mon Sep 17 00:00:00 2001 From: Bert Van Schaeybroeck Date: Mon, 14 Jun 2021 14:19:21 +0000 Subject: [PATCH] Use Case 3 code --- .../UseCase3_data_preparation_SCHEME_model.R | 436 ++++++++++++++++++ 1 file changed, 436 insertions(+) create mode 100644 inst/doc/UseCase3_data_preparation_SCHEME_model.R diff --git a/inst/doc/UseCase3_data_preparation_SCHEME_model.R b/inst/doc/UseCase3_data_preparation_SCHEME_model.R new file mode 100644 index 00000000..0ff104db --- /dev/null +++ b/inst/doc/UseCase3_data_preparation_SCHEME_model.R @@ -0,0 +1,436 @@ +rm(list = ls()) +library(CSTools) +library(s2dverification) +library(CSTools) +library(ClimProjDiags) + +#SETUP PARAMETERS (TO FIX BEFORE RUNNING SCRIPT): +#------------------------------------------------ +var.to.use <- "prlr" #which variable to correct (prlr=precipitation, tasmin, tasmax, tas) + +init.yr <- 1993 #initial year (for ECMWF Sys5 = 1993) +end.yr <- 2019 #end year (for ECMWF Sys5 = 2019) +amt.ftime <- 214 +n.cores.to.use <- 20 +use.chirps <- T +eval.method.to.use <- "leave-one-out" +domain.high.res <- "greece_high_res" +domain.low.res <- "greece_low_res" +sdate.mon.to.use <- 5 #Month of startdate for ECMWF Sys5 possibilities are 5 (May) or 11 (November) +sdate.day.to.use <- 1 #Day of startdate for ECMWF Sys5 only possibility is 1 + +#LOCAL PARAMETERS (to be adjusted for each working system) +#--------------------------------------------------------- +dir.rdata <- "/mnt/HDS_URCLIM/URCLIM/bertvs/medscope/data/greece_rdata/" + +#dir.scripts <- "/mnt/netapp/home/bertvs/ARCHIVE_bertvs/R/medscope/D3.1/" +dir.scratch <- "/scratch/bertvs/" +#Base path for C3S forecast (experiment) dataset: +dir.c3s <- "/mnt/HDS_URCLIM/URCLIM/bertvs/medscope/data/" +#Base path for ERA5 reference or obs dataset: +dir.era5 <- "/mnt/HDS_BREGILABEPOC/BREGILABEPOC/era5/europe/daily/per_mon/" +#Base path for CHIRPS (reference or obs) rainfall dataset: +dir.chirps <- "/mnt/HDS_MEDYCLIM/MEDYCLIM/PREDANTAR/climate_data/obs/chirps/" +dir.chirps.low.res <- paste0(dir.chirps, "/", domain.low.res, "/per_mon/") +dir.chirps.high.res <- paste0(dir.chirps, "/", domain.high.res, "/per_mon/") + + +#AUXILIARY FUNCTIONS +#------------------- +set.msk <- function(x, msk, const){ + x[msk] = const + return(x) +} + +#FIXED PARAMETERS: +#----------------- +greece.coor.vec <- c( + lonmin = 18.975, + lonmax = 24.025, + latmin = 37.975, + latmax = 43.025) +greece.coor.lst <- list( + lon.min = 18.975, + lon.max = 24.025, + lat.min = 37.975, + lat.max = 43.025) +coor.to.use <- greece.coor.lst + +europe.coor <- list( + lon.min = -27, + lon.max = 45, + lat.min = 33, + lat.max = 73.5) + +#Large-scale pressure field metadata (necessary for analogs) +var.msl <- "mean_sea_level_pressure" +nc.var.name.msl <- "msl" + +#Depending on the variable loaded, different datasets and metadata are used +if(var.to.use == "prlr"){ #Precipitation + var.era5 <- "total_precipitation" + time.era5 <- "daily" + nc.var.name.era5 <- "tp" + var.chirps <- "precip" + time.chirps <- "daily" + nc.var.name.chirps <- "precip" + cal.meth.to.use <- "bias" #method for bias calibration + chirps.low.res.daily <- list( + name = "chirps_low_res", + path = paste0(dir.chirps.low.res, "chirps-v2.0.$YEAR$.days_greece_low_res-$MONTH$.nc"), + nc_var_name = nc.var.name.chirps) + chirps.high.res.daily <- list( + name = "chirps_high_res", + path = paste0(dir.chirps.high.res, + "chirps-v2.0.$YEAR$.days_greece_high_res-$MONTH$.nc"), + nc_var_name = nc.var.name.chirps) + #unit conversions + mul.cor.era5 <- 1000 * 24 + add.cor.era5 <- 0 + mul.cor.chirps <- 1 + add.cor.chirps <- 0 + mul.cor.exp <- 1000 * 3600 * 24 + add.cor.exp <- 0 + if(use.chirps){ + add.cor.obs <- add.cor.chirps + mul.cor.obs <- mul.cor.chirps + } else { + add.cor.obs <- add.cor.era5 + mul.cor.obs <- mul.cor.er5 + } +} else if(var.to.use == "tas"){ + var.era5 <- "2m_temperature" + time.era5 <- "daily" + nc.var.name.era5 <- "t2m" + cal.meth.to.use <- "mse_min" + + #unit conversions + mul.cor.era5 <- 0 + add.cor.era5 <- 0 + mul.cor.exp <- 0 + add.cor.exp <- 0 +} else if(var.to.use == "tasmin"){ + var.era5 <- "2m_temperature" + time.era5 <- "daily_min" + nc_var_name.era5 <- "t2m" + cal.meth.to.use <- "mse_min" #method for bias calibration + + #unit conversions + mul.cor.era5 <- 0 + add.cor.era5 <- 0 + mul.cor.exp <- 0 + add.cor.exp <- 0 +} else if(var.to.use == "tasmax"){ + var.era5 <- "2m_temperature" + time.era5 <- "daily_max" + nc_var_name.era5 <- "t2m" + cal.meth.to.use <- "mse_min" #method for bias calibration + + #unit conversions + mul.cor.era5 <- 0 + add.cor.era5 <- 0 + mul.cor.exp <- 0 + add.cor.exp <- 0 +} + + +#Experiment path specification: +ecmwf.s5.daily <- list( + name = "ecmwfS5", + path = paste0(dir.c3s, + "C3S/$EXP_NAME$/$STORE_FREQ$/$VAR_NAME$/", + "$VAR_NAME$_$START_DATE$.nc")) +#Reference or obs path specifications (ERA5 data available over Europe): +era5.daily <- list(name = "era5", + path = paste0(dir.era5, "era5-", time.era5, + "-europe-", var.era5, "-$YEAR$-$MONTH$.nc"), + nc_var_name = nc.var.name.era5) +#Reference or obs path specifications for pressure field (ERA5 data available over Europe): +msl.era5.daily <- list(name = "msl", + path = paste0(dir.era5, "era5-", time.era5, + "-europe-", var.msl, "-$YEAR$-$MONTH$.nc"), + nc_var_name = nc.var.name.msl) + +#UNIVERSAL PARAMETERS: +#--------------------- +amt.mon.per.yr <- 12 +amt.day.per.mon <- 31 +sdate.day.str <- formatC(sdate.day.to.use, width = 2, flag = "0") +sdate.mon.str <- formatC(sdate.mon.to.use, width = 2, flag = "0") +day.lst <- formatC(seq(1, amt.day.per.mon), width = 2, flag = "0") +yr.lst <- seq(init.yr, end.yr) +amt.yr <- length(yr.lst) +sdate.lst <- paste0(yr.lst, sdate.mon.str, sdate.day.str ) + + +#START +#----- + +#1. LOAD THE DATA +#---------------- + +if(use.chirps){ + obs.set.to.use <- chirps.low.res.daily +} else { + obs.set.to.use <- era5.daily +} + +#Load mean sea level pressure field from ERA5 (no need to set the units) + +file.to.load <- paste0(dir.rdata, "msl_all.RData") +if(file.exists(file.to.load)){ + load(file.to.load, verbose = T) +} else { + msl.all <- CST_Load( + var = "psl", #nc.var.name.msl, + obs = list(msl.era5.daily), + exp = list(ecmwf.s5.daily), + nmember = NULL, + sdates = sdate.lst, + lonmin = europe.coor$lon.min, + lonmax = europe.coor$lon.max, + latmin = europe.coor$lat.min, + latmax = europe.coor$lat.max, + output = "lonlat", + storefreq = "daily", + nprocs = n.cores.to.use) + save(file = file.to.load, msl.all) +} + +#Data manipulation: first split lead times per month. Then merge all data per month and all sdates. +#This merged dataset will be used to calibrate (per month) and find analogs (per month). +obs.msl.eur.split <- CST_SplitDim(msl.all$obs, split_dim = c("ftime")) +exp.msl.eur.split <- CST_SplitDim(msl.all$exp, split_dim = c("ftime")) +obs.msl.eur.merge <- CST_MergeDims( + obs.msl.eur.split, + merge_dims = c("sdate", "ftime"), + rename_dim = "sdate") +exp.msl.eur.merge <- CST_MergeDims( + exp.msl.eur.split, + merge_dims = c("sdate", "ftime"), + rename_dim = "sdate") + +#Load observational and forecast set of variable that needs to be calibrated and downscaled: +file.to.load <- paste0(dir.rdata, "data_all.RData") +if(file.exists(file.to.load)){ + load(file.to.load, verbose = T) +} else { + data.all <- CST_Load( + var = var.to.use, + obs = list(obs.set.to.use), + exp = list(ecmwf.s5.daily), + nmember = NULL, + sdates = sdate.lst, + lonmin = coor.to.use$lon.min, + lonmax = coor.to.use$lon.max, + latmin = coor.to.use$lat.min, + latmax = coor.to.use$lat.max, + output = "lonlat", + storefreq = "daily", + nprocs = n.cores.to.use) + save(file = file.to.load, data.all) +} +#Set the units: +data.all$obs$data <- data.all$obs$data * mul.cor.obs + add.cor.obs +data.all$exp$data <- data.all$exp$data * mul.cor.exp + add.cor.exp + +#Data manipulation: first split lead times per month. Then merge all data per month and all sdates. +#This merged dataset will be used to calibrate (per month) and find analogs (per month). +obs.split <- CST_SplitDim(data.all$obs, split_dim = c("ftime")) +exp.split <- CST_SplitDim(data.all$exp, split_dim = c("ftime")) +obs.merge <- CST_MergeDims( + obs.split, + merge_dims = c("sdate", "ftime"), + rename_dim = "sdate") +exp.merge <- CST_MergeDims( + exp.split, + merge_dims = c("sdate", "ftime"), + rename_dim = "sdate") + +#Calibrate the exp data (per month) +cal.merge <- CST_Calibration( + exp = exp.merge, + obs = obs.merge, + cal.method = cal.meth.to.use, + eval.method = eval.method.to.use) +cal.merge$data[cal.merge$data < 0] <- 0 + +#LOAD HIGH RES CHIRPS DATA +file.to.load <- paste0(dir.rdata, "obs_high_res.RData") +if(file.exists(file.to.load)){ + load(file.to.load, verbose = T) +} else { + obs.high.res <- CST_Load(var = var.to.use, + obs = list(chirps.high.res.daily), + exp = NULL, + sdates = sdate.lst, + nmember = 1, + leadtimemax = amt.ftime, + sampleperiod = 1, + lonmin = coor.to.use$lon.min, + lonmax = coor.to.use$lon.max, + latmin = coor.to.use$lat.min, + latmax = coor.to.use$lat.max, + output = "lonlat", + storefreq = "daily", + nprocs = n.cores.to.use) + save(file = file.to.load, obs.high.res) +} +#set the units +obs.high.res$data <- obs.high.res$data * mul.cor.chirps + + add.cor.chirps +#split per month +obs.high.res.split <- CST_SplitDim( + obs.high.res, + split_dim = c("ftime")) +#merge lead times and sdates +obs.high.res.merge <- CST_MergeDims( + obs.high.res.split, + merge_dims = c("sdate", "ftime"), + rename_dim = "sdate") + +#LOAD LOW RES CHIRPS DATA +file.to.load <- paste0(dir.rdata, "obs_low_res.RData") +if(file.exists(file.to.load)){ + load(file.to.load, verbose = T) +} else { + obs.low.res <- CST_Load(var = var.to.use, + obs = list(chirps.low.res.daily), + exp = NULL, + sdates = sdate.lst, + nmember = 1, + leadtimemax = amt.ftime, + sampleperiod = 1, + lonmin = coor.to.use$lon.min, + lonmax = coor.to.use$lon.max, + latmin = coor.to.use$lat.min, + latmax = coor.to.use$lat.max, + output = "lonlat", + storefreq = "daily", + nprocs = n.cores.to.use) + save(file = file.to.load, obs.low.res) +} +#set units +obs.low.res$data <- obs.low.res$data * mul.cor.chirps + + add.cor.chirps +#split per month +obs.low.res.split <- CST_SplitDim( + obs.low.res, + split_dim = c("ftime")) +#merge lead times and sdates +obs.low.res.merge <- CST_MergeDims( + obs.low.res.split, + merge_dims = c("sdate", "ftime"), + rename_dim = "sdate") + + +#2. PROCESS THE DATA +#------------------- + +#amount of ensemble members from experiment. For ECMWF Sys5 it is 25: +amt.mbr <- as.numeric(dim(cal.merge$data)["member"]) +lon.low.res <- as.vector(cal.merge$lon) +lat.low.res <- as.vector(cal.merge$lat) +lon.high.res <- as.vector(obs.high.res$lon) +lat.high.res <- as.vector(obs.high.res$lat) +lon.eur <- as.vector(obs.msl.eur.merge$lon) +lat.eur <- as.vector(obs.msl.eur.merge$lat) + +#amount of lead times in months. For ECMWF Sys5 it is 7: +amt.lead.mon <- as.numeric(dim(cal.merge$data)["monthly"]) +mon.seq.tmp <- seq(sdate.mon.to.use, sdate.mon.to.use + amt.lead.mon - 1) +mon.seq.tmp <- ((mon.seq.tmp - 1) %% amt.mon.per.yr) + 1 +lead.mon.lst <- formatC(mon.seq.tmp, width = 2, flag = "0") +#amount of starting days from experiment. For ECMWF Sys5 it is 837: +amt.sdate <- as.numeric(dim(cal.merge$data)["sdate"]) + +sub.time <- outer( + as.vector(t(outer(yr.lst, day.lst, paste, sep="-"))), + lead.mon.lst, + paste, sep = "-") +#This step is necessary to set the non-existent dates to NA +sub.time <- format(as.Date(sub.time, format("%Y-%d-%m")), "%Y-%m-%d") +dim(sub.time) <- c(sdate = amt.yr * amt.day.per.mon, time = amt.lead.mon) + +cal.high.res.merge <- obs.high.res.merge +cal.high.res.merge$data[] <- NA + +#Determine spatial points with all obs.high.res.merge (CHIRPS) data equal to NA. These are the points over sea. +is.na.high.res.obs <- apply( + obs.high.res.merge$data, + MARGIN = c(4, 5), + FUN = function(x){all(is.na(x))}) +#Determine spatial points with all obs.low.res.merge (CHIRPS) data equal to NA. These are the points over sea. +is.na.low.res.obs <- apply( + obs.low.res.merge$data, + MARGIN = c(4, 5), + FUN = function(x){all(is.na(x))}) + +#Set all calibrated exp data (cal.merge) equal to NA at the sea point. +cal.merge.tmp = Apply( + data = list(x = cal.merge$data), + target_dims = list(x = c("lat", "lon")), + fun = set.msk, + msk = is.na.low.res.obs, + const = 0, + output_dims = list(c("lat", "lon")) + )$output1 +dex.match <- match(names(dim(cal.merge$data)), names(dim(cal.merge.tmp))) +cal.merge$data <- aperm(cal.merge.tmp, dex.match) +rm(cal.merge.tmp) + +#2. PROCESS THE DATA +#------------------- + +i.dataset <- 1 +i.mbr.obs <- 1 +for(i.mbr in seq(1, amt.mbr)){ + for(i.mon in seq(1, amt.lead.mon)){ + for(i.sdate in seq(1, amt.sdate)){ + i.mbr <- 1 + i.mon = 1 + i.sdate = 24 + date.to.use <- sub.time[ i.sdate, i.mon] + date.an.lst <- sub.time[ , i.mon] + cat("i.mbr = ", i.mbr, ", i.mon =", i.mon, ", i.sdate = ", + i.sdate, "date: ", date.to.use,"\n") + + + #Extract the (calibrated) forecast that you want to downscale: + exp.low.res.tmp <- exp.merge$data[i.dataset, i.mbr, i.sdate, , , i.mon] + cal.low.res.tmp <- cal.merge$data[i.dataset, i.mbr, i.sdate, , , i.mon] + #Extract the large-scale pressure field of that day + exp.msl.eur.tmp <- exp.msl.eur.merge$data[i.dataset, i.mbr, i.sdate, , , i.mon] + + #Extract all observations that will be used to find analogs + obs.msl.eur.tmp <- obs.msl.eur.merge$data[i.dataset, i.mbr.obs, , , , i.mon]#-i.sdate + obs.low.res.tmp <- obs.low.res.merge$data[i.dataset, i.mbr.obs, , , , i.mon] #-i.sdate + obs.high.res.tmp <- obs.high.res.merge$data[i.dataset, i.mbr.obs, , , , i.mon] #-i.sdate + names(dim(obs.high.res.tmp)) <- c("time", "lat", "lon") + names(dim(obs.low.res.tmp)) <- c("time", "lat", "lon") + names(dim(obs.msl.eur.tmp)) <- c("time", "lat", "lon") + if(!is.na(date.to.use) & !all(is.na(cal.low.res.tmp))){ + obs.low.res.tmp[is.na(obs.low.res.tmp)] <- 0 + + res <- Analogs( + expL = exp.msl.eur.tmp, + obsL = obs.msl.eur.tmp, + time_obsL = date.an.lst, + obsVar = obs.low.res.tmp, + expVar = exp.low.res.tmp, + lonVar = lon.low.res, + latVar = lat.low.res, + lonL = lon.eur, + latL = lat.eur, + region = greece.coor.vec, + criteria = "Local_dist", + time_expL = date.to.use, + excludeTime = date.to.use, + AnalogsInfo = T, + nAnalogs = 1000) + } + } + } +} + + -- GitLab