Newer
Older
# -------------------------------------------
# Downscaling precipitation using RainFARM
# -------------------------------------------
# Note 1: The data could be first transformed with QuantileMapping from CSTools
# Note 2: Extra parameters could be use to downscale the data: weights, slope...
# See more information in https://cran.r-project.org/web/packages/CSTools/vignettes/RainFARM_vignette.html or https://drive.google.com/file/d/1ltB2vypHrk1B-x1h2BQF4qBXv5spbXmG/view
# -------------------------------------------
# MORE IMPORTANT NOTE:
# startR is aim to return a result that fits in your local memory. This aim is the oposite of downscale, which output size increase. Therefore, this example save the data in NetCDF files and provides the Mean which has a reduced size.
# Warning!!!!!! Use this example with caution to avoid saving not desired data.
#-------------------------------------------
# Load libraries and functions:
library(startR)
library(CSTools)
library(ncdf4)
library(s2dv) # could be removed when CST_SaveExp is merged in CSTools
# Define the data:
sdates <- paste0(1993:1996, '1101') # starting dates
path <- "/esarchive/exp/ecmwf/system5c3s/daily/$var$_s0-24h/$var$_$sdate$.nc"
data <- Start(dataset = path,
var = 'prlr',
sdate = sdates,
member = 'all',
longitude = values(list(-10, 30)),
longitude_reorder = CircularSort(-180, 180),
latitude = values(list(18, 58)),
time = 'all',
return_vars = list(latitude = 'dataset', longitude = 'dataset',
time = NULL),
synonims = list(latitude = c('lat', 'latitude'),
longitude = c('lon', 'longitude'),
member = c('member', 'ensemble')),
retrieve = FALSE)
# Define the function:
Chunk_RF <- function(x, nf, destination, startdates, nchunks = chunk_indices) {
lon <- as.numeric(attributes(x)$Variables$dat1$longitude)
lat <- as.numeric(attributes(x)$Variables$dat1$latitude)
down_data <- RainFARM(x, lon = lon, lat = lat, drop_realization_dim = TRUE,
nf, lon_dim = 'longitude', lat_dim = 'latitude')
# detect the dates of forecast time for different start dates
time <- attributes(x)$Variables$common$time
dates <- lapply(startdates, function(x) {seq(as.Date(x, format = "%Y-%m-%d"),
dimname <- names(dim(down_data$data))
var_dims <- list(ncdim_def(name = 'lon', units = 'degrees',
vals = as.vector(down_data$lon), longname = 'longitude'),
vals = as.vector(down_data$lat), longname = 'longitude'),
ncdim_def(name = 'ensemble', units = "adim",
vals = 1 : dim(down_data$data)[which(dimname == 'member')],
longname = 'ensemble', create_dimvar = TRUE))
# startdates and dates depends on the chunk:
CSTools:::.saveExp(down_data$data, dims_var = var_dims,
var_name = 'tas', units = 'K', sdate = startdates[nchunks['sdate']],
Dates = dates[[nchunks['sdate']]],
cdo_grid_name = paste0('r', length(lon), 'x', length(lat)),
projection = 'none', destination = destination)
down_data_mean <- MeanDims(down_data$data, c('member', 'longitude', 'latitude'))
return(down_data_mean)
}
step <- Step(Chunk_RF,
target_dims = c('member', 'longitude', 'latitude', 'time'),
output_dims = 'time',
use_libraries = c('CSTools', 'ncdf4'),
use_attributes = list(data = "Variables"))
workflow <- AddStep(data, step, nf = 4,
destination = "/esarchive/scratch/nperez/git/Flor/cstools/test_RF_start/",
startdates = as.Date(sdates, format = "%Y%m%d"))
res <- Compute(workflow,
chunks = list(sdate = 4),
threads_load = 2,
threads_compute = 4)
#-----------modify according to your personal info---------
queue_host = 'nord3' #your own host name for power9
temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/'
ecflow_suite_dir = '/home/Earth/nperez/startR_local/' #your own local directory
#------------------------------------------------------------
res <- Compute(workflow,
chunks = list(sdate = 4),
threads_load = 1,
threads_compute = 1,
cluster = list(queue_host = queue_host,
queue_type = 'lsf',
extra_queue_params = list('#BSUB -q bsc_es'),
cores_per_job = 1,
temp_dir = temp_dir,
polling_period = 10,
job_wallclock = '00:30',
max_jobs = 4,
bidirectional = FALSE),
ecflow_suite_dir = ecflow_suite_dir,
wait = TRUE)
# Visualize the temporal evolution of the result simultaneously for all sdates:
matplot(1:214, res$output1[,1,1,], type = "l")