Skip to content
GitLab
Projects Groups Topics Snippets
  • /
  • Help
    • Help
    • Support
    • Community forum
    • Submit feedback
  • Sign in
  • CSTools CSTools
  • Project information
    • Project information
    • Activity
    • Labels
    • Members
  • Repository
    • Repository
    • Files
    • Commits
    • Branches
    • Tags
    • Contributor statistics
    • Graph
    • Compare revisions
  • Issues 28
    • Issues 28
    • List
    • Boards
    • Service Desk
    • Milestones
  • Merge requests 2
    • Merge requests 2
  • CI/CD
    • CI/CD
    • Pipelines
    • Jobs
    • Schedules
  • Deployments
    • Deployments
    • Environments
    • Releases
  • Monitor
    • Monitor
    • Metrics
    • Incidents
  • Analytics
    • Analytics
    • Value stream
    • CI/CD
    • Repository
  • Wiki
    • Wiki
  • Snippets
    • Snippets
  • Activity
  • Graph
  • Create a new issue
  • Jobs
  • Commits
  • Issue Boards
Collapse sidebar
  • External
  • CSToolsCSTools
  • Issues
  • #85
Closed
Open
Issue created Feb 18, 2022 by Paloma Trascasa-Castro@ptrascasDeveloper

Issue with Calibration , nans and precipitation

Hi @aho @nperez @lpalma @amartine

I have some news about this code with some additions to it suggested by @cdelgado . It's about finding a way to solve problems derived from data with values of 0 mm/day (where it doesn't rain)

We've found that one of the ensemble members has points where there's no rain. This messes up with the calculation of some metrics and we have used another function inside the code: tryCatch. We are saying: try to compute the calibration, and if there are Nans, return the Nans.

It still doesn't work completely - I'm working on it - as it returns only the raw data and not the rest of the metrics. Another option I had is to edit the Calibration function setting na.rm to FALSE and call that function instead of CSTools_Calibration. I think it could be great if we find a way to overcome this problem as it's a common issue when calibrating precipitation forecasts.

Here's the code updated with the tryCatch function:

Thanks a lot for your help in advance

# Load startR
library(startR)
library(s2dv)
library(multiApply)

ncores <- 1

lons.min <- -20
lons.max <- 41 
lats.min <- -40
lats.max <- 40
# Declaration of data sources
var_name = 'prlr'
retrieve <- TRUE 
methods <- c('raw','bias','evmos','mse_min','crps_min','rpc-based')
exp_path <- '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_s0-24h/$var$_$sdate$01.nc'
grid <- '/esarchive/exp/ecmwf/system5c3s/monthly_mean/prlr_s0-24h/prlr_20000501.nc'
exp <- startR::Start(dat = exp_path,
                     var = var_name,
                     sdate = c("200011","200111","200211","200311"),
                     ensemble = 'all',
                     time = indices(1:6),
                     latitude = values(list(lats.min, lats.max)),
                     latitude_reorder = Sort(decreasing = T),
                     longitude = values(list(lons.min, lons.max)),
                     longitude_reorder = CircularSort(-180,180),
                     transform = CDORemapper,
		     transform_extra_cells = 8,
                     transform_params = list(grid = grid,
                                             method = 'conservative',
                                             crop = c(lons.min, lons.max, lats.min, lats.max)),
                     transform_vars = c('latitude', 'longitude'),
                     synonims = list(latitude = c('lat', 'latitude'),
                                     longitude = c('lon', 'longitude')),
                     return_vars = list(latitude = 'dat',
                                        longitude = 'dat',
                                        time = 'sdate'),
                     retrieve = retrieve)

dates <- attr(exp, 'Variables')$common$time
obs_path <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_s0-24h/$var$_$date$.nc'
#dates <- format(dates, '%Y%m')
dates_file <- format(dates, '%Y%m')
dim(dates_file) <- list(sdate = 4,
                  time = 6)
obs <- Start(dat = obs_path,
             var = var_name,
           #  date = unique(format(dates, '%Y%m')),
             date = dates_file,
           #  time = values(dates),
             latitude = values(list(lats.min, lats.max)),
             latitude_reorder = Sort(decreasing = T),
             longitude = values(list(lons.min, lons.max)),
             longitude_reorder = CircularSort(-180,180),
             synonims = list(longitude = c('lon', 'longitude'),
                             latitude = c('lat', 'latitude')),
             transform = CDORemapper,
             transform_extra_cells = 8,
             transform_params = list(grid = grid,
                                     method = 'conservative',
                                     crop = c(lons.min, lons.max, lats.min, lats.max)),
             transform_vars = c('latitude', 'longitude'),
             split_multiselected_dims = TRUE,
         #    time_across = 'date',
          #   merge_across_dims = TRUE,
             return_vars = list(time = 'date',
                                latitude = 'dat',
                                longitude = 'dat'),
             retrieve = retrieve)
#Function
skill_calibration <- function(exp, obs, methods) {
  
  library(s2dv)
  library(CSTools)
  library(multiApply)
  
  # source('/esarchive/scratch/ptrascas/gitlab/pasdfasdf/correlation_eno.R')
  correlation_eno <- function(exp, obs, time_dim = 'sdate', member_dim = 'ensemble', alpha = 0.05){
    
    exp <- s2dv::MeanDims(data = exp, dims = member_dim, na.rm = FALSE)
    
    cor = NULL
    cor$r = cor(exp,obs) # Correlation coefficient
    
    n_eff = s2dv::Eno(data = obs, time_dim = time_dim, na.action = na.pass, ncores = 1)
    t_alpha2_n2 = qt(p=alpha/2, df = n_eff-2, lower.tail = FALSE)
    t = abs(cor$r) * sqrt(n_eff-2) / sqrt(1-cor$r^2)
    
    if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){
      cor$sign = TRUE
    } else {
      cor$sign = FALSE
    }
    
    return(cor)
  }
  
  exp <- s2dv::MeanDims(data = exp, dims = 'time', na.rm = FALSE)
  obs <- s2dv::MeanDims(data = obs, dims = 'time', na.rm = FALSE)
  
  # Create an empty list that will contain the output of the "wrap_cal":
  # One list per calibration method, with the different statistical metrics embedded on it
  metrics  <- list()
  calibrated <- list() #, rpss = NULL)
  print ("line 104")
  # Loop over each calibration method to calculate its metrics
  for (method in methods){
    
    if (!identical(method,'raw')){
      # Calibration function

      calibrated[[method]] <- tryCatch(
		     {	     

      return(CSTools::Calibration(exp = exp, obs = obs, cal.method = method, eval.method = "leave-one-out",
     multi.model = FALSE, na.fill = TRUE, na.rm = FALSE, memb_dim = 'ensemble', sdate_dim = 'sdate', apply_to = 'sign', alpha = 0.1, ncores = 1))
    },
    
    error = function(cond) {
    return(NA*exp)
	        }
    )
         
    
    
    } else {
      calibrated[[method]] <- exp
    }

    # Metrics for each method
    metrics$acc[[method]] <- correlation_eno(exp = calibrated[[method]], obs = obs, time_dim = 'sdate', member_dim = 'ensemble', alpha = 0.05)
  }
  print ("line 118")
  return(list(raw_calib = s2dv::Reorder(data = calibrated$raw, order = c('sdate','ensemble')),
	      acc_raw_r = metrics$acc$raw$r,
              acc_raw_sign = metrics$acc$raw$sign,
              bias_calib = s2dv::Reorder(data = calibrated$bias, order = c('sdate', 'ensemble')),
	      acc_bias_r = metrics$acc$bias$r,
              acc_bias_sign = metrics$acc$bias$sign,
              evmos_calib = s2dv::Reorder(data = calibrated$evmos, order = c('sdate', 'ensemble')),
	      acc_evmos_r = metrics$acc$evmos$r,
              acc_evmos_sign = metrics$acc$evmos$sign,
              mse_min_calib = s2dv::Reorder(data = calibrated$mse_min,order = c('sdate', 'ensemble')),
              acc_mse_min_r = metrics$acc$mse_min$r,
              acc_mse_min_sign = metrics$acc$mse_min$sign,
              crps_min_calib = s2dv::Reorder(data = calibrated$crps_min,order = c('sdate', 'ensemble')),
	      acc_crps_min_r = metrics$acc$crps_min$r,
              acc_crps_min_sign = metrics$acc$crps_min$sign,
              rpc_based_calib = s2dv::Reorder(data = calibrated[['rpc-based']],order = c('sdate', 'ensemble')),
	      acc_rpc_based_r = metrics$acc[['rpc-based']]$r,
              acc_rpc_based_sign = metrics$acc[['rpc-based']]$sign))    
    
}

print ("line 140")    
if (isTRUE(retrieve)){
  
  ## for one grid point
  # metrics <- skill_calibration(exp = drop(exp), obs = drop(obs), methods=methods)
  ## paralelised
  output <- multiApply::Apply(data = list(exp = exp, obs = obs), target_dims = list(exp = c('sdate','ensemble','time'), obs = c('sdate','time')), fun = skill_calibration, methods=methods, ncores = ncores)
  
} else if (isFALSE(retrieve)){
  print ("line 150")
  # Target dimensions are 'years' and ensemble for the forecast and 'year' for observations
  # Chunking will take place, where different latitude and longitude points will be sent as different jobs to Nord3
#  step <- startR::Step(fun = skill_calibration,
#                       target_dims = list(exp = c('year','month','ensemble'), obs = c('year','month')),
#                       output_dims = list(raw_calib = NULL, acc_raw_r = NULL, acc_raw_sign = NULL,
#                                          bias_calib = NULL,acc_bias_r = NULL, acc_bias_sign = NULL,
#                                          evmos_calib = NULL,acc_evmos_r = NULL, acc_evmos_sign = NULL,
#                                          mse_min_calib = NULL,acc_mse_min_r = NULL, acc_mse_min_sign= NULL,
#                                          crps_min_calib = NULL,acc_crps_min_r = NULL, acc_crps_min_sign = NULL,
#                                          rpc_based_calib = NULL,acc_rpc_based_r = NULL, acc_rpc_based_sign = NULL))

  step <- startR::Step(fun = skill_calibration,
                       target_dims = list(exp = c('sdate','ensemble','time'), obs = c('sdate','time')),
			# Especificar output dims:
                       output_dims = list(raw_calib = c('sdate','ensemble'),
                                         acc_raw_r = NULL, acc_raw_sign = NULL,
                                          bias_calib = c('sdate','ensemble'),
                                        acc_bias_r = NULL, acc_bias_sign = NULL,
                                          evmos_calib = c('sdate','ensemble'),
                                        acc_evmos_r = NULL, acc_evmos_sign = NULL,
                                          mse_min_calib = c('sdate','ensemble'),
                                        acc_mse_min_r = NULL, acc_mse_min_sign= NULL,
                                          crps_min_calib = c('sdate','ensemble'),
                                        acc_crps_min_r = NULL, acc_crps_min_sign = NULL,
                                          rpc_based_calib = c('sdate','ensemble'),
                                        acc_rpc_based_r = NULL, acc_rpc_based_sign = NULL))
 

 print ("Line 160")
  wf <- startR::AddStep(inputs = list(exp = exp, obs = obs), 
                        methods=methods, 
                        step_fun = step)[[1]]
  print ("Line 164")


#output <- startR::Compute(workflow = wf,
#                             chunks = list(longitude = 2),
#                             threads_load = 1,
#                             threads_compute = 2)

output <- startR::Compute(workflow = wf,
                             chunks = list(longitude = 2),
                             threads_load = 1,
                             threads_compute = ncores,
                             cluster = list(queue_host = 'nord4',
                                            queue_type = 'slurm',
                                            temp_dir = '/gpfs/scratch/bsc32/bsc32413/startR_hpc/', #############################3
                                            CDO_module = 'CDO/1.9.8-foss-2019b',
                                            cores_per_job = ncores,
                                            job_wallclock = '02:00:00',
                                            max_jobs = 10,
                                            bidirectional = FALSE,
                                            polling_period = 10),
                             ecflow_suite_dir = "/home/Earth/ptrascas/Desktop/startR_local/", ################################
                             wait = TRUE)
  print ("Line 180")
} else {stop('retrieve must be TRUE or FALSE')}

lat <- as.numeric(attr(obs,'Variables')$dat1$latitude)
lon <- as.numeric(attr(obs,'Variables')$dat1$longitude)


saveRDS(object = list(output = output, lat = lat, lon = lon),
        file = 'test_nuria.rds')




# Close device
Assignee
Assign to
Time tracking