From 81552c14e896dc2da74ef24be46028f52dde0678 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Mon, 20 Mar 2023 16:49:05 +0100 Subject: [PATCH 01/10] first version of multimodel --- conf/archive_decadal.yml | 4 +- modules/Calibration/Calibration.R | 11 +- modules/Loading/Loading.R | 33 ++ modules/Loading/Loading_decadal.R | 487 ++++++++++--------- modules/Loading/join_datasets.R | 59 +++ recipes/tests/recipe_model_decadal.yml | 51 ++ recipes/tests/recipe_model_seasonal.yml | 55 +++ recipes/tests/recipe_multimodel_decadal.yml | 51 ++ recipes/tests/recipe_multimodel_seasonal.yml | 55 +++ 9 files changed, 572 insertions(+), 234 deletions(-) create mode 100644 modules/Loading/join_datasets.R create mode 100644 recipes/tests/recipe_model_decadal.yml create mode 100644 recipes/tests/recipe_model_seasonal.yml create mode 100644 recipes/tests/recipe_multimodel_decadal.yml create mode 100644 recipes/tests/recipe_multimodel_seasonal.yml diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 2b74bff8..d6a7e5ea 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -228,8 +228,8 @@ archive: # ---- MIROC6: - name: - institution: + name: "MIRC6" + institution: "MIROC" src: hcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" fcst: diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 899b1291..aeae3924 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -8,7 +8,7 @@ calibrate_datasets <- function(recipe, data) { # recipe: object obtained when passing the .yml recipe file to read_yaml() method <- tolower(recipe$Analysis$Workflow$Calibration$method) - + if (method == "raw") { warn(recipe$Run$logger, paste("The Calibration module has been called, but the calibration", @@ -26,7 +26,10 @@ calibrate_datasets <- function(recipe, data) { } else { ## TODO: Calibrate full fields when present # Calibration function params - mm <- recipe$Analysis$Datasets$Multimodel + if (!isFALSE(recipe$Analysis$Datasets$Multimodel)){ + mm <- TRUE + } else {mm <- FALSE} + if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { @@ -42,12 +45,12 @@ calibrate_datasets <- function(recipe, data) { # Replicate observation array for the multi-model case ## TODO: Implement for obs.full_val if (mm) { - obs.mm <- obs$data + obs.mm <- data$obs$data for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { obs.mm <- abind(obs.mm, data$obs$data, along=which(names(dim(data$obs$data)) == 'dat')) } - names(dim(obs.mm)) <- names(dim(obs$data)) + names(dim(obs.mm)) <- names(dim(data$obs$data)) data$obs$data <- obs.mm remove(obs.mm) } diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index e5dafd0d..fd21d2cb 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -3,10 +3,43 @@ source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") # Load required libraries/funs source("modules/Loading/dates2load.R") source("modules/Loading/check_latlon.R") +source('modules/Loading/join_datasets.R') ## TODO: Move to prepare_outputs.R source("tools/libs.R") load_datasets <- function(recipe) { + + if (length(recipe$Analysis$Datasets$System$name) == 1){ ## Only one model + + output <- load_single_datasets(recipe = recipe) + + } else { ## Several models and/or Multimodel + + multimodel <- recipe$Analysis$Datasets$Multimodel ## "TRUE", "FALSE" or "both" + + ## Loading each model individually + data <- list() + n_members <- list(hcst = c(), fcst = c()) + for (model in 1:length(recipe$Analysis$Datasets$System$name)){ + recipe_single <- recipe + recipe_single$Analysis$Datasets$System$name <- recipe_single$Analysis$Datasets$System$name[model] + data[[model]] <- load_single_datasets(recipe = recipe_single) + n_members$hcst[model] <- dim(data[[model]]$hcst$data)['ensemble'] + if (!is.null(data[[model]]$fcst)){ + n_members$fcst[model] <- dim(data[[model]]$fcst$data)['ensemble'] + } + + } + + ## Joining models and/or creating multimodel + output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members) + + } + + return(output) # list(hcst = hcst, fcst = fcst, obs = obs) +} + +load_single_datasets <- function(recipe) { # ------------------------------------------- # Set params ----------------------------------------- diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index e3677e1d..885c1062 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -10,6 +10,7 @@ source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") source("modules/Loading/helper_loading_decadal.R") source("modules/Loading/dates2load.R") source("modules/Loading/check_latlon.R") +source('modules/Loading/join_datasets.R') source("tools/libs.R") ## TODO: Remove once the fun is included in CSTools source("tools/tmp/as.s2dv_cube.R") @@ -20,12 +21,42 @@ source("tools/tmp/as.s2dv_cube.R") # recipe_file <- "modules/Loading/testing_recipes/recipe_decadal_daily.yml" load_datasets <- function(recipe) { + + if (length(recipe$Analysis$Datasets$System$name) == 1){ ## Only one model + + output <- load_single_datasets(recipe = recipe) + + } else { ## Several models and/or Multimodel + + multimodel <- recipe$Analysis$Datasets$Multimodel ## "TRUE", "FALSE" or "both" + + ## Loading each model individually + data <- list() + n_members <- list(hcst = c(), fcst = c()) + for (model in 1:length(recipe$Analysis$Datasets$System$name)){ + recipe_single <- recipe + recipe_single$Analysis$Datasets$System$name <- recipe_single$Analysis$Datasets$System$name[model] + recipe_single$Analysis$Datasets$System$member <- recipe_single$Analysis$Datasets$System$member[model][[1]] + n_members$hcst[model] <- length(recipe_single$Analysis$Datasets$System$member) + n_members$fcst[model] <- length(recipe_single$Analysis$Datasets$System$member) + data[[model]] <- load_single_datasets(recipe = recipe_single) + } + + ## Joining models and/or creating multimodel + output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members) + + } + + return(output) # list(hcst = hcst, fcst = fcst, obs = obs) +} +load_single_datasets <- function(recipe) { + archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive - + # Print Start() info or not DEBUG <- FALSE - + ## TODO: this should come from the main script # Create output folder and log: @@ -41,14 +72,14 @@ load_datasets <- function(recipe) { lats.max <- as.numeric(recipe$Analysis$Region$latmax) #10 lons.min <- as.numeric(recipe$Analysis$Region$lonmin) #0 lons.max <- as.numeric(recipe$Analysis$Region$lonmax) #10 - + # change to: sdates <- dates2load(recipe, logger) sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) #1960:2015 sdates_fcst <- recipe$Analysis$Time$fcst - + if (store.freq == "monthly_mean") { time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) - + } else if (store.freq == "daily_mean") { time_ind <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min), ftimemax = as.numeric(recipe$Analysis$Time$ftime_max), @@ -56,10 +87,10 @@ load_datasets <- function(recipe) { sdates = sdates_hcst, calendar = archive$System[[exp.name]]$calendar) } - -#NOTE: May be used in the future -# season <- recipe$Analysis$Time$season - + + #NOTE: May be used in the future + # season <- recipe$Analysis$Time$season + #------------------------- # Read from archive: #------------------------- @@ -73,20 +104,20 @@ load_datasets <- function(recipe) { if (identical(member, 'all')) { member <- strsplit(archive$System[[exp.name]]$member, ',')[[1]] } - + #------------------------- # derived from above: #------------------------- # Check lat and lon and decide CircularSort - circularsort <- check_latlon(latmin = lats.min, latmax = lats.max, lonmin = lons.min, lonmax = lons.max) - + circularsort <- check_latlon(latmin = lats.min, latmax = lats.max, lonmin = lons.min, lonmax = lons.max) + # generate transform params for system and ref regrid_params <- get_regrid_params(recipe, archive) - + # Only if the time length in each chunk may differ that we need largest_dims_length to be TRUE. Otherwise, set FALSE to increase efficiency. need_largest_dims_length <- ifelse(exp.name == 'EC-Earth3-i2', TRUE, FALSE) - - + + #------------------------------------------- # Step 1: Load the hcst #------------------------------------------- @@ -95,11 +126,11 @@ load_datasets <- function(recipe) { version = version, sdates = sdates_hcst) path_list <- tmp$path_list multi_path <- tmp$multi_path -# hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, -# '$ensemble$', table, '$var$', grid, version) -# hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') - - + # hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + # '$ensemble$', table, '$var$', grid, version) + # hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + + Start_default_arg_list <- list( dat = path_list, #file.path(hcst.path, hcst.files), var = variable, @@ -127,25 +158,25 @@ load_datasets <- function(recipe) { time = c('syear', 'chunk')), silent = !DEBUG, retrieve = T) - + if (!multi_path) { Start_hcst_arg_list <- Start_default_arg_list hcst <- do.call(Start, Start_hcst_arg_list) - + } else { Start_hcst_arg_list <- Start_default_arg_list Start_hcst_arg_list[['syear']] <- NULL Start_hcst_arg_list[['chunk_depends']] <- NULL remove_ind <- which(Start_hcst_arg_list[['return_vars']][['time']] == 'syear') Start_hcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind] - + hcst <- do.call(Start, Start_hcst_arg_list) - + # Reshape and reorder dimensions ## dat should be 1, syear should be length of dat; reorder dimensions dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6]) hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) - + # Manipulate time attr because Start() cannot read it correctly wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only tmp <- array(dim = c(dim(hcst)[c('syear', 'time')])) @@ -155,284 +186,284 @@ load_datasets <- function(recipe) { tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) } attr(hcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') - + } - + tmp_time_attr <- attr(hcst, 'Variables')$common$time - + # change syear to c(sday, sweek, syear) # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) { error(recipe$Run$logger, - "hcst has problem in matching data and time attr dimension.") + "hcst has problem in matching data and time attr dimension.") stop() } dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) - + #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 if (multi_path) { attributes(hcst)$Variables$common[[variable]] <- attributes(hcst)$Variables$dat1[[variable]] } - + # Change class from startR_array to s2dv_cube suppressWarnings( - hcst <- as.s2dv_cube(hcst) + hcst <- as.s2dv_cube(hcst) ) - -#------------------------------------------- -# Step 2: Load the fcst -#------------------------------------------- + + #------------------------------------------- + # Step 2: Load the fcst + #------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst)) { - - tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, - version = version, sdates = sdates_fcst) - path_list <- tmp$path_list - multi_path <- tmp$multi_path -# fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, -# '$ensemble$', table, '$var$', grid, version) -# fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') - - - # monthly & daily - if (!multi_path) { - #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst - Start_fcst_arg_list <- Start_default_arg_list - Start_fcst_arg_list[['dat']] <- path_list - Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) - fcst <- do.call(Start, Start_fcst_arg_list) - - - } else { # multi_path - - #TODO: time attribute is not correct. Improve Start(). - Start_fcst_arg_list <- Start_default_arg_list - Start_fcst_arg_list[['dat']] <- path_list - Start_fcst_arg_list[['syear']] <- NULL - Start_fcst_arg_list[['chunk_depends']] <- NULL - remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear') - Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] - fcst <- do.call(Start, Start_fcst_arg_list) - - # Reshape and reorder dimensions - ## dat should be 1, syear should be length of dat; reorder dimensions - ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble] - dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) - fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) - - # Manipulate time attr because Start() cannot read it correctly - wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only - tmp <- array(dim = c(dim(fcst)[c('syear', 'time')])) - tmp[1, ] <- wrong_time_attr - yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst) - for (i_syear in 1:length(yr_diff)) { - tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) + + tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, + version = version, sdates = sdates_fcst) + path_list <- tmp$path_list + multi_path <- tmp$multi_path + # fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, + # '$ensemble$', table, '$var$', grid, version) + # fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + + + # monthly & daily + if (!multi_path) { + #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst + Start_fcst_arg_list <- Start_default_arg_list + Start_fcst_arg_list[['dat']] <- path_list + Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) + fcst <- do.call(Start, Start_fcst_arg_list) + + + } else { # multi_path + + #TODO: time attribute is not correct. Improve Start(). + Start_fcst_arg_list <- Start_default_arg_list + Start_fcst_arg_list[['dat']] <- path_list + Start_fcst_arg_list[['syear']] <- NULL + Start_fcst_arg_list[['chunk_depends']] <- NULL + remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear') + Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] + fcst <- do.call(Start, Start_fcst_arg_list) + + # Reshape and reorder dimensions + ## dat should be 1, syear should be length of dat; reorder dimensions + ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble] + dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) + fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + + # Manipulate time attr because Start() cannot read it correctly + wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only + tmp <- array(dim = c(dim(fcst)[c('syear', 'time')])) + tmp[1, ] <- wrong_time_attr + yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst) + for (i_syear in 1:length(yr_diff)) { + tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) + } + attr(fcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') + } - attr(fcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') - - } - - tmp_time_attr <- attr(fcst, 'Variables')$common$time - - # change syear to c(sday, sweek, syear) - # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] - dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) - if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { - error(recipe$Run$logger, - "fcst has problem in matching data and time attr dimension.") - stop() - } - dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) - - #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 - if (multi_path) { - attributes(fcst)$Variables$common[[variable]] <- attributes(fcst)$Variables$dat1[[variable]] - } - - # Change class from startR_array to s2dv_cube - suppressWarnings( - fcst <- as.s2dv_cube(fcst) - ) - - # Only syear could be different - if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { - error(recipe$Run$logger, - "hcst and fcst do not share the same dimension structure.") - stop() - } - + + tmp_time_attr <- attr(fcst, 'Variables')$common$time + + # change syear to c(sday, sweek, syear) + # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] + dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) + if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { + error(recipe$Run$logger, + "fcst has problem in matching data and time attr dimension.") + stop() + } + dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) + + #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 + if (multi_path) { + attributes(fcst)$Variables$common[[variable]] <- attributes(fcst)$Variables$dat1[[variable]] + } + + # Change class from startR_array to s2dv_cube + suppressWarnings( + fcst <- as.s2dv_cube(fcst) + ) + + # Only syear could be different + if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { + error(recipe$Run$logger, + "hcst and fcst do not share the same dimension structure.") + stop() + } + } else { fcst <- NULL } - -#------------------------------------------- -# Step 3. Load the reference -#------------------------------------------- + + #------------------------------------------- + # Step 3. Load the reference + #------------------------------------------- obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, store.freq, paste0(variable, archive$Reference[[ref.name]][[store.freq]][[variable]])) obs.files <- paste0('$var$_$file_date$.nc') - + # Get from startR_cube -# dates <- attr(hcst, 'Variables')$common$time + # dates <- attr(hcst, 'Variables')$common$time # Get from s2dv_cube dates <- hcst$Dates$start dates_file <- sapply(dates, format, '%Y%m') dim(dates_file) <- dim(dates) - + if (store.freq == "daily_mean") { -#//////////////// -# Method 1: use hcst time attr as obs time selector -#//////////////// - - # Set hour to 12:00 to ensure correct date retrieval for daily data - lubridate::hour(dates) <- 12 - lubridate::minute(dates) <- 00 - # Restore correct dimensions - dim(dates) <- dim(dates_file) - - obs <- Start(dat = file.path(obs.path, obs.files), - var = variable, - file_date = unique(format(dates, '%Y%m')), - time = dates, # [sday, sweek, syear, time] - time_across = 'file_date', - merge_across_dims = TRUE, - split_multiselected_dims = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = TRUE), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_extra_cells = 2, - transform_params = list(grid = regrid_params$obs.gridtype, #nc file - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = NULL, longitude = NULL, - time = 'file_date'), - silent = !DEBUG, - retrieve = TRUE) - + #//////////////// + # Method 1: use hcst time attr as obs time selector + #//////////////// + + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = file.path(obs.path, obs.files), + var = variable, + file_date = unique(format(dates, '%Y%m')), + time = dates, # [sday, sweek, syear, time] + time_across = 'file_date', + merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_extra_cells = 2, + transform_params = list(grid = regrid_params$obs.gridtype, #nc file + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = 'file_date'), + silent = !DEBUG, + retrieve = TRUE) + } else if (store.freq == "monthly_mean") { -#//////////////// -# Method 2: reshape hcst time attr's date into an array with time dim then as obs date selector -#//////////////// - - obs <- Start(dat = file.path(obs.path, obs.files), - var = variable, - file_date = dates_file, #dates_arr, # [sday, sweek, syear, time] - split_multiselected_dims = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = TRUE), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_extra_cells = 2, - transform_params = list(grid = regrid_params$obs.gridtype, #nc file - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = NULL, longitude = NULL, - time = 'file_date'), - silent = !DEBUG, - retrieve = TRUE) + #//////////////// + # Method 2: reshape hcst time attr's date into an array with time dim then as obs date selector + #//////////////// + + obs <- Start(dat = file.path(obs.path, obs.files), + var = variable, + file_date = dates_file, #dates_arr, # [sday, sweek, syear, time] + split_multiselected_dims = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_extra_cells = 2, + transform_params = list(grid = regrid_params$obs.gridtype, #nc file + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = 'file_date'), + silent = !DEBUG, + retrieve = TRUE) } - - -#dim(attr(obs, 'Variables')$common$time) -# sday sweek syear time -# 1 1 2 14 - + + + #dim(attr(obs, 'Variables')$common$time) + # sday sweek syear time + # 1 1 2 14 + # Only ensemble dim could be different if (!identical(dim(obs), dim(hcst$data)[-9])) { error(recipe$Run$logger, - "obs and hcst dimensions do not match.") + "obs and hcst dimensions do not match.") stop() } # Add ensemble dim to obs dim(obs) <- c(dim(obs), ensemble = 1) - + # Change class from startR_array to s2dv_cube suppressWarnings( - obs <- as.s2dv_cube(obs) + obs <- as.s2dv_cube(obs) ) - - -#------------------------------------------- -# Step 4. Verify the consistance between data -#------------------------------------------- + + + #------------------------------------------- + # Step 4. Verify the consistance between data + #------------------------------------------- # dimension if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) { error(recipe$Run$logger, - "hcst and obs don't share the same dimension names.") + "hcst and obs don't share the same dimension names.") stop() } else { ens_ind <- which(names(dim(obs$data)) == 'ensemble') match_ind <- match(names(dim(obs$data))[-ens_ind], names(dim(hcst$data))) if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) { error(recipe$Run$logger, - "hcst and obs don't share the same dimension length.") + "hcst and obs don't share the same dimension length.") stop() } } - + # time attribute if (!identical(format(hcst$Dates$start, '%Y%m'), format(obs$Dates$start, '%Y%m'))) { error(recipe$Run$logger, - "hcst and obs don't share the same time.") + "hcst and obs don't share the same time.") stop() } - + # lat and lon attributes if (!(recipe$Analysis$Regrid$type == 'none')) { if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lat_error_msg) hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) info(recipe$Run$logger, hcst_lat_msg) obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) + "; Last obs lat: ", obs$lat[length(obs$lat)]) info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } - + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lon_error_msg) hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) info(recipe$Run$logger, hcst_lon_msg) obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) + "; Last obs lon: ", obs$lon[length(obs$lon)]) info(recipe$Run$logger, obs_lon_msg) stop("hcst and obs don't share the same longitudes.") } - } - + } + # Check fcst if (!is.null(fcst)) { # dimension if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) { error(recipe$Run$logger, - "hcst and fcst don't share the same dimension names.") + "hcst and fcst don't share the same dimension names.") stop() } else { ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear')) match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data))) if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) { error(recipe$Run$logger, - "hcst and fcst don't share the same dimension length.") + "hcst and fcst don't share the same dimension length.") stop() } } - + # lat and lon attributes if (!(recipe$Analysis$Regrid$type == 'none')) { if (!identical(as.vector(hcst$lat), as.vector(fcst$lat))) { @@ -448,7 +479,7 @@ load_datasets <- function(recipe) { info(recipe$Run$logger, fcst_lat_msg) stop("hcst and fcst don't share the same latitudes.") } - + if (!identical(as.vector(hcst$lon), as.vector(fcst$lon))) { lon_error_msg <- paste("Longitude mismatch between hcst and fcst.", "Please check the original grids and the", @@ -463,18 +494,18 @@ load_datasets <- function(recipe) { stop("hcst and fcst don't share the same longitudes.") } } - + } - - -#------------------------------------------- -# Step 5. Tune data -#------------------------------------------- + + + #------------------------------------------- + # Step 5. Tune data + #------------------------------------------- # Remove negative values in accumulative variables dictionary <- read_yaml("conf/variable-dictionary.yml") if (dictionary$vars[[variable]]$accum) { info(recipe$Run$logger, - " Accumulated variable: setting negative values to zero.") + " Accumulated variable: setting negative values to zero.") obs$data[obs$data < 0] <- 0 hcst$data[hcst$data < 0] <- 0 if (!is.null(fcst)) { @@ -486,12 +517,12 @@ load_datasets <- function(recipe) { if (variable == "prlr") { # Verify that the units are m/s and the same in obs and hcst if (((attr(obs$Variable, "variable")$units == "m s-1") || - (attr(obs$Variable, "variable")$units == "m s**-1")) && - ((attr(hcst$Variable, "variable")$units == "m s-1") || - (attr(hcst$Variable, "variable")$units == "m s**-1"))) { + (attr(obs$Variable, "variable")$units == "m s**-1")) && + ((attr(hcst$Variable, "variable")$units == "m s-1") || + (attr(hcst$Variable, "variable")$units == "m s**-1"))) { info(recipe$Run$logger, - "Converting precipitation from m/s to mm/day.") + "Converting precipitation from m/s to mm/day.") obs$data <- obs$data*86400*1000 attr(obs$Variable, "variable")$units <- "mm/day" hcst$data <- hcst$data*86400*1000 @@ -502,11 +533,11 @@ load_datasets <- function(recipe) { } } } - -#------------------------------------------- -# Step 6. Print summary -#------------------------------------------- - + + #------------------------------------------- + # Step 6. Print summary + #------------------------------------------- + # Print a summary of the loaded data for the user, for each object if (recipe$Run$logger$threshold <= 2) { data_summary(hcst, recipe) @@ -515,10 +546,10 @@ load_datasets <- function(recipe) { data_summary(fcst, recipe) } } - + info(recipe$Run$logger, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") - - + + return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Loading/join_datasets.R b/modules/Loading/join_datasets.R new file mode 100644 index 00000000..78922d15 --- /dev/null +++ b/modules/Loading/join_datasets.R @@ -0,0 +1,59 @@ + +join_datasets <- function(data, multimodel, n_members) { + + ## Taking the first model as "template" + output <- data[[1]] + + if (is.null(output$fcst$data)){ + exps <- 'hcst' + } else {exps <- c('hcst','fcst')} + + for (exp in exps){ + + ## New dimensions of the arrays + if (isFALSE(multimodel)){ ## Individual models + n_models <- length(data) + max_members <- max(n_members[[exp]]) + } else if (isTRUE(multimodel)){ ## Multimodel + n_models <- length(data) + max_members <- sum(n_members[[exp]]) + } else if (identical(multimodel,'both')){ ## Individual models and multimodel + n_models <- length(data)+1 + max_members <- sum(n_members[[exp]]) + } else {stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both"')} + dims <- dim(data[[1]][[exp]]$data) + dims['dat'] <- n_models + dims['ensemble'] <- max_members + stopifnot(identical(names(dims), c('dat','var','sday','sweek','syear','time','latitude','longitude','ensemble'))) + + ## Creating empty models and multimodel + output[[exp]]$data <- array(data = NA, dim = dims) + ## Filling the models + for (model in 1:length(data)){ + output[[exp]]$data[model,,,,,,,,1:n_members[[exp]][model]] <- data[[model]][[exp]]$data + } + + ## Filling the multimodel + if (isFALSE(multimodel)){ ## Individual models + # output[[exp]]$data <- output[[exp]]$data + } else if (isTRUE(multimodel)){ ## Multimodel + output[[exp]]$data <- CSTools::MergeDims(data = output[[exp]]$data, + merge_dims = c('ensemble','dat'), + rename_dim = 'ensemble', + na.rm = TRUE) + output[[exp]]$data <- s2dv::InsertDim(data = output[[exp]]$data, + posdim = 1, + lendim = 1, + name = 'dat') + } else if (identical(multimodel,'both')){ ## Individual models and multimodel + multimodel_data <- CSTools::MergeDims(data = output[[exp]]$data, + merge_dims = c('ensemble','dat'), + rename_dim = 'ensemble', + na.rm = TRUE) + output[[exp]]$data[n_models,,,,,,,,1:max_members] <- multimodel_data + } else {stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both"')} + + } + + return(output) +} diff --git a/recipes/tests/recipe_model_decadal.yml b/recipes/tests/recipe_model_decadal.yml new file mode 100644 index 00000000..d7d5339d --- /dev/null +++ b/recipes/tests/recipe_model_decadal.yml @@ -0,0 +1,51 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for one decadal forecast system (only hindcast) +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 + member: r1i4p1f1,r2i4p1f1 #'all' + Multimodel: no + Reference: + name: JRA-55 + Time: + fcst_year: + hcst_start: 2015 # 2015-2016 in dcppA, 2017-2018 in dcppB + hcst_end: 2018 +# season: 'Annual' + ftime_min: 6 # Apr + ftime_max: 8 + Region: + latmin: 40 + latmax: 55 + lonmin: 0 + lonmax: 10 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: 'bias' + Skill: + metric: EnsCorr RPSS + Probabilities: + percentiles: [[1/3, 2/3]] + Indicators: + index: FALSE + ncores: 8 # Optional, int: number of cores, defaults to 1 + remove_NAs: FALSE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/cdelgado/auto-s2s_outputs/ + code_dir: /esarchive/scratch/cdelgado/gitlab/auto-s2s/ + diff --git a/recipes/tests/recipe_model_seasonal.yml b/recipes/tests/recipe_model_seasonal.yml new file mode 100644 index 00000000..a45ff2c8 --- /dev/null +++ b/recipes/tests/recipe_model_seasonal.yml @@ -0,0 +1,55 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for one seasonal forecast system (only hindcast) + +Analysis: + Horizon: seasonal # Mandatory, str: 'subseasonal', 'seasonal', or 'decadal' + Variables: + name: tas # Mandatory, str: variable name in /esarchive/ + freq: monthly_mean # Mandatory, str: 'monthly_mean' or 'daily_mean' + Datasets: + System: + name: DWD-GCFS2.1 # ECMWF-SEAS5 # Mandatory, str: System name. + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference name. + Time: + sdate: '1101' # Mandatory, int: Start date, 'mmdd' + fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast initialization start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY' + ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”. + ftime_max: 2 # Mandatory, int: Last forecast time step in months. Starts at “1”. + Region: + latmin: 0 # Mandatory, int: minimum latitude + latmax: 3 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 2 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. + type: 'r180x90' # Mandatory, str: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. + Workflow: + Calibration: + method: mse_min # Mandatory, str: Calibration method. + Anomalies: + compute: no # Mandatory, bool: Either yes/true or no/false + cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false + Skill: + metric: RPSS CRPSS # Mandatory, str: List of skill metrics. + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds + # for quantiles and probability categories. # Each set of thresholds should be + # enclosed within brackets. + Indicators: + index: no # This feature is not implemented yet + ncores: 10 # Optional, int: number of cores to be used in parallel computation. + # If left empty, defaults to 1. + remove_NAs: TRUE # Optional, bool: Whether to remove NAs. + # If left empty, defaults to FALSE. + Output_format: S2S4E # This feature is not implemented yet +Run: + Loglevel: INFO # Optional, str: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. Default 'INFO'. + Terminal: TRUE # Optional, bool: Whether to display log messages in the terminal. Default 'TRUE'. + output_dir: /esarchive/scratch/cdelgado/auto-s2s_outputs # Mandatory, str: output directory + code_dir: /esarchive/scratch/cdelgado/gitlab/auto-s2s/ diff --git a/recipes/tests/recipe_multimodel_decadal.yml b/recipes/tests/recipe_multimodel_decadal.yml new file mode 100644 index 00000000..2ec31fb3 --- /dev/null +++ b/recipes/tests/recipe_multimodel_decadal.yml @@ -0,0 +1,51 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for multi-model decadal forecast (only hindcast) +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: [EC-Earth3-i4, FGOALS-f3-L] + member: [[r1i4p1f1,r2i4p1f1], [r1i1p1f1,r2i1p1f1,r3i1p1f1]] + Multimodel: both ## yes, no, both + Reference: + name: JRA-55 + Time: + fcst_year: + hcst_start: 2015 # 2015-2016 in dcppA, 2017-2018 in dcppB + hcst_end: 2018 +# season: 'Annual' + ftime_min: 6 # Apr + ftime_max: 8 + Region: + latmin: 40 + latmax: 55 + lonmin: 0 + lonmax: 10 + Regrid: + method: bilinear + type: to_reference + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: 'bias' + Skill: + metric: EnsCorr RPSS + Probabilities: + percentiles: [[1/3, 2/3]] + Indicators: + index: FALSE + ncores: 8 # Optional, int: number of cores, defaults to 1 + remove_NAs: FALSE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/cdelgado/auto-s2s_outputs/ + code_dir: /esarchive/scratch/cdelgado/gitlab/auto-s2s/ + diff --git a/recipes/tests/recipe_multimodel_seasonal.yml b/recipes/tests/recipe_multimodel_seasonal.yml new file mode 100644 index 00000000..87a5e31e --- /dev/null +++ b/recipes/tests/recipe_multimodel_seasonal.yml @@ -0,0 +1,55 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for one seasonal forecast system (only hindcast) + +Analysis: + Horizon: seasonal # Mandatory, str: 'subseasonal', 'seasonal', or 'decadal' + Variables: + name: tas # Mandatory, str: variable name in /esarchive/ + freq: monthly_mean # Mandatory, str: 'monthly_mean' or 'daily_mean' + Datasets: + System: + name: [ECMWF-SEAS5, DWD-GCFS2.1] # Mandatory, str: System name. + Multimodel: both # Mandatory, bool: Either yes/true or no/false or both + Reference: + name: ERA5 # Mandatory, str: Reference name. + Time: + sdate: '1101' # Mandatory, int: Start date, 'mmdd' + fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast initialization start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY' + ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”. + ftime_max: 2 # Mandatory, int: Last forecast time step in months. Starts at “1”. + Region: + latmin: 0 # Mandatory, int: minimum latitude + latmax: 3 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 2 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. + type: 'r180x90' # Mandatory, str: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. + Workflow: + Calibration: + method: mse_min # Mandatory, str: Calibration method. + Anomalies: + compute: no # Mandatory, bool: Either yes/true or no/false + cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false + Skill: + metric: RPSS CRPSS # Mandatory, str: List of skill metrics. + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds + # for quantiles and probability categories. # Each set of thresholds should be + # enclosed within brackets. + Indicators: + index: no # This feature is not implemented yet + ncores: 10 # Optional, int: number of cores to be used in parallel computation. + # If left empty, defaults to 1. + remove_NAs: TRUE # Optional, bool: Whether to remove NAs. + # If left empty, defaults to FALSE. + Output_format: S2S4E # This feature is not implemented yet +Run: + Loglevel: INFO # Optional, str: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. Default 'INFO'. + Terminal: TRUE # Optional, bool: Whether to display log messages in the terminal. Default 'TRUE'. + output_dir: /esarchive/scratch/cdelgado/auto-s2s_outputs # Mandatory, str: output directory + code_dir: /esarchive/scratch/cdelgado/gitlab/auto-s2s/ -- GitLab From 266f0b2b2db0d15ce97abc482beb61828bea3ee8 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 8 Sep 2023 17:09:14 +0200 Subject: [PATCH 02/10] Code adjustment; Add NOTE --- modules/Loading/Loading.R | 16 +++-- modules/Loading/join_datasets.R | 39 ++++++------ .../tests/recipe_multimodel_seasonal_aho.yml | 59 +++++++++++++++++++ 3 files changed, 87 insertions(+), 27 deletions(-) create mode 100644 recipes/tests/recipe_multimodel_seasonal_aho.yml diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index b864b6ff..414f893e 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -10,7 +10,7 @@ source("tools/libs.R") load_datasets <- function(recipe) { - if (length(recipe$Analysis$Datasets$System$name) == 1){ ## Only one model + if (length(recipe$Analysis$Datasets$System$name) == 1) { ## Only one model output <- load_single_datasets(recipe = recipe) @@ -18,16 +18,20 @@ load_datasets <- function(recipe) { multimodel <- recipe$Analysis$Datasets$Multimodel ## "TRUE", "FALSE" or "both" - ## Loading each model individually + # Loading each model individually + + ##NOTE: + ## if retrieve = T --> data is a list of models, each model contains a list of s2dv_cube. Try str(data, max.level=3, give.attr=F). + ## if retrieve = F --> must be a list be startR_cube --> unlist(data, recursive = F). 'dat', 'member' cannot be chunking dims. data <- list() n_members <- list(hcst = c(), fcst = c()) - for (model in 1:length(recipe$Analysis$Datasets$System$name)){ + for (model in 1:length(recipe$Analysis$Datasets$System$name)) { recipe_single <- recipe recipe_single$Analysis$Datasets$System$name <- recipe_single$Analysis$Datasets$System$name[model] data[[model]] <- load_single_datasets(recipe = recipe_single) - n_members$hcst[model] <- dim(data[[model]]$hcst$data)['ensemble'] - if (!is.null(data[[model]]$fcst)){ - n_members$fcst[model] <- dim(data[[model]]$fcst$data)['ensemble'] + n_members$hcst[model] <- data[[model]]$hcst$dims['ensemble'] + if (!is.null(data[[model]]$fcst)) { + n_members$fcst[model] <- data[[model]]$fcst$dims['ensemble'] } } diff --git a/modules/Loading/join_datasets.R b/modules/Loading/join_datasets.R index 78922d15..623cf0f7 100644 --- a/modules/Loading/join_datasets.R +++ b/modules/Loading/join_datasets.R @@ -1,42 +1,40 @@ - join_datasets <- function(data, multimodel, n_members) { ## Taking the first model as "template" + ##TODO: the s2dv_cube has only the first model's info output <- data[[1]] - if (is.null(output$fcst$data)){ + if (is.null(output$fcst$data)) { exps <- 'hcst' } else {exps <- c('hcst','fcst')} - for (exp in exps){ + for (exp in exps) { ## New dimensions of the arrays - if (isFALSE(multimodel)){ ## Individual models + if (isFALSE(multimodel)) { ## Individual models n_models <- length(data) max_members <- max(n_members[[exp]]) - } else if (isTRUE(multimodel)){ ## Multimodel + } else if (isTRUE(multimodel)) { ## Multimodel n_models <- length(data) max_members <- sum(n_members[[exp]]) - } else if (identical(multimodel,'both')){ ## Individual models and multimodel + } else if (identical(multimodel,'both')) { ## Individual models and multimodel n_models <- length(data)+1 max_members <- sum(n_members[[exp]]) - } else {stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both"')} + } else {stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both".')} dims <- dim(data[[1]][[exp]]$data) dims['dat'] <- n_models dims['ensemble'] <- max_members - stopifnot(identical(names(dims), c('dat','var','sday','sweek','syear','time','latitude','longitude','ensemble'))) + stopifnot(identical(names(dims), c('dat', 'var', 'sday', 'sweek', 'syear', 'time', 'latitude', 'longitude', 'ensemble'))) ## Creating empty models and multimodel output[[exp]]$data <- array(data = NA, dim = dims) ## Filling the models - for (model in 1:length(data)){ - output[[exp]]$data[model,,,,,,,,1:n_members[[exp]][model]] <- data[[model]][[exp]]$data + for (model in 1:length(data)) { + output[[exp]]$data[model, , , , , , , , 1:n_members[[exp]][model]] <- data[[model]][[exp]]$data } ## Filling the multimodel - if (isFALSE(multimodel)){ ## Individual models - # output[[exp]]$data <- output[[exp]]$data - } else if (isTRUE(multimodel)){ ## Multimodel + if (isTRUE(multimodel)) { ## Multimodel output[[exp]]$data <- CSTools::MergeDims(data = output[[exp]]$data, merge_dims = c('ensemble','dat'), rename_dim = 'ensemble', @@ -45,14 +43,13 @@ join_datasets <- function(data, multimodel, n_members) { posdim = 1, lendim = 1, name = 'dat') - } else if (identical(multimodel,'both')){ ## Individual models and multimodel - multimodel_data <- CSTools::MergeDims(data = output[[exp]]$data, - merge_dims = c('ensemble','dat'), - rename_dim = 'ensemble', - na.rm = TRUE) - output[[exp]]$data[n_models,,,,,,,,1:max_members] <- multimodel_data - } else {stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both"')} - + } else if (identical(multimodel,'both')) { ## Individual models and multimodel + output[[exp]]$data[n_models, , , , , , , , 1:max_members] <- + CSTools::MergeDims(data = output[[exp]]$data, + merge_dims = c('ensemble','dat'), + rename_dim = 'ensemble', + na.rm = TRUE) + } } return(output) diff --git a/recipes/tests/recipe_multimodel_seasonal_aho.yml b/recipes/tests/recipe_multimodel_seasonal_aho.yml new file mode 100644 index 00000000..2696b2b6 --- /dev/null +++ b/recipes/tests/recipe_multimodel_seasonal_aho.yml @@ -0,0 +1,59 @@ +Description: + Author: An-Chi Ho + Info: Test for one seasonal forecast system (only hindcast) + +Analysis: + Horizon: seasonal # Mandatory, str: 'subseasonal', 'seasonal', or 'decadal' + Variables: + name: tas # Mandatory, str: variable name in /esarchive/ + freq: monthly_mean # Mandatory, str: 'monthly_mean' or 'daily_mean' + Datasets: + System: + name: [ECMWF-SEAS5, DWD-GCFS2.1] # Mandatory, str: System name. + Multimodel: both # Mandatory, bool: Either yes/true or no/false or both + Reference: + name: ERA5 # Mandatory, str: Reference name. + Time: + sdate: '1101' # Mandatory, int: Start date, 'mmdd' + fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast initialization start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY' + ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”. + ftime_max: 2 # Mandatory, int: Last forecast time step in months. Starts at “1”. + Region: + latmin: 0 # Mandatory, int: minimum latitude + latmax: 3 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 2 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. + type: 'r180x90' # Mandatory, str: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. + Workflow: + Calibration: + method: mse_min # Mandatory, str: Calibration method. + save: none + Anomalies: + compute: no # Mandatory, bool: Either yes/true or no/false + cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false + Skill: + metric: RPSS CRPSS # Mandatory, str: List of skill metrics. + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds + # for quantiles and probability categories. # Each set of thresholds should be + # enclosed within brackets. + save: none + Visualization: + plots: forecast_ensemble_mean + Indicators: + index: no # This feature is not implemented yet + ncores: 10 # Optional, int: number of cores to be used in parallel computation. + # If left empty, defaults to 1. + remove_NAs: TRUE # Optional, bool: Whether to remove NAs. + # If left empty, defaults to FALSE. + Output_format: S2S4E # This feature is not implemented yet +Run: + Loglevel: INFO # Optional, str: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. Default 'INFO'. + Terminal: TRUE # Optional, bool: Whether to display log messages in the terminal. Default 'TRUE'. + output_dir: /esarchive/scratch/aho/tmp/auto-s2s_outputs # Mandatory, str: output directory + code_dir: /esarchive/scratch/aho/git/auto-s2s/ -- GitLab From 6b3630371384c00339d9964cad03e8526ed5428d Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Sep 2023 14:39:48 +0200 Subject: [PATCH 03/10] add dat_dim --- modules/Skill/Skill.R | 15 +++++++++++++++ recipes/tests/recipe_multimodel_seasonal_aho.yml | 4 +++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index a6ae6d48..9cf3a1d0 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -84,6 +84,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { # correct.") # } time_dim <- 'syear' + dat_dim <- 'dat' memb_dim <- 'ensemble' metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) if (is.null(recipe$Analysis$ncores)) { @@ -122,6 +123,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { if (metric %in% c('rps', 'frps')) { skill <- RPS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, Fair = Fair, cross.val = cross.val, @@ -137,6 +139,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, Fair = Fair, cross.val = cross.val, @@ -149,6 +152,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { } else if (metric == 'bss10') { skill <- RPSS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, prob_thresholds = 0.1, cross.val = cross.val, @@ -162,6 +166,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { } else if (metric == 'bss90') { skill <- RPSS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, prob_thresholds = 0.9, cross.val = cross.val, @@ -175,6 +180,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { } else if (metric %in% c('crps', 'fcrps')) { skill <- CRPS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) @@ -188,6 +194,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { } else if (metric %in% c('crpss', 'fcrpss')) { skill <- CRPSS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) @@ -215,11 +222,13 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { (recipe$Analysis$Workflow$Anomalies$compute)) { skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, ncores = ncores) } else { skill <- Bias(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, ncores = ncores) } @@ -231,11 +240,13 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { (recipe$Analysis$Workflow$Anomalies$compute)) { skill <- AbsBiasSS(data$hcst.full_val$data, data$obs.full_val$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, ncores = ncores) } else { skill <- AbsBiasSS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, ncores = ncores) } @@ -251,6 +262,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { dat_dim = 'dat', time_dim = time_dim, method = 'pearson', + dat_dim = dat_dim, memb_dim = memb_dim, memb = memb, conf = F, @@ -267,6 +279,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { skill <- RMSSS(data$hcst$data, data$obs$data, dat_dim = 'dat', time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, pval = FALSE, sign = TRUE, @@ -280,6 +293,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { } else if (metric == 'mse') { skill <- MSE(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, dat_dim = 'dat', comp_dim = NULL, @@ -292,6 +306,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { } else if (metric == 'msss') { skill <- MSSS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, dat_dim = 'dat', sign = TRUE, diff --git a/recipes/tests/recipe_multimodel_seasonal_aho.yml b/recipes/tests/recipe_multimodel_seasonal_aho.yml index 2696b2b6..d87394dd 100644 --- a/recipes/tests/recipe_multimodel_seasonal_aho.yml +++ b/recipes/tests/recipe_multimodel_seasonal_aho.yml @@ -31,13 +31,15 @@ Analysis: # or CDO-accepted grid. Workflow: Calibration: - method: mse_min # Mandatory, str: Calibration method. + method: none #mse_min # Mandatory, str: Calibration method. save: none Anomalies: compute: no # Mandatory, bool: Either yes/true or no/false cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false + save: none Skill: metric: RPSS CRPSS # Mandatory, str: List of skill metrics. + save: none Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds # for quantiles and probability categories. # Each set of thresholds should be -- GitLab From 30ce1e2681e26ba80a0188ecd29225f738710ec4 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 13 Sep 2023 11:17:13 +0200 Subject: [PATCH 04/10] Some fixes and TODO for multimodel --- modules/Anomalies/Anomalies.R | 1 + modules/Loading/Loading.R | 10 ++- modules/Loading/join_datasets.R | 10 +-- modules/Visualization/R/plot_ensemble_mean.R | 1 + modules/Visualization/R/plot_skill_metrics.R | 1 + .../tests/recipe_multimodel_seasonal_aho.yml | 16 ++--- .../recipe_multimodel_seasonal_aho_single.yml | 61 +++++++++++++++++++ 7 files changed, 87 insertions(+), 13 deletions(-) create mode 100644 recipes/tests/recipe_multimodel_seasonal_aho_single.yml diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 23014cbd..4d9cfdc1 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -21,6 +21,7 @@ compute_anomalies <- function(recipe, data) { original_dims <- data$hcst$dim # Compute anomalies + ##TODO: all NAs returned anom <- CST_Anomaly(data$hcst, data$obs, cross = cross, memb = TRUE, diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 6c8a6192..38e84c71 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -38,7 +38,15 @@ load_datasets <- function(recipe) { ## Joining models and/or creating multimodel output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members) - + + ##TODO: Adjust s2dv_cube + ### $dims + output$hcst$dims <- dim(output$hcst$data) + output$obs$dims <- dim(output$obs$data) + if (!is.null(output$fcst)) { + output$fcst$dims <- dim(output$fcst$data) + } + } return(output) # list(hcst = hcst, fcst = fcst, obs = obs) diff --git a/modules/Loading/join_datasets.R b/modules/Loading/join_datasets.R index 623cf0f7..1bacb619 100644 --- a/modules/Loading/join_datasets.R +++ b/modules/Loading/join_datasets.R @@ -18,9 +18,11 @@ join_datasets <- function(data, multimodel, n_members) { n_models <- length(data) max_members <- sum(n_members[[exp]]) } else if (identical(multimodel,'both')) { ## Individual models and multimodel - n_models <- length(data)+1 + n_models <- length(data) + 1 max_members <- sum(n_members[[exp]]) - } else {stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both".')} + } else { + stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both".') + } dims <- dim(data[[1]][[exp]]$data) dims['dat'] <- n_models dims['ensemble'] <- max_members @@ -36,7 +38,7 @@ join_datasets <- function(data, multimodel, n_members) { ## Filling the multimodel if (isTRUE(multimodel)) { ## Multimodel output[[exp]]$data <- CSTools::MergeDims(data = output[[exp]]$data, - merge_dims = c('ensemble','dat'), + merge_dims = c('ensemble', 'dat'), rename_dim = 'ensemble', na.rm = TRUE) output[[exp]]$data <- s2dv::InsertDim(data = output[[exp]]$data, @@ -46,7 +48,7 @@ join_datasets <- function(data, multimodel, n_members) { } else if (identical(multimodel,'both')) { ## Individual models and multimodel output[[exp]]$data[n_models, , , , , , , , 1:max_members] <- CSTools::MergeDims(data = output[[exp]]$data, - merge_dims = c('ensemble','dat'), + merge_dims = c('ensemble', 'dat'), rename_dim = 'ensemble', na.rm = TRUE) } diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 07e35706..37f97ffd 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -60,6 +60,7 @@ plot_ensemble_mean <- function(recipe, fcst, outdir) { for (i_syear in start_date) { if (length(start_date) == 1) { i_var_ens_mean <- var_ens_mean[which(start_date == i_syear), , , ] + ## TODO: Consider multimodel here outfile <- paste0(outdir[[var]], "forecast_ensemble_mean-", start_date) } else { i_var_ens_mean <- var_ens_mean[which(start_date == i_syear), , , ] diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R index 76dae774..750e1087 100644 --- a/modules/Visualization/R/plot_skill_metrics.R +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -112,6 +112,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_sup <- colorbar[length(colorbar)] } # Reorder dimensions +browser(skipCalls=1) skill <- Reorder(skill, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, # retrieve it and reorder its dimensions. diff --git a/recipes/tests/recipe_multimodel_seasonal_aho.yml b/recipes/tests/recipe_multimodel_seasonal_aho.yml index d87394dd..6c2d2965 100644 --- a/recipes/tests/recipe_multimodel_seasonal_aho.yml +++ b/recipes/tests/recipe_multimodel_seasonal_aho.yml @@ -16,10 +16,10 @@ Analysis: Time: sdate: '1101' # Mandatory, int: Start date, 'mmdd' fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY' - hcst_start: '2014' # Mandatory, int: Hindcast initialization start year 'YYYY' + hcst_start: '2013' # Mandatory, int: Hindcast initialization start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY' ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”. - ftime_max: 2 # Mandatory, int: Last forecast time step in months. Starts at “1”. + ftime_max: 5 # Mandatory, int: Last forecast time step in months. Starts at “1”. Region: latmin: 0 # Mandatory, int: minimum latitude latmax: 3 # Mandatory, int: maximum latitude @@ -31,22 +31,22 @@ Analysis: # or CDO-accepted grid. Workflow: Calibration: - method: none #mse_min # Mandatory, str: Calibration method. - save: none + method: mse_min # Mandatory, str: Calibration method. + save: none #all Anomalies: - compute: no # Mandatory, bool: Either yes/true or no/false + compute: yes # Mandatory, bool: Either yes/true or no/false cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false save: none Skill: metric: RPSS CRPSS # Mandatory, str: List of skill metrics. - save: none + save: none #all Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds # for quantiles and probability categories. # Each set of thresholds should be # enclosed within brackets. - save: none + save: none #all Visualization: - plots: forecast_ensemble_mean + plots: forecast_ensemble_mean most_likely_terciles #skill_metrics Indicators: index: no # This feature is not implemented yet ncores: 10 # Optional, int: number of cores to be used in parallel computation. diff --git a/recipes/tests/recipe_multimodel_seasonal_aho_single.yml b/recipes/tests/recipe_multimodel_seasonal_aho_single.yml new file mode 100644 index 00000000..715da1e0 --- /dev/null +++ b/recipes/tests/recipe_multimodel_seasonal_aho_single.yml @@ -0,0 +1,61 @@ +Description: + Author: An-Chi Ho + Info: Test for one seasonal forecast system (only hindcast) + +Analysis: + Horizon: seasonal # Mandatory, str: 'subseasonal', 'seasonal', or 'decadal' + Variables: + name: tas # Mandatory, str: variable name in /esarchive/ + freq: monthly_mean # Mandatory, str: 'monthly_mean' or 'daily_mean' + Datasets: + System: + name: ECMWF-SEAS5 # Mandatory, str: System name. + Multimodel: no # Mandatory, bool: Either yes/true or no/false or both + Reference: + name: ERA5 # Mandatory, str: Reference name. + Time: + sdate: '1101' # Mandatory, int: Start date, 'mmdd' + fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY' + hcst_start: '2013' # Mandatory, int: Hindcast initialization start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY' + ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”. + ftime_max: 5 # Mandatory, int: Last forecast time step in months. Starts at “1”. + Region: + latmin: 0 # Mandatory, int: minimum latitude + latmax: 3 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 2 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. + type: 'r180x90' # Mandatory, str: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. + Workflow: + Calibration: + method: mse_min # Mandatory, str: Calibration method. + save: none + Anomalies: + compute: yes # Mandatory, bool: Either yes/true or no/false + cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false + save: none + Skill: + metric: RPSS CRPSS # Mandatory, str: List of skill metrics. + save: none + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds + # for quantiles and probability categories. # Each set of thresholds should be + # enclosed within brackets. + save: none + Visualization: + plots: forecast_ensemble_mean + Indicators: + index: no # This feature is not implemented yet + ncores: 10 # Optional, int: number of cores to be used in parallel computation. + # If left empty, defaults to 1. + remove_NAs: TRUE # Optional, bool: Whether to remove NAs. + # If left empty, defaults to FALSE. + Output_format: S2S4E # This feature is not implemented yet +Run: + Loglevel: INFO # Optional, str: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. Default 'INFO'. + Terminal: TRUE # Optional, bool: Whether to display log messages in the terminal. Default 'TRUE'. + output_dir: /esarchive/scratch/aho/tmp/auto-s2s_outputs # Mandatory, str: output directory + code_dir: /esarchive/scratch/aho/git/auto-s2s/ -- GitLab From 45532d5d547ec84b5b5a01c8f9cb32917b0115e9 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 13 Sep 2023 11:21:41 +0200 Subject: [PATCH 05/10] Move helper functions to R/ --- modules/Loading/Loading.R | 2 +- modules/Loading/Loading_decadal.R | 4 ++-- modules/Loading/{ => R}/helper_loading_decadal.R | 0 modules/Loading/{ => R}/join_datasets.R | 0 modules/Visualization/R/plot_skill_metrics.R | 1 - 5 files changed, 3 insertions(+), 4 deletions(-) rename modules/Loading/{ => R}/helper_loading_decadal.R (100%) rename modules/Loading/{ => R}/join_datasets.R (100%) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 38e84c71..9c259722 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -1,7 +1,7 @@ ## TODO: remove paths to personal scratchs source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") # Load required libraries/funs -source('modules/Loading/join_datasets.R') +source('modules/Loading/R/join_datasets.R') source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index c688545a..8a07cb8f 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -7,8 +7,8 @@ ## TODO: remove paths to personal scratchs source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") # Load required libraries/funs -source("modules/Loading/helper_loading_decadal.R") -source('modules/Loading/join_datasets.R') +source("modules/Loading/R/helper_loading_decadal.R") +source('modules/Loading/R/join_datasets.R') source("modules/Loading/R/dates2load.R") source("modules/Loading/R/check_latlon.R") source("modules/Loading/R/get_timeidx.R") diff --git a/modules/Loading/helper_loading_decadal.R b/modules/Loading/R/helper_loading_decadal.R similarity index 100% rename from modules/Loading/helper_loading_decadal.R rename to modules/Loading/R/helper_loading_decadal.R diff --git a/modules/Loading/join_datasets.R b/modules/Loading/R/join_datasets.R similarity index 100% rename from modules/Loading/join_datasets.R rename to modules/Loading/R/join_datasets.R diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R index 750e1087..76dae774 100644 --- a/modules/Visualization/R/plot_skill_metrics.R +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -112,7 +112,6 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_sup <- colorbar[length(colorbar)] } # Reorder dimensions -browser(skipCalls=1) skill <- Reorder(skill, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, # retrieve it and reorder its dimensions. -- GitLab From ebbd1499f489c75de8415f0bfff3f28e269811fe Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 14 Sep 2023 11:39:40 +0200 Subject: [PATCH 06/10] Consider Compute() case; forecast ensemble mean na.rm = T for multi-model --- modules/Loading/Loading.R | 57 ++++++++++++------- modules/Loading/R/join_datasets.R | 4 ++ modules/Visualization/R/plot_ensemble_mean.R | 7 ++- .../recipe_multimodel_seasonal_aho_single.yml | 2 +- 4 files changed, 47 insertions(+), 23 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 9c259722..04a30f2d 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -8,7 +8,7 @@ source("modules/Loading/R/check_latlon.R") ## TODO: Move to prepare_outputs.R source("tools/libs.R") -load_datasets <- function(recipe) { +load_datasets <- function(recipe, retrieve = T) { if (length(recipe$Analysis$Datasets$System$name) == 1) { ## Only one model @@ -22,29 +22,44 @@ load_datasets <- function(recipe) { ##NOTE: ## if retrieve = T --> data is a list of models, each model contains a list of s2dv_cube. Try str(data, max.level=3, give.attr=F). - ## if retrieve = F --> must be a list be startR_cube --> unlist(data, recursive = F). 'dat', 'member' cannot be chunking dims. + ## if retrieve = F --> must be a list be startR_cube --> unlist(data, recursive = F). 'dat', 'ensemble' need to be target_dims. data <- list() n_members <- list(hcst = c(), fcst = c()) for (model in 1:length(recipe$Analysis$Datasets$System$name)) { recipe_single <- recipe recipe_single$Analysis$Datasets$System$name <- recipe_single$Analysis$Datasets$System$name[model] - data[[model]] <- load_single_datasets(recipe = recipe_single) - n_members$hcst[model] <- data[[model]]$hcst$dims['ensemble'] - if (!is.null(data[[model]]$fcst)) { - n_members$fcst[model] <- data[[model]]$fcst$dims['ensemble'] + data[[model]] <- load_single_datasets(recipe = recipe_single, retrieve = retrieve) + + if (retrieve) { + n_members$hcst[model] <- data[[model]]$hcst$dims['ensemble'] + if (!is.null(data[[model]]$fcst)) { + n_members$fcst[model] <- data[[model]]$fcst$dims['ensemble'] + } + } else { + n_members$hcst[model] <- attr(data[[model]]$hcst, 'Dimensions')['ensemble'] + if (!is.null(data[[model]]$fcst)) { + n_members$fcst[model] <- attr(data[[model]]$fcst, 'Dimensions')['ensemble'] + } } - } - - ## Joining models and/or creating multimodel - output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members) + + if (retrieve) { + ## Joining models and/or creating multimodel + output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members) + + ##TODO: Adjust s2dv_cube + ### $dims + output$hcst$dims <- dim(output$hcst$data) + output$obs$dims <- dim(output$obs$data) + if (!is.null(output$fcst)) { + output$fcst$dims <- dim(output$fcst$data) + } - ##TODO: Adjust s2dv_cube - ### $dims - output$hcst$dims <- dim(output$hcst$data) - output$obs$dims <- dim(output$obs$data) - if (!is.null(output$fcst)) { - output$fcst$dims <- dim(output$fcst$data) + } else { + # Unlist data to be a list of s2dv_cube objects + names(data) <- recipe$Analysis$Datasets$System$name + output <- unlist(data, recursive = F) + output$n_members <- n_members } } @@ -52,7 +67,7 @@ load_datasets <- function(recipe) { return(output) # list(hcst = hcst, fcst = fcst, obs = obs) } -load_single_datasets <- function(recipe) { +load_single_datasets <- function(recipe, retrieve = T) { # ------------------------------------------- # Set params ----------------------------------------- @@ -169,7 +184,7 @@ load_single_datasets <- function(recipe) { longitude = 'dat', time = 'file_date'), split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) + retrieve = retrieve) # Remove var_dir dimension if ("var_dir" %in% names(dim(hcst))) { @@ -233,7 +248,7 @@ load_single_datasets <- function(recipe) { longitude = 'dat', time = 'file_date'), split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) + retrieve = retrieve) if ("var_dir" %in% names(dim(fcst))) { fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") @@ -303,7 +318,7 @@ load_single_datasets <- function(recipe) { longitude = 'dat', time = 'file_date'), split_multiselected_dims = TRUE, - retrieve = TRUE) + retrieve = retrieve) } else if (store.freq %in% c("daily_mean", "daily")) { @@ -341,7 +356,7 @@ load_single_datasets <- function(recipe) { longitude = 'dat', time = 'file_date'), split_multiselected_dims = TRUE, - retrieve = TRUE) + retrieve = retrieve) } diff --git a/modules/Loading/R/join_datasets.R b/modules/Loading/R/join_datasets.R index 1bacb619..3690179e 100644 --- a/modules/Loading/R/join_datasets.R +++ b/modules/Loading/R/join_datasets.R @@ -26,11 +26,14 @@ join_datasets <- function(data, multimodel, n_members) { dims <- dim(data[[1]][[exp]]$data) dims['dat'] <- n_models dims['ensemble'] <- max_members + + #TODO: Consider Compute() case stopifnot(identical(names(dims), c('dat', 'var', 'sday', 'sweek', 'syear', 'time', 'latitude', 'longitude', 'ensemble'))) ## Creating empty models and multimodel output[[exp]]$data <- array(data = NA, dim = dims) ## Filling the models + #TODO: Consider Compute() case for (model in 1:length(data)) { output[[exp]]$data[model, , , , , , , , 1:n_members[[exp]][model]] <- data[[model]][[exp]]$data } @@ -46,6 +49,7 @@ join_datasets <- function(data, multimodel, n_members) { lendim = 1, name = 'dat') } else if (identical(multimodel,'both')) { ## Individual models and multimodel + #TODO: Consider Compute() case output[[exp]]$data[n_models, , , , , , , , 1:max_members] <- CSTools::MergeDims(data = output[[exp]]$data, merge_dims = c('ensemble', 'dat'), diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 37f97ffd..81686c06 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -21,7 +21,12 @@ plot_ensemble_mean <- function(recipe, fcst, outdir) { } # Compute ensemble mean - ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') + if (length(recipe$Analysis$Datasets$System$name) == 1) { + ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') + } else { + # multi-model + ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble', na.rm = TRUE) + } # Loop over variable dimension for (var in 1:fcst$dims[['var']]) { variable <- fcst$attrs$Variable$varName[[var]] diff --git a/recipes/tests/recipe_multimodel_seasonal_aho_single.yml b/recipes/tests/recipe_multimodel_seasonal_aho_single.yml index 715da1e0..5a8d2d3f 100644 --- a/recipes/tests/recipe_multimodel_seasonal_aho_single.yml +++ b/recipes/tests/recipe_multimodel_seasonal_aho_single.yml @@ -16,7 +16,7 @@ Analysis: Time: sdate: '1101' # Mandatory, int: Start date, 'mmdd' fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY' - hcst_start: '2013' # Mandatory, int: Hindcast initialization start year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast initialization start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY' ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”. ftime_max: 5 # Mandatory, int: Last forecast time step in months. Starts at “1”. -- GitLab From a8b7e20d8de47c2d73bfe35f07df743608e37b71 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 15 Sep 2023 09:49:48 +0200 Subject: [PATCH 07/10] TODO: Add dat_dim for loop --- modules/Visualization/R/plot_ensemble_mean.R | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 81686c06..78bd3d05 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -20,6 +20,7 @@ plot_ensemble_mean <- function(recipe, fcst, outdir) { projection <- "cylindrical_equidistant" } +#TODO: Add a for loop for dat # Compute ensemble mean if (length(recipe$Analysis$Datasets$System$name) == 1) { ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') -- GitLab From 3622448bcbc54a96ebb968a074bf13b2a52ac518 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 15 Sep 2023 13:33:42 +0200 Subject: [PATCH 08/10] Correct hcst and hcst s2dv_cube --- modules/Loading/Loading.R | 53 +++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 04a30f2d..db5a41f1 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -44,18 +44,51 @@ load_datasets <- function(recipe, retrieve = T) { } if (retrieve) { - ## Joining models and/or creating multimodel + # Joining models and/or creating multimodel output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members) - ##TODO: Adjust s2dv_cube - ### $dims + # Adjust s2dv_cube + # hcst: + ## $dims output$hcst$dims <- dim(output$hcst$data) - output$obs$dims <- dim(output$obs$data) + ## $coords + if (isFALSE(multimodel)) { + output$hcst$coords$dat <- recipe$Analysis$Datasets$System$name + } else if (isTRUE(multimodel)) { + output$hcst$coords$dat <- 'multimodel' + } else if (multimodel == 'both') { + output$hcst$coords$dat <- c(recipe$Analysis$Datasets$System$name, 'multimodel') + } + output$hcst$coords$ensemble <- 1:dim(output$hcst$data)['ensemble'] + ## $attrs + output$hcst$attrs$Datasets <- recipe$Analysis$Datasets$System$name + tmp <- array(c(sapply(data, "[[", c('hcst', 'attrs', 'source_files'))), + dim = c(dim(output$hcst$attrs$source_files)[-1], + dat = length(recipe$Analysis$Datasets$System$name))) + output$hcst$attrs$source_files <- aperm(tmp, c(length(dim(tmp)), 1:(length(dim(tmp)) - 1))) + + # fcst: if (!is.null(output$fcst)) { + ## $dims output$fcst$dims <- dim(output$fcst$data) + ## $coords + if (isFALSE(multimodel)) { + output$fcst$coords$dat <- recipe$Analysis$Datasets$System$name + } else if (isTRUE(multimodel)) { + output$fcst$coords$dat <- 'multimodel' + } else if (multimodel == 'both') { + output$fcst$coords$dat <- c(recipe$Analysis$Datasets$System$name, 'multimodel') + } + output$fcst$coords$ensemble <- 1:dim(output$fcst$data)['ensemble'] + ## $attrs + output$fcst$attrs$Datasets <- recipe$Analysis$Datasets$System$name + tmp <- array(c(sapply(data, "[[", c('fcst', 'attrs', 'source_files'))), + dim = c(dim(output$fcst$attrs$source_files)[-1], + dat = length(recipe$Analysis$Datasets$System$name))) + output$fcst$attrs$source_files <- aperm(tmp, c(length(dim(tmp)), 1:(length(dim(tmp)) - 1))) } - - } else { + + } else { # retrieve = F # Unlist data to be a list of s2dv_cube objects names(data) <- recipe$Analysis$Datasets$System$name output <- unlist(data, recursive = F) @@ -161,7 +194,7 @@ load_single_datasets <- function(recipe, retrieve = T) { # Load hindcast #------------------------------------------------------------------- - hcst <- Start(dat = hcst.path, + hcst <- Start(dat = list(list(name = recipe$Analysis$Datasets$System$name, path = hcst.path)), var = variable, var_dir = var_dir_exp, file_date = sdates$hcst, @@ -225,7 +258,7 @@ load_single_datasets <- function(recipe, retrieve = T) { # with the daily case and the current version of startR not allowing # multiple dims split - fcst <- Start(dat = fcst.path, + fcst <- Start(dat = list(list(name = recipe$Analysis$Datasets$System$name, path = fcst.path)), var = variable, var_dir = var_dir_exp, var_dir_depends = 'var', @@ -298,7 +331,7 @@ load_single_datasets <- function(recipe, retrieve = T) { dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") dim(dates_file) <- dim(dates) - obs <- Start(dat = obs.path, + obs <- Start(dat = list(list(name = recipe$Analysis$Datasets$Reference$name, path = obs.path)), var = variable, var_dir = var_dir_obs, var_dir_depends = 'var', @@ -331,7 +364,7 @@ load_single_datasets <- function(recipe, retrieve = T) { # Restore correct dimensions dim(dates) <- dim(dates_file) - obs <- Start(dat = obs.path, + obs <- Start(dat = list(list(name = recipe$Analysis$Datasets$Reference$name, path = obs.path)), var = variable, var_dir = var_dir_obs, var_dir_depends = 'var', -- GitLab From ce19ae65ab41adea995f3465213a453b732c7336 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 19 Sep 2023 15:34:45 +0200 Subject: [PATCH 09/10] Make Loading.R and join_datasets() work with Compute cases --- modules/Loading/Loading.R | 2 +- modules/Loading/R/join_datasets.R | 83 +++++++++++++------ .../tests/recipe_multimodel_seasonal_aho.yml | 2 +- 3 files changed, 58 insertions(+), 29 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index db5a41f1..1394eb1e 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -21,7 +21,7 @@ load_datasets <- function(recipe, retrieve = T) { # Loading each model individually ##NOTE: - ## if retrieve = T --> data is a list of models, each model contains a list of s2dv_cube. Try str(data, max.level=3, give.attr=F). + ## if retrieve = T --> data is a list of models, each model contains a list of s2dv_cube. Try str(data, max.level = 3, give.attr = F). ## if retrieve = F --> must be a list be startR_cube --> unlist(data, recursive = F). 'dat', 'ensemble' need to be target_dims. data <- list() n_members <- list(hcst = c(), fcst = c()) diff --git a/modules/Loading/R/join_datasets.R b/modules/Loading/R/join_datasets.R index 3690179e..b7a5d17c 100644 --- a/modules/Loading/R/join_datasets.R +++ b/modules/Loading/R/join_datasets.R @@ -1,23 +1,22 @@ join_datasets <- function(data, multimodel, n_members) { ## Taking the first model as "template" - ##TODO: the s2dv_cube has only the first model's info output <- data[[1]] if (is.null(output$fcst$data)) { exps <- 'hcst' - } else {exps <- c('hcst','fcst')} + } else {exps <- c('hcst', 'fcst')} for (exp in exps) { - ## New dimensions of the arrays + # New dimensions of the arrays if (isFALSE(multimodel)) { ## Individual models n_models <- length(data) max_members <- max(n_members[[exp]]) } else if (isTRUE(multimodel)) { ## Multimodel n_models <- length(data) max_members <- sum(n_members[[exp]]) - } else if (identical(multimodel,'both')) { ## Individual models and multimodel + } else if (identical(multimodel, 'both')) { ## Individual models and multimodel n_models <- length(data) + 1 max_members <- sum(n_members[[exp]]) } else { @@ -27,35 +26,65 @@ join_datasets <- function(data, multimodel, n_members) { dims['dat'] <- n_models dims['ensemble'] <- max_members - #TODO: Consider Compute() case - stopifnot(identical(names(dims), c('dat', 'var', 'sday', 'sweek', 'syear', 'time', 'latitude', 'longitude', 'ensemble'))) - - ## Creating empty models and multimodel + # Creating array for models and/or multimodel output[[exp]]$data <- array(data = NA, dim = dims) + ## Filling the models - #TODO: Consider Compute() case - for (model in 1:length(data)) { - output[[exp]]$data[model, , , , , , , , 1:n_members[[exp]][model]] <- data[[model]][[exp]]$data + fill_data <- function(data, n_members, dims) { + # data: a list of hcst/fcst$data from each model + # n_members: a vector of members of each model + # dims: c(dat = n_models, ensemble = max_members) + + .fill_data <- function(data, n_members, dims) { + output <- rep(NA, length = dims['ensemble']) + output[1:n_members] <- data + return(output) + } + + data_list <- vector('list', length = length(data)) + for (i_model in 1:length(data)) { + data_list[[i_model]] <- multiApply::Apply(data = data[[i_model]], + target_dims = 'ensemble', output_dims = 'ensemble', + fun = .fill_data, n_members = n_members[i_model], + dims = dims)$output1 + } + if (dims['dat'] > length(data)) { # Multimodel = 'both' + data_list[[as.numeric(dims['dat'])]] <- array(as.numeric(NA), dim = dim(data_list[[1]])) + } + output <- array(unlist(data_list), dim = c(dims['ensemble'], dim(data_list[[1]])[-c(1, 2)], dims['dat'])) + return(output) } + + data_exp_data <- lapply(data, '[[', c(exp, 'data')) + output[[exp]]$data <- fill_data(data = (data_exp_data), n_members = n_members[[exp]], + dims = dims[c('dat', 'ensemble')]) ## Filling the multimodel - if (isTRUE(multimodel)) { ## Multimodel - output[[exp]]$data <- CSTools::MergeDims(data = output[[exp]]$data, - merge_dims = c('ensemble', 'dat'), - rename_dim = 'ensemble', - na.rm = TRUE) - output[[exp]]$data <- s2dv::InsertDim(data = output[[exp]]$data, - posdim = 1, - lendim = 1, - name = 'dat') - } else if (identical(multimodel,'both')) { ## Individual models and multimodel - #TODO: Consider Compute() case - output[[exp]]$data[n_models, , , , , , , , 1:max_members] <- - CSTools::MergeDims(data = output[[exp]]$data, - merge_dims = c('ensemble', 'dat'), - rename_dim = 'ensemble', - na.rm = TRUE) + if (!isFALSE(multimodel)) { + data_multi <- CSTools::MergeDims(data = output[[exp]]$data, + merge_dims = c('ensemble', 'dat'), + rename_dim = 'ensemble', + na.rm = TRUE) + + if (isTRUE(multimodel)) { ## Multimodel + output[[exp]]$data <- data_multi + output[[exp]]$data <- s2dv::InsertDim(data = output[[exp]]$data, + posdim = 1, + lendim = 1, + name = 'dat') + + } else if (identical(multimodel, 'both')) { ## Individual models and multimodel + tmp <- asplit(output[[exp]]$data, which(names(dim(output[[exp]]$data)) == 'dat')) + tmp[[n_models]] <- data_multi + output[[exp]]$data <- array(unlist(tmp), dim = c(dim(tmp[[1]]), dat = n_models)) + } } + + # Reorder data back to standard + tmp <- match(c('dat', 'var', 'sday', 'sweek', 'syear', 'time', 'latitude', 'longitude', 'ensemble'), + names(dim(output[[exp]]$data))) + output[[exp]]$data <- aperm(output[[exp]]$data, tmp[!is.na(tmp)]) + } return(output) diff --git a/recipes/tests/recipe_multimodel_seasonal_aho.yml b/recipes/tests/recipe_multimodel_seasonal_aho.yml index 6c2d2965..f56119fc 100644 --- a/recipes/tests/recipe_multimodel_seasonal_aho.yml +++ b/recipes/tests/recipe_multimodel_seasonal_aho.yml @@ -49,7 +49,7 @@ Analysis: plots: forecast_ensemble_mean most_likely_terciles #skill_metrics Indicators: index: no # This feature is not implemented yet - ncores: 10 # Optional, int: number of cores to be used in parallel computation. + ncores: 1 # Optional, int: number of cores to be used in parallel computation. # If left empty, defaults to 1. remove_NAs: TRUE # Optional, bool: Whether to remove NAs. # If left empty, defaults to FALSE. -- GitLab From 4c0cb05d20b7a13747b9e5f52e102684d9d5e03f Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 27 Sep 2023 16:22:34 +0200 Subject: [PATCH 10/10] The loading script for calibrated data. Supposed to be used before join_datasets() --- modules/Loading/R/load_seasonal_calibrated.R | 265 +++++++++++++++++++ 1 file changed, 265 insertions(+) create mode 100644 modules/Loading/R/load_seasonal_calibrated.R diff --git a/modules/Loading/R/load_seasonal_calibrated.R b/modules/Loading/R/load_seasonal_calibrated.R new file mode 100644 index 00000000..f8e3ea63 --- /dev/null +++ b/modules/Loading/R/load_seasonal_calibrated.R @@ -0,0 +1,265 @@ +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") + +#TODO: Get 'output_module' from recipe (e.g., multimodel$readFrom) +load_seasonal_calibrated <- function(recipe, output_module) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", output_module) + hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), + "$var$_$file_date$.nc") + hcst.path <- gsub(variable[1], "$var$", hcst.path) + fcst.path <- obs.path <- hcst.path + obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + ############################### + + # Convert hcst to s2dv_cube object + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- Start(dat = fcst.path, + var = variable, + file_date = sdates$fcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + ############################# + + # Convert fcst to s2dv_cube + fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$attrs$Dates[] <- + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + } else { + fcst <- NULL + } + + + + # Load reference + #------------------------------------------------------------------- + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + if (store.freq == "monthly_mean") { + + dates_file <- paste0(format(as.Date(dates, '%Y%m%d'), "%Y%m"), "01") + dim(dates_file) <- dim(dates) + + obs <- Start(dat = obs.path, + var = variable, + file_date = dates_file, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq %in% c("daily_mean", "daily")) { + + ############################# + #NOTE: NOT TESTED YET + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + ############################# + + } + + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + + # Check for consistency between hcst and obs grid + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + + # Print a summary of the loaded data for the user, for each object + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } + } + + info(recipe$Run$logger, + "##### CALIBRATED DATA LOADING COMPLETED SUCCESSFULLY #####") + + + return(list(hcst = hcst, fcst = fcst, obs = obs)) + +} -- GitLab