From ca0675183dbec6563e4c3037e6fe306d68ed50d8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 20 Mar 2023 12:08:25 +0100 Subject: [PATCH 01/96] Add retrieve = False (WIP) --- modules/Loading/Loading.R | 230 +++++++++--------- .../testing_recipes/recipe_seasonal-tests.yml | 4 +- 2 files changed, 121 insertions(+), 113 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index e5dafd0d..a45dcfdb 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -6,8 +6,9 @@ source("modules/Loading/check_latlon.R") ## TODO: Move to prepare_outputs.R source("tools/libs.R") -load_datasets <- function(recipe) { +load_datasets <- function(recipe, retrieve = T) { + # retrieve <- F # ------------------------------------------- # Set params ----------------------------------------- @@ -98,6 +99,8 @@ load_datasets <- function(recipe) { # Load hindcast #------------------------------------------------------------------- + ## PROBLEM 1: Bug in Start() that causes problems if regrid params are NULL + ## PROBLEM 2: "Error: Incompatible classes: + " unknown origin hcst <- Start(dat = hcst.path, var = variable, file_date = sdates$hcst, @@ -118,7 +121,7 @@ load_datasets <- function(recipe) { longitude = 'dat', time = 'file_date'), split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) + retrieve = retrieve) if (recipe$Analysis$Variables$freq == "daily_mean") { # Adjusts dims for daily case, could be removed if startR allows @@ -138,15 +141,16 @@ load_datasets <- function(recipe) { dim(attr(hcst, "Variables")$common$time) <- default_time_dims } - # Convert hcst to s2dv_cube object - ## TODO: Give correct dimensions to $Dates$start - ## (sday, sweek, syear instead of file_date) - hcst <- as.s2dv_cube(hcst) # Adjust dates for models where the time stamp goes into the next month if (recipe$Analysis$Variables$freq == "monthly_mean") { - hcst$Dates$start[] <- hcst$Dates$start - seconds(exp_descrip$time_stamp_lag) + attr(hcst, "Variables")$common$time[] <- + attr(hcst, "Variables")$common$time - seconds(exp_descrip$time_stamp_lag) + } + dates <- attr(hcst, "Variables")$common$time + # Convert hcst to s2dv_cube object + if (retrieve) { + hcst <- as.s2dv_cube(hcst) } - # Load forecast #------------------------------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst_year)) { @@ -155,26 +159,26 @@ load_datasets <- function(recipe) { # multiple dims split fcst <- Start(dat = fcst.path, - var = variable, - file_date = sdates$fcst, - time = idxs$fcst, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = indices(1:fcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) + var = variable, + file_date = sdates$fcst, + time = idxs$fcst, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = retrieve) if (recipe$Analysis$Variables$freq == "daily_mean") { # Adjusts dims for daily case, could be removed if startR allows @@ -186,6 +190,7 @@ load_datasets <- function(recipe) { default_dims[names(dim(fcst))] <- dim(fcst) dim(fcst) <- default_dims # Change time attribute dimensions + ## TODO: Is this still necessary? default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) names(dim(attr(fcst, "Variables")$common$time))[which(names( dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" @@ -194,14 +199,15 @@ load_datasets <- function(recipe) { dim(attr(fcst, "Variables")$common$time) <- default_time_dims } - # Convert fcst to s2dv_cube - fcst <- as.s2dv_cube(fcst) - # Adjust dates for models where the time stamp goes into the next month + # Change dates in models with wrong timestamp if (recipe$Analysis$Variables$freq == "monthly_mean") { - fcst$Dates$start[] <- - fcst$Dates$start - seconds(exp_descrip$time_stamp_lag) + attr(fcst, "Variables")$common$time <- + attr(fcst, "Variables")$common$time - seconds(exp_descrip$time_stamp_lag) + } + # Convert fcst to s2dv_cube + if (retrieve) { + fcst <- as.s2dv_cube(fcst) } - } else { fcst <- NULL } @@ -211,11 +217,7 @@ load_datasets <- function(recipe) { # Obtain dates and date dimensions from the loaded hcst data to make sure # the corresponding observations are loaded correctly. - dates <- hcst$Dates$start - dim(dates) <- dim(Subset(hcst$data, - along=c('dat', 'var', - 'latitude', 'longitude', 'ensemble'), - list(1,1,1,1,1), drop="selected")) + # dates <- hcst$Dates$start # Separate Start() call for monthly vs daily data if (store.freq == "monthly_mean") { @@ -240,7 +242,7 @@ load_datasets <- function(recipe) { longitude = 'dat', time = 'file_date'), split_multiselected_dims = TRUE, - retrieve = TRUE) + retrieve = retrieve) } else if (store.freq == "daily_mean") { @@ -275,90 +277,96 @@ load_datasets <- function(recipe) { longitude = 'dat', time = 'file_date'), split_multiselected_dims = TRUE, - retrieve = TRUE) + retrieve = retrieve) } - + ## TODO: How to handle this with retrieve = F? # Adds ensemble dim to obs (for consistency with hcst/fcst) default_dims <- c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 1, latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(obs))] <- dim(obs) - dim(obs) <- default_dims - - # Convert obs to s2dv_cube - obs <- as.s2dv_cube(obs) - - # Check for consistency between hcst and obs grid - if (!(recipe$Analysis$Regrid$type == 'none')) { - if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { - lat_error_msg <- paste("Latitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") - error(recipe$Run$logger, lat_error_msg) - hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) - info(recipe$Run$logger, hcst_lat_msg) - obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) - info(recipe$Run$logger, obs_lat_msg) - stop("hcst and obs don't share the same latitudes.") - } - if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { - lon_error_msg <- paste("Longitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") - error(recipe$Run$logger, lon_error_msg) - hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) - info(recipe$Run$logger, hcst_lon_msg) - obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) - info(recipe$Run$logger, obs_lon_msg) - stop("hcst and obs don't share the same longitudes.") - - } + if (retrieve) { + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + } else { + default_dims[names(attr(obs, "Dimensions"))] <- attr(obs, "Dimensions") + attr(obs, "Dimensions") <- default_dims } + # Checks and data summary (only if retrieve = TRUE) + if (retrieve) { + # Check for consistency between hcst and obs grid + if (!(recipe$Analysis$Regrid$type == 'none')) { + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") - # Remove negative values in accumulative variables - dictionary <- read_yaml("conf/variable-dictionary.yml") - if (dictionary$vars[[variable]]$accum) { - info(recipe$Run$logger, - "Accumulated variable: setting negative values to zero.") - obs$data[obs$data < 0] <- 0 - hcst$data[hcst$data < 0] <- 0 - if (!is.null(fcst)) { - fcst$data[fcst$data < 0] <- 0 + } } - } - # Convert prlr from m/s to mm/day - ## TODO: Make a unit conversion function? - if (variable == "prlr") { - # Verify that the units are m/s and the same in obs and hcst - if (((attr(obs$Variable, "variable")$units == "m s-1") || - (attr(obs$Variable, "variable")$units == "m s**-1")) && - ((attr(hcst$Variable, "variable")$units == "m s-1") || - (attr(hcst$Variable, "variable")$units == "m s**-1"))) { - - info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") - obs$data <- obs$data*86400*1000 - attr(obs$Variable, "variable")$units <- "mm/day" - hcst$data <- hcst$data*86400*1000 - attr(hcst$Variable, "variable")$units <- "mm/day" + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + if (dictionary$vars[[variable]]$accum) { + info(recipe$Run$logger, + "Accumulated variable: setting negative values to zero.") + obs$data[obs$data < 0] <- 0 + hcst$data[hcst$data < 0] <- 0 if (!is.null(fcst)) { - fcst$data <- fcst$data*86400*1000 - attr(fcst$Variable, "variable")$units <- "mm/day" + fcst$data[fcst$data < 0] <- 0 + } + } + + # Convert prlr from m/s to mm/day + ## TODO: Make a unit conversion function? + if (variable == "prlr") { + # Verify that the units are m/s and the same in obs and hcst + if (((attr(obs$Variable, "variable")$units == "m s-1") || + (attr(obs$Variable, "variable")$units == "m s**-1")) && + ((attr(hcst$Variable, "variable")$units == "m s-1") || + (attr(hcst$Variable, "variable")$units == "m s**-1"))) { + + info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") + obs$data <- obs$data*86400*1000 + attr(obs$Variable, "variable")$units <- "mm/day" + hcst$data <- hcst$data*86400*1000 + attr(hcst$Variable, "variable")$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data <- fcst$data*86400*1000 + attr(fcst$Variable, "variable")$units <- "mm/day" + } } } - } - # Compute anomalies if requested - # Print a summary of the loaded data for the user, for each object - if (recipe$Run$logger$threshold <= 2) { - data_summary(hcst, recipe) - data_summary(obs, recipe) - if (!is.null(fcst)) { - data_summary(fcst, recipe) + # Compute anomalies if requested + # Print a summary of the loaded data for the user, for each object + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } } } diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml index b4e05fc4..5144e98e 100644 --- a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml +++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml @@ -25,8 +25,8 @@ Analysis: lonmin: -10 lonmax: 30 Regrid: - method: - type: none + method: conservative + type: 'r360x180' Workflow: Calibration: method: raw -- GitLab From f8090af5cecced04e347f71b1bfe16c65f2a7cbb Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 20 Mar 2023 15:38:35 +0100 Subject: [PATCH 02/96] Correct date metadata dimensions --- modules/Loading/Loading.R | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index a45dcfdb..a3260f69 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -150,6 +150,7 @@ load_datasets <- function(recipe, retrieve = T) { # Convert hcst to s2dv_cube object if (retrieve) { hcst <- as.s2dv_cube(hcst) + dim(hcst$Dates$start) <- dim(dates) } # Load forecast #------------------------------------------------------------------- @@ -197,11 +198,8 @@ load_datasets <- function(recipe, retrieve = T) { default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- dim(attr(fcst, "Variables")$common$time) dim(attr(fcst, "Variables")$common$time) <- default_time_dims - } - - # Change dates in models with wrong timestamp - if (recipe$Analysis$Variables$freq == "monthly_mean") { - attr(fcst, "Variables")$common$time <- + } else if (recipe$Analysis$Variables$freq == "monthly_mean") { + attr(fcst, "Variables")$common$time[] <- attr(fcst, "Variables")$common$time - seconds(exp_descrip$time_stamp_lag) } # Convert fcst to s2dv_cube @@ -221,7 +219,6 @@ load_datasets <- function(recipe, retrieve = T) { # Separate Start() call for monthly vs daily data if (store.freq == "monthly_mean") { - dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") dim(dates_file) <- dim(dates) @@ -245,7 +242,6 @@ load_datasets <- function(recipe, retrieve = T) { retrieve = retrieve) } else if (store.freq == "daily_mean") { - # Get year and month for file_date dates_file <- sapply(dates, format, '%Y%m') dim(dates_file) <- dim(dates) @@ -285,11 +281,11 @@ load_datasets <- function(recipe, retrieve = T) { sweek = 1, syear = 1, time = 1, latitude = 1, longitude = 1, ensemble = 1) if (retrieve) { - default_dims[names(dim(obs))] <- dim(obs) - dim(obs) <- default_dims # Convert obs to s2dv_cube + dim(obs) <- c(dim(obs), "ensemble" = 1) obs <- as.s2dv_cube(obs) } else { + ## TODO: How to handle this with retrieve = F? default_dims[names(attr(obs, "Dimensions"))] <- attr(obs, "Dimensions") attr(obs, "Dimensions") <- default_dims } -- GitLab From 429cbc60cfa1317dfbf59a81a478c136a91df0ac Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 21 Mar 2023 08:55:22 +0100 Subject: [PATCH 03/96] Testing, code cleanup --- modules/Loading/Loading.R | 36 +++++++++---------- .../testing_recipes/recipe_seasonal-tests.yml | 2 +- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index a3260f69..37c1db41 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -91,7 +91,7 @@ load_datasets <- function(recipe, retrieve = T) { #------------------------------------------------------------------- circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) - if (recipe$Analysis$Variables$freq == "monthly_mean"){ + if (recipe$Analysis$Variables$freq == "monthly_mean") { split_multiselected_dims = TRUE } else { split_multiselected_dims = FALSE @@ -99,8 +99,6 @@ load_datasets <- function(recipe, retrieve = T) { # Load hindcast #------------------------------------------------------------------- - ## PROBLEM 1: Bug in Start() that causes problems if regrid params are NULL - ## PROBLEM 2: "Error: Incompatible classes: + " unknown origin hcst <- Start(dat = hcst.path, var = variable, file_date = sdates$hcst, @@ -221,7 +219,7 @@ load_datasets <- function(recipe, retrieve = T) { if (store.freq == "monthly_mean") { dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") dim(dates_file) <- dim(dates) - + obs <- Start(dat = obs.path, var = variable, file_date = dates_file, @@ -265,13 +263,13 @@ load_datasets <- function(recipe, retrieve = T) { longitude_reorder = circularsort, transform = regrid_params$obs.transform, transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), + method = regrid_params$obs.gridmethod), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), + longitude = c('lon','longitude')), return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), + longitude = 'dat', + time = 'file_date'), split_multiselected_dims = TRUE, retrieve = retrieve) } @@ -290,38 +288,39 @@ load_datasets <- function(recipe, retrieve = T) { attr(obs, "Dimensions") <- default_dims } # Checks and data summary (only if retrieve = TRUE) + ## TODO: See if these checks can be performed on the startR_cube metadata if (retrieve) { # Check for consistency between hcst and obs grid if (!(recipe$Analysis$Regrid$type == 'none')) { if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lat_error_msg) hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) info(recipe$Run$logger, hcst_lat_msg) obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) + "; Last obs lat: ", obs$lat[length(obs$lat)]) info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lon_error_msg) hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) info(recipe$Run$logger, hcst_lon_msg) obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) + "; Last obs lon: ", obs$lon[length(obs$lon)]) info(recipe$Run$logger, obs_lon_msg) stop("hcst and obs don't share the same longitudes.") } } - + # Unit manipulation and conversion: # Remove negative values in accumulative variables dictionary <- read_yaml("conf/variable-dictionary.yml") if (dictionary$vars[[variable]]$accum) { @@ -333,7 +332,6 @@ load_datasets <- function(recipe, retrieve = T) { fcst$data[fcst$data < 0] <- 0 } } - # Convert prlr from m/s to mm/day ## TODO: Make a unit conversion function? if (variable == "prlr") { @@ -354,8 +352,6 @@ load_datasets <- function(recipe, retrieve = T) { } } } - - # Compute anomalies if requested # Print a summary of the loaded data for the user, for each object if (recipe$Run$logger$threshold <= 2) { data_summary(hcst, recipe) diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml index 5144e98e..ff6ede09 100644 --- a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml +++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml @@ -26,7 +26,7 @@ Analysis: lonmax: 30 Regrid: method: conservative - type: 'r360x180' + type: to_system Workflow: Calibration: method: raw -- GitLab From 1d74faaf07b82d02b881aa48e4b9b4d858ce6c4b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 24 Mar 2023 15:23:42 +0100 Subject: [PATCH 04/96] Source startR functions --- modules/Calibration/calibrate_with_compute.R | 176 +++++++++++++++++++ modules/Loading/Loading.R | 4 + 2 files changed, 180 insertions(+) create mode 100644 modules/Calibration/calibrate_with_compute.R diff --git a/modules/Calibration/calibrate_with_compute.R b/modules/Calibration/calibrate_with_compute.R new file mode 100644 index 00000000..f9b3cb4c --- /dev/null +++ b/modules/Calibration/calibrate_with_compute.R @@ -0,0 +1,176 @@ + +calibrate_with_compute <- function(recipe, hcst, obs, fcst = NULL) { + # Function that calibrates the hindcast using the method stated in the + # recipe. If the forecast is not null, it calibrates it as well. + # + # data: list of s2dv_cube objects containing the hcst, obs and fcst. + # Optionally, it may also have hcst.full_val and obs.full_val. + # recipe: object obtained when passing the .yml recipe file to read_yaml() + + method <- tolower(recipe$Analysis$Workflow$Calibration$method) + + if (method == "raw") { + warn(recipe$Run$logger, + paste("The Calibration module has been called, but the calibration", + "method in the recipe is 'raw'. The hcst and fcst will not be", + "calibrated.")) + fcst_calibrated <- fcst + hcst_calibrated <- hcst + if (!is.null(hcst.full_val)) { + hcst_full_calibrated <- hcst.full_val + } else { + hcst_full_calibrated <- NULL + } + CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####" + + } else { + ## TODO: Calibrate full fields when present + # Calibration function params + mm <- recipe$Analysis$Datasets$Multimodel + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } + + CALIB_MSG <- "##### CALIBRATION COMPLETE #####" + # Replicate observation array for the multi-model case + ## TODO: Implement for obs.full_val + if (mm) { + obs.mm <- obs + for(dat in 1:(dim(hcst)['dat'][[1]]-1)) { + obs.mm <- abind(obs.mm, obs, + along=which(names(dim(obs)) == 'dat')) + } + names(dim(obs.mm)) <- names(dim(obs)) + obs$data <- obs.mm + remove(obs.mm) + } + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + + CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") + ## TODO: implement other calibration methods + if (!(method %in% CST_CALIB_METHODS)) { + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "monthly data.")) + stop() + } else { + # Calibrate the hindcast + hcst_calibrated <- CST_Calibration(hcst, obs, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + # In the case where anomalies have been computed, calibrate full values + if (!is.null(hcst.full_val)) { + hcst_full_calibrated <- CST_Calibration(hcst.full_val, + obs.full_val, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + hcst_full_calibrated <- NULL + } + + # Calibrate the forecast + if (!is.null(fcst)) { + fcst_calibrated <- CST_Calibration(hcst, obs, fcst, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + fcst_calibrated <- NULL + } + } + } else if (recipe$Analysis$Variables$freq == "daily_mean") { + # Daily data calibration using Quantile Mapping + if (!(method %in% c("qmap"))) { + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "daily data. Only quantile mapping 'qmap is implemented.")) + stop() + } + # Calibrate the hindcast + dim_order <- names(dim(hcst$data)) + hcst_calibrated <- CST_QuantileMapping(hcst, obs, + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) + # Restore dimension order + hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) + # In the case where anomalies have been computed, calibrate full values + if (!is.null(hcst.full_val)) { + hcst_full_calibrated <- CST_QuantileMapping(hcst.full_val, + obs.full_val, + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) + } else { + hcst_full_calibrated <- NULL + } + + if (!is.null(fcst)) { + # Calibrate the forecast + fcst_calibrated <- CST_QuantileMapping(hcst, obs, + exp_cor = fcst, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) + # Restore dimension order + fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) + } else { + fcst_calibrated <- NULL + } + } + } + info(recipe$Run$logger, CALIB_MSG) + ## TODO: Sort out returns + return_list <- list(hcst = hcst_calibrated, + obs = obs, + fcst = fcst_calibrated) + if (!is.null(hcst_full_calibrated)) { + return_list <- append(return_list, + list(hcst.full_val = hcst_full_calibrated, + obs.full_val = obs.full_val)) + } + return(return_list) +} diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 94e16493..14e99b32 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -8,6 +8,10 @@ source("tools/libs.R") #TODO: remove these two lines when new as.s2dv_cube() is in CSTools source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R') source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R') +## TODO: remove these lines when new version of startR is released +source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Start.R") +source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Utils.R") +source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/zzz.R") load_datasets <- function(recipe, retrieve = T) { -- GitLab From c478ba09cce25ec2703e867bb9b2811551e72042 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 27 Mar 2023 17:02:44 +0200 Subject: [PATCH 05/96] Remove startR sourced functions --- modules/Loading/Loading.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 14e99b32..94e16493 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -8,10 +8,6 @@ source("tools/libs.R") #TODO: remove these two lines when new as.s2dv_cube() is in CSTools source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R') source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R') -## TODO: remove these lines when new version of startR is released -source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Start.R") -source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Utils.R") -source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/zzz.R") load_datasets <- function(recipe, retrieve = T) { -- GitLab From 11bcc01c015b687a642232b45a8992a9a3e02802 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 26 Apr 2023 14:51:38 +0200 Subject: [PATCH 06/96] Testing Calibration with Compute() --- modules/Calibration/calibrate_with_compute.R | 98 +++++++++---------- modules/Loading/Loading.R | 3 - .../testing_recipes/recipe_seasonal-tests.yml | 2 +- modules/test_seasonal.R | 2 +- 4 files changed, 51 insertions(+), 54 deletions(-) diff --git a/modules/Calibration/calibrate_with_compute.R b/modules/Calibration/calibrate_with_compute.R index f9b3cb4c..c44d31a3 100644 --- a/modules/Calibration/calibrate_with_compute.R +++ b/modules/Calibration/calibrate_with_compute.R @@ -10,17 +10,17 @@ calibrate_with_compute <- function(recipe, hcst, obs, fcst = NULL) { method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (method == "raw") { - warn(recipe$Run$logger, - paste("The Calibration module has been called, but the calibration", - "method in the recipe is 'raw'. The hcst and fcst will not be", - "calibrated.")) +# warn(recipe$Run$logger, +# paste("The Calibration module has been called, but the calibration", +# "method in the recipe is 'raw'. The hcst and fcst will not be", +# "calibrated.")) fcst_calibrated <- fcst hcst_calibrated <- hcst - if (!is.null(hcst.full_val)) { - hcst_full_calibrated <- hcst.full_val - } else { - hcst_full_calibrated <- NULL - } + # if (!is.null(data$hcst.full_val)) { + # hcst_full_calibrated <- data$hcst.full_val + # } else { + # hcst_full_calibrated <- NULL + # } CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####" } else { @@ -47,7 +47,7 @@ calibrate_with_compute <- function(recipe, hcst, obs, fcst = NULL) { obs.mm <- abind(obs.mm, obs, along=which(names(dim(obs)) == 'dat')) } - names(dim(obs.mm)) <- names(dim(obs)) + names(dim(obs.mm)) <- names(dim(obs$data)) obs$data <- obs.mm remove(obs.mm) } @@ -74,22 +74,22 @@ calibrate_with_compute <- function(recipe, hcst, obs, fcst = NULL) { memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) - # In the case where anomalies have been computed, calibrate full values - if (!is.null(hcst.full_val)) { - hcst_full_calibrated <- CST_Calibration(hcst.full_val, - obs.full_val, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - } else { - hcst_full_calibrated <- NULL - } + # # In the case where anomalies have been computed, calibrate full values + # if (!is.null(data$hcst.full_val)) { + # hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, + # data$obs.full_val, + # cal.method = method, + # eval.method = "leave-one-out", + # multi.model = mm, + # na.fill = TRUE, + # na.rm = na.rm, + # apply_to = NULL, + # memb_dim = "ensemble", + # sdate_dim = "syear", + # ncores = ncores) + # } else { + # hcst_full_calibrated <- NULL + # } # Calibrate the forecast if (!is.null(fcst)) { @@ -128,21 +128,21 @@ calibrate_with_compute <- function(recipe, hcst, obs, fcst = NULL) { na.rm = na.rm, wet.day = F) # Restore dimension order - hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) + hcst_calibrated <- Reorder(hcst_calibrated, dim_order) # In the case where anomalies have been computed, calibrate full values - if (!is.null(hcst.full_val)) { - hcst_full_calibrated <- CST_QuantileMapping(hcst.full_val, - obs.full_val, - exp_cor = NULL, - sdate_dim = "syear", - memb_dim = "ensemble", - method = "QUANT", - ncores = ncores, - na.rm = na.rm, - wet.day = F) - } else { - hcst_full_calibrated <- NULL - } + # if (!is.null(data$hcst.full_val)) { + # hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, + # data$obs.full_val, + # exp_cor = NULL, + # sdate_dim = "syear", + # memb_dim = "ensemble", + # method = "QUANT", + # ncores = ncores, + # na.rm = na.rm, + # wet.day = F) + # } else { + # hcst_full_calibrated <- NULL + # } if (!is.null(fcst)) { # Calibrate the forecast @@ -156,21 +156,21 @@ calibrate_with_compute <- function(recipe, hcst, obs, fcst = NULL) { na.rm = na.rm, wet.day = F) # Restore dimension order - fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) + fcst_calibrated <- Reorder(fcst_calibrated, dim_order) } else { fcst_calibrated <- NULL } } } - info(recipe$Run$logger, CALIB_MSG) + # info(recipe$Run$logger, CALIB_MSG) ## TODO: Sort out returns return_list <- list(hcst = hcst_calibrated, - obs = obs, - fcst = fcst_calibrated) - if (!is.null(hcst_full_calibrated)) { - return_list <- append(return_list, - list(hcst.full_val = hcst_full_calibrated, - obs.full_val = obs.full_val)) - } + obs = obs)#, + # fcst = fcst_calibrated) + # if (!is.null(hcst_full_calibrated)) { + # return_list <- append(return_list, + # list(hcst.full_val = hcst_full_calibrated, + # obs.full_val = data$obs.full_val)) + # } return(return_list) } diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 94e16493..81f8c4a4 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -5,9 +5,6 @@ source("modules/Loading/dates2load.R") source("modules/Loading/check_latlon.R") ## TODO: Move to prepare_outputs.R source("tools/libs.R") -#TODO: remove these two lines when new as.s2dv_cube() is in CSTools -source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R') -source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R') load_datasets <- function(recipe, retrieve = T) { diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml index ff6ede09..d1279fb0 100644 --- a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml +++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml @@ -45,5 +45,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index b8541488..a6a15b62 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -9,7 +9,7 @@ recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml" recipe <- prepare_outputs(recipe_file) # Load datasets -data <- load_datasets(recipe) +data <- load_datasets(recipe, retrieve = F) # Calibrate datasets calibrated_data <- calibrate_datasets(recipe, data) # Compute anomalies -- GitLab From 3933ff3fdd5ecfe8aaabbd89fc0c709345560601 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 26 May 2023 16:48:59 +0200 Subject: [PATCH 07/96] Modify Calibration module to work with Compute() (WIP) --- modules/Calibration/Calibration.R | 226 +++++++++++++++--------------- 1 file changed, 113 insertions(+), 113 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 899b1291..b8c4c3d5 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -1,30 +1,32 @@ -calibrate_datasets <- function(recipe, data) { +calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) { # Function that calibrates the hindcast using the method stated in the # recipe. If the forecast is not null, it calibrates it as well. # # data: list of s2dv_cube objects containing the hcst, obs and fcst. # Optionally, it may also have hcst.full_val and obs.full_val. # recipe: object obtained when passing the .yml recipe file to read_yaml() - + method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (method == "raw") { - warn(recipe$Run$logger, - paste("The Calibration module has been called, but the calibration", - "method in the recipe is 'raw'. The hcst and fcst will not be", - "calibrated.")) - fcst_calibrated <- data$fcst - hcst_calibrated <- data$hcst - if (!is.null(data$hcst.full_val)) { - hcst_full_calibrated <- data$hcst.full_val - } else { - hcst_full_calibrated <- NULL + if (inherits(hcst, "s2dv_cube")) { + warn(recipe$Run$logger, + paste("The Calibration module has been called, but the calibration", + "method in the recipe is 'raw'. The hcst and fcst will not be", + "calibrated.")) } + ## TODO: Improve efficiency + fcst_calibrated <- fcst + hcst_calibrated <- hcst + # if (!is.null(data$hcst.full_val)) { + # hcst_full_calibrated <- data$hcst.full_val + # } else { + # hcst_full_calibrated <- NULL + # } CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####" } else { - ## TODO: Calibrate full fields when present # Calibration function params mm <- recipe$Analysis$Datasets$Multimodel if (is.null(recipe$Analysis$ncores)) { @@ -40,137 +42,135 @@ calibrate_datasets <- function(recipe, data) { CALIB_MSG <- "##### CALIBRATION COMPLETE #####" # Replicate observation array for the multi-model case - ## TODO: Implement for obs.full_val if (mm) { obs.mm <- obs$data - for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { - obs.mm <- abind(obs.mm, data$obs$data, - along=which(names(dim(data$obs$data)) == 'dat')) + for(dat in 1:(dim(hcst$data)['dat'][[1]]-1)) { + obs.mm <- abind(obs.mm, obs$data, + along=which(names(dim(obs$data)) == 'dat')) } names(dim(obs.mm)) <- names(dim(obs$data)) - data$obs$data <- obs.mm + obs$data <- obs.mm remove(obs.mm) } if (recipe$Analysis$Variables$freq == "monthly_mean") { CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") - ## TODO: implement other calibration methods + ## TODO: this belongs in the recipe checker if (!(method %in% CST_CALIB_METHODS)) { - error(recipe$Run$logger, - paste("Calibration method in the recipe is not available for", - "monthly data.")) + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "monthly data.")) stop() } else { - # Calibrate the hindcast - hcst_calibrated <- CST_Calibration(data$hcst, data$obs, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - alpha = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - # In the case where anomalies have been computed, calibrate full values - if (!is.null(data$hcst.full_val)) { - hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, - data$obs.full_val, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - } else { - hcst_full_calibrated <- NULL - } + arguments <- list(exp = hcst, obs = obs, exp_cor = NULL, + cal.method = method, eval.method = "leave-one-out", + multi.model = mm, na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, alpha = NULL, + memb_dim = "ensemble", sdate_dim = "syear", + dat_dim = "dat", ncores = ncores) + if (inherits(hcst, "s2dv_cube")) { + fun <- CST_Calibration + } else { + fun <- Calibration + } + hcst_calibrated <- do.call(fun, arguments) + # # In the case where anomalies have been computed, calibrate full values + # if (!is.null(data$hcst.full_val)) { + # hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, + # data$obs.full_val, + # cal.method = method, + # eval.method = "leave-one-out", + # multi.model = mm, + # na.fill = TRUE, + # na.rm = na.rm, + # apply_to = NULL, + # memb_dim = "ensemble", + # sdate_dim = "syear", + # ncores = ncores) + # } else { + # hcst_full_calibrated <- NULL + # } - # Calibrate the forecast - if (!is.null(data$fcst)) { - fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - alpha = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) + # Calibrate the forecast + if (!is.null(fcst)) { + arguments <- list(exp = hcst, obs = obs, exp_cor = fcst, + cal.method = method, eval.method = "leave-one-out", + multi.model = mm, na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, alpha = NULL, + memb_dim = "ensemble", sdate_dim = "syear", + dat_dim = "dat", ncores = ncores) + fcst_calibrated <- do.call(fun, arguments) } else { fcst_calibrated <- NULL } } } else if (recipe$Analysis$Variables$freq == "daily_mean") { + ## TODO: Move to recipe checker # Daily data calibration using Quantile Mapping - if (!(method %in% c("qmap"))) { - error(recipe$Run$logger, - paste("Calibration method in the recipe is not available for", - "daily data. Only quantile mapping 'qmap is implemented.")) - stop() - } + # if (!(method %in% c("qmap"))) { + # error(recipe$Run$logger, + # paste("Calibration method in the recipe is not available for", + # "daily data. Only quantile mapping 'qmap is implemented.")) + # stop() + # } # Calibrate the hindcast - dim_order <- names(dim(data$hcst$data)) - hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, - exp_cor = NULL, - sdate_dim = "syear", - memb_dim = "ensemble", - # window_dim = "time", - method = "QUANT", - ncores = ncores, - na.rm = na.rm, - wet.day = F) - # Restore dimension order - hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) - # In the case where anomalies have been computed, calibrate full values - if (!is.null(data$hcst.full_val)) { - hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, - data$obs.full_val, - exp_cor = NULL, - sdate_dim = "syear", - memb_dim = "ensemble", - method = "QUANT", - ncores = ncores, - na.rm = na.rm, - wet.day = F) + arguments <- list(exp = hcst, obs = obs, exp_cor = NULL, + sdate_dim = "syear", memb_dim = "ensemble", + method = "QUANT", ncores = ncores, + na.rm = na.rm, wet.day = F) + if (inherits(hcst, "s2dv_cube")) { + ## TODO: Modify QuantileMapping to reorder dims? + dim_order <- names(hcst$dims) + fun <- CST_QuantileMapping + arguments <- list(exp = hcst, obs = obs, exp_cor = NULL, + sdate_dim = "syear", memb_dim = "ensemble", + method = "QUANT", ncores = ncores, + na.rm = na.rm, wet.day = F) } else { - hcst_full_calibrated <- NULL + dim_order <- names(attr(data$hcst, "Dimensions")) + fun <- QuantileMapping } - - if (!is.null(data$fcst)) { + hcst_calibrated <- do.call(fun, arguments) + # # Restore dimension order + # hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) + # # In the case where anomalies have been computed, calibrate full values + # if (!is.null(data$hcst.full_val)) { + # hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, + # data$obs.full_val, + # exp_cor = NULL, + # sdate_dim = "syear", + # memb_dim = "ensemble", + # method = "QUANT", + # ncores = ncores, + # na.rm = na.rm, + # wet.day = F) + # } else { + # hcst_full_calibrated <- NULL + # } + if (!is.null(fcst)) { + arguments <- list(exp = hcst, obs = obs, exp_cor = fcst, + sdate_dim = "syear", memb_dim = "ensemble", + method = "QUANT", ncores = ncores, + na.rm = na.rm, wet.day = F) # Calibrate the forecast - fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, - exp_cor = data$fcst, - sdate_dim = "syear", - memb_dim = "ensemble", - # window_dim = "time", - method = "QUANT", - ncores = ncores, - na.rm = na.rm, - wet.day = F) + fcst_calibrated <- do.call(fun, arguments) # Restore dimension order - fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) + # fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) } else { fcst_calibrated <- NULL } } } - info(recipe$Run$logger, CALIB_MSG) + # info(recipe$Run$logger, CALIB_MSG) ## TODO: Sort out returns return_list <- list(hcst = hcst_calibrated, - obs = data$obs, - fcst = fcst_calibrated) - if (!is.null(hcst_full_calibrated)) { - return_list <- append(return_list, - list(hcst.full_val = hcst_full_calibrated, - obs.full_val = data$obs.full_val)) - } + obs = obs) # , + # fcst = fcst_calibrated) + # if (!is.null(hcst_full_calibrated)) { + # return_list <- append(return_list, + # list(hcst.full_val = hcst_full_calibrated, + # obs.full_val = data$obs.full_val)) + # } return(return_list) } -- GitLab From f4ed1fd0f9803a17c90325d4c155e687b1b388a0 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 26 May 2023 16:49:31 +0200 Subject: [PATCH 08/96] Add functions to build the compute function, run the compute worrkflow and convert output to s2dv_cube (WIP) --- build_compute_workflow.R | 56 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 build_compute_workflow.R diff --git a/build_compute_workflow.R b/build_compute_workflow.R new file mode 100644 index 00000000..c495b76f --- /dev/null +++ b/build_compute_workflow.R @@ -0,0 +1,56 @@ +# Function to build a function to put inside compute +build_compute_workflow <- function(recipe) { + ## TODO: Sort out system and order + ## Alternatively, this function could just be defined by the user + # Define function + my_compute_function = function(recipe, hcst, obs, fcst = NULL) { + add_calibration <- TRUE + if (add_calibration) { + data <- calibrate_datasets(recipe, hcst, obs, fcst) + } + return(data) + } + info(recipe$Run$logger, "##### COMPUTE FUNCTION DEFINED #####") + return(my_compute_function) +} + +convert_to_s2dv_cube <- function(new_cube, original_cube) { + ## TODO: Make sure all the metadata is created correctly + attr(new_cube, "Variables") <- attr(original_cube, "Variables") + attr(new_cube, "FileSelectors") <- attr(original_cube, "FileSelectors") + class(new_cube) <- "startR_array" + new_cube <- as.s2dv_cube(new_cube) + return(new_cube) +} + +run_compute_workflow <- function(recipe, data) { + + # Step 1: Define the function + ## TODO: Sort out order + my_compute_function <- build_compute_workflow(recipe) + # Step 2: Define the call to compute() + ## TODO: Define the way the dimensions are split + + step <- Step(fun = my_compute_function, + target_dims = list(c('sday', 'sweek', 'syear', 'ensemble'), + c('sday', 'sweek', 'syear')), + output_dims = list(c('sday', 'sweek', 'syear', 'ensemble'), + c('sday', 'sweek', 'syear'))) + wf <- AddStep(inputs = list(hcst = data$hcst, obs = data$obs), + step, + recipe = recipe) + res <- Compute(wf$output2, + chunks = list(latitude = 2, + longitude = 2)) + + info(recipe$Run$logger, + "##### COMPUTE SECTION ENDED, CONVERTING TO s2dv_cube #####") + # Step 3: Convert result to s2dv_cube and return it + hcst <- convert_to_s2dv_cube(res$output1, data$hcst) + obs <- convert_to_s2dv_cube(res$output2, data$obs) + ## TODO: fcst + info(recipe$Run$logger, + "##### DATA RETURNED AS A LIST OF s2dv_cube OBJECTS #####") + return(list(hcst = hcst, obs = obs)) +} + -- GitLab From 54b40e17627c5276f247d7321619323b516daec8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 26 May 2023 16:57:09 +0200 Subject: [PATCH 09/96] Add TODOs --- build_compute_workflow.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index c495b76f..00a37e23 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -8,6 +8,9 @@ build_compute_workflow <- function(recipe) { if (add_calibration) { data <- calibrate_datasets(recipe, hcst, obs, fcst) } + ## TODO: Define what to return depending on functions. + ## Also, maybe return an additional variable to determine if the output + ## needs to be converted back to s2dv_cube or what. return(data) } info(recipe$Run$logger, "##### COMPUTE FUNCTION DEFINED #####") @@ -15,6 +18,7 @@ build_compute_workflow <- function(recipe) { } convert_to_s2dv_cube <- function(new_cube, original_cube) { + ## TODO: Make function more general? ## TODO: Make sure all the metadata is created correctly attr(new_cube, "Variables") <- attr(original_cube, "Variables") attr(new_cube, "FileSelectors") <- attr(original_cube, "FileSelectors") -- GitLab From 01ff5c16c9910e850ee00602326ac94d0631a613 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 29 May 2023 12:52:20 +0200 Subject: [PATCH 10/96] Try to use 'data' as argument --- build_compute_workflow.R | 18 ++++++++++--- modules/Calibration/Calibration.R | 42 ++++++++++++++----------------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 00a37e23..2168f23b 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -3,10 +3,19 @@ build_compute_workflow <- function(recipe) { ## TODO: Sort out system and order ## Alternatively, this function could just be defined by the user # Define function + ## TODO: Define returns my_compute_function = function(recipe, hcst, obs, fcst = NULL) { - add_calibration <- TRUE - if (add_calibration) { - data <- calibrate_datasets(recipe, hcst, obs, fcst) + modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, + ", | |,")[[1]]) + data <- list(hcst = hcst, obs = obs, fcst = fcst) + for (module in modules) { + if (module == "calibration") { + data <- calibrate_datasets(recipe, data) + } else if (module == "anomalies") { + data <- compute_anomalies(recipe, data) + } else if (module == "skill") { + skill_metrics <- compute_skill_metrics(recipe, data) + } } ## TODO: Define what to return depending on functions. ## Also, maybe return an additional variable to determine if the output @@ -34,7 +43,7 @@ run_compute_workflow <- function(recipe, data) { my_compute_function <- build_compute_workflow(recipe) # Step 2: Define the call to compute() ## TODO: Define the way the dimensions are split - + ## TODO: Add use attributes step <- Step(fun = my_compute_function, target_dims = list(c('sday', 'sweek', 'syear', 'ensemble'), c('sday', 'sweek', 'syear')), @@ -53,6 +62,7 @@ run_compute_workflow <- function(recipe, data) { hcst <- convert_to_s2dv_cube(res$output1, data$hcst) obs <- convert_to_s2dv_cube(res$output2, data$obs) ## TODO: fcst + ## TODO: skill info(recipe$Run$logger, "##### DATA RETURNED AS A LIST OF s2dv_cube OBJECTS #####") return(list(hcst = hcst, obs = obs)) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index b8c4c3d5..b8bf782a 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -1,5 +1,4 @@ - -calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) { +calibrate_datasets <- function(recipe, data) { # Function that calibrates the hindcast using the method stated in the # recipe. If the forecast is not null, it calibrates it as well. # @@ -10,15 +9,15 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) { method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (method == "raw") { - if (inherits(hcst, "s2dv_cube")) { + if (inherits(data$hcst, "s2dv_cube")) { warn(recipe$Run$logger, paste("The Calibration module has been called, but the calibration", "method in the recipe is 'raw'. The hcst and fcst will not be", "calibrated.")) } ## TODO: Improve efficiency - fcst_calibrated <- fcst - hcst_calibrated <- hcst + fcst_calibrated <- data$fcst + hcst_calibrated <- data$hcst # if (!is.null(data$hcst.full_val)) { # hcst_full_calibrated <- data$hcst.full_val # } else { @@ -42,13 +41,14 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) { CALIB_MSG <- "##### CALIBRATION COMPLETE #####" # Replicate observation array for the multi-model case + ## TODO: Adapt for Compute() if (mm) { - obs.mm <- obs$data - for(dat in 1:(dim(hcst$data)['dat'][[1]]-1)) { - obs.mm <- abind(obs.mm, obs$data, - along=which(names(dim(obs$data)) == 'dat')) + obs.mm <- data$obs$data + for (dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { + obs.mm <- abind(obs.mm, data$obs$data, + along=which(names(dim(data$obs$data)) == 'dat')) } - names(dim(obs.mm)) <- names(dim(obs$data)) + names(dim(obs.mm)) <- names(dim(data$obs$data)) obs$data <- obs.mm remove(obs.mm) } @@ -63,13 +63,13 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) { "monthly data.")) stop() } else { - arguments <- list(exp = hcst, obs = obs, exp_cor = NULL, + arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL, cal.method = method, eval.method = "leave-one-out", multi.model = mm, na.fill = TRUE, na.rm = na.rm, apply_to = NULL, alpha = NULL, memb_dim = "ensemble", sdate_dim = "syear", dat_dim = "dat", ncores = ncores) - if (inherits(hcst, "s2dv_cube")) { + if (inherits(data$hcst, "s2dv_cube")) { fun <- CST_Calibration } else { fun <- Calibration @@ -93,8 +93,8 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) { # } # Calibrate the forecast - if (!is.null(fcst)) { - arguments <- list(exp = hcst, obs = obs, exp_cor = fcst, + if (!is.null(data$fcst)) { + arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = data$fcst, cal.method = method, eval.method = "leave-one-out", multi.model = mm, na.fill = TRUE, na.rm = na.rm, apply_to = NULL, alpha = NULL, @@ -115,18 +115,14 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) { # stop() # } # Calibrate the hindcast - arguments <- list(exp = hcst, obs = obs, exp_cor = NULL, + arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL, sdate_dim = "syear", memb_dim = "ensemble", method = "QUANT", ncores = ncores, na.rm = na.rm, wet.day = F) - if (inherits(hcst, "s2dv_cube")) { + if (inherits(data$hcst, "s2dv_cube")) { ## TODO: Modify QuantileMapping to reorder dims? dim_order <- names(hcst$dims) fun <- CST_QuantileMapping - arguments <- list(exp = hcst, obs = obs, exp_cor = NULL, - sdate_dim = "syear", memb_dim = "ensemble", - method = "QUANT", ncores = ncores, - na.rm = na.rm, wet.day = F) } else { dim_order <- names(attr(data$hcst, "Dimensions")) fun <- QuantileMapping @@ -148,8 +144,8 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) { # } else { # hcst_full_calibrated <- NULL # } - if (!is.null(fcst)) { - arguments <- list(exp = hcst, obs = obs, exp_cor = fcst, + if (!is.null(data$fcst)) { + arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = data$fcst, sdate_dim = "syear", memb_dim = "ensemble", method = "QUANT", ncores = ncores, na.rm = na.rm, wet.day = F) @@ -165,7 +161,7 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) { # info(recipe$Run$logger, CALIB_MSG) ## TODO: Sort out returns return_list <- list(hcst = hcst_calibrated, - obs = obs) # , + obs = data$obs) # , # fcst = fcst_calibrated) # if (!is.null(hcst_full_calibrated)) { # return_list <- append(return_list, -- GitLab From 278a994cb7ceb5b20f6d775ced0f7e14cb8cec20 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 30 May 2023 12:24:00 +0200 Subject: [PATCH 11/96] Test to retrieve metrics with Compute() (WIP) --- build_compute_workflow.R | 19 ++++++++++++++----- modules/Skill/Skill.R | 23 ++++++++++++++++------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 2168f23b..b3e6ed0d 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -6,8 +6,11 @@ build_compute_workflow <- function(recipe) { ## TODO: Define returns my_compute_function = function(recipe, hcst, obs, fcst = NULL) { modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, - ", | |,")[[1]]) + ", | |,")[[1]]) data <- list(hcst = hcst, obs = obs, fcst = fcst) + skill_metrics <- NULL + ## TODO: Define returns? + return_list <- data for (module in modules) { if (module == "calibration") { data <- calibrate_datasets(recipe, data) @@ -15,12 +18,13 @@ build_compute_workflow <- function(recipe) { data <- compute_anomalies(recipe, data) } else if (module == "skill") { skill_metrics <- compute_skill_metrics(recipe, data) + # return_list <- c(return_list, skill_metrics = skill_metrics) } } ## TODO: Define what to return depending on functions. ## Also, maybe return an additional variable to determine if the output ## needs to be converted back to s2dv_cube or what. - return(data) + return(list(data$hcst, data$obs, skill_metrics)) } info(recipe$Run$logger, "##### COMPUTE FUNCTION DEFINED #####") return(my_compute_function) @@ -31,6 +35,7 @@ convert_to_s2dv_cube <- function(new_cube, original_cube) { ## TODO: Make sure all the metadata is created correctly attr(new_cube, "Variables") <- attr(original_cube, "Variables") attr(new_cube, "FileSelectors") <- attr(original_cube, "FileSelectors") + class(new_cube) <- "startR_array" new_cube <- as.s2dv_cube(new_cube) return(new_cube) @@ -44,15 +49,18 @@ run_compute_workflow <- function(recipe, data) { # Step 2: Define the call to compute() ## TODO: Define the way the dimensions are split ## TODO: Add use attributes + ## TODO: Handle skill output step <- Step(fun = my_compute_function, target_dims = list(c('sday', 'sweek', 'syear', 'ensemble'), c('sday', 'sweek', 'syear')), output_dims = list(c('sday', 'sweek', 'syear', 'ensemble'), - c('sday', 'sweek', 'syear'))) + c('sday', 'sweek', 'syear'), + c('metric', 'syear'))) #, + # c('sday', 'sweek', 'syear'))) wf <- AddStep(inputs = list(hcst = data$hcst, obs = data$obs), step, recipe = recipe) - res <- Compute(wf$output2, + res <- Compute(wf$output1, #wf$output1 chunks = list(latitude = 2, longitude = 2)) @@ -61,10 +69,11 @@ run_compute_workflow <- function(recipe, data) { # Step 3: Convert result to s2dv_cube and return it hcst <- convert_to_s2dv_cube(res$output1, data$hcst) obs <- convert_to_s2dv_cube(res$output2, data$obs) + skill_metrics <- res$output3 ## TODO: fcst ## TODO: skill info(recipe$Run$logger, "##### DATA RETURNED AS A LIST OF s2dv_cube OBJECTS #####") - return(list(hcst = hcst, obs = obs)) + return(list(hcst = hcst, obs = obs, skill_metrics = skill_metrics)) } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 9f97e688..767cf7f7 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -118,7 +118,8 @@ compute_skill_metrics <- function(recipe, data) { skill_metrics[[ metric ]] <- skill # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { - skill <- RPSS(data$hcst$data, data$obs$data, + # skill <- RPSS(data$hcst$data, data$obs$data, + skill <- RPSS(data$hcst, data$obs, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, @@ -262,11 +263,12 @@ compute_skill_metrics <- function(recipe, data) { # Compute SpecsVerification version of the metrics ## Retain _specs in metric name for clarity metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name - if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', - 'rpss'))) { - warn(recipe$Run$logger, - "Some of the requested SpecsVerification metrics are not available.") - } + ## TODO: Move this to the recipe checker + # if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', + # 'rpss'))) { + # warn(recipe$Run$logger, + # "Some of the requested SpecsVerification metrics are not available.") + # } capture.output( skill <- Compute_verif_metrics(data$hcst$data, data$obs$data, skill_metrics = metric_name, @@ -282,7 +284,14 @@ compute_skill_metrics <- function(recipe, data) { skill_metrics[[ metric ]] <- skill } } - info(recipe$Run$logger, "##### SKILL METRIC COMPUTATION COMPLETE #####") + if (inherits(data$hcst, "s2dv_cube")) { + info(recipe$Run$logger, + "##### SKILL METRIC COMPUTATION COMPLETE #####") + } else { + dimensions <- names(dim(skill_metrics[[1]])) + skill_metrics <- abind(skill_metrics, along = 0) + names(dim(skill_metrics)) <- c("metric", dimensions) + } return(skill_metrics) } -- GitLab From 52e003c1998f489572b158a060ca60bc24947cde Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 1 Jun 2023 17:15:52 +0200 Subject: [PATCH 12/96] Update modules and create input and output dims dynamically --- build_compute_workflow.R | 53 +++++++++++++------ modules/Calibration/Calibration.R | 4 +- modules/Skill/Skill.R | 53 +++---------------- modules/test_compute.R | 30 +++++++++++ modules/test_seasonal.R | 3 +- .../atomic_recipes/recipe_system7c3s-tas.yml | 5 +- 6 files changed, 82 insertions(+), 66 deletions(-) create mode 100644 modules/test_compute.R diff --git a/build_compute_workflow.R b/build_compute_workflow.R index b3e6ed0d..83b26c77 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -9,8 +9,6 @@ build_compute_workflow <- function(recipe) { ", | |,")[[1]]) data <- list(hcst = hcst, obs = obs, fcst = fcst) skill_metrics <- NULL - ## TODO: Define returns? - return_list <- data for (module in modules) { if (module == "calibration") { data <- calibrate_datasets(recipe, data) @@ -43,37 +41,58 @@ convert_to_s2dv_cube <- function(new_cube, original_cube) { run_compute_workflow <- function(recipe, data) { + modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, + ", | |,")[[1]]) # Step 1: Define the function ## TODO: Sort out order my_compute_function <- build_compute_workflow(recipe) # Step 2: Define the call to compute() - ## TODO: Define the way the dimensions are split + # Create target dimensions + ## TODO: Refine this part + target_dims <- list(hcst = c('sday', 'sweek', 'syear', 'ensemble'), + obs = c('sday', 'sweek', 'syear')) + if (!is.null(data$fcst)) { + target_dims <- c(target_dims, + list(fcst = c('sday', 'sweek', 'syear', 'ensemble'))) + } + + # Create output dimensions + ## TODO: Add several conditions depending on user requests + output_dims <- list(hcst = c('sday', 'sweek', 'syear', 'ensemble'), + obs = c('sday', 'sweek', 'syear')) + if (!is.null(data$fcst)) { + output_dims <- c(output_dims, + list(fcst = c('sday', 'sweek', 'syear', 'ensemble'))) + } + if ("skill" %in% modules) { + output_dims <- c(output_dims, + list(skill = c('metric', 'syear'))) + } + ## TODO: Create output dimensions dynamically ## TODO: Add use attributes ## TODO: Handle skill output step <- Step(fun = my_compute_function, - target_dims = list(c('sday', 'sweek', 'syear', 'ensemble'), - c('sday', 'sweek', 'syear')), - output_dims = list(c('sday', 'sweek', 'syear', 'ensemble'), - c('sday', 'sweek', 'syear'), - c('metric', 'syear'))) #, - # c('sday', 'sweek', 'syear'))) + target_dims = target_dims, + output_dims = output_dims) wf <- AddStep(inputs = list(hcst = data$hcst, obs = data$obs), step, recipe = recipe) - res <- Compute(wf$output1, #wf$output1 - chunks = list(latitude = 2, - longitude = 2)) + res <- Compute(wf$hcst, + chunks = recipe$Run$startR_workflow$chunk_along) info(recipe$Run$logger, "##### COMPUTE SECTION ENDED, CONVERTING TO s2dv_cube #####") # Step 3: Convert result to s2dv_cube and return it - hcst <- convert_to_s2dv_cube(res$output1, data$hcst) - obs <- convert_to_s2dv_cube(res$output2, data$obs) - skill_metrics <- res$output3 ## TODO: fcst - ## TODO: skill + hcst <- convert_to_s2dv_cube(res$hcst, data$hcst) + obs <- convert_to_s2dv_cube(res$obs, data$obs) + ## TODO: Transform skill metrics back into a list + metric_list <- c("rpss", "rpss_significance") ## to be removed + skill <- .drop_dims(res$skill) + # skill_metrics <- apply(seq(dim(skill)[1]), function(x) {res$skill[x, , , ]}) + info(recipe$Run$logger, - "##### DATA RETURNED AS A LIST OF s2dv_cube OBJECTS #####") + "##### DATA RETURNED AS A NAMED LIST #####") return(list(hcst = hcst, obs = obs, skill_metrics = skill_metrics)) } diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 2222c2b1..4b6e00ca 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -68,7 +68,8 @@ calibrate_datasets <- function(recipe, data) { multi.model = mm, na.fill = TRUE, na.rm = na.rm, apply_to = NULL, alpha = NULL, memb_dim = "ensemble", sdate_dim = "syear", - dat_dim = "dat", ncores = ncores) + ncores = ncores) + # dat_dim = "dat", ncores = ncores) if (inherits(data$hcst, "s2dv_cube")) { fun <- CST_Calibration } else { @@ -158,7 +159,6 @@ calibrate_datasets <- function(recipe, data) { } } } - # info(recipe$Run$logger, CALIB_MSG) ## TODO: Sort out returns return_list <- list(hcst = hcst_calibrated, obs = data$obs) # , diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index bcf3689b..619dc91b 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -17,45 +17,7 @@ source("modules/Skill/R/tmp/GetProbs.R") source("modules/Skill/R/tmp/RandomWalkTest.R") source("modules/Skill/R/RPS_clim.R") source("modules/Skill/R/CRPS_clim.R") -## TODO: Implement this in the future -## Which parameter are required? -# if (!("obs" %in% ls()) || is.null(obs)) { -# error(logger, -# "There is no object 'obs' in the global environment or it is NULL") -# } -# if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) { -# error(logger, -# "There is no object 'fcst' in the global environment or it is NULL") -# } -# if (!("hcst" %in% ls()) || is.null(hcst)) { -# error(logger, -# "There is no object 'hcst' in the global environment or it is NULL") -# } -# if (!("metric" %in% ls()) || is.null(metric)) { -# warn(logger, -# "Verification metric not found and it is set as 'EnsCorr'.") -# metric <- 'EnsCorr' -# } -# if (metric %in% c('FRPSS', 'RPSS')) { -# metric_fun <- "veriApply" -# metric_method <- "FairRpss" -# } else if (metric %in% c("FCRPSS", "CRPSS")) { -# metric_fun <- "veriApply" -# } else if (metric %in% c("EnsCorr", "EnsCor")) { -# metric_fun <- "veriApply" -# metric_method <- "EnsCorr" -# #... -# } else { -# error(logger, "Unknown verification metric defined in the recipe.") -# metric_fun <- 'NotFound' -# } -# info(logger, paste("#-------------------------- ", "\n", -# " running Skill module ", "\n", -# " it can call ", metric_fun )) -# compute_skill_metrics <- function(recipe, data$hcst, obs, -# clim_data$hcst = NULL, -# clim_obs = NULL) { compute_skill_metrics <- function(recipe, data) { # data$hcst: s2dv_cube containing the hindcast @@ -92,13 +54,14 @@ compute_skill_metrics <- function(recipe, data) { } else { na.rm = recipe$Analysis$remove_NAs } - if (is.null(recipe$Analysis$Workflow$Skill$cross_validation)) { - warn(recipe$Run$logger, - "cross_validation parameter not defined, setting it to FALSE.") - cross.val <- FALSE - } else { - cross.val <- recipe$Analysis$Workflow$Skill$cross_validation - } + ## TODO: Move this to the recipe checker + # if (is.null(recipe$Analysis$Workflow$Skill$cross_validation)) { + # warn(recipe$Run$logger, + # "cross_validation parameter not defined, setting it to FALSE.") + # cross.val <- FALSE + # } else { + # cross.val <- recipe$Analysis$Workflow$Skill$cross_validation + # } skill_metrics <- list() for (metric in strsplit(metrics, ", | |,")[[1]]) { # Whether the fair version of the metric is to be computed diff --git a/modules/test_compute.R b/modules/test_compute.R new file mode 100644 index 00000000..16246069 --- /dev/null +++ b/modules/test_compute.R @@ -0,0 +1,30 @@ +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("build_compute_workflow.R") + +recipe_file <- "recipes/atomic_recipes/recipe_system7c3s-tas.yml" +# recipe_file <- "recipes/atomic_recipes/recipe_test_multivar.yml" +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- load_datasets(recipe, retrieve = F) + +new_data <- run_compute_workflow(recipe, data) +# Calibrate datasets +data <- calibrate_datasets(recipe, data) +# Compute anomalies +data <- compute_anomalies(recipe, data) +# Compute skill metrics +skill_metrics <- compute_skill_metrics(recipe, data) +# Compute percentiles and probability bins +probabilities <- compute_probabilities(recipe, data) +# Export all data to netCDF +## TODO: Fix plotting +# save_data(recipe, data, skill_metrics, probabilities) +# Plot data +# plot_data(recipe, calibrated_data, skill_metrics, probabilities, +# significance = T) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index 75b27b53..394e2e1a 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -5,7 +5,8 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "recipes/atomic_recipes/recipe_test_multivar.yml" +recipe_file <- "recipes/atomic_recipes/recipe_system7c3s-tas.yml" +# recipe_file <- "recipes/atomic_recipes/recipe_test_multivar.yml" recipe <- prepare_outputs(recipe_file) # Load datasets diff --git a/recipes/atomic_recipes/recipe_system7c3s-tas.yml b/recipes/atomic_recipes/recipe_system7c3s-tas.yml index e5cd2aba..a57ca5af 100644 --- a/recipes/atomic_recipes/recipe_system7c3s-tas.yml +++ b/recipes/atomic_recipes/recipe_system7c3s-tas.yml @@ -36,7 +36,7 @@ Analysis: method: mse_min save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' Skill: - metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + metric: RPSS # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS save: 'all' # 'all'/'none' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] @@ -53,3 +53,6 @@ Run: Terminal: yes output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + startR_workflow: + modules: calibration skill # Modules to run inside Compute(), in order + chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks...} -- GitLab From 3e36d723de9d19e011f002cae7d57531253892a4 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 7 Jun 2023 17:16:17 +0200 Subject: [PATCH 13/96] Modify test, add pre-processing module and forecast, define all inputs and outputs dynamically --- build_compute_workflow.R | 69 ++++++++++++++++----------- modules/Calibration/Calibration.R | 7 +-- modules/Preprocessing/Preprocessing.R | 7 +++ modules/Skill/Skill.R | 1 + modules/test_compute.R | 16 ++----- 5 files changed, 57 insertions(+), 43 deletions(-) create mode 100644 modules/Preprocessing/Preprocessing.R diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 83b26c77..aa46c99b 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -8,6 +8,7 @@ build_compute_workflow <- function(recipe) { modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) data <- list(hcst = hcst, obs = obs, fcst = fcst) + data <- preprocess_datasets(recipe, data) skill_metrics <- NULL for (module in modules) { if (module == "calibration") { @@ -16,20 +17,22 @@ build_compute_workflow <- function(recipe) { data <- compute_anomalies(recipe, data) } else if (module == "skill") { skill_metrics <- compute_skill_metrics(recipe, data) - # return_list <- c(return_list, skill_metrics = skill_metrics) } } - ## TODO: Define what to return depending on functions. - ## Also, maybe return an additional variable to determine if the output - ## needs to be converted back to s2dv_cube or what. - return(list(data$hcst, data$obs, skill_metrics)) + ## TODO: Define what to return depending the modules called + the recipe + # Eliminate NULL elements from the return list + return_list <- list(hcst = data$hcst, + obs = data$obs, + fcst = data$fcst, + skill = skill_metrics) + return_list <- return_list[!sapply(return_list, is.null)] + return(return_list) } info(recipe$Run$logger, "##### COMPUTE FUNCTION DEFINED #####") return(my_compute_function) } convert_to_s2dv_cube <- function(new_cube, original_cube) { - ## TODO: Make function more general? ## TODO: Make sure all the metadata is created correctly attr(new_cube, "Variables") <- attr(original_cube, "Variables") attr(new_cube, "FileSelectors") <- attr(original_cube, "FileSelectors") @@ -44,22 +47,23 @@ run_compute_workflow <- function(recipe, data) { modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) # Step 1: Define the function - ## TODO: Sort out order my_compute_function <- build_compute_workflow(recipe) - # Step 2: Define the call to compute() + + # --------------------------------------------------------------------------- + # Step 2: Define the inputs and outputs for Compute() # Create target dimensions - ## TODO: Refine this part target_dims <- list(hcst = c('sday', 'sweek', 'syear', 'ensemble'), obs = c('sday', 'sweek', 'syear')) + inputs <- list(hcst = data$hcst, obs = data$obs) if (!is.null(data$fcst)) { target_dims <- c(target_dims, list(fcst = c('sday', 'sweek', 'syear', 'ensemble'))) + inputs <- c(inputs, list(fcst = data$fcst)) } - # Create output dimensions ## TODO: Add several conditions depending on user requests output_dims <- list(hcst = c('sday', 'sweek', 'syear', 'ensemble'), - obs = c('sday', 'sweek', 'syear')) + obs = c('sday', 'sweek', 'syear', 'ensemble')) if (!is.null(data$fcst)) { output_dims <- c(output_dims, list(fcst = c('sday', 'sweek', 'syear', 'ensemble'))) @@ -68,31 +72,42 @@ run_compute_workflow <- function(recipe, data) { output_dims <- c(output_dims, list(skill = c('metric', 'syear'))) } - ## TODO: Create output dimensions dynamically + + # --------------------------------------------------------------------------- + # Step 3: Generate the Step and call Compute() ## TODO: Add use attributes - ## TODO: Handle skill output step <- Step(fun = my_compute_function, target_dims = target_dims, output_dims = output_dims) - wf <- AddStep(inputs = list(hcst = data$hcst, obs = data$obs), - step, - recipe = recipe) + wf <- AddStep(inputs = inputs, + step = step, + recipe = recipe) res <- Compute(wf$hcst, chunks = recipe$Run$startR_workflow$chunk_along) info(recipe$Run$logger, - "##### COMPUTE SECTION ENDED, CONVERTING TO s2dv_cube #####") - # Step 3: Convert result to s2dv_cube and return it - ## TODO: fcst - hcst <- convert_to_s2dv_cube(res$hcst, data$hcst) - obs <- convert_to_s2dv_cube(res$obs, data$obs) - ## TODO: Transform skill metrics back into a list - metric_list <- c("rpss", "rpss_significance") ## to be removed - skill <- .drop_dims(res$skill) - # skill_metrics <- apply(seq(dim(skill)[1]), function(x) {res$skill[x, , , ]}) - + "##### COMPUTE SECTION ENDED, REFORMATTING OUTPUT #####") + + # --------------------------------------------------------------------------- + # Step 4: Convert results to s2dv_cube/list format + result <- list() + for (cube in c("hcst", "obs", "fcst")) { + if (!is.null(res[[cube]])) { + result[[cube]] <- convert_to_s2dv_cube(res[[cube]], data[[cube]]) + } else { + result[[cube]] <- NULL + } + } + ## TODO: Transform skill metrics back into a NAMED list. To do this, Apply() + ## should return the attributes of the skill array so that we have the name + ## of each metric, or we should find an alternative way to retrieve them. + if (!is.null(res$skill)) { + metric_list <- c("rpss", "rpss_significance") ## to be removed + result$skill <- .drop_dims(res$skill) + # result$skill_metrics <- apply(seq(dim(skill)[1]), function(x) {res$skill[x, , , ]}) + } info(recipe$Run$logger, "##### DATA RETURNED AS A NAMED LIST #####") - return(list(hcst = hcst, obs = obs, skill_metrics = skill_metrics)) + return(result) } diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 4b6e00ca..6c586f54 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -100,7 +100,8 @@ calibrate_datasets <- function(recipe, data) { multi.model = mm, na.fill = TRUE, na.rm = na.rm, apply_to = NULL, alpha = NULL, memb_dim = "ensemble", sdate_dim = "syear", - dat_dim = "dat", ncores = ncores) + ncores = ncores) + # dat_dim = "dat", ncores = ncores) fcst_calibrated <- do.call(fun, arguments) } else { fcst_calibrated <- NULL @@ -161,8 +162,8 @@ calibrate_datasets <- function(recipe, data) { } ## TODO: Sort out returns return_list <- list(hcst = hcst_calibrated, - obs = data$obs) # , - # fcst = fcst_calibrated) + obs = data$obs, + fcst = fcst_calibrated) # if (!is.null(hcst_full_calibrated)) { # return_list <- append(return_list, # list(hcst.full_val = hcst_full_calibrated, diff --git a/modules/Preprocessing/Preprocessing.R b/modules/Preprocessing/Preprocessing.R new file mode 100644 index 00000000..8e86a38d --- /dev/null +++ b/modules/Preprocessing/Preprocessing.R @@ -0,0 +1,7 @@ +preprocess_datasets <- function(recipe, data) { + # Remove 'var_dir' dimension + + # Add 'ensemble' dimension to obs + dim(data$obs) <- c(dim(data$obs), ensemble = 1) + return(data) +} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 619dc91b..b048b2c6 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -309,6 +309,7 @@ compute_skill_metrics <- function(recipe, data) { dimensions <- names(dim(skill_metrics[[1]])) skill_metrics <- abind(skill_metrics, along = 0) names(dim(skill_metrics)) <- c("metric", dimensions) + skill_metrics <- sticky(skill_metrics) } # Return results return(skill_metrics) diff --git a/modules/test_compute.R b/modules/test_compute.R index 16246069..4076c51d 100644 --- a/modules/test_compute.R +++ b/modules/test_compute.R @@ -1,4 +1,5 @@ source("modules/Loading/Loading.R") +source("modules/Preprocessing/Preprocessing.R") source("modules/Calibration/Calibration.R") source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") @@ -14,17 +15,6 @@ recipe <- prepare_outputs(recipe_file) data <- load_datasets(recipe, retrieve = F) new_data <- run_compute_workflow(recipe, data) -# Calibrate datasets -data <- calibrate_datasets(recipe, data) -# Compute anomalies -data <- compute_anomalies(recipe, data) -# Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, data) -# Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, data) -# Export all data to netCDF -## TODO: Fix plotting -# save_data(recipe, data, skill_metrics, probabilities) # Plot data -# plot_data(recipe, calibrated_data, skill_metrics, probabilities, -# significance = T) +plot_data(recipe, calibrated_data, skill_metrics, + significance = T) -- GitLab From 2dbba621b99acb1260f73561a2650494d36f0d58 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Jun 2023 12:25:59 +0200 Subject: [PATCH 14/96] Source Start() from master branch --- modules/Loading/Loading.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 295d3e87..4563a0a8 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -6,9 +6,10 @@ source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") ## TODO: Move to prepare_outputs.R source("tools/libs.R") -## TODO: remove these two lines when new as.s2dv_cube() is in CSTools -source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R') -source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R') +## TODO: Remove when new startR is released: +source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Start.R") +source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/zzz.R") +source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/Utils.R") load_datasets <- function(recipe, retrieve = T) { -- GitLab From f80a5708740ac7798326b02bded50c16734ae0d8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Jun 2023 15:39:08 +0200 Subject: [PATCH 15/96] Make long name retrieval more flexible ('longname' or 'long_name') --- modules/Visualization/R/plot_ensemble_mean.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index e3d75138..a3f1fe3c 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -39,7 +39,7 @@ plot_ensemble_mean <- function(recipe, fcst, outdir) { # Define brks, centered on in the case of anomalies ## if (grepl("anomaly", - fcst$attrs$Variable$metadata[[variable]]$long_name)) { + fcst$attrs$Variable$metadata[[variable]]$long)) { variable <- paste(variable, "anomaly") max_value <- max(abs(var_ens_mean)) ugly_intervals <- seq(-max_value, max_value, max_value/20) -- GitLab From 12899a3c00b161f7a29979ba82f42cf8e2bf24bb Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Jun 2023 15:39:22 +0200 Subject: [PATCH 16/96] Remove 'var_dir' dimension --- modules/Preprocessing/Preprocessing.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/Preprocessing/Preprocessing.R b/modules/Preprocessing/Preprocessing.R index 8e86a38d..6f118670 100644 --- a/modules/Preprocessing/Preprocessing.R +++ b/modules/Preprocessing/Preprocessing.R @@ -1,6 +1,11 @@ preprocess_datasets <- function(recipe, data) { # Remove 'var_dir' dimension - + for (element in names(data)) { + data[[element]] <- Subset(x = data[[element]], + along = c('var_dir'), + indices = list(1), + drop = 'selected') + } # Add 'ensemble' dimension to obs dim(data$obs) <- c(dim(data$obs), ensemble = 1) return(data) -- GitLab From b8218ab9f9a3d60c3c5f74e87bf7616353348d22 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Jun 2023 15:39:54 +0200 Subject: [PATCH 17/96] Adapt Anomalies for Compute() (WIP) --- modules/Anomalies/Anomalies.R | 164 +++++++++++++++++++--------------- 1 file changed, 94 insertions(+), 70 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 28092625..8fc38a15 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -3,11 +3,15 @@ compute_anomalies <- function(recipe, data) { - if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { - error(recipe$Run$logger, - paste("The anomaly module has been called, but the element", - "'Workflow:Anomalies:compute' is missing from the recipe.")) - stop() + ## TODO: Is this necessary? + # Check + if (inherits(data$hcst, "s2dv_cube")) { + if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { + error(recipe$Run$logger, + paste("The anomaly module has been called, but the element", + "'Workflow:Anomalies:compute' is missing from the recipe.")) + stop() + } } if (recipe$Analysis$Workflow$Anomalies$compute) { @@ -18,89 +22,109 @@ compute_anomalies <- function(recipe, data) { cross <- FALSE cross_msg <- "without" } - original_dims <- data$hcst$dim + original_dims <- dim(data$hcst) # Compute anomalies - anom <- CST_Anomaly(data$hcst, data$obs, - cross = cross, - memb = TRUE, - memb_dim = 'ensemble', - dim_anom = 'syear', - dat_dim = c('dat', 'ensemble'), - ftime_dim = 'time', - ncores = recipe$Analysis$ncores) - # Reorder dims - anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) - anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) - + ## TODO: No cross-validation case + arguments <- list(exp = data$hcst, obs = data$obs, + memb = TRUE, memb_dim = 'ensemble', + dat_dim = c('dat', 'ensemble'), + ncores = recipe$Analysis$ncores) + if (inherits(data$hcst, "s2dv_cube")) { + anom <- do.call(what = CST_Anomaly, + args = c(arguments, list(cross = cross, + dim_anom = 'syear', + ftime_dim = 'time'))) + anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) + anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) + # Change variable metadata + for (var in data$hcst$attrs$Variable$varName) { + # Change hcst longname + data$hcst$attrs$Variable$metadata[[var]]$long_name <- + paste(data$hcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") + # Change obs longname + data$obs$attrs$Variable$metadata[[var]]$long_name <- + paste(data$obs$attrs$Variable$metadata[[var]]$long_name, "anomaly") + } + } else { + anom <- do.call(what = Ano_CrossValid, + args = c(arguments, list(time_dim = 'syear'))) + ## TODO: Check if dim reorder is really necessary in all cases + anom$exp <- Reorder(anom$exp, names(original_dims)) + anom$obs <- Reorder(anom$obs, names(original_dims)) + } # Save full fields hcst_fullvalue <- data$hcst obs_fullvalue <- data$obs - - # Hindcast climatology - + # Save anomalies data$hcst <- anom$exp data$obs <- anom$obs remove(anom) - # Change variable metadata - for (var in data$hcst$attrs$Variable$varName) { - # Change hcst longname - data$hcst$attrs$Variable$metadata[[var]]$long_name <- - paste(data$hcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") - # Change obs longname - data$obs$attrs$Variable$metadata[[var]]$long_name <- - paste(data$obs$attrs$Variable$metadata[[var]]$long_name, "anomaly") - } + # Compute forecast anomaly field if (!is.null(data$fcst)) { - # Compute hindcast climatology ensemble mean - clim <- s2dv::Clim(hcst_fullvalue$data, obs_fullvalue$data, - time_dim = "syear", - dat_dim = c("dat", "ensemble"), - memb = FALSE, - memb_dim = "ensemble", - ftime_dim = "time", - ncores = recipe$Analysis$ncores) - clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, - name = "syear") + arguments <- list(time_dim = 'syear', dat_dim = c('dat', 'ensemble'), + memb = FALSE, memb_dim = 'ensemble', ftime_dim = 'time', + ncores = recipe$Analysis$ncores) + if (inherits(data$fcst, "s2dv_cube")) { + clim <- do.call(what = s2dv::Clim, + args = c(list(exp = hcst_fullvalue$data, + obs = obs_fullvalue$data), + arguments)) + } else { + clim <- do.call(what = s2dv::Clim, + args = c(list(exp = hcst_fullvalue, + obs = obs_fullvalue), + arguments)) + } + clim$clim_exp <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, + name = "syear") # Store original dimensions - dims <- dim(clim_hcst) + dims <- dim(clim$clim_exp) + ## Is there a better way to do this? # Repeat the array as many times as ensemble members - clim_hcst <- rep(clim_hcst, data$fcst$dim[['ensemble']]) + clim$clim_exp <- rep(clim$clim_exp, dim(data$fcst)[['ensemble']]) # Rename and reorder dimensions - dim(clim_hcst) <- c(dims, ensemble = data$fcst$dim[['ensemble']]) - clim_hcst <- Reorder(clim_hcst, order = names(data$fcst$dim)) + dim(clim$clim_exp) <- c(dims, ensemble = dim(data$fcst)[['ensemble']]) + clim$clim_exp <- Reorder(clim$clim_exp, order = names(dim(data$fcst))) # Get fcst anomalies - data$fcst$data <- data$fcst$data - clim_hcst - # Change metadata - for (var in data$fcst$attrs$Variable$varName) { - data$fcst$attrs$Variable$metadata[[var]]$long_name <- - paste(data$fcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") + if (inherits(data$fcst, "s2dv_cube")) { + data$fcst$data <- data$fcst$data - clim$clim_exp + # Change metadata + for (var in data$fcst$attrs$Variable$varName) { + data$fcst$attrs$Variable$metadata[[var]]$long_name <- + paste(data$fcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") + } + } else { + data$fcst <- data$fcst - clim$clim_exp } } - info(recipe$Run$logger, - paste("The anomalies have been computed,", cross_msg, - "cross-validation. The original full fields are returned as", - "$hcst.full_val and $obs.full_val.")) + # Display success messages and save data + if (inherits(data$hcst, "s2dv_cube")) { + info(recipe$Run$logger, + paste("The anomalies have been computed,", cross_msg, + "cross-validation. The original full fields are returned as", + "$hcst.full_val and $obs.full_val.")) - info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") - # Save outputs - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Anomalies/") - # Save forecast - if (recipe$Analysis$Workflow$Anomalies$save %in% - c('all', 'exp_only', 'fcst_only')) { - save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') - } - # Save hindcast - if (recipe$Analysis$Workflow$Anomalies$save %in% - c('all', 'exp_only')) { - save_forecast(recipe = recipe, data_cube = data$hcst, type = 'hcst') - } - # Save observation - if (recipe$Analysis$Workflow$Anomalies$save == 'all') { - save_observations(recipe = recipe, data_cube = data$obs) + info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") + # Save outputs + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only', 'fcst_only')) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = data$hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { + save_observations(recipe = recipe, data_cube = data$obs) + } } } else { warn(recipe$Run$logger, paste("The Anomalies module has been called, but", -- GitLab From fbe54e7ec68d7b9bb3d6b7acae8ff87e93fbaf4f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Jun 2023 15:41:13 +0200 Subject: [PATCH 18/96] Fix broken link --- modules/Calibration/Calibration.R | 6 ++---- modules/Loading/Loading.R | 2 +- modules/Skill/Skill.R | 1 - 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 6c586f54..7308f0a3 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -68,8 +68,7 @@ calibrate_datasets <- function(recipe, data) { multi.model = mm, na.fill = TRUE, na.rm = na.rm, apply_to = NULL, alpha = NULL, memb_dim = "ensemble", sdate_dim = "syear", - ncores = ncores) - # dat_dim = "dat", ncores = ncores) + dat_dim = NULL, ncores = ncores) if (inherits(data$hcst, "s2dv_cube")) { fun <- CST_Calibration } else { @@ -100,8 +99,7 @@ calibrate_datasets <- function(recipe, data) { multi.model = mm, na.fill = TRUE, na.rm = na.rm, apply_to = NULL, alpha = NULL, memb_dim = "ensemble", sdate_dim = "syear", - ncores = ncores) - # dat_dim = "dat", ncores = ncores) + dat_dim = NULL, ncores = ncores) fcst_calibrated <- do.call(fun, arguments) } else { fcst_calibrated <- NULL diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 4563a0a8..abd01333 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -9,7 +9,7 @@ source("tools/libs.R") ## TODO: Remove when new startR is released: source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Start.R") source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/zzz.R") -source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/Utils.R") +source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Utils.R") load_datasets <- function(recipe, retrieve = T) { diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index b048b2c6..619dc91b 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -309,7 +309,6 @@ compute_skill_metrics <- function(recipe, data) { dimensions <- names(dim(skill_metrics[[1]])) skill_metrics <- abind(skill_metrics, along = 0) names(dim(skill_metrics)) <- c("metric", dimensions) - skill_metrics <- sticky(skill_metrics) } # Return results return(skill_metrics) -- GitLab From e71e474931496821bd987ea1ea74ffd31d66dd0e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Jun 2023 15:42:33 +0200 Subject: [PATCH 19/96] Changes to testing script and recipe --- modules/test_compute.R | 6 ++++-- recipes/atomic_recipes/recipe_system7c3s-tas.yml | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/modules/test_compute.R b/modules/test_compute.R index 4076c51d..4b000122 100644 --- a/modules/test_compute.R +++ b/modules/test_compute.R @@ -14,7 +14,9 @@ recipe <- prepare_outputs(recipe_file) # Load datasets data <- load_datasets(recipe, retrieve = F) -new_data <- run_compute_workflow(recipe, data) +# Run workflow with compute +result <- run_compute_workflow(recipe, data) + # Plot data -plot_data(recipe, calibrated_data, skill_metrics, +plot_data(recipe, result$data, result$skill, significance = T) diff --git a/recipes/atomic_recipes/recipe_system7c3s-tas.yml b/recipes/atomic_recipes/recipe_system7c3s-tas.yml index a57ca5af..b3f15fbf 100644 --- a/recipes/atomic_recipes/recipe_system7c3s-tas.yml +++ b/recipes/atomic_recipes/recipe_system7c3s-tas.yml @@ -42,7 +42,7 @@ Analysis: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' Visualization: - plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + plots: skill_metrics, forecast_ensemble_mean Indicators: index: no ncores: 10 @@ -54,5 +54,5 @@ Run: output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ startR_workflow: - modules: calibration skill # Modules to run inside Compute(), in order - chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks...} + modules: calibration, anomalies, skill # Modules to run inside Compute(), in order + chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} -- GitLab From 0b42b7690cd0649c7e554a59abba6e9a8e312cb3 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Jun 2023 16:14:53 +0200 Subject: [PATCH 20/96] Improve input and output dims and their flexibility, fix bugs in skill outputs --- build_compute_workflow.R | 48 ++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index aa46c99b..47aedccb 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -20,11 +20,11 @@ build_compute_workflow <- function(recipe) { } } ## TODO: Define what to return depending the modules called + the recipe - # Eliminate NULL elements from the return list return_list <- list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, skill = skill_metrics) + # Eliminate NULL elements from the return list return_list <- return_list[!sapply(return_list, is.null)] return(return_list) } @@ -42,35 +42,50 @@ convert_to_s2dv_cube <- function(new_cube, original_cube) { return(new_cube) } -run_compute_workflow <- function(recipe, data) { +run_compute_workflow <- function(recipe, data, my_compute_function = NULL) { modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) # Step 1: Define the function - my_compute_function <- build_compute_workflow(recipe) + if (is.null(my_compute_function)) { + my_compute_function <- build_compute_workflow(recipe) + } # --------------------------------------------------------------------------- # Step 2: Define the inputs and outputs for Compute() # Create target dimensions - target_dims <- list(hcst = c('sday', 'sweek', 'syear', 'ensemble'), - obs = c('sday', 'sweek', 'syear')) + original_dims <- c('dat', 'var', 'var_dir', 'sday', 'sweek', 'syear', 'time', + 'latitude', 'longitude', 'ensemble') + # Remove the chunking dimensions from the original dimensions in the arrays + exp_target_dims <- + original_dims[!original_dims %in% names(recipe$Run$startR_workflow$chunk_along)] + obs_target_dims <- exp_target_dims[!exp_target_dims == 'ensemble'] + # Default inputs: hindcast and observations + target_dims <- list(hcst = exp_target_dims, + obs = obs_target_dims) inputs <- list(hcst = data$hcst, obs = data$obs) + # Add forecast if not empty if (!is.null(data$fcst)) { target_dims <- c(target_dims, - list(fcst = c('sday', 'sweek', 'syear', 'ensemble'))) + list(exp_target_dims)) inputs <- c(inputs, list(fcst = data$fcst)) } + # Create output dimensions - ## TODO: Add several conditions depending on user requests - output_dims <- list(hcst = c('sday', 'sweek', 'syear', 'ensemble'), - obs = c('sday', 'sweek', 'syear', 'ensemble')) + ## TODO: Add several conditions depending on module/user requests? + exp_output_dims <- exp_target_dims[!exp_target_dims == 'var_dir'] + # Default outputs: hindcast and observations + output_dims <- list(hcst = exp_output_dims, + obs = exp_output_dims) + # Add forecast if not empty if (!is.null(data$fcst)) { output_dims <- c(output_dims, - list(fcst = c('sday', 'sweek', 'syear', 'ensemble'))) + list(fcst = exp_output_dims)) } + # Add skill metrics if skill module is called if ("skill" %in% modules) { output_dims <- c(output_dims, - list(skill = c('metric', 'syear'))) + list(skill = c('metric', 'var', 'time'))) } # --------------------------------------------------------------------------- @@ -93,9 +108,9 @@ run_compute_workflow <- function(recipe, data) { result <- list() for (cube in c("hcst", "obs", "fcst")) { if (!is.null(res[[cube]])) { - result[[cube]] <- convert_to_s2dv_cube(res[[cube]], data[[cube]]) + res$data[[cube]] <- convert_to_s2dv_cube(res[[cube]], data[[cube]]) } else { - result[[cube]] <- NULL + res$data[[cube]] <- NULL } } ## TODO: Transform skill metrics back into a NAMED list. To do this, Apply() @@ -103,11 +118,12 @@ run_compute_workflow <- function(recipe, data) { ## of each metric, or we should find an alternative way to retrieve them. if (!is.null(res$skill)) { metric_list <- c("rpss", "rpss_significance") ## to be removed - result$skill <- .drop_dims(res$skill) - # result$skill_metrics <- apply(seq(dim(skill)[1]), function(x) {res$skill[x, , , ]}) + res$skill <- lapply(seq(dim(res$skill)[1]), function(x) {res$skill[x, , , , , drop = F]}) + ## TODO: Subset 'metric' dimension + names(res$skill) <- metric_list } info(recipe$Run$logger, "##### DATA RETURNED AS A NAMED LIST #####") - return(result) + return(res) } -- GitLab From 729177bbcae38eb1771b5256c74d901023746d25 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 19 Jun 2023 10:55:41 +0200 Subject: [PATCH 21/96] Simplify compute workflow --- build_compute_workflow.R | 63 +++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 47aedccb..b25444fd 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -1,35 +1,27 @@ -# Function to build a function to put inside compute -build_compute_workflow <- function(recipe) { - ## TODO: Sort out system and order - ## Alternatively, this function could just be defined by the user - # Define function - ## TODO: Define returns - my_compute_function = function(recipe, hcst, obs, fcst = NULL) { - modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, - ", | |,")[[1]]) - data <- list(hcst = hcst, obs = obs, fcst = fcst) - data <- preprocess_datasets(recipe, data) - skill_metrics <- NULL - for (module in modules) { - if (module == "calibration") { - data <- calibrate_datasets(recipe, data) - } else if (module == "anomalies") { - data <- compute_anomalies(recipe, data) - } else if (module == "skill") { - skill_metrics <- compute_skill_metrics(recipe, data) - } +compute_workflow <- function(recipe, hcst, obs, fcst = NULL) { + modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, + ", | |,")[[1]]) + data <- list(hcst = hcst, obs = obs, fcst = fcst) + rm(hcst, obs, fcst) + data <- preprocess_datasets(recipe, data) + skill_metrics <- NULL + for (module in modules) { + if (module == "calibration") { + data <- calibrate_datasets(recipe, data) + } else if (module == "anomalies") { + data <- compute_anomalies(recipe, data) + } else if (module == "skill") { + skill_metrics <- compute_skill_metrics(recipe, data) } - ## TODO: Define what to return depending the modules called + the recipe - return_list <- list(hcst = data$hcst, - obs = data$obs, - fcst = data$fcst, - skill = skill_metrics) - # Eliminate NULL elements from the return list - return_list <- return_list[!sapply(return_list, is.null)] - return(return_list) } - info(recipe$Run$logger, "##### COMPUTE FUNCTION DEFINED #####") - return(my_compute_function) + ## TODO: Define what to return depending the modules called + the recipe + return_list <- list(hcst = data$hcst, + obs = data$obs, + fcst = data$fcst, + skill = skill_metrics) + # Eliminate NULL elements from the return list + return_list <- return_list[!sapply(return_list, is.null)] + return(return_list) } convert_to_s2dv_cube <- function(new_cube, original_cube) { @@ -42,14 +34,13 @@ convert_to_s2dv_cube <- function(new_cube, original_cube) { return(new_cube) } -run_compute_workflow <- function(recipe, data, my_compute_function = NULL) { +run_compute_workflow <- function(recipe, data) { + + # --------------------------------------------------------------------------- + # Step 1: Retrieve the modules that will be called inside the workflow modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) - # Step 1: Define the function - if (is.null(my_compute_function)) { - my_compute_function <- build_compute_workflow(recipe) - } # --------------------------------------------------------------------------- # Step 2: Define the inputs and outputs for Compute() @@ -91,7 +82,7 @@ run_compute_workflow <- function(recipe, data, my_compute_function = NULL) { # --------------------------------------------------------------------------- # Step 3: Generate the Step and call Compute() ## TODO: Add use attributes - step <- Step(fun = my_compute_function, + step <- Step(fun = compute_workflow, target_dims = target_dims, output_dims = output_dims) wf <- AddStep(inputs = inputs, -- GitLab From 084ef11ba3c87515e42380e86bfadcf5ebef939e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 19 Jun 2023 16:47:30 +0200 Subject: [PATCH 22/96] Remove hardcoded array dimensions; do not add 'ensemble' dim if retrieve = F --- build_compute_workflow.R | 14 ++++++++------ modules/Loading/Loading.R | 7 ++----- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index b25444fd..47147997 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -44,13 +44,15 @@ run_compute_workflow <- function(recipe, data) { # --------------------------------------------------------------------------- # Step 2: Define the inputs and outputs for Compute() - # Create target dimensions - original_dims <- c('dat', 'var', 'var_dir', 'sday', 'sweek', 'syear', 'time', - 'latitude', 'longitude', 'ensemble') - # Remove the chunking dimensions from the original dimensions in the arrays + # Retrieve original dimensions + exp_dims <- names(attr(data$hcst, "Dimensions")) + obs_dims <- names(attr(data$obs, "Dimensions")) + # Remove the chunking dimensions from the original dimensions in the arrays, + # to create the target dimensions exp_target_dims <- - original_dims[!original_dims %in% names(recipe$Run$startR_workflow$chunk_along)] - obs_target_dims <- exp_target_dims[!exp_target_dims == 'ensemble'] + exp_dims[!exp_dims %in% names(recipe$Run$startR_workflow$chunk_along)] + obs_target_dims <- + obs_dims[!obs_dims %in% names(recipe$Run$startR_workflow$chunk_along)] # Default inputs: hindcast and observations target_dims <- list(hcst = exp_target_dims, obs = obs_target_dims) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index abd01333..c3e124d5 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -297,7 +297,8 @@ load_datasets <- function(recipe, retrieve = T) { split_multiselected_dims = TRUE, retrieve = retrieve) } - + + ## TODO: This part belongs to the pre-processing module # Adds ensemble dim to obs (for consistency with hcst/fcst) default_dims <- c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 1, @@ -310,10 +311,6 @@ load_datasets <- function(recipe, retrieve = T) { } # Convert obs to s2dv_cube obs <- as.s2dv_cube(obs) - } else { - ## TODO: How to handle this with retrieve = F? - default_dims[names(attr(obs, "Dimensions"))] <- attr(obs, "Dimensions") - attr(obs, "Dimensions") <- default_dims } # Checks and data summary (only if retrieve = TRUE) -- GitLab From b3f60efacffe5b8c84a0c0ced0dc8eff5f2b3f8c Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 20 Jun 2023 11:42:08 +0200 Subject: [PATCH 23/96] Fix bug related to dimname attributes, flexibilize skill output dimensions in compute workflow --- build_compute_workflow.R | 9 +++++++-- modules/Skill/Skill.R | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 47147997..7897ed7b 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -66,6 +66,8 @@ run_compute_workflow <- function(recipe, data) { # Create output dimensions ## TODO: Add several conditions depending on module/user requests? + ## For example: if 'downscaling' or 'Indices' in modules, lat and lon cannot + ## be used as chunking dimensions exp_output_dims <- exp_target_dims[!exp_target_dims == 'var_dir'] # Default outputs: hindcast and observations output_dims <- list(hcst = exp_output_dims, @@ -77,8 +79,11 @@ run_compute_workflow <- function(recipe, data) { } # Add skill metrics if skill module is called if ("skill" %in% modules) { + skill_dims <- c('metric', 'var', 'time', 'latitude', 'longitude') + skill_output_dims <- + skill_dims[!skill_dims %in% names(recipe$Run$startR_workflow$chunk_along)] output_dims <- c(output_dims, - list(skill = c('metric', 'var', 'time'))) + list(skill = skill_output_dims)) } # --------------------------------------------------------------------------- @@ -93,6 +98,7 @@ run_compute_workflow <- function(recipe, data) { res <- Compute(wf$hcst, chunks = recipe$Run$startR_workflow$chunk_along) + browser() info(recipe$Run$logger, "##### COMPUTE SECTION ENDED, REFORMATTING OUTPUT #####") @@ -119,4 +125,3 @@ run_compute_workflow <- function(recipe, data) { "##### DATA RETURNED AS A NAMED LIST #####") return(res) } - diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 619dc91b..f9a92158 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -306,9 +306,17 @@ compute_skill_metrics <- function(recipe, data) { } } } else { + # Retrieve original dimensions dimensions <- names(dim(skill_metrics[[1]])) + # Bind list into an array skill_metrics <- abind(skill_metrics, along = 0) + # Retrieve dimname attributes + dimension_names <- dimnames(skill_metrics) + metric_names <- dimension_names[[1]] + dim(metric_names) <- c(metric = dim(skill_metrics)[[1]]) + # Add 'metric' dimension to array and restore attributes names(dim(skill_metrics)) <- c("metric", dimensions) + dimnames(skill_metrics) <- dimension_names } # Return results return(skill_metrics) -- GitLab From a1fcbed0eeb868652e7473da2743029a12a3feef Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 3 Jul 2023 09:11:38 +0200 Subject: [PATCH 24/96] Changes to structure of Compute() funs --- build_compute_workflow.R | 17 +++++++++++++---- {modules => example_scripts}/test_compute.R | 0 .../test_parallel_workflow.R | 0 modules/Calibration/Calibration.R | 2 +- 4 files changed, 14 insertions(+), 5 deletions(-) rename {modules => example_scripts}/test_compute.R (100%) rename {modules => example_scripts}/test_parallel_workflow.R (100%) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 7897ed7b..b096874f 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -1,10 +1,19 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL) { modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) - data <- list(hcst = hcst, obs = obs, fcst = fcst) + # Create data list + data <- list(hcst = list(), obs = list(), fcst = list()) + # Fill list following the structure of the s2dv_cube + data$hcst$data <- hcst + data$obs$data <- obs + data$fcst$data <- fcst + # Remove duplicated objects rm(hcst, obs, fcst) + # Pre-process datasets data <- preprocess_datasets(recipe, data) + # Define other outputs skill_metrics <- NULL + # Loop over the modules for (module in modules) { if (module == "calibration") { data <- calibrate_datasets(recipe, data) @@ -15,9 +24,9 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL) { } } ## TODO: Define what to return depending the modules called + the recipe - return_list <- list(hcst = data$hcst, - obs = data$obs, - fcst = data$fcst, + return_list <- list(hcst = data$hcst$data, + obs = data$obs$data, + fcst = data$fcst$data, skill = skill_metrics) # Eliminate NULL elements from the return list return_list <- return_list[!sapply(return_list, is.null)] diff --git a/modules/test_compute.R b/example_scripts/test_compute.R similarity index 100% rename from modules/test_compute.R rename to example_scripts/test_compute.R diff --git a/modules/test_parallel_workflow.R b/example_scripts/test_parallel_workflow.R similarity index 100% rename from modules/test_parallel_workflow.R rename to example_scripts/test_parallel_workflow.R diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 7308f0a3..117cb5f0 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -41,7 +41,6 @@ calibrate_datasets <- function(recipe, data) { CALIB_MSG <- "##### CALIBRATION COMPLETE #####" # Replicate observation array for the multi-model case - ## TODO: Adapt for Compute() if (mm) { obs.mm <- data$obs$data for (dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { @@ -51,6 +50,7 @@ calibrate_datasets <- function(recipe, data) { names(dim(obs.mm)) <- names(dim(data$obs$data)) obs$data <- obs.mm remove(obs.mm) + gc() } if (recipe$Analysis$Variables$freq == "monthly_mean") { -- GitLab From f3f50432afafe2075a24423e1cf61ce858a54c76 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 4 Jul 2023 17:03:04 +0200 Subject: [PATCH 25/96] Use s2dv_cubes inside self-defined Compute() fun --- build_compute_workflow.R | 18 ++++---- modules/Anomalies/Anomalies.R | 14 +++--- modules/Calibration/Calibration.R | 72 ++++++++++++++++--------------- modules/Skill/Skill.R | 6 +-- 4 files changed, 57 insertions(+), 53 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index b096874f..02062b3e 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -2,25 +2,25 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL) { modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) # Create data list - data <- list(hcst = list(), obs = list(), fcst = list()) - # Fill list following the structure of the s2dv_cube - data$hcst$data <- hcst - data$obs$data <- obs - data$fcst$data <- fcst + data <- list(hcst = hcst, obs = obs, fcst = fcst) # Remove duplicated objects rm(hcst, obs, fcst) # Pre-process datasets data <- preprocess_datasets(recipe, data) + # Transform data to s2dv_cube + data <- lapply(data, function(x) { + class(x) <- "startR_array" + as.s2dv_cube(x)}) # Define other outputs skill_metrics <- NULL # Loop over the modules for (module in modules) { if (module == "calibration") { - data <- calibrate_datasets(recipe, data) + data <- calibrate_datasets(recipe, data, retrieve = F) } else if (module == "anomalies") { - data <- compute_anomalies(recipe, data) + data <- compute_anomalies(recipe, data, retrieve = F) } else if (module == "skill") { - skill_metrics <- compute_skill_metrics(recipe, data) + skill_metrics <- compute_skill_metrics(recipe, data, retrieve = F) } } ## TODO: Define what to return depending the modules called + the recipe @@ -107,7 +107,7 @@ run_compute_workflow <- function(recipe, data) { res <- Compute(wf$hcst, chunks = recipe$Run$startR_workflow$chunk_along) - browser() + # browser() info(recipe$Run$logger, "##### COMPUTE SECTION ENDED, REFORMATTING OUTPUT #####") diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 8fc38a15..41a125ab 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -1,11 +1,11 @@ # Compute the hcst, obs and fcst anomalies with or without cross-validation # and return them, along with the hcst and obs climatologies. -compute_anomalies <- function(recipe, data) { +compute_anomalies <- function(recipe, data, retrieve = TRUE) { ## TODO: Is this necessary? # Check - if (inherits(data$hcst, "s2dv_cube")) { + if (retrieve) { if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { error(recipe$Run$logger, paste("The anomaly module has been called, but the element", @@ -22,7 +22,7 @@ compute_anomalies <- function(recipe, data) { cross <- FALSE cross_msg <- "without" } - original_dims <- dim(data$hcst) + original_dims <- dim(data$hcst$data) # Compute anomalies ## TODO: No cross-validation case @@ -83,10 +83,10 @@ compute_anomalies <- function(recipe, data) { dims <- dim(clim$clim_exp) ## Is there a better way to do this? # Repeat the array as many times as ensemble members - clim$clim_exp <- rep(clim$clim_exp, dim(data$fcst)[['ensemble']]) + clim$clim_exp <- rep(clim$clim_exp, data$fcst$dims[['ensemble']]) # Rename and reorder dimensions - dim(clim$clim_exp) <- c(dims, ensemble = dim(data$fcst)[['ensemble']]) - clim$clim_exp <- Reorder(clim$clim_exp, order = names(dim(data$fcst))) + dim(clim$clim_exp) <- c(dims, ensemble = data$fcst$dims[['ensemble']]) + clim$clim_exp <- Reorder(clim$clim_exp, order = names(dim(data$fcst$data))) # Get fcst anomalies if (inherits(data$fcst, "s2dv_cube")) { data$fcst$data <- data$fcst$data - clim$clim_exp @@ -101,7 +101,7 @@ compute_anomalies <- function(recipe, data) { } # Display success messages and save data - if (inherits(data$hcst, "s2dv_cube")) { + if (retrieve) { info(recipe$Run$logger, paste("The anomalies have been computed,", cross_msg, "cross-validation. The original full fields are returned as", diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 117cb5f0..1436e401 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -1,4 +1,4 @@ -calibrate_datasets <- function(recipe, data) { +calibrate_datasets <- function(recipe, data, retrieve = TRUE) { # Function that calibrates the hindcast using the method stated in the # recipe. If the forecast is not null, it calibrates it as well. # @@ -9,7 +9,7 @@ calibrate_datasets <- function(recipe, data) { method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (method == "raw") { - if (inherits(data$hcst, "s2dv_cube")) { + if (retrieve) { warn(recipe$Run$logger, paste("The Calibration module has been called, but the calibration", "method in the recipe is 'raw'. The hcst and fcst will not be", @@ -63,6 +63,7 @@ calibrate_datasets <- function(recipe, data) { "monthly data.")) stop() } else { + ## TODO: Change again if all data ends up being an s2dv_cube arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL, cal.method = method, eval.method = "leave-one-out", multi.model = mm, na.fill = TRUE, na.rm = na.rm, @@ -75,24 +76,25 @@ calibrate_datasets <- function(recipe, data) { fun <- Calibration } hcst_calibrated <- do.call(fun, arguments) - # # In the case where anomalies have been computed, calibrate full values - # if (!is.null(data$hcst.full_val)) { - # hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, - # data$obs.full_val, - # cal.method = method, - # eval.method = "leave-one-out", - # multi.model = mm, - # na.fill = TRUE, - # na.rm = na.rm, - # apply_to = NULL, - # memb_dim = "ensemble", - # sdate_dim = "syear", - # ncores = ncores) - # } else { - # hcst_full_calibrated <- NULL - # } + # In the case where anomalies have been computed, calibrate full values + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, + data$obs.full_val, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + hcst_full_calibrated <- NULL + } # Calibrate the forecast + ## TODO: Change again if all data ends up being an s2dv_cube if (!is.null(data$fcst)) { arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = data$fcst, cal.method = method, eval.method = "leave-one-out", @@ -115,6 +117,7 @@ calibrate_datasets <- function(recipe, data) { # stop() # } # Calibrate the hindcast + ## TODO: Change again if all data ends up being an s2dv_cube arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL, sdate_dim = "syear", memb_dim = "ensemble", method = "QUANT", ncores = ncores, @@ -129,22 +132,23 @@ calibrate_datasets <- function(recipe, data) { } hcst_calibrated <- do.call(fun, arguments) # # Restore dimension order - # hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) - # # In the case where anomalies have been computed, calibrate full values - # if (!is.null(data$hcst.full_val)) { - # hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, - # data$obs.full_val, - # exp_cor = NULL, - # sdate_dim = "syear", - # memb_dim = "ensemble", - # method = "QUANT", - # ncores = ncores, - # na.rm = na.rm, - # wet.day = F) - # } else { - # hcst_full_calibrated <- NULL - # } + hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) + # In the case where anomalies have been computed, calibrate full values + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, + data$obs.full_val, + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) + } else { + hcst_full_calibrated <- NULL + } if (!is.null(data$fcst)) { + ## TODO: Change again if all data ends up being an s2dv_cube arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = data$fcst, sdate_dim = "syear", memb_dim = "ensemble", method = "QUANT", ncores = ncores, @@ -167,7 +171,7 @@ calibrate_datasets <- function(recipe, data) { # list(hcst.full_val = hcst_full_calibrated, # obs.full_val = data$obs.full_val)) # } - if (inherits(hcst_calibrated, "sd2v_cube")) { + if (retrieve) { info(recipe$Run$logger, CALIB_MSG) ## TODO: What do we do with the full values? recipe$Run$output_dir <- paste0(recipe$Run$output_dir, diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index a686b35e..87b951ed 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -20,7 +20,7 @@ source("modules/Skill/R/RPS_clim.R") source("modules/Skill/R/CRPS_clim.R") ## TODO: Should agg be a function parameter...? -compute_skill_metrics <- function(recipe, data, agg = 'global') { +compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE) { # data$hcst: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations @@ -97,7 +97,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { # skill <- RPSS(data$hcst$data, data$obs$data, - skill <- RPSS(data$hcst, data$obs, + skill <- RPSS(data$hcst$data, data$obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, @@ -281,7 +281,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') { skill_metrics[[ metric ]] <- skill } } - if (inherits(data$hcst, "s2dv_cube")) { + if (retrieve) { info(recipe$Run$logger, "##### SKILL METRIC COMPUTATION COMPLETE #####") -- GitLab From f080c5f141589d7eccc17e6fd701de120cfc46b4 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 5 Jul 2023 14:53:29 +0200 Subject: [PATCH 26/96] Create recipe for testing; add creation of tmp directory to store metadata generated inside Compute() --- build_compute_workflow.R | 27 ++++++++- example_scripts/test_compute.R | 2 +- modules/Skill/Skill.R | 26 ++++---- .../atomic_recipes/recipe_test_compute.yml | 60 +++++++++++++++++++ 4 files changed, 100 insertions(+), 15 deletions(-) create mode 100644 recipes/atomic_recipes/recipe_test_compute.yml diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 02062b3e..a7952036 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -94,6 +94,10 @@ run_compute_workflow <- function(recipe, data) { output_dims <- c(output_dims, list(skill = skill_output_dims)) } + ## TODO: Add Indices (Niño1+2, Niño3, Niño3.4, Niño4, NAO + if ("indices" %in% modules) { + idx_dims <- c('index', 'var', 'syear', 'time', 'region') + } # --------------------------------------------------------------------------- # Step 3: Generate the Step and call Compute() @@ -113,23 +117,40 @@ run_compute_workflow <- function(recipe, data) { # --------------------------------------------------------------------------- # Step 4: Convert results to s2dv_cube/list format - result <- list() for (cube in c("hcst", "obs", "fcst")) { if (!is.null(res[[cube]])) { res$data[[cube]] <- convert_to_s2dv_cube(res[[cube]], data[[cube]]) } else { res$data[[cube]] <- NULL } + res[[cube]] <- NULL } ## TODO: Transform skill metrics back into a NAMED list. To do this, Apply() ## should return the attributes of the skill array so that we have the name ## of each metric, or we should find an alternative way to retrieve them. if (!is.null(res$skill)) { - metric_list <- c("rpss", "rpss_significance") ## to be removed - res$skill <- lapply(seq(dim(res$skill)[1]), function(x) {res$skill[x, , , , , drop = F]}) + metric_list <- readRDS(paste0(recipe$Run$output_dir, + "/outputs/tmp/Skill/metric_names.Rds")) + res$skill <- lapply(seq(dim(res$skill)[1]), + function(x) {res$skill[x, , , , , drop = F]}) + res$skill <- lapply(res$skill, + function(x) { + Subset(x, + along = 'metric', + indices = 1, + drop = 'selected') + }) ## TODO: Subset 'metric' dimension names(res$skill) <- metric_list } + + # --------------------------------------------------------------------------- + # Step 5: Remove temporary files + unlink(paste0(recipe$Run$output_dir, "/outputs/tmp/"), recursive = TRUE) + + # --------------------------------------------------------------------------- + # Step 6: Return outputs + res <- res[!sapply(res, is.null)] info(recipe$Run$logger, "##### DATA RETURNED AS A NAMED LIST #####") return(res) diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R index 4b000122..02cd3d45 100644 --- a/example_scripts/test_compute.R +++ b/example_scripts/test_compute.R @@ -7,7 +7,7 @@ source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") source("build_compute_workflow.R") -recipe_file <- "recipes/atomic_recipes/recipe_system7c3s-tas.yml" +recipe_file <- "recipes/atomic_recipes/recipe_test_compute.yml" # recipe_file <- "recipes/atomic_recipes/recipe_test_multivar.yml" recipe <- prepare_outputs(recipe_file) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 87b951ed..aeae3b01 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -57,13 +57,13 @@ compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE) na.rm = recipe$Analysis$remove_NAs } ## TODO: Move this to the recipe checker - # if (is.null(recipe$Analysis$Workflow$Skill$cross_validation)) { - # warn(recipe$Run$logger, - # "cross_validation parameter not defined, setting it to FALSE.") - # cross.val <- FALSE - # } else { - # cross.val <- recipe$Analysis$Workflow$Skill$cross_validation - # } + if (is.null(recipe$Analysis$Workflow$Skill$cross_validation)) { + # warn(recipe$Run$logger, + # "cross_validation parameter not defined, setting it to FALSE.") + cross.val <- FALSE + } else { + cross.val <- recipe$Analysis$Workflow$Skill$cross_validation + } skill_metrics <- list() for (metric in strsplit(metrics, ", | |,")[[1]]) { # Whether the fair version of the metric is to be computed @@ -314,12 +314,16 @@ compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE) # Bind list into an array skill_metrics <- abind(skill_metrics, along = 0) # Retrieve dimname attributes - dimension_names <- dimnames(skill_metrics) - metric_names <- dimension_names[[1]] - dim(metric_names) <- c(metric = dim(skill_metrics)[[1]]) + metric_names <- dimnames(skill_metrics)[[1]] + tmp_file <- paste0(recipe$Run$output_dir, + "/outputs/tmp/Skill/metric_names.Rds") + if (!file.exists(tmp_file)) { + dir.create(paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/"), + recursive = TRUE) + saveRDS(metric_names, tmp_file) + } # Add 'metric' dimension to array and restore attributes names(dim(skill_metrics)) <- c("metric", dimensions) - dimnames(skill_metrics) <- dimension_names } # Return results return(skill_metrics) diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml new file mode 100644 index 00000000..22bc1410 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_compute.yml @@ -0,0 +1,60 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: Meteo-France-System7 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '2000' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: yes # yes/no, default yes + cross_validation: yes # yes/no, default yes + save: 'all' # 'all'/'none'/'exp_only'/'fcst_only' + Calibration: + method: mse_min + save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' + Skill: + metric: RPS RPSS BSS10 # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + save: 'all' # 'all'/'none' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' + Visualization: + plots: skill_metrics, forecast_ensemble_mean + multi_panel: no + projection: cylindrical_equidistant + Indicators: + index: no + ncores: 10 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + startR_workflow: + modules: calibration, anomalies, skill # Modules to run inside Compute(), in order + chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} -- GitLab From a1df866c18ffed0f1310189d9f20da6bf5951763 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 13 Jul 2023 12:59:53 +0200 Subject: [PATCH 27/96] Save s2dv_cube metadata after skill, change recipe to test chunking by var --- build_compute_workflow.R | 25 +++++++++++++------ modules/Skill/Skill.R | 13 +++++----- .../atomic_recipes/recipe_test_compute.yml | 2 +- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index a7952036..457c71d9 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -3,14 +3,20 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL) { ", | |,")[[1]]) # Create data list data <- list(hcst = hcst, obs = obs, fcst = fcst) + var_name <- as.vector(attributes(hcst)$FileSelectors[[1]]$var[[1]]) # Remove duplicated objects rm(hcst, obs, fcst) # Pre-process datasets data <- preprocess_datasets(recipe, data) # Transform data to s2dv_cube - data <- lapply(data, function(x) { + data <- lapply(data, + function(x) { class(x) <- "startR_array" - as.s2dv_cube(x)}) + as.s2dv_cube(x) + }) + for (cube_idx in seq(1:length(data))) { + data[[cube_idx]]$attrs$Variable$varName <- var_name + } # Define other outputs skill_metrics <- NULL # Loop over the modules @@ -66,11 +72,15 @@ run_compute_workflow <- function(recipe, data) { target_dims <- list(hcst = exp_target_dims, obs = obs_target_dims) inputs <- list(hcst = data$hcst, obs = data$obs) + # Define input attributes from Start() + input_attributes <- list(hcst = c("Variables", "FileSelectors"), + obs = c("Variables", "FileSelectors")) # Add forecast if not empty if (!is.null(data$fcst)) { target_dims <- c(target_dims, list(exp_target_dims)) inputs <- c(inputs, list(fcst = data$fcst)) + input_attributes <- c(input_attributes, list(fcst = c("Variables", "FileSelectors"))) } # Create output dimensions @@ -104,14 +114,14 @@ run_compute_workflow <- function(recipe, data) { ## TODO: Add use attributes step <- Step(fun = compute_workflow, target_dims = target_dims, - output_dims = output_dims) + output_dims = output_dims, + use_attributes = input_attributes) wf <- AddStep(inputs = inputs, step = step, recipe = recipe) res <- Compute(wf$hcst, chunks = recipe$Run$startR_workflow$chunk_along) - # browser() info(recipe$Run$logger, "##### COMPUTE SECTION ENDED, REFORMATTING OUTPUT #####") @@ -129,8 +139,8 @@ run_compute_workflow <- function(recipe, data) { ## should return the attributes of the skill array so that we have the name ## of each metric, or we should find an alternative way to retrieve them. if (!is.null(res$skill)) { - metric_list <- readRDS(paste0(recipe$Run$output_dir, - "/outputs/tmp/Skill/metric_names.Rds")) + tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/") + metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds")) res$skill <- lapply(seq(dim(res$skill)[1]), function(x) {res$skill[x, , , , , drop = F]}) res$skill <- lapply(res$skill, @@ -140,8 +150,9 @@ run_compute_workflow <- function(recipe, data) { indices = 1, drop = 'selected') }) - ## TODO: Subset 'metric' dimension names(res$skill) <- metric_list + ## TODO: Chunked metadata needs to be put back together!!! + res$metadata <- readRDS(paste0(tmp_dir, "skill_metadata.Rds")) } # --------------------------------------------------------------------------- diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index aeae3b01..583c1534 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -315,12 +315,13 @@ compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE) skill_metrics <- abind(skill_metrics, along = 0) # Retrieve dimname attributes metric_names <- dimnames(skill_metrics)[[1]] - tmp_file <- paste0(recipe$Run$output_dir, - "/outputs/tmp/Skill/metric_names.Rds") - if (!file.exists(tmp_file)) { - dir.create(paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/"), - recursive = TRUE) - saveRDS(metric_names, tmp_file) + tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/") + + if (!dir.exists(tmp_dir)) { + dir.create(tmp_dir, recursive = TRUE) + saveRDS(metric_names, paste0(tmp_dir, "metric_names.Rds")) + data$hcst$data <- NULL + saveRDS(data$hcst, paste0(tmp_dir, "skill_metadata.Rds")) } # Add 'metric' dimension to array and restore attributes names(dim(skill_metrics)) <- c("metric", dimensions) diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml index 22bc1410..38374870 100644 --- a/recipes/atomic_recipes/recipe_test_compute.yml +++ b/recipes/atomic_recipes/recipe_test_compute.yml @@ -4,7 +4,7 @@ Description: Analysis: Horizon: Seasonal Variables: - name: tas + name: tas prlr freq: monthly_mean Datasets: System: -- GitLab From 7f500d6588479ce2a52fde0c0db985e3d83c5888 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 26 Jul 2023 16:32:06 +0200 Subject: [PATCH 28/96] Update --- build_compute_workflow.R | 12 ++++++------ modules/Skill/Skill.R | 3 ++- recipes/atomic_recipes/recipe_test_compute.yml | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 457c71d9..97ddd2f9 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -1,4 +1,5 @@ -compute_workflow <- function(recipe, hcst, obs, fcst = NULL) { +compute_workflow <- function(recipe, hcst, obs, fcst = NULL, + nchunks = chunk_indices) { modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) # Create data list @@ -19,6 +20,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL) { } # Define other outputs skill_metrics <- NULL + probabilities <- NULL # Loop over the modules for (module in modules) { if (module == "calibration") { @@ -26,14 +28,15 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL) { } else if (module == "anomalies") { data <- compute_anomalies(recipe, data, retrieve = F) } else if (module == "skill") { - skill_metrics <- compute_skill_metrics(recipe, data, retrieve = F) + skill_metrics <- compute_skill_metrics(recipe, data, retrieve = F, nchunks = nchunks) } } ## TODO: Define what to return depending the modules called + the recipe return_list <- list(hcst = data$hcst$data, obs = data$obs$data, fcst = data$fcst$data, - skill = skill_metrics) + skill = skill_metrics, + probabilities = probabilities) # Eliminate NULL elements from the return list return_list <- return_list[!sapply(return_list, is.null)] return(return_list) @@ -135,9 +138,6 @@ run_compute_workflow <- function(recipe, data) { } res[[cube]] <- NULL } - ## TODO: Transform skill metrics back into a NAMED list. To do this, Apply() - ## should return the attributes of the skill array so that we have the name - ## of each metric, or we should find an alternative way to retrieve them. if (!is.null(res$skill)) { tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/") metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds")) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 583c1534..ec6e8be6 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -20,7 +20,8 @@ source("modules/Skill/R/RPS_clim.R") source("modules/Skill/R/CRPS_clim.R") ## TODO: Should agg be a function parameter...? -compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE) { +compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE, + nchunks = nchunks) { # data$hcst: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml index 38374870..e0ffe5d3 100644 --- a/recipes/atomic_recipes/recipe_test_compute.yml +++ b/recipes/atomic_recipes/recipe_test_compute.yml @@ -57,4 +57,4 @@ Run: code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ startR_workflow: modules: calibration, anomalies, skill # Modules to run inside Compute(), in order - chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + chunk_along: {var: 2,time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} -- GitLab From 7adc3beab1d7c0aa0670d903bee6aa62e60d822d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 27 Jul 2023 09:29:35 +0200 Subject: [PATCH 29/96] Remove commented recipe --- example_scripts/test_compute.R | 1 - 1 file changed, 1 deletion(-) diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R index 02cd3d45..963ed021 100644 --- a/example_scripts/test_compute.R +++ b/example_scripts/test_compute.R @@ -8,7 +8,6 @@ source("modules/Visualization/Visualization.R") source("build_compute_workflow.R") recipe_file <- "recipes/atomic_recipes/recipe_test_compute.yml" -# recipe_file <- "recipes/atomic_recipes/recipe_test_multivar.yml" recipe <- prepare_outputs(recipe_file) # Load datasets -- GitLab From 5bc9ae4becd0d9c9bb628abed7bbdd11bcc53b58 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 8 Aug 2023 12:13:04 +0200 Subject: [PATCH 30/96] Add function to retrieve s2dv_cube metadata after Compute() (WIP) --- build_compute_workflow.R | 5 +- modules/Skill/Skill.R | 12 ++- .../atomic_recipes/recipe_test_compute.yml | 4 +- tools/libs.R | 1 - tools/retrieve_metadata.R | 78 +++++++++++++++++++ 5 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 tools/retrieve_metadata.R diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 97ddd2f9..00ef2bc3 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -1,5 +1,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, nchunks = chunk_indices) { + setwd(recipe$Run$code_dir) modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) # Create data list @@ -152,7 +153,9 @@ run_compute_workflow <- function(recipe, data) { }) names(res$skill) <- metric_list ## TODO: Chunked metadata needs to be put back together!!! - res$metadata <- readRDS(paste0(tmp_dir, "skill_metadata.Rds")) + source("tools/retrieve_metadata.R") + res$metadata <- retrieve_metadata(tmp_dir = tmp_dir, + chunks = recipe$Run$startR_workflow$chunk_along) } # --------------------------------------------------------------------------- diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index ec6e8be6..8ee96357 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -317,15 +317,21 @@ compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE, # Retrieve dimname attributes metric_names <- dimnames(skill_metrics)[[1]] tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/") - + # Save metric names (only done once) if (!dir.exists(tmp_dir)) { dir.create(tmp_dir, recursive = TRUE) saveRDS(metric_names, paste0(tmp_dir, "metric_names.Rds")) - data$hcst$data <- NULL - saveRDS(data$hcst, paste0(tmp_dir, "skill_metadata.Rds")) } # Add 'metric' dimension to array and restore attributes names(dim(skill_metrics)) <- c("metric", dimensions) + # Save s2dv_cube metadata + metadata_filename <- "skill_metadata" + for (chunk in names(nchunks)) { + metadata_filename <- paste0(metadata_filename, "_", chunk, "_", nchunks[[chunk]]) + } + metadata_filename <- paste0(metadata_filename, ".Rds") + data$hcst$data <- NULL + saveRDS(data$hcst, paste0(tmp_dir, metadata_filename)) } # Return results return(skill_metrics) diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml index e0ffe5d3..20550bdc 100644 --- a/recipes/atomic_recipes/recipe_test_compute.yml +++ b/recipes/atomic_recipes/recipe_test_compute.yml @@ -56,5 +56,5 @@ Run: output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ startR_workflow: - modules: calibration, anomalies, skill # Modules to run inside Compute(), in order - chunk_along: {var: 2,time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + modules: skill # Modules to run inside Compute(), in order + chunk_along: {longitude: 2, latitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} diff --git a/tools/libs.R b/tools/libs.R index a67f9549..4d151db1 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -29,7 +29,6 @@ source("tools/data_summary.R") source("tools/read_atomic_recipe.R") source("tools/write_autosubmit_conf.R") source("tools/get_archive.R") -# source("tools/add_dims.R") # Not sure if necessary yet # Settings options(bitmapType = 'cairo') diff --git a/tools/retrieve_metadata.R b/tools/retrieve_metadata.R new file mode 100644 index 00000000..a29528bb --- /dev/null +++ b/tools/retrieve_metadata.R @@ -0,0 +1,78 @@ +retrieve_metadata <- function(tmp_dir, chunks) { + # Build metadata file pattern: + metadata_file_pattern <- "skill_metadata" + for (chunk in sort(names(chunks))) { + metadata_file_pattern <- paste0(metadata_file_pattern, "_", chunk, "_*") + } + metadata_file_pattern <- paste0(metadata_file_pattern, ".Rds") + metadata_files <- list.files(path = tmp_dir, pattern = glob2rx(metadata_file_pattern)) + metadata <- readRDS(paste0(tmp_dir, metadata_files[1])) + # Piece together variable info + if ("var" %in% names(chunks)) { + # $attrs + for (i in 2:chunks[["var"]]) { + metadata_chunk <- readRDS(paste0(tmp_dir, + gsub("var_1", paste0("var_", i), + metadata_files[1]))) + var_name <- metadata_chunk$attrs$Variable$varName + metadata$attrs$Variable$varName <- c(metadata$attrs$Variable$varName, + var_name) + metadata$attrs$Variable$metadata <- c(metadata$attrs$Variable$metadata, + metadata_chunk$attrs$Variable$metadata[var_name]) + } + # $coords + metadata$coords$var <- metadata$attrs$Variable$varName + dim(metadata$coords$var) <- c(var = length(metadata$coords$var)) + attr(metadata$coords$var, "values") <- TRUE + attr(metadata$coords$var, "indices") <- FALSE + ## TODO: $dims + } + # Piece together time info + if ("time" %in% names(chunks)) { + dates_dims <- dim(metadata$attrs$Dates) + time_dim_idx <- which(names(dates_dims) == "time") + for (i in 2:chunks[["time"]]) { + metadata_chunk <- readRDS(paste0(tmp_dir, + gsub("time_1", paste0("time_", i), + metadata_files[1]))) + metadata$attrs$Dates <- c(metadata$attrs$Dates, metadata_chunk$attrs$Dates) + } + time_dim_length <- length(metadata$attrs$Dates)/prod(dates_dims[-time_dim_idx]) + dim(metadata$attr$Dates) <- c(dates_dims[-time_dim_idx], + time = time_dim_length) + metadata$coords$time <- seq(1, time_dim_length) + dim(metadata$coords$time) <- c(time = time_dim_length) + attr(metadata$coords$time, "indices") <- TRUE + ## TODO: Add to 'dims' + } + # Piece together lon/lat info + if ("latitude" %in% names(chunks)) { + # $attrs + ## TODO: Preserve attributes + for (i in 2:chunks[["latitude"]]) { + metadata_chunk <- readRDS(paste0(tmp_dir, + gsub("latitude_1", paste0("latitude_", i), + metadata_files[1]))) + metadata$attrs$Variable$metadata$latitude <- c(metadata$attrs$Variable$metadata$latitude, + metadata_chunk$attrs$Variable$metadata$latitude) + } + # $coords + metadata$coords$latitude <- metadata$attrs$Variable$metadata$latitude + dim(metadata$coords$latitude) <- c(latitude = length(metadata$coords$latitude)) + } + if ("longitude" %in% names(chunks)) { + # $attrs + ## TODO: Preserve attributes + for (i in 2:chunks[["longitude"]]) { + metadata_chunk <- readRDS(paste0(tmp_dir, + gsub("longitude_1", paste0("longitude_", i), + metadata_files[1]))) + metadata$attrs$Variable$metadata$longitude <- c(metadata$attrs$Variable$metadata$longitude, + metadata_chunk$attrs$Variable$metadata$longitude) + } + # $coords + metadata$coords$longitude <- metadata$attrs$Variable$metadata$longitude + dim(metadata$coords$longitude) <- c(longitude = length(metadata$coords$longitude)) + } + return(metadata) +} -- GitLab From 0e20aea909d2856135428200bb30a3589dad6f58 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 8 Aug 2023 13:12:15 +0200 Subject: [PATCH 31/96] Compute() with autosubmit --- .gitignore | 2 + build_compute_workflow.R | 60 ++++++++++++++++++- .../atomic_recipes/recipe_test_compute.yml | 8 +-- 3 files changed, 64 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index e11ba7d3..1ce7a002 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ out-logs/ modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml +STARTR_CHUNKING_*/ +# TODO: ignore bigmemory object like a68e_1_2 and a68e_1_2.desc diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 97ddd2f9..15fabcb3 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -1,5 +1,12 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, nchunks = chunk_indices) { + + # Load modules + source("modules/Preprocessing/Preprocessing.R") + source("modules/Calibration/Calibration.R") + source("modules/Anomalies/Anomalies.R") + source("modules/Skill/Skill.R") + modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) # Create data list @@ -15,6 +22,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, class(x) <- "startR_array" as.s2dv_cube(x) }) + for (cube_idx in seq(1:length(data))) { data[[cube_idx]]$attrs$Variable$varName <- var_name } @@ -115,15 +123,63 @@ run_compute_workflow <- function(recipe, data) { # --------------------------------------------------------------------------- # Step 3: Generate the Step and call Compute() ## TODO: Add use attributes + # Libraries to be loaded when running Compute() + libs_load <- c("log4r", "docopt", "ClimProjDiags", "multiApply", "yaml", "s2dv", + "abind", "easyNCDF", "CSTools", "lubridate", "PCICt", + "RColorBrewer", "configr", "sf", "ggplot2", "rnaturalearth", + "cowplot", "stringr", "pryr") + step <- Step(fun = compute_workflow, target_dims = target_dims, output_dims = output_dims, - use_attributes = input_attributes) + use_attributes = input_attributes, use_libraries = libs_load) wf <- AddStep(inputs = inputs, step = step, recipe = recipe) + + #--------------------------------------- + # Compute locally +# res <- Compute(wf$hcst, +# chunks = recipe$Run$startR_workflow$chunk_along) + + #--------------------------------------- + # Compute with autosubmit (NOTE: ssh to AS machine to run this!) + run_on <- 'local' + if (run_on == 'local') { + r_module_ver <- "R/4.1.2-foss-2015a-bare" + cdo_module_ver <- "CDO/1.9.8-foss-2015a" + } else if (run_on == 'nord3') { + r_module_ver <- "R/4.1.2-foss-2019b" + cdo_module_ver <- "CDO/1.9.8-foss-2019b" + } + #NOTE: autosubmit_suite_dir can be anywhere + autosubmit_suite_dir <- recipe$Run$code_dir #"/home/Earth/aho/startR_local_autosubmit/" + #TODO: Review the core/thread setting + #TODO: Add more parameters in recipe, e.g., expid, hpc_user, wallclock res <- Compute(wf$hcst, - chunks = recipe$Run$startR_workflow$chunk_along) + chunks = recipe$Run$startR_workflow$chunk_along, + threads_compute = recipe$Analysis$Workflow$ncores, + threads_load = 2, + cluster = list( + queue_host = run_on, # name in platforms.yml + r_module = r_module_ver, + CDO_module = cdo_module_ver, + autosubmit_module = 'autosubmit/4.0.0b-foss-2015a-Python-3.7.3', + cores_per_job = recipe$Analysis$Workflow$ncores, + job_wallclock = '01:00:00', + max_jobs = prod(unlist(recipe$Run$startR_workflow$chunk_along)), + polling_period = 10, + extra_queue_params = list('#SBATCH --constraint=medmem', '#SBATCH --exclusive'), + expid = "a68e", # to be changed! + hpc_user = "bsc32734", # to be changed! + run_dir = recipe$Run$code_dir + ), + workflow_manager = 'autosubmit', # 'ecFlow' + autosubmit_suite_dir = autosubmit_suite_dir, + autosubmit_server = NULL, #or 'bscesautosubmit02', + wait = TRUE + ) +#--------------------------------------- info(recipe$Run$logger, "##### COMPUTE SECTION ENDED, REFORMATTING OUTPUT #####") diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml index e0ffe5d3..2bd19f6e 100644 --- a/recipes/atomic_recipes/recipe_test_compute.yml +++ b/recipes/atomic_recipes/recipe_test_compute.yml @@ -26,7 +26,7 @@ Analysis: lonmax: 20 Regrid: method: bilinear - type: to_system + type: to_system #to_reference #'r360x181' Workflow: Anomalies: compute: yes # yes/no, default yes @@ -53,8 +53,8 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + output_dir: /esarchive/scratch/aho/tmp/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ startR_workflow: modules: calibration, anomalies, skill # Modules to run inside Compute(), in order - chunk_along: {var: 2,time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + chunk_along: {var: 1,time: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} -- GitLab From bd222d33730a76be263748be08e569b41a5b1b9b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 8 Aug 2023 14:30:08 +0200 Subject: [PATCH 32/96] Add startR workflow checks to recipe checker --- tools/check_recipe.R | 63 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index c1053534..079b8576 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -533,6 +533,69 @@ check_recipe <- function(recipe) { } } + # --------------------------------------------------------------------- + # STARTR WORKFLOW CHECKS + # --------------------------------------------------------------------- + + STARTR_PARAMS <- c("modules", "chunk_along") + STARTR_MODULES <- c("calibration", "anomalies", "skill") + CHUNK_DIMS <- c("var", "time", "latitude", "longitude") + MODULES_USING_LATLON <- c("downscaling", "indices") + MODULES_USING_TIME <- c("indicators") + MODULES_USING_VAR <- c("indicators") + # Check that all required fields are present + if ("startR_workflow" %in% names(recipe$Run)) { + if (!all(STARTR_PARAMS %in% names(recipe$Run$startR_workflow))) { + error(recipe$Run$logger, + paste0("The element 'Run:startR_workflow' must contain all of the ", + "following: ", paste(STARTR_PARAMS, collapse = ", "), ".")) + error_status <- T + } else { + modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, + ", | |,")[[1]]) + # Check modules + if (!any(modules %in% STARTR_MODULES)) { + error(recipe$Run$logger, + paste0("The element 'Run:startR_workflow:modules' can only ", + "contain the following modules: ", + paste(STARTR_MODULES, collapse = ", "), ".")) + error_status <- T + } + # Check chunking dims + if (!(any(names(recipe$Run$startR_workflow$chunk_along) %in% CHUNK_DIMS))) { + error(recipe$Run$logger, + paste0("The dimensions in 'Run:startR_workflow:chunk_along' can ", + "only be: ", paste(CHUNK_DIMS, collapse = ", "), ".")) + error_status <- T + } + # Check compatibility between module selection and chunking dimensions + if (any(c("latitude", "longitude") %in% names(recipe$Run$startR_workflow$chunk_along)) && + any(modules %in% MODULES_USING_LATLON)) { + error(recipe$Run$logger, + paste0("latitude and longitude cannot be chunking dimensions if", + "any of the following modules is in the startR workflow: ", + paste(MODULES_USING_LATLON, collapse = ", "), ".")) + error_status <- T + } + if ("time" %in% names(recipe$Run$startR_workflow$chunk_along) && + any(modules %in% MODULES_USING_TIME)) { + error(recipe$Run$logger, + paste0("time cannot be a chunking dimension if any of the ", + "following modules is in the startR workflow: ", + paste(MODULES_USING_TIME, collapse = ", "), ".")) + error_status <- T + } + if ("var" %in% names(recipe$Run$startR_workflow$chunk_along) && + any(modules %in% MODULES_USING_VAR)) { + error(recipe$Run$logger, + paste0("var cannot be a chunking dimension if any of the ", + "following modules is in the startR workflow: ", + paste(MODULES_USING_VAR, collapse = ", "), ".")) + error_status <- T + } + } + } + # --------------------------------------------------------------------- # WORKFLOW CHECKS # --------------------------------------------------------------------- -- GitLab From f78738b0ea691eb2de6f218b4a88e2b51210ecc2 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 8 Aug 2023 16:02:03 +0200 Subject: [PATCH 33/96] Correct ncores; add init_commands --- build_compute_workflow.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 15fabcb3..e60688ba 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -143,8 +143,9 @@ run_compute_workflow <- function(recipe, data) { # chunks = recipe$Run$startR_workflow$chunk_along) #--------------------------------------- - # Compute with autosubmit (NOTE: ssh to AS machine to run this!) - run_on <- 'local' + # Compute with autosubmit + #NOTE: 'local' is not recommended; only use it for small tests. + run_on <- 'nord3' if (run_on == 'local') { r_module_ver <- "R/4.1.2-foss-2015a-bare" cdo_module_ver <- "CDO/1.9.8-foss-2015a" @@ -158,14 +159,15 @@ run_compute_workflow <- function(recipe, data) { #TODO: Add more parameters in recipe, e.g., expid, hpc_user, wallclock res <- Compute(wf$hcst, chunks = recipe$Run$startR_workflow$chunk_along, - threads_compute = recipe$Analysis$Workflow$ncores, + threads_compute = recipe$Analysis$ncores, threads_load = 2, cluster = list( queue_host = run_on, # name in platforms.yml r_module = r_module_ver, CDO_module = cdo_module_ver, + init_commands = list('module load GDAL PROJ GEOS'), autosubmit_module = 'autosubmit/4.0.0b-foss-2015a-Python-3.7.3', - cores_per_job = recipe$Analysis$Workflow$ncores, + cores_per_job = recipe$Analysis$ncores, job_wallclock = '01:00:00', max_jobs = prod(unlist(recipe$Run$startR_workflow$chunk_along)), polling_period = 10, -- GitLab From 66f2d31e25670fe31b5fd6b393d1f4f76569b968 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 9 Aug 2023 12:43:29 +0200 Subject: [PATCH 34/96] missing parenthesis --- build_compute_workflow.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 936c0bff..ac882507 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -152,7 +152,7 @@ run_compute_workflow <- function(recipe, data) { chunks = recipe$Run$startR_workflow$chunk_along, threads_compute = recipe$Analysis$ncores, threads_load = 2) - } else if (run_on %in% c('nord3', 'as_machine') { + } else if (run_on %in% c('nord3', 'as_machine')) { #NOTE: autosubmit_suite_dir can be anywhere ## TODO: Change code_dir to autosubmit dir when necessary autosubmit_suite_dir <- recipe$Run$code_dir -- GitLab From 1e692b81ecb29ad78468a6a40f8dc3880ae56033 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 9 Aug 2023 12:47:22 +0200 Subject: [PATCH 35/96] Remove threads_compute from local call --- build_compute_workflow.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index ac882507..b12b9b52 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -149,9 +149,7 @@ run_compute_workflow <- function(recipe, data) { if (run_on == 'local') { # Compute locally, in serial res <- Compute(wf$hcst, - chunks = recipe$Run$startR_workflow$chunk_along, - threads_compute = recipe$Analysis$ncores, - threads_load = 2) + chunks = recipe$Run$startR_workflow$chunk_along) } else if (run_on %in% c('nord3', 'as_machine')) { #NOTE: autosubmit_suite_dir can be anywhere ## TODO: Change code_dir to autosubmit dir when necessary -- GitLab From 372e4a1c13a7f1a36dad9bbfd2eea31638629f3e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 10 Aug 2023 11:42:55 +0200 Subject: [PATCH 36/96] test locally --- recipes/atomic_recipes/recipe_test_compute.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml index 93dbcf8b..10b7b875 100644 --- a/recipes/atomic_recipes/recipe_test_compute.yml +++ b/recipes/atomic_recipes/recipe_test_compute.yml @@ -56,7 +56,7 @@ Run: output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ startR_workflow: - modules: anomalies calibration skill # Modules to run inside Compute(), in order + modules: calibration skill # Modules to run inside Compute(), in order chunk_along: {time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} run_on: local # options: local, as_machine, nord3 chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' -- GitLab From 321ba66cba506a47cc5e5c49ca3dc1ed547d33f6 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 6 Sep 2023 14:27:53 +0200 Subject: [PATCH 37/96] Remove sourced startR functions after release of v2.3.0 --- modules/Loading/Loading.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 04e8ac38..7f59669e 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -6,10 +6,6 @@ source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") ## TODO: Move to prepare_outputs.R source("tools/libs.R") -## TODO: Remove when new startR is released: -source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Start.R") -source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/zzz.R") -source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Utils.R") load_datasets <- function(recipe, retrieve = T) { -- GitLab From 9e8d499c10ca6c2002e89242c534eef76c5f5f22 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 2 Oct 2023 09:48:51 +0200 Subject: [PATCH 38/96] Copy seasonal loading function --- modules/Loading/R/load_seasonal.R | 452 ++++++++++++++++++++++++++++++ 1 file changed, 452 insertions(+) create mode 100644 modules/Loading/R/load_seasonal.R diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R new file mode 100644 index 00000000..7f59669e --- /dev/null +++ b/modules/Loading/R/load_seasonal.R @@ -0,0 +1,452 @@ +## TODO: remove paths to personal scratchs +source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") +# Load required libraries/funs +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +## TODO: Move to prepare_outputs.R +source("tools/libs.R") + +load_datasets <- function(recipe, retrieve = T) { + + # retrieve <- F + # ------------------------------------------- + # Set params ----------------------------------------- + + hcst.inityear <- recipe$Analysis$Time$hcst_start + hcst.endyear <- recipe$Analysis$Time$hcst_end + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + store.freq <- recipe$Analysis$Variables$freq + + # get sdates array + ## LOGGER: Change dates2load to extract logger from recipe? + sdates <- dates2load(recipe, recipe$Run$logger) + + idxs <- NULL + idxs$hcst <- get_timeidx(sdates$hcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + + if (!(is.null(sdates$fcst))) { + idxs$fcst <- get_timeidx(sdates$fcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + } + + ## TODO: Examine this verifications part, verify if it's necessary + # stream <- verifications$stream + # sdates <- verifications$fcst.sdate + + ## TODO: define fcst.name + ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name + + # get esarchive datasets dict: + ## TODO: Adapt to 'filesystem' option in recipe + archive <- read_yaml("conf/archive.yml")$esarchive + exp_descrip <- archive$System[[exp.name]] + + freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]]) + reference_descrip <- archive$Reference[[ref.name]] + freq.obs <- unlist(reference_descrip[[store.freq]][variable[1]]) + obs.dir <- reference_descrip$src + fcst.dir <- exp_descrip$src + hcst.dir <- exp_descrip$src + fcst.nmember <- exp_descrip$nmember$fcst + hcst.nmember <- exp_descrip$nmember$hcst + + ## TODO: it is necessary? + ##if ("accum" %in% names(reference_descrip)) { + ## accum <- unlist(reference_descrip$accum[store.freq][[1]]) + ##} else { + ## accum <- FALSE + ##} + + var_dir_obs <- reference_descrip[[store.freq]][variable] + var_dir_exp <- exp_descrip[[store.freq]][variable] + + # ----------- + obs.path <- paste0(archive$src, + obs.dir, store.freq, "/$var$", "$var_dir$", + "/$var$_$file_date$.nc") + + hcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", "$var_dir$", + "$var$_$file_date$.nc") + + fcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", "$var_dir$", + "/$var$_$file_date$.nc") + + # Define regrid parameters: + #------------------------------------------------------------------- + regrid_params <- get_regrid_params(recipe, archive) + + # Longitude circular sort and latitude check + #------------------------------------------------------------------- + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + var_dir = var_dir_exp, + file_date = sdates$hcst, + time = idxs$hcst, + var_dir_depends = 'var', + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:hcst.nmember), + metadata_dims = 'var', # change to just 'var'? + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = retrieve) + + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + attr(hcst, "Variables")$common$time[] <- + attr(hcst, "Variables")$common$time - seconds(exp_descrip$time_stamp_lag) + } + dates <- attr(hcst, "Variables")$common$time + # Convert hcst to s2dv_cube object + if (retrieve) { + if ("var_dir" %in% names(dim(hcst))) { + hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") + } + hcst <- as.s2dv_cube(hcst) + dim(hcst$attrs$Dates) <- dim(dates) + } + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + # the call uses file_date instead of fcst_syear so that it can work + # with the daily case and the current version of startR not allowing + # multiple dims split + + fcst <- Start(dat = fcst.path, + var = variable, + var_dir = var_dir_exp, + var_dir_depends = 'var', + file_date = sdates$fcst, + time = idxs$fcst, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = retrieve) + + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + ## TODO: Is this still necessary? + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } else if (recipe$Analysis$Variables$freq == "monthly_mean") { + attr(fcst, "Variables")$common$time[] <- + attr(fcst, "Variables")$common$time - seconds(exp_descrip$time_stamp_lag) + } + # Convert fcst to s2dv_cube + if (retrieve) { + if ("var_dir" %in% names(dim(fcst))) { + fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") + } + fcst <- as.s2dv_cube(fcst) + } + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + + # Separate Start() call for monthly vs daily data + if (store.freq == "monthly_mean") { + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") + dim(dates_file) <- dim(dates) + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = retrieve) + + } else if (store.freq %in% c("daily_mean", "daily")) { + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = retrieve) + } + + ## TODO: This part belongs to the pre-processing module + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + if (retrieve) { + dim(obs) <- c(dim(obs), "ensemble" = 1) + # Remove var_dir dimension + if ("var_dir" %in% names(dim(obs))) { + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + } + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + } + + # Checks and data summary (only if retrieve = TRUE) + # --------------------------------------------------------------------------- + ## TODO: See if these checks can be performed on the startR_cube metadata + if (retrieve) { + # Check for consistency between hcst and obs grid + if (!(recipe$Analysis$Regrid$type == 'none')) { + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + } + # Unit manipulation and conversion: + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + for (var_idx in 1:length(variable)) { + var_name <- variable[var_idx] + if (dictionary$vars[[var_name]]$accum) { + info(recipe$Run$logger, + paste0("Accumulated variable ", var_name, + ": setting negative values to zero.")) + # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, + # along = "var", + # indices = var_idx, F), 0) + obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 + hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + } + } + + # Convert prlr from m/s to mm/day + ## TODO: Make a unit conversion function + if (variable[[var_idx]] == "prlr") { + # Verify that the units are m/s and the same in obs and hcst + if (((obs$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (obs$attrs$Variable$metadata[[var_name]]$units == "m s**-1")) && + ((hcst$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (hcst$attrs$Variable$metadata[[var_name]]$units == "m s**-1"))) { + info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") + obs$data[, var_idx, , , , , , , ] <- + obs$data[, var_idx, , , , , , , ]*86400*1000 + obs$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + hcst$data[, var_idx, , , , , , , ] <- + hcst$data[, var_idx, , , , , , , ]*86400*1000 + hcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ] <- + fcst$data[, var_idx, , , , , , , ]*86400*1000 + fcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + } + } + } + } + # Compute anomalies if requested + # Print a summary of the loaded data for the user, for each object + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } + } + } + + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + ############################################################################ + # + # CHECKS ON MISSING FILES + # + ############################################################################ + + #obs.NA_dates.ind <- Apply(obs, + # fun=(function(x){ all(is.na(x))}), + # target_dims=c('time', 'latitude', 'longitude'))[[1]] + #obs.NA_dates <- dates_file[obs.NA_dates.ind] + #obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] + #obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", + # freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") + # + #if (any(is.na(hcst))){ + # fatal(recipe$Run$logger, + # paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "hcst files:", + # hcst.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status = 1) + #} + # + #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ + # fatal(recipe$logger, + # paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "obs files:", + # obs.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status=1) + #} + # + #info(recipe$logger, + # "######### DATA LOADING COMPLETED SUCCESFULLY ##############") + + ############################################################################ + ############################################################################ + + return(list(hcst = hcst, fcst = fcst, obs = obs)) + +} -- GitLab From 21d46fae2d3f4c1c843f3932a5e77913470fe6cd Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 2 Oct 2023 13:11:34 +0200 Subject: [PATCH 39/96] Remove sourcing of startR dev branch --- tools/libs.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/libs.R b/tools/libs.R index c8dc6e13..c3de4208 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -1,15 +1,15 @@ # Libraries library(log4r) library(docopt) -# library(startR) +library(startR) #source all the .R files -path <- "/esarchive/scratch/vagudets/repos/startR/R/" -ff <- lapply(list.files(path), function(x) paste0(path, x)) -invisible(lapply(ff, source)) +# path <- "/esarchive/scratch/vagudets/repos/startR/R/" +# ff <- lapply(list.files(path), function(x) paste0(path, x)) +# invisible(lapply(ff, source)) # load all the libraries -lib <- c('parallel', 'abind', 'bigmemory', 'future', 'multiApply', - 'PCICt', 'ClimProjDiags', 'ncdf4', 'plyr') -invisible(lapply(lib, library, character.only = TRUE)) +# lib <- c('parallel', 'abind', 'bigmemory', 'future', 'multiApply', +# 'PCICt', 'ClimProjDiags', 'ncdf4', 'plyr') +# invisible(lapply(lib, library, character.only = TRUE)) library(ClimProjDiags) library(multiApply) library(yaml) -- GitLab From a84047403c218c4cd314bd92cff3e267d7be4eab Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 5 Oct 2023 09:27:40 +0200 Subject: [PATCH 40/96] Adjust Units to add retrieve, plus other changes --- build_compute_workflow.R | 9 ++++++--- example_scripts/test_compute.R | 7 ++++--- modules/Calibration/Calibration.R | 1 - modules/Loading/Loading.R | 2 +- modules/Units/Units.R | 13 +++++++++---- 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index b12b9b52..d0806a4a 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -2,6 +2,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, nchunks = chunk_indices) { # Load modules source("modules/Preprocessing/Preprocessing.R") + source("modules/Units/Units.R") source("modules/Calibration/Calibration.R") source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") @@ -28,14 +29,16 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, # Define other outputs skill_metrics <- NULL probabilities <- NULL + # Change units + data <- Units(recipe, data, retrieve = F) # Loop over the modules for (module in modules) { if (module == "calibration") { - data <- calibrate_datasets(recipe, data, retrieve = F) + data <- Calibration(recipe, data, retrieve = F) } else if (module == "anomalies") { - data <- compute_anomalies(recipe, data, retrieve = F) + data <- Anomalies(recipe, data, retrieve = F) } else if (module == "skill") { - skill_metrics <- compute_skill_metrics(recipe, data, retrieve = F, nchunks = nchunks) + skill_metrics <- Skill(recipe, data, retrieve = F, nchunks = nchunks) } } ## TODO: Define what to return depending the modules called + the recipe diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R index 963ed021..01aba3fc 100644 --- a/example_scripts/test_compute.R +++ b/example_scripts/test_compute.R @@ -1,4 +1,5 @@ source("modules/Loading/Loading.R") +source("modules/Units/Units.R") source("modules/Preprocessing/Preprocessing.R") source("modules/Calibration/Calibration.R") source("modules/Anomalies/Anomalies.R") @@ -11,11 +12,11 @@ recipe_file <- "recipes/atomic_recipes/recipe_test_compute.yml" recipe <- prepare_outputs(recipe_file) # Load datasets -data <- load_datasets(recipe, retrieve = F) +data <- Loading(recipe, retrieve = F) # Run workflow with compute result <- run_compute_workflow(recipe, data) # Plot data -plot_data(recipe, result$data, result$skill, - significance = T) +Visualization(recipe, result$data, result$skill, + significance = T) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index c974939a..06c2f6a6 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -11,7 +11,6 @@ Calibration <- function(recipe, data, retrieve = TRUE) { method <- tolower(recipe$Analysis$Workflow$Calibration$method) - browser() if (method == "raw") { if (retrieve) { warn(recipe$Run$logger, diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index b90887d7..5453e9ae 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -7,7 +7,7 @@ Loading <- function(recipe, retrieve = TRUE) { time_horizon <- tolower(recipe$Analysis$Horizon) if (time_horizon == "seasonal") { source("modules/Loading/R/load_seasonal.R") - data <- load_seasonal(recipe, retrieve = TRUE) + data <- load_seasonal(recipe, retrieve = retrieve) } else if (time_horizon == "decadal") { source("modules/Loading/R/load_decadal.R") data <- load_decadal(recipe) diff --git a/modules/Units/Units.R b/modules/Units/Units.R index a143c0a2..eb527e9e 100644 --- a/modules/Units/Units.R +++ b/modules/Units/Units.R @@ -9,7 +9,7 @@ source("modules/Units/R/transform_units_pressure.R") # $freq # $flux (if precipitation) # $units (optional) -Units <- function(recipe, data) { +Units <- function(recipe, data, retrieve = T) { # from recipe read the user defined units # from data the original units # deaccumulate option for CDS accumulated variables? @@ -59,12 +59,15 @@ Units <- function(recipe, data) { ## if "/" appears substitute by -1 in at the end of next unit. How to know? # Check if all units are equal (if so, no conversion needed; else convert) if (length(unique(c(unlist(orig_units), user_units))) == length(user_units)) { - info(recipe$Run$logger, "##### NO UNIT CONVERSION NEEDED #####") + if (retrieve) { + info(recipe$Run$logger, "##### NO UNIT CONVERSION NEEDED #####") + } res <- data } else { if (recipe$Run$filesystem == 'esarchive' && (sum(!sapply(unique(orig_units), is.null)) != 1)) { - warn(recipe$Run$logger, + ## TODO: Warning + warn(recipe$Run$logger, paste("The units in", paste(names(orig_units), collapse = ', '), "were not all equal and will be uniformized.", "If this is not expected, please contact the ES data team.")) @@ -86,7 +89,9 @@ Units <- function(recipe, data) { } return(result) }, simplify = TRUE) # instead of lapply to get the named list directly - info(recipe$Run$logger, "##### UNIT CONVERSION COMPLETE #####") + if (retrieve) { + info(recipe$Run$logger, "##### UNIT CONVERSION COMPLETE #####") + } } return(res) } -- GitLab From c362ef35d7e212cb16044e2d7c7095d37ff0b4ed Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 6 Oct 2023 16:12:05 +0200 Subject: [PATCH 41/96] Use asplit or Apply to make dimensions flexible --- modules/Units/R/transform_units_temperature.R | 53 ++++++++++++++++--- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/modules/Units/R/transform_units_temperature.R b/modules/Units/R/transform_units_temperature.R index 366f0d34..1735984a 100644 --- a/modules/Units/R/transform_units_temperature.R +++ b/modules/Units/R/transform_units_temperature.R @@ -1,17 +1,54 @@ transform_units_temperature <- function(data, original_units, new_units, var_name, var_index = 1, var_dim = "var") { - ## TODO: Hard-coded subsetting + + # Method 1: use asplit + data_arr <- data[[1]]$data + data_list <- asplit(data_arr, which(names(dim(data_arr)) == var_dim)) if (original_units == 'c' & new_units == 'k') { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] + 273.15 - data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'K' + data_list[[var_index]] <- data_list[[var_index]] + 273.15 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "K" } if (original_units == 'k' & new_units == 'c') { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] - 273.15 - data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "C" - + data_list[[var_index]] <- data_list[[var_index]] - 273.15 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "C" } + + # Combine list back to array + data_arr <- array(unlist(data_list), + dim = c(dim(data_list[[1]]), var = length(data_list))) + data[[1]]$data <- aperm(data_arr, match(names(dim(data[[1]]$data)), + names(dim(data_arr)))) + return(data) } + + # Method 2: use Apply + if (original_units == 'c' & new_units == 'k') { + + data[[1]]$data <- multiApply::Apply(data[[1]]$data, + target_dims = var_dim, output_dims = var_dim, + fun = .transform_units_temp, + type = "c_to_k", var_index = var_index)$output1 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "K" + + } + if (original_units == 'k' & new_units == 'c') { + data[[1]]$data <- multiApply::Apply(data[[1]]$data, + target_dims = var_dim, output_dims = var_dim, + fun = .transform_units_temp, + type = "k_to_c", var_index = var_index)$output1 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "C" + + } + + .transform_units_temp <- function(data, type, var_index) { + # data: 'var' + if (type == "k_to_c") { + data[var_index] <- data[var_index] - 273.15 + } else if (type == "c_to_k") { + data[var_index] <- data[var_index] + 273.15 + } + return(data) + } + -- GitLab From 1af44013b0b6c2636d95212f5b34c8a1aa335d82 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 9 Oct 2023 12:39:45 +0200 Subject: [PATCH 42/96] remove Apply() approach --- modules/Units/R/transform_units_temperature.R | 31 ------------------- 1 file changed, 31 deletions(-) diff --git a/modules/Units/R/transform_units_temperature.R b/modules/Units/R/transform_units_temperature.R index 1735984a..eebd30bc 100644 --- a/modules/Units/R/transform_units_temperature.R +++ b/modules/Units/R/transform_units_temperature.R @@ -2,7 +2,6 @@ transform_units_temperature <- function(data, original_units, new_units, var_name, var_index = 1, var_dim = "var") { - # Method 1: use asplit data_arr <- data[[1]]$data data_list <- asplit(data_arr, which(names(dim(data_arr)) == var_dim)) if (original_units == 'c' & new_units == 'k') { @@ -22,33 +21,3 @@ transform_units_temperature <- function(data, original_units, new_units, return(data) } - - # Method 2: use Apply - if (original_units == 'c' & new_units == 'k') { - - data[[1]]$data <- multiApply::Apply(data[[1]]$data, - target_dims = var_dim, output_dims = var_dim, - fun = .transform_units_temp, - type = "c_to_k", var_index = var_index)$output1 - data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "K" - - } - if (original_units == 'k' & new_units == 'c') { - data[[1]]$data <- multiApply::Apply(data[[1]]$data, - target_dims = var_dim, output_dims = var_dim, - fun = .transform_units_temp, - type = "k_to_c", var_index = var_index)$output1 - data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "C" - - } - - .transform_units_temp <- function(data, type, var_index) { - # data: 'var' - if (type == "k_to_c") { - data[var_index] <- data[var_index] - 273.15 - } else if (type == "c_to_k") { - data[var_index] <- data[var_index] + 273.15 - } - return(data) - } - -- GitLab From 230ea62084eb9b6676daa65b715d8cccbc63db75 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 10 Oct 2023 12:07:35 +0200 Subject: [PATCH 43/96] Make dimensions flexible --- .../Units/R/transform_units_precipitation.R | 75 +++++++++---------- modules/Units/R/transform_units_pressure.R | 24 +++--- modules/Units/R/transform_units_temperature.R | 5 +- 3 files changed, 52 insertions(+), 52 deletions(-) diff --git a/modules/Units/R/transform_units_precipitation.R b/modules/Units/R/transform_units_precipitation.R index a24ef851..5cc1e812 100644 --- a/modules/Units/R/transform_units_precipitation.R +++ b/modules/Units/R/transform_units_precipitation.R @@ -1,80 +1,70 @@ transform_units_precipitation <- function(data, original_units, new_units, var_name, freq, flux = FALSE, ncores = NULL, var_index = 1) { - ## TODO: Hard-coded subsetting + ## TODO consider higher frequencies (e.g. 6hourly) ## could create a constant hours <- 24 or hours <- 6 and use the same code + + data_arr <- data[[1]]$data + data_list <- asplit(data_arr, which(names(dim(data_arr)) == "var")) + + if (original_units == "ms-1") { if (new_units == "mm") { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] * 3600 * 24 * 1000 - data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'mm' + data_list[[var_index]] <- data_list[[var_index]] * 3600 * 24 * 1000 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "mm" } else if (new_units == "m") { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] * 3600 * 24 - data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm' + data_list[[var_index]] <- data_list[[var_index]] * 3600 * 24 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm' } else if (new_units == "kgm-2s-1") { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] * 1000 - data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'kg m-2 s-1' + data_list[[var_index]] <- data_list[[var_index]] * 1000 + data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'kg m-2 s-1' } else { - stop(paste("Unknown transformation from", original_units, "to", - new_units)) + stop(paste("Unknown transformation from", original_units, "to", new_units)) } } else if (original_units == "mm") { if (new_units == "ms-1") { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] / (3600 * 24 * 1000) + data_list[[var_index]] <- data_list[[var_index]] / (3600 * 24 * 1000) data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm s-1' } else if (new_units == "m") { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] / 1000 + data_list[[var_index]] <- data_list[[var_index]] / 1000 data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm' } else if (new_units == "kgm-2s-1") { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] / (3600 * 24 ) + data_list[[var_index]] <- data_list[[var_index]] / (3600 * 24) data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'kg m-2 s-1' } else { - stop(paste("Unknown transformation from", original_units, "to", - new_units)) + stop(paste("Unknown transformation from", original_units, "to", new_units)) } } else if (original_units == "m") { if (new_units == "ms-1") { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] / (3600 * 24) + data_list[[var_index]] <- data_list[[var_index]] / (3600 * 24) data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm s-1' } else if (new_units == "mm") { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] * 1000 + data_list[[var_index]] <- data_list[[var_index]] * 1000 data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'mm' } else if (new_units == "kgm-2s-1") { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] * 1000 / (3600 * 24) + data_list[[var_index]] <- data_list[[var_index]] * 1000 / (3600 * 24) data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'kg m-2 s-1' } else { - stop(paste("Unknown transformation from", original_units, "to", - new_units)) + stop(paste("Unknown transformation from", original_units, "to", new_units)) } } else if (original_units == "kgm-2s-1") { if (new_units == "ms-1") { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] / 1000 + data_list[[var_index]] <- data_list[[var_index]] / 1000 data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm s-1' } else if (new_units == "mm") { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] * 3600 * 24 + data_list[[var_index]] <- data_list[[var_index]] * 3600 * 24 data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'mm' } else if (new_units == "m") { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] * 3600 * 24 / 1000 + data_list[[var_index]] <- data_list[[var_index]] * 3600 * 24 / 1000 data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm' } else { - stop(paste("Unknown transformation from", original_units, "to", - new_units)) + stop(paste("Unknown transformation from", original_units, "to", new_units)) } } else { stop("Unknown precipitation units transformation") } + if (flux) { data[[1]]$attrs$Variable$metadata[[var_name]]$units <- paste0( data[[1]]$attrs$Variable$metadata[[var_name]]$units, "/day") @@ -84,9 +74,8 @@ transform_units_precipitation <- function(data, original_units, new_units, time_pos <- which(lapply(data[[1]]$attrs$Variable$metadata[[var_name]]$dim, function(x) {x$name}) == 'time') cal <- tolower(data[[1]]$attrs$Variable$metadata[[var_name]]$dim[[time_pos]]$calendar) - data_subset <- Subset(data[[1]]$data, along = "var", indices = var_index, drop = 'selected') - data[[1]]$data[ , var_index, , , , , , , ] <- - Apply(list(data_subset, data[[1]]$attrs$Dates), + data_list[[var_index]] <- + Apply(list(data_list[[var_index]], data[[1]]$attrs$Dates), target_dim = list(c('syear'), c('syear')), extra_info = list(cal = cal, days_in_month = .days_in_month), fun = function(x, y) { @@ -96,9 +85,17 @@ transform_units_precipitation <- function(data, original_units, new_units, }, ncores = ncores)$output1 } } + + # Combine list back to array + data_arr <- array(unlist(data_list), + dim = c(dim(data_list[[1]]), var = length(data_list))) + data[[1]]$data <- aperm(data_arr, match(names(dim(data[[1]]$data)), + names(dim(data_arr)))) + return(data) } + .days_in_month <- function(x, cal) { if (cal %in% c('gregorian', 'standard', 'proleptic_gregorian')) { N_DAYS_IN_MONTHS <- lubridate:::N_DAYS_IN_MONTHS diff --git a/modules/Units/R/transform_units_pressure.R b/modules/Units/R/transform_units_pressure.R index 9712e9fe..58d51b1e 100644 --- a/modules/Units/R/transform_units_pressure.R +++ b/modules/Units/R/transform_units_pressure.R @@ -1,33 +1,37 @@ transform_units_pressure <- function(data, original_units, new_units, var_name, var_index = 1) { - ## TODO: Hard-coded subsetting + data_arr <- data[[1]]$data + data_list <- asplit(data_arr, which(names(dim(data_arr)) == "var")) + if (original_units == 'pa') { + data_list[[var_index]] <- data_list[[var_index]] / 100 if (new_units == 'hpa') { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] /100 data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'hPa' } else if (new_units == 'mb') { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] /100 data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'mb' } } else if (original_units == 'hpa') { if (new_units == 'pa') { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] * 100 + data_list[[var_index]] <- data_list[[var_index]] * 100 data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "Pa" } else if (new_units == "mb") { data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "mb" } } else if (original_units == "mb") { if (new_units == 'pa') { - data[[1]]$data[ , var_index, , , , , , , ] <- - data[[1]]$data[ , var_index, , , , , , , ] * 100 + data_list[[var_index]] <- data_list[[var_index]] * 100 data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "Pa" } else if (new_units == "hPa") { data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "hPa" - } + } } + + # Combine list back to array + data_arr <- array(unlist(data_list), + dim = c(dim(data_list[[1]]), var = length(data_list))) + data[[1]]$data <- aperm(data_arr, match(names(dim(data[[1]]$data)), + names(dim(data_arr)))) + return(data) } diff --git a/modules/Units/R/transform_units_temperature.R b/modules/Units/R/transform_units_temperature.R index eebd30bc..985483f0 100644 --- a/modules/Units/R/transform_units_temperature.R +++ b/modules/Units/R/transform_units_temperature.R @@ -1,9 +1,8 @@ transform_units_temperature <- function(data, original_units, new_units, - var_name, var_index = 1, - var_dim = "var") { + var_name, var_index = 1) { data_arr <- data[[1]]$data - data_list <- asplit(data_arr, which(names(dim(data_arr)) == var_dim)) + data_list <- asplit(data_arr, which(names(dim(data_arr)) == "var")) if (original_units == 'c' & new_units == 'k') { data_list[[var_index]] <- data_list[[var_index]] + 273.15 data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "K" -- GitLab From 36d4bb6ab9d0ff4ebb20353e0961e34baf9e0dfa Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 17 Jan 2024 17:00:35 +0100 Subject: [PATCH 44/96] Add TODOs, fix typo --- build_compute_workflow.R | 1 + tools/retrieve_metadata.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index afa17c05..5a5a59df 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -29,6 +29,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, # Define other outputs skill_metrics <- NULL probabilities <- NULL + ## TODO: Not working # Change units # data <- Units(recipe, data, retrieve = F) # Loop over the modules diff --git a/tools/retrieve_metadata.R b/tools/retrieve_metadata.R index a29528bb..7b922103 100644 --- a/tools/retrieve_metadata.R +++ b/tools/retrieve_metadata.R @@ -38,7 +38,7 @@ retrieve_metadata <- function(tmp_dir, chunks) { metadata$attrs$Dates <- c(metadata$attrs$Dates, metadata_chunk$attrs$Dates) } time_dim_length <- length(metadata$attrs$Dates)/prod(dates_dims[-time_dim_idx]) - dim(metadata$attr$Dates) <- c(dates_dims[-time_dim_idx], + dim(metadata$attrs$Dates) <- c(dates_dims[-time_dim_idx], time = time_dim_length) metadata$coords$time <- seq(1, time_dim_length) dim(metadata$coords$time) <- c(time = time_dim_length) -- GitLab From e6cb27f93ce73b4b9d279dcac81414079cb44f8f Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 18 Jan 2024 16:59:14 +0100 Subject: [PATCH 45/96] Attach metadata to skill metrics and adapt plot_skill_metrics() and Visualization() --- build_compute_workflow.R | 4 ++-- example_scripts/test_compute.R | 2 +- modules/Visualization/R/plot_skill_metrics.R | 23 +++++++++++--------- modules/Visualization/Visualization.R | 15 +++++++++---- 4 files changed, 27 insertions(+), 17 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 5a5a59df..2a55282c 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -218,8 +218,8 @@ run_compute_workflow <- function(recipe, data) { names(res$skill) <- metric_list ## TODO: Chunked metadata needs to be put back together!!! source("tools/retrieve_metadata.R") - res$metadata <- retrieve_metadata(tmp_dir = tmp_dir, - chunks = recipe$Run$startR_workflow$chunk_along) + res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir, + chunks = recipe$Run$startR_workflow$chunk_along) } # --------------------------------------------------------------------------- diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R index 01aba3fc..c82a1719 100644 --- a/example_scripts/test_compute.R +++ b/example_scripts/test_compute.R @@ -18,5 +18,5 @@ data <- Loading(recipe, retrieve = F) result <- run_compute_workflow(recipe, data) # Plot data -Visualization(recipe, result$data, result$skill, +Visualization(recipe, data = result$data, skill_metrics = result$skill, significance = T) diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R index b4c2b273..edf3df54 100644 --- a/modules/Visualization/R/plot_skill_metrics.R +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -1,10 +1,10 @@ library(stringr) -plot_skill_metrics <- function(recipe, data_cube, skill_metrics, +plot_skill_metrics <- function(recipe, skill_metrics, outdir, significance = F, output_conf) { # recipe: Auto-S2S recipe # archive: Auto-S2S archive - # data_cube: s2dv_cube object with the corresponding hindcast data + # cube_info: s2dv_cube object with the corresponding hindcast data # skill_metrics: list of named skill metrics arrays # outdir: output directory # significance: T/F, whether to display the significance dots in the plots @@ -19,9 +19,12 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { stop("The element 'skill_metrics' must be a list of named arrays.") } - - latitude <- data_cube$coords$lat - longitude <- data_cube$coords$lon + + cube_info <- skill_metrics[["metadata"]] + skill_metrics[["metadata"]] <- NULL + + latitude <- cube_info$coords$lat + longitude <- cube_info$coords$lon archive <- get_archive(recipe) system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", @@ -36,7 +39,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, month_label <- tolower(month.name[init_month]) month_abbreviation <- month.abb[init_month] # Get months - months <- lubridate::month(Subset(data_cube$attrs$Dates, + months <- lubridate::month(Subset(cube_info$attrs$Dates, "syear", indices = 1), label = T, abb = F,locale = "en_GB") if (!is.null(recipe$Analysis$Workflow$Visualization$projection)) { @@ -59,7 +62,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, "enscorr_specs", "rmsss", "msss") scores <- c("rps", "frps", "crps", "frps_specs", "mse") # Loop over variables and assign colorbar and plot parameters to each metric - for (var in 1:data_cube$dims[['var']]) { + for (var in 1:cube_info$dims[['var']]) { var_skill <- lapply(skill_metrics, function(x) { ClimProjDiags::Subset(x, along = 'var', indices = var, @@ -119,7 +122,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, cols <- colorbar[2:(length(colorbar) - 1)] col_inf <- colorbar[1] col_sup <- colorbar[length(colorbar)] - units <- data_cube$attrs$Variable$metadata[[var_name]]$units + units <- cube_info$attrs$Variable$metadata[[var_name]]$units } # Reorder dimensions skill <- Reorder(skill, c("time", "longitude", "latitude")) @@ -147,8 +150,8 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, outfile <- paste0(outdir[var], name) } # Get variable name and long name - var_name <- data_cube$attrs$Variable$varName[[var]] - var_long_name <- data_cube$attrs$Variable$metadata[[var_name]]$long_name + var_name <- cube_info$attrs$Variable$varName[[var]] + var_long_name <- cube_info$attrs$Variable$metadata[[var_name]]$long_name # Multi-panel or single-panel plots if (recipe$Analysis$Workflow$Visualization$multi_panel) { # Define titles diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index ae94ed3d..d0c1d3c8 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -14,7 +14,7 @@ source("modules/Visualization/R/plot_ensemble_mean.R") source("modules/Visualization/plot_data.R") Visualization <- function(recipe, - data, + data = NULL, skill_metrics = NULL, probabilities = NULL, significance = F, @@ -56,8 +56,15 @@ Visualization <- function(recipe, # Get plot types and create output directories plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, ", | |,")[[1]] recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/plots/") - outdir <- get_dir(recipe = recipe, - variable = data$hcst$attrs$Variable$varName) + ## TODO: Improve this? + if (!is.null(data$hcst)) { + variable <- data$hcst$attrs$Variable$varName + } else { + variable <- skill_metrics$metadata$attrs$Variable$varName + } + + outdir <- get_dir(recipe = recipe, variable = variable) + browser() for (directory in outdir) { dir.create(directory, showWarnings = FALSE, recursive = TRUE) } @@ -75,7 +82,7 @@ Visualization <- function(recipe, # Plot skill metrics if ("skill_metrics" %in% plots) { if (!is.null(skill_metrics)) { - plot_skill_metrics(recipe, data$hcst, skill_metrics, outdir, + plot_skill_metrics(recipe, skill_metrics, outdir, significance, output_conf = output_conf) } else { error(recipe$Run$logger, -- GitLab From d27ac023478789db659a2ceac5daab129e4c0785 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 19 Jan 2024 15:23:25 +0100 Subject: [PATCH 46/96] test units module --- build_compute_workflow.R | 4 ++-- modules/Units/R/transform_units_precipitation.R | 4 +++- recipes/atomic_recipes/recipe_test_compute.yml | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 2a55282c..0b60b6c3 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -22,7 +22,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, class(x) <- "startR_array" as.s2dv_cube(x) }) - + for (cube_idx in seq(1:length(data))) { data[[cube_idx]]$attrs$Variable$varName <- var_name } @@ -31,7 +31,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, probabilities <- NULL ## TODO: Not working # Change units - # data <- Units(recipe, data, retrieve = F) + data <- Units(recipe, data, retrieve = F) # Loop over the modules for (module in modules) { if (module == "calibration") { diff --git a/modules/Units/R/transform_units_precipitation.R b/modules/Units/R/transform_units_precipitation.R index 5cc1e812..52e1e0bd 100644 --- a/modules/Units/R/transform_units_precipitation.R +++ b/modules/Units/R/transform_units_precipitation.R @@ -76,10 +76,11 @@ transform_units_precipitation <- function(data, original_units, new_units, cal <- tolower(data[[1]]$attrs$Variable$metadata[[var_name]]$dim[[time_pos]]$calendar) data_list[[var_index]] <- Apply(list(data_list[[var_index]], data[[1]]$attrs$Dates), - target_dim = list(c('syear'), c('syear')), + target_dim = list(c("time"), c("time")), extra_info = list(cal = cal, days_in_month = .days_in_month), fun = function(x, y) { date <- as.Date(y, "%Y-%m-%d") + dim(date) <- dim(y) num_days <- .days_in_month(date, cal = .cal) res <- x * num_days }, ncores = ncores)$output1 @@ -97,6 +98,7 @@ transform_units_precipitation <- function(data, original_units, new_units, .days_in_month <- function(x, cal) { + print(dim(x)) if (cal %in% c('gregorian', 'standard', 'proleptic_gregorian')) { N_DAYS_IN_MONTHS <- lubridate:::N_DAYS_IN_MONTHS if (leap_year(year(x))) { diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml index 4d47d944..53b5eee1 100644 --- a/recipes/atomic_recipes/recipe_test_compute.yml +++ b/recipes/atomic_recipes/recipe_test_compute.yml @@ -4,9 +4,9 @@ Description: Analysis: Horizon: Seasonal Variables: - name: tas prlr + name: tas freq: monthly_mean - units: {tas: C, prlr: mm} + units: {tas: C} Datasets: System: name: Meteo-France-System7 -- GitLab From 8e96e19410fafb107597905a74617a0f294864f7 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 22 Jan 2024 17:01:22 +0100 Subject: [PATCH 47/96] COmments and TODOs --- build_compute_workflow.R | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 0b60b6c3..53ae4fe1 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -29,7 +29,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, # Define other outputs skill_metrics <- NULL probabilities <- NULL - ## TODO: Not working # Change units data <- Units(recipe, data, retrieve = F) # Loop over the modules @@ -38,9 +37,15 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, data <- Calibration(recipe, data, retrieve = F) } else if (module == "anomalies") { data <- Anomalies(recipe, data, retrieve = F) + } else if (module == "downscaling") { + data <- Downscaling(recipe, data, retrieve = F) } else if (module == "skill") { skill_metrics <- Skill(recipe, data, retrieve = F, nchunks = nchunks) - } + } else if (module = "probabilities") { + probabilities <- Probabilities(recipe, data, retrieve = F, nchunks = nchunks) + } else if (module = "indices") { + ## TODO: Won't work with plots... + data <- Indices(recipe, data, retrieve = F) } ## TODO: Define what to return depending the modules called + the recipe return_list <- list(hcst = data$hcst$data, @@ -54,7 +59,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, } convert_to_s2dv_cube <- function(new_cube, original_cube) { - ## TODO: Make sure all the metadata is created correctly attr(new_cube, "Variables") <- attr(original_cube, "Variables") attr(new_cube, "FileSelectors") <- attr(original_cube, "FileSelectors") @@ -203,6 +207,7 @@ run_compute_workflow <- function(recipe, data) { } res[[cube]] <- NULL } + ## TODO: Replicate for probabilities and other modules if (!is.null(res$skill)) { tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/") metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds")) @@ -216,7 +221,7 @@ run_compute_workflow <- function(recipe, data) { drop = 'selected') }) names(res$skill) <- metric_list - ## TODO: Chunked metadata needs to be put back together!!! + # Put chunked metadata back together source("tools/retrieve_metadata.R") res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir, chunks = recipe$Run$startR_workflow$chunk_along) @@ -231,5 +236,8 @@ run_compute_workflow <- function(recipe, data) { res <- res[!sapply(res, is.null)] info(recipe$Run$logger, "##### DATA RETURNED AS A NAMED LIST #####") + info(recipe$Run$logger, + paste0("Steps performed: ", recipe$Run$startR_workflow$modules)) + return(res) } -- GitLab From 3daf27463c0926573caed760a10c671c9937dc52 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 25 Jan 2024 09:02:18 +0100 Subject: [PATCH 48/96] Adapt logger calls to customized appender (WIP) --- build_compute_workflow.R | 13 +- modules/Anomalies/Anomalies.R | 37 +++--- modules/Calibration/Calibration.R | 14 +- modules/Loading/R/load_seasonal.R | 4 +- modules/Skill/Skill.R | 63 ++++----- modules/Units/Units.R | 10 +- modules/Visualization/Visualization.R | 31 ++--- tools/check_recipe.R | 184 +++++++++++++------------- tools/prepare_outputs.R | 24 +++- 9 files changed, 197 insertions(+), 183 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 53ae4fe1..8bc40fde 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -7,6 +7,8 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") + recipe$Run$logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(console_appender(layout = .custom_log_layout()))) modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) # Create data list @@ -41,11 +43,12 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, data <- Downscaling(recipe, data, retrieve = F) } else if (module == "skill") { skill_metrics <- Skill(recipe, data, retrieve = F, nchunks = nchunks) - } else if (module = "probabilities") { + } else if (module == "probabilities") { probabilities <- Probabilities(recipe, data, retrieve = F, nchunks = nchunks) - } else if (module = "indices") { + } else if (module == "indices") { ## TODO: Won't work with plots... data <- Indices(recipe, data, retrieve = F) + } } ## TODO: Define what to return depending the modules called + the recipe return_list <- list(hcst = data$hcst$data, @@ -194,7 +197,7 @@ run_compute_workflow <- function(recipe, data) { wait = TRUE ) } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, "##### COMPUTE SECTION ENDED, REFORMATTING OUTPUT #####") # --------------------------------------------------------------------------- @@ -234,9 +237,9 @@ run_compute_workflow <- function(recipe, data) { # --------------------------------------------------------------------------- # Step 6: Return outputs res <- res[!sapply(res, is.null)] - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, "##### DATA RETURNED AS A NAMED LIST #####") - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, paste0("Steps performed: ", recipe$Run$startR_workflow$modules)) return(res) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 06d16927..2f738bdc 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -8,13 +8,11 @@ Anomalies <- function(recipe, data, retrieve = TRUE) { ## TODO: Is this necessary? # Check - if (retrieve) { - if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { - error(recipe$Run$logger, - paste("The anomaly module has been called, but the element", - "'Workflow:Anomalies:compute' is missing from the recipe.")) - stop() - } + if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { + error(recipe$Run$logger, retrieve = retrieve, + paste("The anomaly module has been called, but the element", + "'Workflow:Anomalies:compute' is missing from the recipe.")) + stop() } if (recipe$Analysis$Workflow$Anomalies$compute) { if (recipe$Analysis$Workflow$Anomalies$cross_validation) { @@ -120,13 +118,14 @@ Anomalies <- function(recipe, data, retrieve = TRUE) { } # Display success messages and save data - if (retrieve) { - info(recipe$Run$logger, - paste("The anomalies have been computed,", cross_msg, - "cross-validation. The original full fields are returned as", - "$hcst.full_val and $obs.full_val.")) + info(recipe$Run$logger, retrieve = retrieve, + paste("The anomalies have been computed,", cross_msg, + "cross-validation. The original full fields are returned as", + "$hcst.full_val and $obs.full_val.")) - info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") + info(recipe$Run$logger, retrieve = retrieve, + "##### ANOMALIES COMPUTED SUCCESSFULLY #####") + if (retrieve) { if (recipe$Analysis$Workflow$Anomalies$save != 'none') { info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") } @@ -148,19 +147,19 @@ Anomalies <- function(recipe, data, retrieve = TRUE) { save_observations(recipe = recipe, data_cube = data$obs) } } - } else { - warn(recipe$Run$logger, paste("The Anomalies module has been called, but", - "recipe parameter Workflow:anomalies:compute is set to FALSE.", - "The full fields will be returned.")) + warn(recipe$Run$logger, retrieve = retrieve, + paste("The Anomalies module has been called, but recipe parameter", + "Workflow:anomalies:compute is set to FALSE.", + "The full fields will be returned.")) hcst_fullvalue <- NULL obs_fullvalue <- NULL - info(recipe$Run$logger, "##### ANOMALIES NOT COMPUTED #####") + info(recipe$Run$logger, retrieve = retrieve, + "##### ANOMALIES NOT COMPUTED #####") } ## TODO: Return fcst full value? .log_memory_usage(recipe$Run$logger, "After computing anomalies") - return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) } diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 20042e11..1639e459 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -12,12 +12,10 @@ Calibration <- function(recipe, data, retrieve = TRUE) { method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (method == "raw") { - if (retrieve) { - warn(recipe$Run$logger, - paste("The Calibration module has been called, but the calibration", - "method in the recipe is 'raw'. The hcst and fcst will not be", - "calibrated.")) - } + warn(recipe$Run$logger, retrieve = retrieve, + paste("The Calibration module has been called, but the calibration", + "method in the recipe is 'raw'. The hcst and fcst will not be", + "calibrated.")) ## TODO: Improve efficiency fcst_calibrated <- data$fcst hcst_calibrated <- data$hcst @@ -61,7 +59,7 @@ Calibration <- function(recipe, data, retrieve = TRUE) { CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") ## TODO: this belongs in the recipe checker if (!(method %in% CST_CALIB_METHODS)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = retrieve, paste("Calibration method in the recipe is not available for", "monthly data.")) stop() @@ -174,7 +172,6 @@ Calibration <- function(recipe, data, retrieve = TRUE) { # obs.full_val = data$obs.full_val)) # } if (retrieve) { - info(recipe$Run$logger, CALIB_MSG) # Saving if (recipe$Analysis$Workflow$Calibration$save != 'none') { info(recipe$Run$logger, "##### START SAVING CALIBRATED DATA #####") @@ -194,5 +191,6 @@ Calibration <- function(recipe, data, retrieve = TRUE) { save_observations(recipe = recipe, data_cube = data$obs) } } + info(recipe$Run$logger, retrieve = retrieve, CALIB_MSG) return(return_list) } diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R index 4babacd1..1f953b20 100644 --- a/modules/Loading/R/load_seasonal.R +++ b/modules/Loading/R/load_seasonal.R @@ -311,10 +311,10 @@ load_seasonal <- function(recipe, retrieve = T) { if (!(recipe$Analysis$Regrid$type == 'none')) { compare_exp_obs_grids(hcst, obs) } - info(recipe$Run$logger, - "##### DATA LOADING COMPLETED SUCCESSFULLY #####") .log_memory_usage(recipe$Run$logger, when = "After loading") } + info(recipe$Run$logger, retrieve = retrieve, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") ############################################################################ # diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index b5b7c1b9..403043b8 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -307,14 +307,15 @@ Skill <- function(recipe, data, agg = 'global', retrieve = TRUE, skill_metrics[[ metric ]] <- skill } } + info(recipe$Run$logger, retrieve = retrieve) + "##### SKILL METRIC COMPUTATION COMPLETE #####") + .log_memory_usage(recipe$Run$logger, when = "After skill metric computation") + # Save outputs if (retrieve) { - info(recipe$Run$logger, - "##### SKILL METRIC COMPUTATION COMPLETE #####") - .log_memory_usage(recipe$Run$logger, when = "After skill metric computation") - # Save outputs ## TODO: Handle the output differently to avoid changing the recipe? if (recipe$Analysis$Workflow$Skill$save != 'none') { - info(recipe$Run$logger, "##### START SAVING SKILL METRIC #####") + info(recipe$Run$logger, retrieve = retrieve, + "##### START SAVING SKILL METRIC #####") } recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/outputs/Skill/") @@ -376,7 +377,7 @@ Skill <- function(recipe, data, agg = 'global', retrieve = TRUE, return(skill_metrics) } -Probabilities <- function(recipe, data) { +Probabilities <- function(recipe, data, retrieve = TRUE) { ## TODO: Do hcst and fcst at the same time if (is.null(recipe$Analysis$ncores)) { ncores <- 1 @@ -395,8 +396,9 @@ Probabilities <- function(recipe, data) { named_quantiles <- list() if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { - error(recipe$Run$logger, "Quantiles and probability bins have been - requested, but no thresholds are provided in the recipe.") + error(recipe$Run$logger, retrieve = retrieve, + paste("Quantiles and probability bins have been requested, but no", + "thresholds are provided in the recipe.")) stop() } else { for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { @@ -471,34 +473,35 @@ Probabilities <- function(recipe, data) { percentiles = named_quantiles) } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") .log_memory_usage(recipe$Run$logger, when = "After anomaly computation") # Save outputs - if (recipe$Analysis$Workflow$Probabilities$save != 'none') { - info(recipe$Run$logger, - "##### START SAVING PERCENTILES AND PROBABILITY CATEGORIES #####") - - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Skill/") - # Save percentiles - if (recipe$Analysis$Workflow$Probabilities$save %in% - c('all', 'percentiles_only')) { - save_percentiles(recipe = recipe, percentiles = results$percentiles, - data_cube = data$hcst) - } - # Save probability bins - if (recipe$Analysis$Workflow$Probabilities$save %in% - c('all', 'bins_only')) { - save_probabilities(recipe = recipe, probs = results$probs, - data_cube = data$hcst, type = "hcst") - if (!is.null(results$probs_fcst)) { - save_probabilities(recipe = recipe, probs = results$probs_fcst, - data_cube = data$fcst, type = "fcst") + if (retrieve) { + if (recipe$Analysis$Workflow$Probabilities$save != 'none') { + info(recipe$Run$logger, retrieve = retrieve, + "##### START SAVING PERCENTILES AND PROBABILITY CATEGORIES #####") + + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Skill/") + # Save percentiles + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'percentiles_only')) { + save_percentiles(recipe = recipe, percentiles = results$percentiles, + data_cube = data$hcst) + } + # Save probability bins + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = results$probs, + data_cube = data$hcst, type = "hcst") + if (!is.null(results$probs_fcst)) { + save_probabilities(recipe = recipe, probs = results$probs_fcst, + data_cube = data$fcst, type = "fcst") + } } } } - # Return results return(results) } diff --git a/modules/Units/Units.R b/modules/Units/Units.R index eb527e9e..83964195 100644 --- a/modules/Units/Units.R +++ b/modules/Units/Units.R @@ -59,15 +59,13 @@ Units <- function(recipe, data, retrieve = T) { ## if "/" appears substitute by -1 in at the end of next unit. How to know? # Check if all units are equal (if so, no conversion needed; else convert) if (length(unique(c(unlist(orig_units), user_units))) == length(user_units)) { - if (retrieve) { - info(recipe$Run$logger, "##### NO UNIT CONVERSION NEEDED #####") - } + info(recipe$Run$logger, retrieve = retrieve, "##### NO UNIT CONVERSION NEEDED #####") res <- data } else { if (recipe$Run$filesystem == 'esarchive' && (sum(!sapply(unique(orig_units), is.null)) != 1)) { ## TODO: Warning - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = retrieve, paste("The units in", paste(names(orig_units), collapse = ', '), "were not all equal and will be uniformized.", "If this is not expected, please contact the ES data team.")) @@ -89,9 +87,7 @@ Units <- function(recipe, data, retrieve = T) { } return(result) }, simplify = TRUE) # instead of lapply to get the named list directly - if (retrieve) { - info(recipe$Run$logger, "##### UNIT CONVERSION COMPLETE #####") - } + info(recipe$Run$logger, retrieve = retrieve, "##### UNIT CONVERSION COMPLETE #####") } return(res) } diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index d0c1d3c8..55944e16 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -34,7 +34,7 @@ Visualization <- function(recipe, if (recipe$Analysis$Region$name %in% names(output_conf)) { output_conf <- output_conf[[recipe$Analysis$Region$name]] } else { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste0("The region name is not found in the 'output_conf' file. ", "The default plot settings will be used.")) output_conf <- NULL @@ -70,9 +70,10 @@ Visualization <- function(recipe, } if ((is.null(skill_metrics)) && (is.null(data$fcst))) { - error(recipe$Run$logger, "The Visualization module has been called, - but there is no fcst in 'data', and 'skill_metrics' is NULL - so there is no data that can be plotted.") + error(recipe$Run$logger, retrieve = TRUE, + paste("The Visualization module has been called,", + "but there is no fcst in 'data', and 'skill_metrics' is NULL", + "so there is no data that can be plotted.")) stop() } # Set default single-panel plots if not specified @@ -85,7 +86,7 @@ Visualization <- function(recipe, plot_skill_metrics(recipe, skill_metrics, outdir, significance, output_conf = output_conf) } else { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The skill metric plots have been requested, but the ", "parameter 'skill_metrics' is NULL")) } @@ -113,11 +114,11 @@ Visualization <- function(recipe, if (recipe$Analysis$Workflow$Visualization$mask_ens %in% c('both', TRUE)) { if (is.null(skill_metrics)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the forecast ensemble mean plot, skill_metrics ", "need to be provided to be masked.")) } else if (!('enscorr' %in% names(skill_metrics))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the forecast ensemble mean plot, enscor metric ", "need to be provided to be masked")) } else { @@ -130,11 +131,11 @@ Visualization <- function(recipe, # Plots with dotted negative correlated in ens-mean-fcst if (recipe$Analysis$Workflow$Visualization$dots %in% c('both', TRUE)) { if (is.null(skill_metrics)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the forecast ensemble mean plot, skill_metrics ", "need to be provided for the dots")) } else if (!('enscorr' %in% names(skill_metrics))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the forecast ensemble mean plot, enscor metric ", "needs to be provided for the dots")) } else { @@ -146,7 +147,7 @@ Visualization <- function(recipe, } } else { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The forecast ensemble mean plot has been requested, but ", "there is no fcst element in 'data'")) } @@ -177,11 +178,11 @@ Visualization <- function(recipe, if (recipe$Analysis$Workflow$Visualization$mask_terciles %in% c('both', TRUE)) { if (is.null(skill_metrics)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the most likely terciles plot, skill_metrics ", "need to be provided to be masked.")) } else if (!('rpss' %in% names(skill_metrics))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the most likely terciles plot, rpss metric ", "need to be provided to be masked")) } else { @@ -195,11 +196,11 @@ Visualization <- function(recipe, # Plots with dotted terciles if (recipe$Analysis$Workflow$Visualization$dots %in% c('both', TRUE)) { if (is.null(skill_metrics)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the most likely terciles plot, skill_metrics ", "need to be provided for the dots")) } else if (!('rpss' %in% names(skill_metrics))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the most likely terciles plot, rpss metric ", "needs to be provided for the dots")) } else { @@ -211,7 +212,7 @@ Visualization <- function(recipe, } } } else { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the most likely terciles plot, both the fcst and the ", "probabilities must be provided.")) } diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 2fc9f72d..b6c7ba00 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -1,7 +1,7 @@ check_recipe <- function(recipe) { # recipe: yaml recipe already read it ## TODO: set up logger-less case - info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) + info(recipe$Run$logger, retrieve = TRUE, paste("Checking recipe:", recipe$recipe_path)) # --------------------------------------------------------------------- # ANALYSIS CHECKS @@ -21,20 +21,20 @@ check_recipe <- function(recipe) { # Check basic elements in recipe:Analysis: if (!("Analysis" %in% names(recipe))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The recipe must contain an element called 'Analysis'.") error_status <- T } if (!all(PARAMS %in% names(recipe$Analysis))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The element 'Analysis' in the recipe must contain all of ", "the following: ", paste(PARAMS, collapse = ", "), ".")) error_status <- T } if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The element 'Horizon' in the recipe must be one of the ", "following: ", paste(HORIZONS, collapse = ", "), ".")) error_status <- T @@ -44,7 +44,7 @@ check_recipe <- function(recipe) { ## TODO: Specify filesystem archive <- read_yaml(ARCHIVE_SEASONAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_SEASONAL %in% names(recipe$Analysis$Time))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The element 'Time' in the recipe must contain all of the ", "following: ", paste(TIME_SETTINGS_SEASONAL, collapse = ", "), ".")) @@ -53,7 +53,7 @@ check_recipe <- function(recipe) { } else if (tolower(recipe$Analysis$Horizon) == "decadal") { archive <- read_yaml(ARCHIVE_DECADAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_DECADAL %in% names(recipe$Analysis$Time))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The element 'Time' in the recipe must contain all of the ", "following: ", paste(TIME_SETTINGS_DECADAL, collapse = ", "), ".")) @@ -74,7 +74,7 @@ check_recipe <- function(recipe) { (!is.null(recipe$Analysis$Variables$sic_threshold))) { if (!is.numeric(recipe$Analysis$Variables$sic_threshold) || dplyr::between(recipe$Analysis$Variables$sic_threshold, 0, 1)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("The element Analysis:Variables:sic_threshold must be a", "numeric value between 0 and 1.")) } @@ -82,14 +82,14 @@ check_recipe <- function(recipe) { # Check system names if (!is.null(archive)) { if (!all(recipe$Analysis$Datasets$System$name %in% names(archive$System))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The specified System name was not found in the archive.") error_status <- T } # Check reference names if (!all(recipe$Analysis$Datasets$Reference$name %in% names(archive$Reference))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The specified Reference name was not found in the archive.") error_status <- T } @@ -97,36 +97,36 @@ check_recipe <- function(recipe) { # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || (!is.integer(recipe$Analysis$Time$ftime_min))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The element 'ftime_min' must be an integer larger than 0.") error_status <- T } if ((!(recipe$Analysis$Time$ftime_max > 0)) || (!is.integer(recipe$Analysis$Time$ftime_max))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The element 'ftime_max' must be an integer larger than 0.") error_status <- T } if (recipe$Analysis$Time$ftime_max < recipe$Analysis$Time$ftime_min) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "'ftime_max' cannot be smaller than 'ftime_min'.") error_status <- T } # Check consistency of hindcast years if (!(as.numeric(recipe$Analysis$Time$hcst_start) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_start > 0))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The element 'hcst_start' must be a valid year.") error_status <- T } if (!(as.numeric(recipe$Analysis$Time$hcst_end) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_end > 0))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The element 'hcst_end' must be a valid year.") error_status <- T } if (recipe$Analysis$Time$hcst_end < recipe$Analysis$Time$hcst_start) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "'hcst_end' cannot be smaller than 'hcst_start'.") error_status <- T } @@ -142,19 +142,19 @@ check_recipe <- function(recipe) { ## TODO: To be implemented in the future # if (length(recipe$Analysis$Time$sdate$fcst_day) > 1 && # tolower(recipe$Analysis$Horizon) != "subseasonal") { - # warn(recipe$Run$logger, + # warn(recipe$Run$logger, retrieve = TRUE, # paste("Only subseasonal verification allows multiple forecast days."), # "Element fcst_day in recipe set as 1.") # recipe$Analysis$Time$sdate$fcst_day <- '01' # } ## TODO: Delete, this parameter was deprecated # if (is.null(recipe$Analysis$Time$sdate$fcst_sday)) { - # error(recipe$Run$logger, + # error(recipe$Run$logger, retrieve = TRUE, # paste("The element 'fcst_sday' in the recipe should be defined.")) # } if (is.null(recipe$Analysis$Time$fcst_year)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste("The element 'fcst_year' is not defined in the recipe.", "No forecast year will be used.")) } @@ -171,7 +171,7 @@ check_recipe <- function(recipe) { # Regrid checks: if (length(recipe$Analysis$Regrid) != 2) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The 'Regrid' element must specify the 'method' and 'type'.") error_status <- T } @@ -179,7 +179,7 @@ check_recipe <- function(recipe) { # ... # calculate number of workflows to create for each variable and if (length(recipe$Analysis$Horizon) > 1) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Only one single Horizon can be specified in the recipe") error_status <- T } @@ -187,7 +187,7 @@ check_recipe <- function(recipe) { ## TODO: Refine this # nvar <- length(recipe$Analysis$Variables) # if (nvar > 2) { - # error(recipe$Run$logger, + # error(recipe$Run$logger, retrieve = TRUE, # "Only two type of Variables can be listed: ECVs and Indicators.") # stop("EXECUTION FAILED") # } @@ -210,7 +210,7 @@ check_recipe <- function(recipe) { if (is.null(names(recipe$Analysis$Region))) { for (region in recipe$Analysis$Region) { if (!all(LIMITS %in% names(region))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("There must be 4 elements in 'Region': ", paste(LIMITS, collapse = ", "), ".")) error_status <- T @@ -219,7 +219,7 @@ check_recipe <- function(recipe) { if (length(recipe$Analysis$Region) > 1) { for (region in recipe$Analysis$Region) { if (!("name" %in% names(region)) || (is.null(region$name))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("If more than one region has been defined, every region", "must have a unique name.")) } @@ -227,7 +227,7 @@ check_recipe <- function(recipe) { } # Atomic recipe } else if (!all(LIMITS %in% names(recipe$Analysis$Region))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("There must be 4 elements in 'Region': ", paste(LIMITS, collapse = ", "), ".")) error_status <- T @@ -237,7 +237,7 @@ check_recipe <- function(recipe) { # for (i in 1:length(recipe$Analysis$Region)) { # if (!all(limits %in% names(recipe$Analysis$Region[[i]]))) { # limits <- paste(limits, collapse = " ") - # error(recipe$Run$logger, + # error(recipe$Run$logger, retrieve = TRUE, # paste0("Each region defined in element 'Region' ", # "should have 4 elements: ", # paste(limits, collapse = ", "), ".")) @@ -245,7 +245,7 @@ check_recipe <- function(recipe) { # } # if (length(recipe$Analysis$Region) > 1) { # if (!("name" %in% names(recipe$Analysis$Region[[i]]))) { - # error(recipe$Run$logger, + # error(recipe$Run$logger, retrieve = TRUE, # paste("If multiple regions are requested, each region must", # "have a 'name'".) # # are numeric? class list mode list @@ -261,19 +261,19 @@ check_recipe <- function(recipe) { recipe$Analysis$Workflow$Calibration$method == FALSE) || tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || is.null(recipe$Analysis$Workflow$Calibration$method)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, "No Calibration method was specified, raw data verification.") recipe$Analysis$Workflow$Calibration$method <- 'raw' } else { if (is.null(recipe$Analysis$Workflow$Calibration$method)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The 'Calibration' element 'method' must be specified.") error_status <- T } SAVING_OPTIONS_CALIB <- c("all", "none", "exp_only", "fcst_only") if ((is.null(recipe$Analysis$Workflow$Calibration$save)) || (!(recipe$Analysis$Workflow$Calibration$save %in% SAVING_OPTIONS_CALIB))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Please specify which Calibration module outputs you want ", "to save with the 'save' parameter. The options are: ", paste(SAVING_OPTIONS_CALIB, collapse = ", "), ".")) @@ -284,18 +284,18 @@ check_recipe <- function(recipe) { if ("Anomalies" %in% names(recipe$Analysis$Workflow)) { # Computation and cross-validation checks if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Parameter 'compute' must be defined under 'Anomalies'.") error_status <- T } else if (!(is.logical(recipe$Analysis$Workflow$Anomalies$compute))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Parameter 'Anomalies:compute' must be a logical value", "(True/False or yes/no).")) error_status <- T } else if ((recipe$Analysis$Workflow$Anomalies$compute)) { # Cross-validation check if (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("If anomaly computation is requested, parameter", "'cross_validation' must be defined under 'Anomalies', and it must be a logical value (True/False or yes/no).")) @@ -305,7 +305,7 @@ check_recipe <- function(recipe) { SAVING_OPTIONS_ANOM <- c("all", "none", "exp_only", "fcst_only") if ((is.null(recipe$Analysis$Workflow$Anomalies$save)) || (!(recipe$Analysis$Workflow$Anomalies$save %in% SAVING_OPTIONS_ANOM))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Please specify which Anomalies module outputs you want ", "to save with the 'save' parameter. The options are: ", paste(SAVING_OPTIONS_ANOM, collapse = ", "), ".")) @@ -327,12 +327,12 @@ check_recipe <- function(recipe) { if ("type" %in% names(downscal_params)) { if (length(downscal_params$type) == 0) { downscal_params$type <- "none" - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste("Downscaling 'type' is empty in the recipe, setting it to", "'none'.")) } if (!(downscal_params$type %in% DOWNSCAL_TYPES)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The type of Downscaling request in the recipe is not ", "available. It must be one of the following: ", paste(DOWNSCAL_TYPES, collapse = ", "), ".")) @@ -340,14 +340,14 @@ check_recipe <- function(recipe) { } if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && (is.null(downscal_params$target_grid))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("A target grid is required for the downscaling method", "requested in the recipe.")) error_status <- T } if (downscal_params$type == "int") { if (is.null(downscal_params$int_method)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type 'int' was requested, but no", "interpolation method is provided in the recipe.")) error_status <- T @@ -355,7 +355,7 @@ check_recipe <- function(recipe) { } else if (downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) { if (is.null(downscal_params$int_method)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type", downscal_params$type, "was requested in the recipe, but no", "interpolation method is provided.")) @@ -363,24 +363,24 @@ check_recipe <- function(recipe) { } } else if (downscal_params$type == "intbc") { if (is.null(downscal_params$bc_method)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type 'intbc' was requested in the recipe, but", "no bias correction method is provided.")) error_status <- T } else if (!(downscal_params$bc_method %in% BC_METHODS)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The accepted Bias Correction methods for the downscaling", " module are: ", paste(BC_METHODS, collapse = ", "), ".")) error_status <- T } } else if (downscal_params$type == "intlr") { if (length(downscal_params$lr_method) == 0) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type 'intlr' was requested in the recipe, but", "no linear regression method was provided.")) error_status <- T } else if (!(downscal_params$lr_method %in% LR_METHODS)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The accepted linear regression methods for the", " downscaling module are: ", paste(LR_METHODS, collapse = ", "), ".")) @@ -388,24 +388,24 @@ check_recipe <- function(recipe) { } } else if (downscal_params$type == "analogs") { if (is.null(downscal_params$nanalogs)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type is 'analogs, but the number of analogs", "has not been provided in the recipe. The default is 3.")) } } else if (downscal_params$type == "logreg") { if (is.null(downscal_params$int_method)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type 'logreg' was requested in the recipe, but", "no interpolation method was provided.")) error_status <- T } if (is.null(downscal_params$log_reg_method)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type 'logreg' was requested in the recipe,", "but no logistic regression method is provided.")) error_status <- T } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The accepted logistic regression methods for the ", "downscaling module are: ", paste(LOGREG_METHODS, collapse = ", "), ".")) @@ -420,19 +420,19 @@ check_recipe <- function(recipe) { nino_indices <- paste0("nino", c("1+2", "3", "3.4", "4")) indices <- c("nao", nino_indices) if (!("anomalies" %in% tolower(names(recipe$Analysis$Workflow)))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Indices uses Anomalies as input, but Anomalies are missing", "in the recipe.")) error_status <- T } else if (!(recipe$Analysis$Workflow$Anomalies$compute)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Indices uses Anomalies as input, but the parameter", "'Anomalies:compute' is set as no/False.")) error_status <- T } recipe_indices <- tolower(names(recipe$Analysis$Workflow$Indices)) if (!all(recipe_indices %in% indices)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Some of the indices under 'Indices' are not available.", "The available Indices are: 'NAO', 'Nino1+2', 'Nino3', ", "'Nino3.4' and 'Nino4'.")) @@ -441,7 +441,7 @@ check_recipe <- function(recipe) { # Check that variables correspond with indices requested if (("nao" %in% recipe_indices) && (!all(recipe_variables %in% c("psl", "z500")))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("It is not possible to compute the NAO with some of the ", "variables requested. To compute the NAO, please make sure", "your recipe requests only psl and/or z500.")) @@ -449,7 +449,7 @@ check_recipe <- function(recipe) { } if ((any(nino_indices %in% recipe_indices)) && (!all(recipe_variables %in% c("tos", "sst")))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("It is not possible to compute El Nino indices with some ", "of the variables requested. To compute El Nino, please ", "make sure your recipe requests only tos.")) @@ -465,14 +465,14 @@ check_recipe <- function(recipe) { "frpss_specs", "bss10_specs", "bss90_specs") if ("Skill" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Skill$metric)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Parameter 'metric' must be defined under 'Skill'.") error_status <- T } else { requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] if (!all(tolower(requested_metrics) %in% AVAILABLE_METRICS)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Some of the metrics requested under 'Skill' are not ", "available in SUNSET. Check the documentation to see the ", "full list of accepted skill metrics.")) @@ -483,7 +483,7 @@ check_recipe <- function(recipe) { SAVING_OPTIONS_SKILL <- c("all", "none") if ((is.null(recipe$Analysis$Workflow$Skill$save)) || (!(recipe$Analysis$Workflow$Skill$save %in% SAVING_OPTIONS_SKILL))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Please specify whether you want to save the Skill metrics ", "with the 'save' parameter. The options are: ", paste(SAVING_OPTIONS_SKILL, collapse = ", "), ".")) @@ -494,11 +494,11 @@ check_recipe <- function(recipe) { # Probabilities if ("Probabilities" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Parameter 'percentiles' must be defined under 'Probabilities'.") error_status <- T } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Parameter 'Probabilities:percentiles' expects a list.", "See documentation in the wiki for examples.")) error_status <- T @@ -507,7 +507,7 @@ check_recipe <- function(recipe) { SAVING_OPTIONS_PROBS <- c("all", "none", "bins_only", "percentiles_only") if ((is.null(recipe$Analysis$Workflow$Probabilities$save)) || (!(recipe$Analysis$Workflow$Probabilities$save %in% SAVING_OPTIONS_PROBS))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Please specify whether you want to save the percentiles ", "and probability bins with the 'save' parameter. The ", "options are: ", @@ -522,14 +522,14 @@ check_recipe <- function(recipe) { "most_likely_terciles") # Separate plots parameter and check if all elements are in PLOT_OPTIONS if (is.null(recipe$Analysis$Workflow$Visualization$plots)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The 'plots' element must be defined under 'Visualization'.") error_status <- T } else { plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, ", | |,")[[1]] if (!all(plots %in% PLOT_OPTIONS)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The options available for the plots are: ", paste(PLOT_OPTIONS, collapse = ", "), ".")) error_status <- T @@ -538,41 +538,41 @@ check_recipe <- function(recipe) { # Check multi_panel option ## TODO: Multi-panel if (is.null(recipe$Analysis$Workflow$Visualization$multi_panel)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste0("Visualization:multi_panel not specified for the plots, the", " default is 'no/False'.")) } else if (!is.logical(recipe$Analysis$Workflow$Visualization$multi_panel)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Parameter 'Visualization:multi_panel' must be a logical ", "value: either 'yes/True' or 'no/False'")) error_status <- T } # Check projection if (is.null(recipe$Analysis$Workflow$Visualization$projection)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste0("Visualization:projection not specified for the plots, the ", "default projection is cylindrical equidistant.")) } ## TODO: Add significance? if ("most_likely_terciles" %in% plots) { if (is.null(recipe$Analysis$Workflow$Visualization$mask_terciles)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste0("Visualization:mask_terciles not set for tercile plots,", " the default setting is: 'no/False'.")) } else if (!(recipe$Analysis$Workflow$Visualization$mask_terciles %in% c(TRUE, FALSE, "both"))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Parameter Visualization:mask_terciles must be one of: ", "yes/True, no/False, 'both'")) error_status <- T } if (is.null(recipe$Analysis$Workflow$Visualization$dots)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste0("Visualization:dots not set for tercile plots, the default", " setting is: 'no/False'.")) } else if (!(recipe$Analysis$Workflow$Visualization$dots %in% c(TRUE, FALSE, "both"))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Parameter Visualization:plots must be one of: ", "yes/True, no/False, 'both'")) } @@ -581,14 +581,14 @@ check_recipe <- function(recipe) { # Scorecards if ("Scorecards" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Parameter 'metric' must be defined under 'Scorecards'.") error_status <- T } else { sc_metrics <- strsplit(recipe$Analysis$Workflow$Scorecards$metric, ", | |,")[[1]] if (!all(tolower(sc_metrics) %in% tolower(requested_metrics))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("All of the metrics requested under 'Scorecards' must ", "be requested in the 'Skill' section.")) error_status <- T @@ -607,27 +607,27 @@ check_recipe <- function(recipe) { stop("The recipe must contain an element named 'Run'.") } if (!all(RUN_FIELDS %in% names(recipe$Run))) { - error(recipe$Run$logger, paste("Recipe element 'Run' must contain", + error(recipe$Run$logger, retrieve = TRUE, paste("Recipe element 'Run' must contain", "all of the following fields:", paste(RUN_FIELDS, collapse=", "), ".")) error_status <- T } if (!is.character(recipe$Run$output_dir)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("The Run element 'output_dir' in", recipe$name, "file", "should be a character string indicating the path where", "the outputs should be saved.")) error_status <- T } if (!is.character(recipe$Run$code_dir)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("The Run element 'code_dir' in", recipe$name, "file ", "should be a character string indicating the path", "where the code is.")) error_status <- T } if (!is.logical(recipe$Run$Terminal)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("The Run element 'Terminal' in", recipe$name, "file ", "should be a boolean value indicating whether or not to", "print the logs in the terminal.")) @@ -660,39 +660,39 @@ check_recipe <- function(recipe) { auto_specs <- read_yaml("conf/autosubmit.yml")[[recipe$Run$filesystem]] # Check that the autosubmit configuration parameters are present if (!("auto_conf" %in% names(recipe$Run))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The 'auto_conf' is missing from the 'Run' section of the recipe.") error_status <- T } else if (!all(AUTO_PARAMS %in% names(recipe$Run$auto_conf))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The element 'Run:auto_conf' must contain all of the ", "following: ", paste(AUTO_PARAMS, collapse = ", "), ".")) error_status <- T } # Check that the script is not NULL and exists if (is.null(recipe$Run$auto_conf$script)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "A script must be provided to run the recipe with autosubmit.") error_status <- T } else if (!file.exists(recipe$Run$auto_conf$script)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Could not find the file for the script in 'auto_conf'.") error_status <- T } # Check that the experiment ID exists if (is.null(recipe$Run$auto_conf$expid)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("The Autosubmit EXPID is missing. You can create one by", "running the following commands on the autosubmit machine:")) - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("module load", auto_specs$module_version)) - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("autosubmit expid -H", auto_specs$platform, "-d ")) error_status <- T } else if (!dir.exists(paste0(auto_specs$experiment_dir, recipe$Run$auto_conf$expid))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("No folder in ", auto_specs$experiment_dir, " for the EXPID", recipe$Run$auto_conf$expid, ". Please make sure it is correct.")) @@ -700,17 +700,17 @@ check_recipe <- function(recipe) { } if ((recipe$Run$auto_conf$email_notifications) && (is.null(recipe$Run$auto_conf$email_address))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Autosubmit notifications are enabled but email address is empty!") error_status <- T } if (is.null(recipe$Run$auto_conf$hpc_user)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The 'Run:auto_conf:hpc_user' field can not be empty.") error_status <- T } else if ((recipe$Run$filesystem == "esarchive") && (!substr(recipe$Run$auto_conf$hpc_user, 1, 5) == "bsc32")) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Please check your hpc_user ID. It should look like: 'bsc32xxx'") error_status <- T } @@ -729,7 +729,7 @@ check_recipe <- function(recipe) { # Check that all required fields are present if ("startR_workflow" %in% names(recipe$Run)) { if (!all(STARTR_PARAMS %in% names(recipe$Run$startR_workflow))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The element 'Run:startR_workflow' must contain all of the ", "following: ", paste(STARTR_PARAMS, collapse = ", "), ".")) error_status <- T @@ -738,7 +738,7 @@ check_recipe <- function(recipe) { ", | |,")[[1]]) # Check modules if (!any(modules %in% STARTR_MODULES)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The element 'Run:startR_workflow:modules' can only ", "contain the following modules: ", paste(STARTR_MODULES, collapse = ", "), ".")) @@ -746,7 +746,7 @@ check_recipe <- function(recipe) { } # Check chunking dims if (!(any(names(recipe$Run$startR_workflow$chunk_along) %in% CHUNK_DIMS))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The dimensions in 'Run:startR_workflow:chunk_along' can ", "only be: ", paste(CHUNK_DIMS, collapse = ", "), ".")) error_status <- T @@ -754,7 +754,7 @@ check_recipe <- function(recipe) { # Check compatibility between module selection and chunking dimensions if (any(c("latitude", "longitude") %in% names(recipe$Run$startR_workflow$chunk_along)) && any(modules %in% MODULES_USING_LATLON)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("latitude and longitude cannot be chunking dimensions if", "any of the following modules is in the startR workflow: ", paste(MODULES_USING_LATLON, collapse = ", "), ".")) @@ -762,7 +762,7 @@ check_recipe <- function(recipe) { } if ("time" %in% names(recipe$Run$startR_workflow$chunk_along) && any(modules %in% MODULES_USING_TIME)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("time cannot be a chunking dimension if any of the ", "following modules is in the startR workflow: ", paste(MODULES_USING_TIME, collapse = ", "), ".")) @@ -770,7 +770,7 @@ check_recipe <- function(recipe) { } if ("var" %in% names(recipe$Run$startR_workflow$chunk_along) && any(modules %in% MODULES_USING_VAR)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("var cannot be a chunking dimension if any of the ", "following modules is in the startR workflow: ", paste(MODULES_USING_VAR, collapse = ", "), ".")) @@ -786,15 +786,15 @@ check_recipe <- function(recipe) { # e.g. only one calibration method ## TODO: Implement number of dependent verifications #nverifications <- check_number_of_dependent_verifications(recipe) - # info(recipe$Run$logger, paste("Start Dates:", + # info(recipe$Run$logger, retrieve = TRUE, paste("Start Dates:", # paste(fcst.sdate, collapse = " "))) # Return error if any check has failed if (error_status) { - error(recipe$Run$logger, "RECIPE CHECK FAILED.") + error(recipe$Run$logger, retrieve = TRUE, "RECIPE CHECK FAILED.") stop("The recipe contains some errors. Find the full list in the", " main.log file.") } else { - info(recipe$Run$logger, "##### RECIPE CHECK SUCCESSFULL #####") + info(recipe$Run$logger, retrieve = TRUE, "##### RECIPE CHECK SUCCESSFULL #####") } } diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index f9942525..0540fec9 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -72,13 +72,13 @@ prepare_outputs <- function(recipe_file, # logger set-up if (recipe$Run$Terminal) { logger <- log4r::logger(threshold = recipe$Run$Loglevel, - appenders = list(console_appender(layout = default_log_layout()), + appenders = list(console_appender(layout = .custom_log_layout()), file_appender(logfile, append = TRUE, - layout = default_log_layout()))) + layout = .custom_log_layout()))) } else { logger <- log4r::logger(threshold = recipe$Run$Loglevel, appenders = list(file_appender(logfile, append = TRUE, - layout = default_log_layout()))) + layout = .custom_log_layout()))) } recipe$Run$logger <- logger recipe$Run$logfile <- logfile @@ -86,7 +86,7 @@ prepare_outputs <- function(recipe_file, # Set up default filesystem if (is.null(recipe$Run$filesystem)) { recipe$Run$filesystem <- "esarchive" - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, "Filesystem not specified in the recipe. Setting it to 'esarchive'.") } # Restructure the recipe to make the atomic recipe more readable @@ -95,10 +95,24 @@ prepare_outputs <- function(recipe_file, } # Run recipe checker if (disable_checks) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, "Recipe checks disabled. The recipe will not be checked for errors.") } else { check_recipe(recipe) } return(recipe) } + +.custom_log_layout <- function() { + # Custom modification of the default log4r log layout to remove ANSI + # formatting from log messages when outputting to files. + time_format <- "%Y-%m-%d %H:%M:%S" + function(level, retrieve, ...) { + if (retrieve) { + msg <- paste0(..., collapse = "") + # Strip msg of ANSI formatting + # msg <- ansi_strip(msg) + sprintf("%-5s [%s] %s\n", level, Sys.time(), msg) + } + } +} -- GitLab From c6256dd3f5d35806959f40b8d27ef12da2fc2bd7 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 14 Feb 2024 16:24:13 +0100 Subject: [PATCH 49/96] Fix bug when no forecast in self-defined Compute(=) function --- build_compute_workflow.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 8bc40fde..22881188 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -5,14 +5,21 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, source("modules/Units/Units.R") source("modules/Calibration/Calibration.R") source("modules/Anomalies/Anomalies.R") + source("modules/Downscaling/Downscaling.R") source("modules/Skill/Skill.R") + # Define appender with custom log layout so that it knows not to append + # within Compute(). recipe$Run$logger <- log4r::logger(threshold = recipe$Run$Loglevel, appenders = list(console_appender(layout = .custom_log_layout()))) modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) # Create data list - data <- list(hcst = hcst, obs = obs, fcst = fcst) + data <- list(hcst = hcst, + obs = obs, + fcst = fcst) + data <- data[!sapply(data, is.null)] + print(names(data)) var_name <- as.vector(attributes(hcst)$FileSelectors[[1]]$var[[1]]) # Remove duplicated objects rm(hcst, obs, fcst) @@ -40,10 +47,12 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, } else if (module == "anomalies") { data <- Anomalies(recipe, data, retrieve = F) } else if (module == "downscaling") { + ## TODO: Test, adapt if necessary data <- Downscaling(recipe, data, retrieve = F) } else if (module == "skill") { skill_metrics <- Skill(recipe, data, retrieve = F, nchunks = nchunks) } else if (module == "probabilities") { + ## TODO: Adapt probabilities output probabilities <- Probabilities(recipe, data, retrieve = F, nchunks = nchunks) } else if (module == "indices") { ## TODO: Won't work with plots... -- GitLab From 9422ab3bf530854ca60cec6f6aceeecd441359e4 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 14 Feb 2024 16:27:44 +0100 Subject: [PATCH 50/96] uncomment lines --- modules/Calibration/Calibration.R | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 1639e459..d1e4bc88 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -110,26 +110,20 @@ Calibration <- function(recipe, data, retrieve = TRUE) { } } else if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { # Daily data calibration using Quantile Mapping - # if (!(method %in% c("qmap"))) { - # error(recipe$Run$logger, - # paste("Calibration method in the recipe is not available for", - # "daily data. Only quantile mapping 'qmap is implemented.")) - # stop() - # } + if (!(method %in% c("qmap"))) { + error(recipe$Run$logger, retrieve = retrieve, + paste("Calibration method in the recipe is not available for", + "daily data. Only quantile mapping 'qmap is implemented.")) + stop() + } # Calibrate the hindcast ## TODO: Change again if all data ends up being an s2dv_cube arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL, sdate_dim = "syear", memb_dim = "ensemble", method = "QUANT", ncores = ncores, na.rm = na.rm, wet.day = F) - if (inherits(data$hcst, "s2dv_cube")) { - ## TODO: Modify QuantileMapping to reorder dims? - dim_order <- names(data$hcst$dims) - fun <- CST_QuantileMapping - } else { - dim_order <- names(attr(data$hcst, "Dimensions")) - fun <- QuantileMapping - } + dim_order <- names(data$hcst$dims) + fun <- CST_QuantileMapping hcst_calibrated <- do.call(fun, arguments) # # Restore dimension order hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) @@ -156,7 +150,7 @@ Calibration <- function(recipe, data, retrieve = TRUE) { # Calibrate the forecast fcst_calibrated <- do.call(fun, arguments) # Restore dimension order - # fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) + fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) } else { fcst_calibrated <- NULL } @@ -174,7 +168,8 @@ Calibration <- function(recipe, data, retrieve = TRUE) { if (retrieve) { # Saving if (recipe$Analysis$Workflow$Calibration$save != 'none') { - info(recipe$Run$logger, "##### START SAVING CALIBRATED DATA #####") + info(recipe$Run$logger, retrieve = retrieve, + "##### START SAVING CALIBRATED DATA #####") } ## TODO: What do we do with the full values? recipe$Run$output_dir <- paste0(recipe$Run$output_dir, -- GitLab From a5196e9315de9524efe55ca270ac2ffb4210e2f8 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 14 Feb 2024 16:41:26 +0100 Subject: [PATCH 51/96] Adapt Downscaling module to work within Compute() --- modules/Downscaling/Downscaling.R | 42 ++++++++++--------- modules/Downscaling/tmp/Intbc.R | 14 +++++-- .../atomic_recipes/recipe_test_compute.yml | 19 +++++++-- tools/check_recipe.R | 5 ++- 4 files changed, 51 insertions(+), 29 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 59233dc2..f1911cfc 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -6,13 +6,12 @@ source('modules/Downscaling/tmp/Analogs.R') source('modules/Downscaling/tmp/LogisticReg.R') source('modules/Downscaling/tmp/Utils.R') -Downscaling <- function(recipe, data) { +Downscaling <- function(recipe, data, retrieve = TRUE) { # Function that downscale the hindcast using the method stated in the # recipe. For the moment, forecast must be null. # # data: list of s2dv_cube objects containing the hcst, obs and fcst. # recipe: object obtained when passing the .yml recipe file to read_yaml() - type <- tolower(recipe$Analysis$Workflow$Downscaling$type) if (type == "none") { @@ -22,7 +21,7 @@ Downscaling <- function(recipe, data) { } else { if (!is.null(data$fcst)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = retrieve, "The downscaling will be only performed to the hindcast data") data$fcst <- NULL } @@ -271,24 +270,29 @@ Downscaling <- function(recipe, data) { DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } } - print(DOWNSCAL_MSG) + info(recipe$Run$logger, retrieve = retrieve, DOWNSCAL_MSG) # Saving - if (recipe$Analysis$Workflow$Downscaling$save != 'none') { - info(recipe$Run$logger, "##### START SAVING DOWNSCALED DATA #####") - } - ## TODO: What do we do with the full values? - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Downscaling/") - # if ((recipe$Analysis$Workflow$Downscaling$save %in% - # c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { - # save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') - # } - if (recipe$Analysis$Workflow$Downscaling$save %in% c('all', 'exp_only')) { - save_forecast(recipe = recipe, data_cube = hcst_downscal$exp, type = 'hcst') - } - if (recipe$Analysis$Workflow$Downscaling$save == 'all') { - save_observations(recipe = recipe, data_cube = hcst_downscal$obs) + if (retrieve) { + if (recipe$Analysis$Workflow$Downscaling$save != 'none') { + info(recipe$Run$logger, retrieve = retrieve, + "##### START SAVING DOWNSCALED DATA #####") + } + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Downscaling/") + # if ((recipe$Analysis$Workflow$Downscaling$save %in% + # c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { + # save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + # } + if (recipe$Analysis$Workflow$Downscaling$save %in% c('all', 'exp_only')) { + save_forecast(recipe = recipe, + data_cube = hcst_downscal$exp, + type = 'hcst') + } + if (recipe$Analysis$Workflow$Downscaling$save == 'all') { + save_observations(recipe = recipe, + data_cube = hcst_downscal$obs) + } } return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R index dc5d050b..63c8be7c 100644 --- a/modules/Downscaling/tmp/Intbc.R +++ b/modules/Downscaling/tmp/Intbc.R @@ -306,8 +306,11 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, if (bc_method == 'qm' | bc_method == 'quantile_mapping') { - res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, na.rm = TRUE, - memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, ...) + res <- CSTools::QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, + na.rm = TRUE, + memb_dim = member_dim, + sdate_dim = sdate_dim, + ncores = ncores, ...) } else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { # the temporal dimension must be only one dimension called "time" @@ -328,8 +331,11 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, if (dim(obs_ref)[sdate_dim] == 1) { warning('Simple Bias Correction should not be used with only one observation. Returning NA.') } - res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, - sdate_dim = sdate_dim, ncores = ncores, cal.method = bc_method) + res <- CSTools::Calibration(exp = exp_interpolated$data, obs = obs_ref, + memb_dim = member_dim, + sdate_dim = sdate_dim, + ncores = ncores, + cal.method = bc_method) } # Return a list of three elements diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml index 53b5eee1..eaca75d7 100644 --- a/recipes/atomic_recipes/recipe_test_compute.yml +++ b/recipes/atomic_recipes/recipe_test_compute.yml @@ -15,7 +15,7 @@ Analysis: name: ERA5 Time: sdate: '1101' - fcst_year: '2020' + fcst_year: # '2020' hcst_start: '2000' hcst_end: '2016' ftime_min: 1 @@ -30,12 +30,23 @@ Analysis: type: to_system #to_reference #'r360x181' Workflow: Anomalies: - compute: yes # yes/no, default yes + compute: no # yes/no, default yes cross_validation: yes # yes/no, default yes save: 'all' # 'all'/'none'/'exp_only'/'fcst_only' Calibration: - method: mse_min + method: raw save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. + bc_method: bias # If type intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type intlr. Options: 'basic', 'large_scale', '4nn' + log_reg_method: # If type logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # 'all'/'none'/'exp_only' Skill: metric: RPS RPSS BSS10 # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS save: 'all' # 'all'/'none' @@ -57,7 +68,7 @@ Run: output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ startR_workflow: - modules: calibration skill # Modules to run inside Compute(), in order + modules: downscaling skill # Modules to run inside Compute(), in order chunk_along: {time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} run_on: local # options: local, as_machine, nord3 chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' diff --git a/tools/check_recipe.R b/tools/check_recipe.R index b6c7ba00..6e41e1d3 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -721,7 +721,8 @@ check_recipe <- function(recipe) { # --------------------------------------------------------------------- STARTR_PARAMS <- c("modules", "chunk_along") - STARTR_MODULES <- c("calibration", "anomalies", "skill") + STARTR_MODULES <- c("calibration", "anomalies", "downscaling", + "skill", "probabilities") CHUNK_DIMS <- c("var", "time", "latitude", "longitude") MODULES_USING_LATLON <- c("downscaling", "indices") MODULES_USING_TIME <- c("indicators") @@ -755,7 +756,7 @@ check_recipe <- function(recipe) { if (any(c("latitude", "longitude") %in% names(recipe$Run$startR_workflow$chunk_along)) && any(modules %in% MODULES_USING_LATLON)) { error(recipe$Run$logger, retrieve = TRUE, - paste0("latitude and longitude cannot be chunking dimensions if", + paste0("latitude and longitude cannot be chunking dimensions if ", "any of the following modules is in the startR workflow: ", paste(MODULES_USING_LATLON, collapse = ", "), ".")) error_status <- T -- GitLab From 81cac495d9b452e133bad3026863d3202d646084 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 14 Feb 2024 16:41:39 +0100 Subject: [PATCH 52/96] Fix bug in logger call --- modules/Skill/Skill.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 403043b8..75c2e689 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -307,7 +307,7 @@ Skill <- function(recipe, data, agg = 'global', retrieve = TRUE, skill_metrics[[ metric ]] <- skill } } - info(recipe$Run$logger, retrieve = retrieve) + info(recipe$Run$logger, retrieve = retrieve, "##### SKILL METRIC COMPUTATION COMPLETE #####") .log_memory_usage(recipe$Run$logger, when = "After skill metric computation") # Save outputs -- GitLab From ce9d468cd40681bbadef3e985df04d85a4d3d3da Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 15 Feb 2024 12:14:50 +0100 Subject: [PATCH 53/96] Adapt logger calls and fix obs dimensions --- modules/Calibration/Calibration.R | 168 +++++++++++++------------- modules/Loading/Loading.R | 2 +- modules/Loading/R/dates2load.R | 16 +-- modules/Loading/R/load_decadal.R | 2 +- modules/Loading/R/load_seasonal.R | 7 +- modules/Loading/R/load_tas_tos.R | 7 +- modules/Saving/R/save_corr.R | 2 +- modules/Saving/R/save_forecast.R | 4 +- modules/Saving/R/save_metrics.R | 3 +- modules/Saving/R/save_observations.R | 3 +- modules/Saving/R/save_percentiles.R | 3 +- modules/Saving/R/save_probabilities.R | 2 +- modules/Saving/Saving.R | 7 +- tools/data_summary.R | 24 ++-- 14 files changed, 129 insertions(+), 121 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index d1e4bc88..2dab5eba 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -8,7 +8,7 @@ Calibration <- function(recipe, data, retrieve = TRUE) { # data: list of s2dv_cube objects containing the hcst, obs and fcst. # Optionally, it may also have hcst.full_val and obs.full_val. # recipe: object obtained when passing the .yml recipe file to read_yaml() - + method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (method == "raw") { @@ -16,19 +16,20 @@ Calibration <- function(recipe, data, retrieve = TRUE) { paste("The Calibration module has been called, but the calibration", "method in the recipe is 'raw'. The hcst and fcst will not be", "calibrated.")) - ## TODO: Improve efficiency fcst_calibrated <- data$fcst hcst_calibrated <- data$hcst - # if (!is.null(data$hcst.full_val)) { - # hcst_full_calibrated <- data$hcst.full_val - # } else { - # hcst_full_calibrated <- NULL - # } + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- data$hcst.full_val + } else { + hcst_full_calibrated <- NULL + } CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####" } else { + ## TODO: Calibrate full fields when present # Calibration function params - mm <- recipe$Analysis$Datasets$Multimodel + mm <- !is.null(recipe$Analysis$Datasets$Multimodel) && + !tolower(recipe$Analysis$Datasets$Multimodel) %in% c('no','false') if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { @@ -42,41 +43,29 @@ Calibration <- function(recipe, data, retrieve = TRUE) { CALIB_MSG <- "##### CALIBRATION COMPLETE #####" # Replicate observation array for the multi-model case - if (mm) { - obs.mm <- data$obs$data - for (dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { - obs.mm <- abind(obs.mm, data$obs$data, - along=which(names(dim(data$obs$data)) == 'dat')) - } - names(dim(obs.mm)) <- names(dim(data$obs$data)) - obs$data <- obs.mm - remove(obs.mm) - gc() - } - + if (recipe$Analysis$Variables$freq == "monthly_mean") { CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") - ## TODO: this belongs in the recipe checker + ## TODO: implement other calibration methods if (!(method %in% CST_CALIB_METHODS)) { error(recipe$Run$logger, retrieve = retrieve, paste("Calibration method in the recipe is not available for", "monthly data.")) stop() } else { - ## TODO: Change again if all data ends up being an s2dv_cube - arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL, - cal.method = method, eval.method = "leave-one-out", - multi.model = mm, na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, alpha = NULL, - memb_dim = "ensemble", sdate_dim = "syear", - dat_dim = NULL, ncores = ncores) - if (inherits(data$hcst, "s2dv_cube")) { - fun <- CST_Calibration - } else { - fun <- Calibration - } - hcst_calibrated <- do.call(fun, arguments) + # Calibrate the hindcast + hcst_calibrated <- CST_Calibration(data$hcst, data$obs, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) # In the case where anomalies have been computed, calibrate full values if (!is.null(data$hcst.full_val)) { hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, @@ -95,15 +84,18 @@ Calibration <- function(recipe, data, retrieve = TRUE) { } # Calibrate the forecast - ## TODO: Change again if all data ends up being an s2dv_cube if (!is.null(data$fcst)) { - arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = data$fcst, - cal.method = method, eval.method = "leave-one-out", - multi.model = mm, na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, alpha = NULL, - memb_dim = "ensemble", sdate_dim = "syear", - dat_dim = NULL, ncores = ncores) - fcst_calibrated <- do.call(fun, arguments) + fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) } else { fcst_calibrated <- NULL } @@ -111,21 +103,24 @@ Calibration <- function(recipe, data, retrieve = TRUE) { } else if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { # Daily data calibration using Quantile Mapping if (!(method %in% c("qmap"))) { - error(recipe$Run$logger, retrieve = retrieve, + error(recipe$Run$logger, retrieve = retrieve, paste("Calibration method in the recipe is not available for", "daily data. Only quantile mapping 'qmap is implemented.")) stop() } # Calibrate the hindcast - ## TODO: Change again if all data ends up being an s2dv_cube - arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL, - sdate_dim = "syear", memb_dim = "ensemble", - method = "QUANT", ncores = ncores, - na.rm = na.rm, wet.day = F) - dim_order <- names(data$hcst$dims) - fun <- CST_QuantileMapping - hcst_calibrated <- do.call(fun, arguments) - # # Restore dimension order + dim_order <- names(dim(data$hcst$data)) + hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) + # Restore dimension order + browser() hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) # In the case where anomalies have been computed, calibrate full values if (!is.null(data$hcst.full_val)) { @@ -141,14 +136,18 @@ Calibration <- function(recipe, data, retrieve = TRUE) { } else { hcst_full_calibrated <- NULL } + if (!is.null(data$fcst)) { - ## TODO: Change again if all data ends up being an s2dv_cube - arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = data$fcst, - sdate_dim = "syear", memb_dim = "ensemble", - method = "QUANT", ncores = ncores, - na.rm = na.rm, wet.day = F) # Calibrate the forecast - fcst_calibrated <- do.call(fun, arguments) + fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, + exp_cor = data$fcst, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) # Restore dimension order fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) } else { @@ -156,36 +155,39 @@ Calibration <- function(recipe, data, retrieve = TRUE) { } } } - ## TODO: Sort out returns - return_list <- list(hcst = hcst_calibrated, - obs = data$obs, - fcst = fcst_calibrated) - # if (!is.null(hcst_full_calibrated)) { - # return_list <- append(return_list, - # list(hcst.full_val = hcst_full_calibrated, - # obs.full_val = data$obs.full_val)) - # } + info(recipe$Run$logger, retrieve = retrieve, CALIB_MSG) + .log_memory_usage(recipe$Run$logger, when = "After calibration") + # Saving if (retrieve) { - # Saving if (recipe$Analysis$Workflow$Calibration$save != 'none') { info(recipe$Run$logger, retrieve = retrieve, "##### START SAVING CALIBRATED DATA #####") - } - ## TODO: What do we do with the full values? - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Calibration/") - if (recipe$Analysis$Workflow$Calibration$save %in% - c('all', 'exp_only', 'fcst_only') && (!is.null(data$fcst))) { - save_forecast(recipe = recipe, data_cube = fcst_calibrated, type = 'fcst') - } - if (recipe$Analysis$Workflow$Calibration$save %in% - c('all', 'exp_only')) { - save_forecast(recipe = recipe, data_cube = hcst_calibrated, type = 'hcst') - } - if (recipe$Analysis$Workflow$Calibration$save == 'all') { - save_observations(recipe = recipe, data_cube = data$obs) + + ## TODO: What do we do with the full values? + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Calibration/") + if ((recipe$Analysis$Workflow$Calibration$save %in% + c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { + save_forecast(recipe = recipe, data_cube = fcst_calibrated, type = 'fcst') + } + if (recipe$Analysis$Workflow$Calibration$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = hcst_calibrated, type = 'hcst') + } + if (recipe$Analysis$Workflow$Calibration$save == 'all') { + save_observations(recipe = recipe, data_cube = data$obs) + } } } - info(recipe$Run$logger, retrieve = retrieve, CALIB_MSG) + + ## TODO: Sort out returns + return_list <- list(hcst = hcst_calibrated, + obs = data$obs, + fcst = fcst_calibrated) + if (!is.null(hcst_full_calibrated)) { + return_list <- append(return_list, + list(hcst.full_val = hcst_full_calibrated, + obs.full_val = data$obs.full_val)) + } return(return_list) } diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 59c22baa..4e3a8870 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -24,7 +24,7 @@ Loading <- function(recipe, retrieve = TRUE) { } } else if (time_horizon == "decadal") { source("modules/Loading/R/load_decadal.R") - data <- load_decadal(recipe) + data <- load_decadal(recipe, retrieve = retrieve) } else { stop("Incorrect time horizon.") } diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index f084ce62..2458231e 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -15,27 +15,27 @@ library(lubridate) -dates2load <- function(recipe, logger) { +dates2load <- function(recipe) { temp_freq <- recipe$Analysis$Variables$freq - recipe <- recipe$Analysis$Time + Time <- recipe$Analysis$Time # hcst dates - file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), - recipe$sdate) + file_dates <- paste0(strtoi(Time$hcst_start):strtoi(Time$hcst_end), + Time$sdate) if (temp_freq == "monthly_mean") { file_dates <- .add_dims(file_dates) } # fcst dates (if fcst_year empty it creates an empty object) - if (! is.null(recipe$fcst_year)) { - file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) + if (!is.null(Time$fcst_year)) { + file_dates.fcst <- paste0(Time$fcst_year, Time$sdate) if (temp_freq == "monthly_mean") { file_dates.fcst <- .add_dims(file_dates.fcst) } } else { file_dates.fcst <- NULL - info(logger, - paste("fcst_year empty in the recipe, creating empty fcst object...")) + info(recipe$Run$logger, retrieve = TRUE, + "fcst_year empty in the recipe, creating empty fcst object...") } return(list(hcst = file_dates, fcst = file_dates.fcst)) ## TODO: document header of fun diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index 9268b090..4d92d4e9 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -11,7 +11,7 @@ source("modules/Loading/R/compare_exp_obs_grids.R") # recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml" # recipe_file <- "recipes/atomic_recipes/recipe_decadal_daily.yml" -load_decadal <- function(recipe) { +load_decadal <- function(recipe, retrieve = TRUE) { ## archive <- read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]] diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R index 1f953b20..6e8d40a9 100644 --- a/modules/Loading/R/load_seasonal.R +++ b/modules/Loading/R/load_seasonal.R @@ -5,7 +5,7 @@ source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") source("modules/Loading/R/compare_exp_obs_grids.R") -load_seasonal <- function(recipe, retrieve = T) { +load_seasonal <- function(recipe, retrieve = TRUE) { # ------------------------------------------- # Set params ----------------------------------------- @@ -22,8 +22,7 @@ load_seasonal <- function(recipe, retrieve = T) { store.freq <- recipe$Analysis$Variables$freq # get sdates array - ## LOGGER: Change dates2load to extract logger from recipe? - sdates <- dates2load(recipe, recipe$Run$logger) + sdates <- dates2load(recipe) idxs <- NULL idxs$hcst <- get_timeidx(sdates$hcst, @@ -297,10 +296,10 @@ load_seasonal <- function(recipe, retrieve = T) { # Adjust obs dimensions and compare exp and obs grids, only for retrieve = TRUE if (retrieve) { # Adds ensemble dim to obs (for consistency with hcst/fcst) + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") default_dims <- c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 1, latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(obs))] <- dim(obs) dim(obs) <- default_dims diff --git a/modules/Loading/R/load_tas_tos.R b/modules/Loading/R/load_tas_tos.R index ea231b56..e3972ba8 100644 --- a/modules/Loading/R/load_tas_tos.R +++ b/modules/Loading/R/load_tas_tos.R @@ -6,7 +6,7 @@ source("modules/Loading/R/check_latlon.R") source('modules/Loading/R/mask_tas_tos.R') source("modules/Loading/R/compare_exp_obs_grids.R") -load_tas_tos <- function(recipe) { +load_tas_tos <- function(recipe, retrieve = TRUE) { # ------------------------------------------- # Set params ----------------------------------------- @@ -33,8 +33,7 @@ load_tas_tos <- function(recipe) { # get sdates array - ## LOGGER: Change dates2load to extract logger from recipe? - sdates <- dates2load(recipe, recipe$Run$logger) + sdates <- dates2load(recipe) idxs <- NULL idxs$hcst <- get_timeidx(sdates$hcst, @@ -480,7 +479,7 @@ load_tas_tos <- function(recipe) { compare_exp_obs_grids(exp = hcst, obs = obs) } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") return(list(hcst = hcst, fcst = fcst, obs = obs)) diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R index 050fe68d..46dac25f 100644 --- a/modules/Saving/R/save_corr.R +++ b/modules/Saving/R/save_corr.R @@ -122,6 +122,6 @@ save_corr <- function(recipe, ArrayToNc(vars, outfile) } } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R index 00a22850..23b16836 100644 --- a/modules/Saving/R/save_forecast.R +++ b/modules/Saving/R/save_forecast.R @@ -142,6 +142,6 @@ save_forecast <- function(recipe, } } } - info(recipe$Run$logger, paste("#####", toupper(type), - "SAVED TO NETCDF FILE #####")) + info(recipe$Run$logger, retrieve = TRUE, + paste("#####", toupper(type), "SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index cd4252ab..060addfd 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -131,6 +131,7 @@ save_metrics <- function(recipe, ArrayToNc(vars, outfile) } } - info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, retrieve = TRUE, + "##### SKILL METRICS SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R index 127e9890..fd7c0832 100644 --- a/modules/Saving/R/save_observations.R +++ b/modules/Saving/R/save_observations.R @@ -141,5 +141,6 @@ save_observations <- function(recipe, } } } - info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, retrieve = TRUE, + "##### OBS SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index 862ed5ff..96603110 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -113,5 +113,6 @@ save_percentiles <- function(recipe, ArrayToNc(vars, outfile) } } - info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, retrieve = TRUE, + "##### PERCENTILES SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index b7da0449..56acf158 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -127,7 +127,7 @@ save_probabilities <- function(recipe, } } } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, paste("#####", toupper(type), "PROBABILITIES SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index fc9fe4ee..cd5a0a47 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -31,12 +31,13 @@ Saving <- function(recipe, data, # Sanity checks if (is.null(recipe)) { - error(recipe$Run$logger, "The 'recipe' parameter is mandatory.") + error(recipe$Run$logger, retrieve = TRUE, + "The 'recipe' parameter is mandatory.") stop() } - + ## TODO: Modify this if (is.null(data)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("The 'data' parameter is mandatory. It should be a list", "of at least two s2dv_cubes containing the hcst and obs.")) stop() diff --git a/tools/data_summary.R b/tools/data_summary.R index b76101ba..4c2c8e21 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -20,32 +20,36 @@ data_summary <- function(data_cube, recipe) { sdate_max <- format(max(as.Date(data_cube$attrs$Dates)), format = date_format) # Log the summary - info(recipe$Run$logger, "DATA SUMMARY:") - info(recipe$Run$logger, paste(object_name, "months:", months)) - info(recipe$Run$logger, paste(object_name, "range:", sdate_min, "to", + info(recipe$Run$logger, retrieve = TRUE, "DATA SUMMARY:") + info(recipe$Run$logger, retrieve = TRUE, + paste(object_name, "months:", months)) + info(recipe$Run$logger, retrieve = TRUE, + paste(object_name, "range:", sdate_min, "to", sdate_max)) - info(recipe$Run$logger, paste(object_name, "dimensions:")) + info(recipe$Run$logger, retrieve = TRUE, + paste(object_name, "dimensions:")) # Use capture.output() and for loop to display results neatly output_string <- capture.output(dim(data_cube$data)) for (i in output_string) { - info(recipe$Run$logger, i) + info(recipe$Run$logger, retrieve = TRUE, i) } # Print statistical summary of the data for every variable - info(recipe$Run$logger, paste0("Statistical summary of the data in ", - object_name, ":")) + info(recipe$Run$logger, retrieve = TRUE, + paste0("Statistical summary of the data in ", object_name, ":")) for (var_index in 1:data_cube$dims[['var']]) { variable_name <- data_cube$attrs$Variable$varName[var_index] variable_units <- data_cube$attrs$Variable$metadata[[variable_name]]$units - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, paste0("Variable: ", variable_name, " (units: ", variable_units, ")")) output_string <- capture.output(summary(Subset(data_cube$data, along = "var", indices = var_index))) for (i in output_string) { - info(recipe$Run$logger, i) + info(recipe$Run$logger, retrieve = TRUE, i) } } - info(recipe$Run$logger, "---------------------------------------------") + info(recipe$Run$logger, retrieve = TRUE, + "---------------------------------------------") invisible(gc()) } -- GitLab From d60679cacb1925147cd51954a8e2c62b2367b304 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 20 Feb 2024 12:53:00 +0100 Subject: [PATCH 54/96] Add startR workflow checks for Indices module --- tools/check_recipe.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 6e41e1d3..d39a784c 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -722,7 +722,7 @@ check_recipe <- function(recipe) { STARTR_PARAMS <- c("modules", "chunk_along") STARTR_MODULES <- c("calibration", "anomalies", "downscaling", - "skill", "probabilities") + "skill", "probabilities", "indices") CHUNK_DIMS <- c("var", "time", "latitude", "longitude") MODULES_USING_LATLON <- c("downscaling", "indices") MODULES_USING_TIME <- c("indicators") -- GitLab From a41ca0e40872e24e4e6374172679a9216d353604 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 20 Feb 2024 12:53:42 +0100 Subject: [PATCH 55/96] Add 'retrieve' parameter to logger calls --- modules/Anomalies/Anomalies.R | 6 ++++-- tools/Utils.R | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 2f738bdc..d1163b57 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -127,7 +127,8 @@ Anomalies <- function(recipe, data, retrieve = TRUE) { "##### ANOMALIES COMPUTED SUCCESSFULLY #####") if (retrieve) { if (recipe$Analysis$Workflow$Anomalies$save != 'none') { - info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") + info(recipe$Run$logger, retrieve = retrieve, + "##### START SAVING ANOMALIES #####") } # Save outputs recipe$Run$output_dir <- paste0(recipe$Run$output_dir, @@ -159,7 +160,8 @@ Anomalies <- function(recipe, data, retrieve = TRUE) { } ## TODO: Return fcst full value? - .log_memory_usage(recipe$Run$logger, "After computing anomalies") + .log_memory_usage(recipe$Run$logger, retrieve = retrieve, + "After computing anomalies") return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) } diff --git a/tools/Utils.R b/tools/Utils.R index c0acf374..a5cc84c2 100644 --- a/tools/Utils.R +++ b/tools/Utils.R @@ -1,13 +1,13 @@ ## TODO: Write header ## TODO: Add if 'DEBUG' -.log_memory_usage <- function(logger, when) { - debug(logger, paste0(when, ":")) +.log_memory_usage <- function(logger, retrieve = TRUE, when = NULL) { + debug(logger, retrieve = retrieve, paste0(when, ":")) mem_info <- capture.output(memuse::Sys.meminfo()) for (i in mem_info) { - debug(recipe$Run$logger, i) + debug(recipe$Run$logger, retrieve = retrieve, i) } proc_mem <- capture.output(memuse::Sys.procmem()) for (i in proc_mem) { - debug(recipe$Run$logger, i) + debug(recipe$Run$logger, retrieve = retrieve, i) } } -- GitLab From ffe655738912ae65bef405d24484442a11a5e5d5 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 20 Feb 2024 12:54:46 +0100 Subject: [PATCH 56/96] Add Files attribute to Step() --- build_compute_workflow.R | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 22881188..df05dc0a 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -7,6 +7,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, source("modules/Anomalies/Anomalies.R") source("modules/Downscaling/Downscaling.R") source("modules/Skill/Skill.R") + source("modules/Indices/Indices.R") # Define appender with custom log layout so that it knows not to append # within Compute(). @@ -19,7 +20,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, obs = obs, fcst = fcst) data <- data[!sapply(data, is.null)] - print(names(data)) var_name <- as.vector(attributes(hcst)$FileSelectors[[1]]$var[[1]]) # Remove duplicated objects rm(hcst, obs, fcst) @@ -31,7 +31,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, class(x) <- "startR_array" as.s2dv_cube(x) }) - for (cube_idx in seq(1:length(data))) { data[[cube_idx]]$attrs$Variable$varName <- var_name } @@ -55,7 +54,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, ## TODO: Adapt probabilities output probabilities <- Probabilities(recipe, data, retrieve = F, nchunks = nchunks) } else if (module == "indices") { - ## TODO: Won't work with plots... + ## TODO: Won't work with plots... OR WILL IT? data <- Indices(recipe, data, retrieve = F) } } @@ -103,8 +102,8 @@ run_compute_workflow <- function(recipe, data) { obs = obs_target_dims) inputs <- list(hcst = data$hcst, obs = data$obs) # Define input attributes from Start() - input_attributes <- list(hcst = c("Variables", "FileSelectors"), - obs = c("Variables", "FileSelectors")) + input_attributes <- list(hcst = c("Variables", "Dimensions", "FileSelectors", "Files"), + obs = c("Variables", "Dimensions", "FileSelectors", "Files")) # Add forecast if not empty if (!is.null(data$fcst)) { target_dims <- c(target_dims, @@ -114,7 +113,14 @@ run_compute_workflow <- function(recipe, data) { } # Create output dimensions - exp_output_dims <- exp_target_dims[!exp_target_dims == 'var_dir'] + ## TODO: Improve this conditions + if ("indices" %in% modules) { + spatial_output_dims <- c("region") + } else { + spatial_output_dims <- c("latitude", "longitude") + } + exp_output_dims <- c(exp_target_dims[!exp_target_dims %in% c('var_dir', 'latitude', 'longitude')], + spatial_output_dims) # Default outputs: hindcast and observations output_dims <- list(hcst = exp_output_dims, obs = exp_output_dims) @@ -125,16 +131,16 @@ run_compute_workflow <- function(recipe, data) { } # Add skill metrics if skill module is called if ("skill" %in% modules) { - skill_dims <- c('metric', 'var', 'time', 'latitude', 'longitude') + skill_dims <- c('metric', 'var', 'time', spatial_output_dims) skill_output_dims <- skill_dims[!skill_dims %in% names(recipe$Run$startR_workflow$chunk_along)] output_dims <- c(output_dims, list(skill = skill_output_dims)) } ## TODO: Add Indices (Niño1+2, Niño3, Niño3.4, Niño4, NAO) - if ("indices" %in% modules) { - idx_dims <- c('index', 'var', 'syear', 'time', 'region') - } + # if ("indices" %in% modules) { + # idx_dims <- c('index', 'var', 'syear', 'time', 'region') + # } # --------------------------------------------------------------------------- # Step 3: Generate the Step and call Compute() @@ -243,8 +249,17 @@ run_compute_workflow <- function(recipe, data) { # Step 5: Remove temporary files unlink(paste0(recipe$Run$output_dir, "/outputs/tmp/"), recursive = TRUE) + + # --------------------------------------------------------------------------- + # Step 6: Save data + ## PROBLEM: Defining the output dir + # Saving(recipe = recipe, data = res$data, skill_metrics = res$skill) + if (recipe$Analysis$Workflow$Skill$save == 'all') { + save_metrics(recipe = recipe, skill = res$skill, + agg = "global") + } # --------------------------------------------------------------------------- - # Step 6: Return outputs + # Step 7: Return outputs res <- res[!sapply(res, is.null)] info(recipe$Run$logger, retrieve = TRUE, "##### DATA RETURNED AS A NAMED LIST #####") -- GitLab From 9d2bb54f5128da9e5eb4d480ab57a1b01ba10826 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 20 Feb 2024 12:55:33 +0100 Subject: [PATCH 57/96] Modify indices module to work in Compute() (WIP) --- modules/Indices/Indices.R | 14 +++++--- modules/Indices/R/compute_nao.R | 62 ++++++++++++++++++++------------- 2 files changed, 48 insertions(+), 28 deletions(-) diff --git a/modules/Indices/Indices.R b/modules/Indices/Indices.R index fb9a7277..b767e983 100644 --- a/modules/Indices/Indices.R +++ b/modules/Indices/Indices.R @@ -2,7 +2,13 @@ source("modules/Indices/R/compute_nao.R") source("modules/Indices/R/compute_nino.R") source("modules/Indices/R/drop_indices_dims.R") source("modules/Saving/Saving.R") -Indices <- function(recipe, data) { +## TODO: Remove later +source("/home/shared/Earth/vagudets/git/s2dv/R/NAO.R") +source("/home/shared/Earth/vagudets/git/s2dv/R/Utils.R") +source("/home/shared/Earth/vagudets/git/s2dv/R/EOF.R") +source("/home/shared/Earth/vagudets/git/s2dv/R/ProjectField.R") + +Indices <- function(recipe, data, retrieve = TRUE) { # Define parameters nao <- NULL if ("nao" %in% tolower(names(recipe$Analysis$Workflow$Indices))) { @@ -29,7 +35,7 @@ Indices <- function(recipe, data) { nao <- compute_nao(data = data, recipe = recipe, obsproj = obsproj, plot_ts = plot_ts, plot_sp = plot_sp, - alpha = alpha) + alpha = alpha, retrieve = retrieve) } ninos <- list() num_ninos <- sum(tolower(substr(names(recipe$Analysis$Workflow$Indices), @@ -101,11 +107,11 @@ Indices <- function(recipe, data) { } if (is.null(nao)) { return(ninos) - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, "##### EL NINO INDICES COMPUTED SUCCESSFULLY #####") } else { return(nao) - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, "##### NAO INDEX COMPUTED SUCCESSFULLY #####") } } diff --git a/modules/Indices/R/compute_nao.R b/modules/Indices/R/compute_nao.R index 5bb76e4e..c24aebe7 100644 --- a/modules/Indices/R/compute_nao.R +++ b/modules/Indices/R/compute_nao.R @@ -1,6 +1,6 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, - alpha, logo = NULL) { + alpha, logo = NULL, retrieve = TRUE) { ## TODO: if fcst object in data compute the nao too if (!is.null(data$fcst)) { warning("NAO computed only for hindcast data.") @@ -117,6 +117,7 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, Dates = data$obs$attrs$Dates, Dataset = recipe$Analysis$Datasets$Reference$name)) if (recipe$Analysis$Workflow$Indices$NAO$save == 'all') { + ## TODO: Fix output directory file_dest <- paste0(recipe$Run$output_dir, "/outputs/Indices/") if (tolower(recipe$Analysis$Horizon) == "seasonal") { # Use startdates param from SaveExp to correctly name the files: @@ -134,23 +135,25 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, # need to recover original dimensions after saving to make Skill module work nao_original_dims_hcst <- nao$hcst$data nao$hcst$data <- .drop_indices_dims(nao$hcst$data) - CST_SaveExp(data = nao$hcst, - destination = file_dest, - startdates = as.vector(file_dates), - dat_dim = NULL, sdate_dim = 'syear', - ftime_dim = 'time', var_dim = NULL, - memb_dim = 'ensemble') - nao_original_dims_obs <- nao$obs$data - nao$obs$data <- .drop_indices_dims(nao$obs$data) - CST_SaveExp(data = nao$obs, #res, - destination = file_dest, - startdates = as.vector(file_dates), - dat_dim = NULL, sdate_dim = 'syear', - ftime_dim = 'time', var_dim = NULL, - memb_dim = NULL) - nao$hcst$data <- nao_original_dims_hcst - nao$obs$data <- nao_original_dims_obs - nao_original_dims_hcst <- nao_original_dims_obs <- NULL + if (retrieve) { + CST_SaveExp(data = nao$hcst, + destination = file_dest, + startdates = as.vector(file_dates), + dat_dim = NULL, sdate_dim = 'syear', + ftime_dim = 'time', var_dim = NULL, + memb_dim = 'ensemble') + nao_original_dims_obs <- nao$obs$data + nao$obs$data <- .drop_indices_dims(nao$obs$data) + CST_SaveExp(data = nao$obs, #res, + destination = file_dest, + startdates = as.vector(file_dates), + dat_dim = NULL, sdate_dim = 'syear', + ftime_dim = 'time', var_dim = NULL, + memb_dim = NULL) + nao$hcst$data <- nao_original_dims_hcst + nao$obs$data <- nao_original_dims_obs + nao_original_dims_hcst <- nao_original_dims_obs <- NULL + } gc() } # Read variable long_name to plot it @@ -162,13 +165,24 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), showWarnings = F, recursive = T) source("modules/Indices/R/plot_deterministic_forecast.R") - for (tstep in 1:dim(nao$hcst$data)['time']) { - mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + - (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) - mes <- ifelse(mes > 12, mes - 12, mes) + ## TODO: is there a better way to define the time steps? Probably yes. + time_step_dates <- Subset(data$hcst$attrs$Dates, + along = "syear", + indices = 1, + drop = T) + for (tstep in 1:length(time_step_dates)) { + mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) + + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) + mes <- ifelse(mes > 12, mes - 12, mes) fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) - obs <- Subset(nao$obs$data, along = 'time', ind = tstep) - exp <- Subset(nao$hcst$data, along = 'time', ind = tstep) + ## TODO: What to do if there is no 'time' dimension?... + if ('time' %in% dim(nao$obs$data)) { + obs <- Subset(nao$obs$data, along = 'time', ind = tstep) + exp <- Subset(nao$hcst$data, along = 'time', ind = tstep) + } else { + obs <- nao$obs$data + exp <- nao$hcst$data + } if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { system <- recipe$Analysis$Datasets$System$name } else { -- GitLab From a4b76000b030de691b2715197108a95bf4d7dcc3 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 20 Feb 2024 16:59:49 +0100 Subject: [PATCH 58/96] Adapt compute_nao() to work within Compute() --- modules/Indices/R/compute_nao.R | 60 ++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/modules/Indices/R/compute_nao.R b/modules/Indices/R/compute_nao.R index c24aebe7..0499c147 100644 --- a/modules/Indices/R/compute_nao.R +++ b/modules/Indices/R/compute_nao.R @@ -133,9 +133,9 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, recipe$Analysis$Time$hcst_start : recipe$Analysis$Time$hcst_end) } # need to recover original dimensions after saving to make Skill module work - nao_original_dims_hcst <- nao$hcst$data - nao$hcst$data <- .drop_indices_dims(nao$hcst$data) if (retrieve) { + nao_original_dims_hcst <- nao$hcst$data + nao$hcst$data <- .drop_indices_dims(nao$hcst$data) CST_SaveExp(data = nao$hcst, destination = file_dest, startdates = as.vector(file_dates), @@ -171,12 +171,11 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, indices = 1, drop = T) for (tstep in 1:length(time_step_dates)) { - mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) + - (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) - mes <- ifelse(mes > 12, mes - 12, mes) - fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + mes <- lubridate::month(time_step_dates[tstep]) + fmonth <- mes - as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) + 1 + fmonth <- sprintf("%02d", ifelse(fmonth < 0, fmonth + 12, fmonth)) ## TODO: What to do if there is no 'time' dimension?... - if ('time' %in% dim(nao$obs$data)) { + if ('time' %in% names(dim(nao$obs$data))) { obs <- Subset(nao$obs$data, along = 'time', ind = tstep) exp <- Subset(nao$hcst$data, along = 'time', ind = tstep) } else { @@ -196,7 +195,7 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", system, "_", recipe$Analysis$Datasets$Reference$name, "_s", recipe$Analysis$Time$sdate, "_ftime", - sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".png") + fmonth, ".png") caption <- paste0("NAO method: ", ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", @@ -212,8 +211,7 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, recipe$Analysis$Time$hcst_end) plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", system, "_", recipe$Analysis$Datasets$Reference$name, - "_ftime", - sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".png") + "_ftime", fmonth, ".png") caption <- paste0("NAO method: ", ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", @@ -272,12 +270,26 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, time_dim = 'syear', method = 'pearson', alpha = alpha, test.type = 'two-sided', pval = FALSE)}, ncores = recipe$Analysis$ncores) - - for (tstep in 1:dim(nao$obs$data)['time']) { - fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + + + time_step_dates <- Subset(data$hcst$attrs$Dates, + along = "syear", + indices = 1, + drop = T) + for (tstep in 1:length(time_step_dates)) { + mes <- lubridate::month(time_step_dates[tstep]) + fmonth <- mes - as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) + 1 + fmonth <- sprintf("%02d", ifelse(fmonth < 0, fmonth + 12, fmonth)) + # for (tstep in 1:dim(nao$obs$data)['time']) { + # fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) ## Observations - map <- drop(Subset(correl_obs$r, along = 'time', ind = tstep)) - sig <- drop(Subset(correl_obs$sign, along = 'time', ind = tstep)) + if ("time" %in% names(dim(data$hcst$data))) { + map <- drop(Subset(correl_obs$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_obs$sign, along = 'time', ind = tstep)) + } else { + map <- drop(correl_obs$r) + sig <- drop(correl_obs$sign) + } if (tolower(recipe$Analysis$Horizon) == "seasonal") { mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) @@ -337,8 +349,13 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, fileout = plotfile, width = 8, height = 6, brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) ## Ensemble-mean - map <- drop(Subset(correl_hcst$r, along = 'time', ind = tstep)) - sig <- drop(Subset(correl_hcst$sign, along = 'time', ind = tstep)) + if ("time" %in% names(dim(data$hcst$data))) { + map <- drop(Subset(correl_hcst$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst$sign, along = 'time', ind = tstep)) + } else { + map <- drop(correl_hcst$r) + sig <- drop(correl_hcst$sign) + } if (tolower(recipe$Analysis$Horizon) == "seasonal") { toptitle <- paste(recipe$Analysis$Datasets$System$name, "\n", "NAO Index -", var_name, "\n", @@ -394,8 +411,13 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) # Full hcst corr - map <- drop(Subset(correl_hcst_full$r, along = 'time', ind = tstep)) - sig <- drop(Subset(correl_hcst_full$sign, along = 'time', ind = tstep)) + if ("time" %in% names(dim(correl_hcst_full$r))) { + map <- drop(Subset(correl_hcst_full$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst_full$sign, along = 'time', ind = tstep)) + } else { + map <- drop(correl_hcst_full$r) + sig <- drop(correl_hcst_full$sign) + } if (tolower(recipe$Analysis$Horizon) == "seasonal") { toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", "NAO Index -",var_name, "\n", -- GitLab From 818cb7f99f63af664167606820f0c09112e43053 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 27 Feb 2024 10:15:24 +0100 Subject: [PATCH 59/96] Fully adapt compute_nao() to work within Compute() --- modules/Indices/R/compute_nao.R | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/modules/Indices/R/compute_nao.R b/modules/Indices/R/compute_nao.R index 0499c147..b566f2e2 100644 --- a/modules/Indices/R/compute_nao.R +++ b/modules/Indices/R/compute_nao.R @@ -49,10 +49,11 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, Variable = c(data$hcst$Variable[1], level = 'surface'), data$hcst$Va, Datasets = data$hcst$Datasets, time_dims = c('syear', 'time'), - Dates = data$hcst$Dates) + Dates = data$hcst$attrs$Dates) obs <- s2dv_cube(data = obs1$data, lat = obs1$lat, lon = obs1$lon, Variable = c(data$obs$Variable[1], level = 'surface'), - Datasets = data$obs$Datasets, time_dims = c('syear', 'time')) + Datasets = data$obs$Datasets, time_dims = c('syear', 'time'), + Dates = data$obs$attrs$Dates) # TODO check and create data object for the next steps data <- list(hcst = hcst, obs = obs) lons <- data$hcst$coords$longitude @@ -83,15 +84,8 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, nao$exp <- InsertDim(nao$exp, posdim = 1, lendim = 1, name = 'region') nao$obs <- InsertDim(nao$obs, posdim = 1, lendim = 1, name = 'region') hcst_dates <- data$hcst$attrs$Dates - hcst_dates <- drop(data$hcst$attrs$Dates) - - if (!("time" %in% names(dim(hcst_dates)))) { - if (is.null(dim(hcst_dates))) { - hcst_dates <- array(hcst_dates, c(syear = length(hcst_dates))) - } - hcst_dates <- InsertDim(hcst_dates, pos = 1, len = 1, name = 'time') - hcst_dates <- as.POSIXct(hcst_dates, origin = '1970-01-01', tz = 'UTC') - } + dim(hcst_dates) <- dim(hcst_dates)[intersect(names(dim(hcst_dates)), + names(dim(nao$exp)))] nao <- list(hcst = s2dv_cube( data = nao$exp, varName = "nao", @@ -165,7 +159,6 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), showWarnings = F, recursive = T) source("modules/Indices/R/plot_deterministic_forecast.R") - ## TODO: is there a better way to define the time steps? Probably yes. time_step_dates <- Subset(data$hcst$attrs$Dates, along = "syear", indices = 1, @@ -174,7 +167,6 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, mes <- lubridate::month(time_step_dates[tstep]) fmonth <- mes - as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) + 1 fmonth <- sprintf("%02d", ifelse(fmonth < 0, fmonth + 12, fmonth)) - ## TODO: What to do if there is no 'time' dimension?... if ('time' %in% names(dim(nao$obs$data))) { obs <- Subset(nao$obs$data, along = 'time', ind = tstep) exp <- Subset(nao$hcst$data, along = 'time', ind = tstep) -- GitLab From 1de88788b32ce3a9c9d7cafc1ec43ab39c8c0e9b Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 27 Feb 2024 15:30:19 +0100 Subject: [PATCH 60/96] Adapt saving data functions and module output (WIP) --- modules/Saving/R/save_metrics.R | 20 +++++++++++-------- modules/Saving/Saving.R | 9 +++++---- modules/Skill/Skill.R | 35 +++++++++++++++------------------ 3 files changed, 33 insertions(+), 31 deletions(-) diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index 060addfd..9e217b95 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -1,7 +1,7 @@ save_metrics <- function(recipe, skill, + module, dictionary = NULL, - data_cube, agg = "global", outdir = NULL) { # This function adds metadata to the skill metrics in 'skill' @@ -12,7 +12,9 @@ save_metrics <- function(recipe, dictionary <- read_yaml("conf/variable-dictionary.yml") global_attributes <- .get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided + cube_info <- skill[["metadata"]] + skill[["metadata"]] <- NULL + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(list(from_anomalies = "Yes"), @@ -27,7 +29,7 @@ save_metrics <- function(recipe, calendar <- archive$System[[global_attributes$system]]$calendar # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + dates <- as.PCICt(ClimProjDiags::Subset(cube_info$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { @@ -66,14 +68,16 @@ save_metrics <- function(recipe, times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) time <- times$time # Loop over variable dimension - for (var in 1:data_cube$dims[['var']]) { + for (var in 1:cube_info$dims[['var']]) { # Subset skill arrays subset_skill <- lapply(skill, function(x) { ClimProjDiags::Subset(x, along = 'var', indices = var, drop = 'selected')}) # Generate name of output file - variable <- data_cube$attrs$Variable$varName[[var]] + variable <- cube_info$attrs$Variable$varName[[var]] + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/outputs/", + module, "/") outdir <- get_dir(recipe = recipe, variable = variable) if (!dir.exists(outdir)) { dir.create(outdir, recursive = T) @@ -116,14 +120,14 @@ save_metrics <- function(recipe, } else if (tolower(agg) == "region") { region <- array(1:dim(skill[[1]])['region'], c(dim(skill[[1]])['region'])) # TODO: check metadata when more than 1 region is store in the data array - metadata <- list(region = list(long_name = data_cube$attrs$Variable$metadata$region$name)) + metadata <- list(region = list(long_name = cube_info$attrs$Variable$metadata$region$name)) attr(region, 'variables') <- metadata vars <- list(region, time) vars <- c(vars, subset_skill) ArrayToNc(vars, outfile) } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latitude <- cube_info$coords$lat[1:length(cube_info$coords$lat)] + longitude <- cube_info$coords$lon[1:length(cube_info$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index cd5a0a47..17f6492e 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -18,11 +18,9 @@ source("modules/Saving/R/drop_dims.R") Saving <- function(recipe, data, skill_metrics = NULL, probabilities = NULL, - agg = 'global', - archive = NULL) { + agg = 'global') { # Wrapper for the saving functions. # recipe: The auto-s2s recipe - # archive: The auto-s2s archive # data: output of load_datasets() # data: output of calibrate_datasets() # skill_metrics: output of compute_skill_metrics() @@ -57,6 +55,7 @@ Saving <- function(recipe, data, } # Iterate over variables to subset s2dv_cubes and save outputs + ## leave as-is save_forecast(recipe = recipe, data_cube = data$hcst, outdir = outdir[var], @@ -72,13 +71,15 @@ Saving <- function(recipe, data, outdir = outdir[var]) # Export skill metrics + ## TODO: change if (!is.null(skill_metrics)) { save_metrics(recipe = recipe, skill = skill_metrics, - data_cube = data$hcst, agg = agg, + # data_cube = data$hcst, agg = agg, outdir = outdir[var]) } if (!is.null(corr_metrics)) { + ## TODO: change (low priority) save_corr(recipe = recipe, skill = corr_metrics, data_cube = data$hcst, diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 75c2e689..b89e415c 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -307,6 +307,7 @@ Skill <- function(recipe, data, agg = 'global', retrieve = TRUE, skill_metrics[[ metric ]] <- skill } } + ## TODO: Create list of returns with data and metadata info(recipe$Run$logger, retrieve = retrieve, "##### SKILL METRIC COMPUTATION COMPLETE #####") .log_memory_usage(recipe$Run$logger, when = "After skill metric computation") @@ -317,35 +318,31 @@ Skill <- function(recipe, data, agg = 'global', retrieve = TRUE, info(recipe$Run$logger, retrieve = retrieve, "##### START SAVING SKILL METRIC #####") } - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Skill/") + # recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + # "/outputs/Skill/") # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim if (recipe$Analysis$Workflow$Skill$save == 'all') { - corr_metric_names <- grep("^corr_individual_members", names(skill_metrics)) + corr_metric_names <- grep("^corr_individual_members", + names(skill_metrics$metrics)) if (length(corr_metric_names) == 0) { save_metrics(recipe = recipe, skill = skill_metrics, - data_cube = data$hcst, agg = agg) + module = "Skill", agg = agg) } else { # Save corr if (length(skill_metrics[corr_metric_names]) > 0) { - save_corr(recipe = recipe, skill = skill_metrics[corr_metric_names], - data_cube = data$hcst) + save_corr(recipe = recipe, + skill = list(metrics = skill_metrics$metrics[corr_metric_names], + metadata = skill_metrics$metadata), + module = "Skill", + agg = agg) } # Save other skill metrics if (length(skill_metrics[-corr_metric_names]) > 0) { - save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], - data_cube = data$hcst, agg = agg) - } else { - # Save corr - if (length(skill_metrics[corr_metric_names]) > 0) { - save_corr(recipe = recipe, skill = skill_metrics[corr_metric_names], - data_cube = data$hcst) - } - # Save other skill metrics - if (length(skill_metrics[-corr_metric_names]) > 0) { - save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], - data_cube = data$hcst, agg = agg) - } + save_metrics(recipe = recipe, + skill = list(metrics = skill_metrics$metrics[-corr_metric_names], + metadata = skill_metrics$metadata), + module = "Skill", + agg = agg) } } } -- GitLab From d8fd75d4336bdab3c688a9eb501a0b0faafb3b37 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 27 Feb 2024 15:30:45 +0100 Subject: [PATCH 61/96] Fix bug in retrieve_metadata() related to incorrect output dimensions --- tools/retrieve_metadata.R | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/tools/retrieve_metadata.R b/tools/retrieve_metadata.R index 7b922103..3550b19a 100644 --- a/tools/retrieve_metadata.R +++ b/tools/retrieve_metadata.R @@ -1,4 +1,14 @@ -retrieve_metadata <- function(tmp_dir, chunks) { +# For each chunking dimension, retrieve the metadata for each chunk from its +# .Rds file and put the pieces together, generating the complete coordinates, +# dimensions and attributes of the resulting s2dv_cube. +# +# tmp_dir: string, The temporary directory where the metadata files are stored. +# chunks: named list of chunking dimensions with the amount of chunks for each. +# array_dims: vector containing the dimensions of the final array. + +retrieve_metadata <- function(tmp_dir, chunks, array_dims) { + ## TODO: add array_dims parameter + ## For each chunking dimension # Build metadata file pattern: metadata_file_pattern <- "skill_metadata" for (chunk in sort(names(chunks))) { @@ -25,25 +35,26 @@ retrieve_metadata <- function(tmp_dir, chunks) { dim(metadata$coords$var) <- c(var = length(metadata$coords$var)) attr(metadata$coords$var, "values") <- TRUE attr(metadata$coords$var, "indices") <- FALSE - ## TODO: $dims + # $dims + metadata$dims["var"] <- array_dims["var"] } # Piece together time info if ("time" %in% names(chunks)) { dates_dims <- dim(metadata$attrs$Dates) - time_dim_idx <- which(names(dates_dims) == "time") for (i in 2:chunks[["time"]]) { metadata_chunk <- readRDS(paste0(tmp_dir, gsub("time_1", paste0("time_", i), metadata_files[1]))) metadata$attrs$Dates <- c(metadata$attrs$Dates, metadata_chunk$attrs$Dates) } - time_dim_length <- length(metadata$attrs$Dates)/prod(dates_dims[-time_dim_idx]) - dim(metadata$attrs$Dates) <- c(dates_dims[-time_dim_idx], - time = time_dim_length) + time_dim_length <- array_dims[["time"]] + dim(metadata$attrs$Dates) <- c(dates_dims[names(dates_dims) != "time"], + time = time_dim_length) metadata$coords$time <- seq(1, time_dim_length) dim(metadata$coords$time) <- c(time = time_dim_length) attr(metadata$coords$time, "indices") <- TRUE - ## TODO: Add to 'dims' + #$dims + metadata$dims["time"] <- array_dims["time"] } # Piece together lon/lat info if ("latitude" %in% names(chunks)) { @@ -59,7 +70,10 @@ retrieve_metadata <- function(tmp_dir, chunks) { # $coords metadata$coords$latitude <- metadata$attrs$Variable$metadata$latitude dim(metadata$coords$latitude) <- c(latitude = length(metadata$coords$latitude)) + # $dims + metadata$dims["latitude"] <- array_dims["latitude"] } + if ("longitude" %in% names(chunks)) { # $attrs ## TODO: Preserve attributes @@ -68,11 +82,13 @@ retrieve_metadata <- function(tmp_dir, chunks) { gsub("longitude_1", paste0("longitude_", i), metadata_files[1]))) metadata$attrs$Variable$metadata$longitude <- c(metadata$attrs$Variable$metadata$longitude, - metadata_chunk$attrs$Variable$metadata$longitude) + metadata_chunk$attrs$Variable$metadata$longitude) } # $coords metadata$coords$longitude <- metadata$attrs$Variable$metadata$longitude dim(metadata$coords$longitude) <- c(longitude = length(metadata$coords$longitude)) + # $dims + metadata$dims["longitude"] <- array_dims["longitude"] } return(metadata) } -- GitLab From 5fdaeb3c11d4d6eb4cfa998851739dcb5cab598b Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 27 Feb 2024 15:30:59 +0100 Subject: [PATCH 62/96] Saving and metadata bug --- build_compute_workflow.R | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index df05dc0a..fd5388a2 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -72,6 +72,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, convert_to_s2dv_cube <- function(new_cube, original_cube) { attr(new_cube, "Variables") <- attr(original_cube, "Variables") attr(new_cube, "FileSelectors") <- attr(original_cube, "FileSelectors") + attr(new_cube, "Files") <- attr(original_cube, "Files") class(new_cube) <- "startR_array" new_cube <- as.s2dv_cube(new_cube) @@ -102,18 +103,19 @@ run_compute_workflow <- function(recipe, data) { obs = obs_target_dims) inputs <- list(hcst = data$hcst, obs = data$obs) # Define input attributes from Start() - input_attributes <- list(hcst = c("Variables", "Dimensions", "FileSelectors", "Files"), - obs = c("Variables", "Dimensions", "FileSelectors", "Files")) + STARTR_CUBE_ATTRS <- c("Variables", "Dimensions", "FileSelectors", "Files") + input_attributes <- list(hcst = STARTR_CUBE_ATTRS, + obs = STARTR_CUBE_ATTRS) # Add forecast if not empty if (!is.null(data$fcst)) { target_dims <- c(target_dims, list(exp_target_dims)) inputs <- c(inputs, list(fcst = data$fcst)) - input_attributes <- c(input_attributes, list(fcst = c("Variables", "FileSelectors"))) + input_attributes <- c(input_attributes, list(fcst = STARTR_CUBE_ATTRS)) } # Create output dimensions - ## TODO: Improve this conditions + ## TODO: Improve this condition if ("indices" %in% modules) { spatial_output_dims <- c("region") } else { @@ -229,20 +231,21 @@ run_compute_workflow <- function(recipe, data) { if (!is.null(res$skill)) { tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/") metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds")) + ## TODO: Fails if skill has 'region' instead of 'latlon'. Should handle + ## dimensions dynamically. Can Subset() be used? res$skill <- lapply(seq(dim(res$skill)[1]), - function(x) {res$skill[x, , , , , drop = F]}) - res$skill <- lapply(res$skill, function(x) { - Subset(x, + Subset(res$skill, along = 'metric', - indices = 1, + indices = x, drop = 'selected') }) names(res$skill) <- metric_list # Put chunked metadata back together source("tools/retrieve_metadata.R") res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir, - chunks = recipe$Run$startR_workflow$chunk_along) + chunks = recipe$Run$startR_workflow$chunk_along, + array_dims = dim(res$skill[[1]])) } # --------------------------------------------------------------------------- @@ -256,7 +259,7 @@ run_compute_workflow <- function(recipe, data) { # Saving(recipe = recipe, data = res$data, skill_metrics = res$skill) if (recipe$Analysis$Workflow$Skill$save == 'all') { save_metrics(recipe = recipe, skill = res$skill, - agg = "global") + agg = "global", module = "Skill") } # --------------------------------------------------------------------------- # Step 7: Return outputs @@ -265,6 +268,5 @@ run_compute_workflow <- function(recipe, data) { "##### DATA RETURNED AS A NAMED LIST #####") info(recipe$Run$logger, retrieve = TRUE, paste0("Steps performed: ", recipe$Run$startR_workflow$modules)) - return(res) } -- GitLab From 2851a2b1e3153ebe39e0a687aaa25bce882592ce Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 27 Feb 2024 15:31:39 +0100 Subject: [PATCH 63/96] Testing --- example_scripts/test_compute.R | 1 + .../atomic_recipes/recipe_test_compute.yml | 50 ++++++++++--------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R index c82a1719..8179ba73 100644 --- a/example_scripts/test_compute.R +++ b/example_scripts/test_compute.R @@ -4,6 +4,7 @@ source("modules/Preprocessing/Preprocessing.R") source("modules/Calibration/Calibration.R") source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") +source("modules/Indices/Indices.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") source("build_compute_workflow.R") diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml index eaca75d7..f082a1ed 100644 --- a/recipes/atomic_recipes/recipe_test_compute.yml +++ b/recipes/atomic_recipes/recipe_test_compute.yml @@ -4,12 +4,12 @@ Description: Analysis: Horizon: Seasonal Variables: - name: tas + name: psl freq: monthly_mean - units: {tas: C} + # units: {tas: C} Datasets: System: - name: Meteo-France-System7 + name: ECMWF-SEAS5 Multimodel: False Reference: name: ERA5 @@ -17,36 +17,38 @@ Analysis: sdate: '1101' fcst_year: # '2020' hcst_start: '2000' - hcst_end: '2016' + hcst_end: '2010' ftime_min: 1 ftime_max: 3 Region: - latmin: -10 - latmax: 10 - lonmin: 0 - lonmax: 20 + latmin: 20 + latmax: 80 + lonmin: -80 + lonmax: 40 Regrid: method: bilinear - type: to_system #to_reference #'r360x181' + type: "r180x90" #to_reference #'r360x181' Workflow: Anomalies: - compute: no # yes/no, default yes + compute: yes # yes/no, default yes cross_validation: yes # yes/no, default yes save: 'all' # 'all'/'none'/'exp_only'/'fcst_only' Calibration: method: raw save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' - Downscaling: - # Assumption 1: leave-one-out cross-validation is always applied - # Assumption 2: for analogs, we select the best analog (minimum distance) - type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. - int_method: conservative # regridding method accepted by CDO. - bc_method: bias # If type intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. - lr_method: # If type intlr. Options: 'basic', 'large_scale', '4nn' - log_reg_method: # If type logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' - target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO - nanalogs: # If type analgs. Number of analogs to be searched - save: 'all' # 'all'/'none'/'exp_only' + Indices: + NAO: {obsproj: TRUE, save: 'all', plot_ts: TRUE, plot_sp: yes} + # Downscaling: + # # Assumption 1: leave-one-out cross-validation is always applied + # # Assumption 2: for analogs, we select the best analog (minimum distance) + # type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + # int_method: conservative # regridding method accepted by CDO. + # bc_method: bias # If type intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + # lr_method: # If type intlr. Options: 'basic', 'large_scale', '4nn' + # log_reg_method: # If type logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + # target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + # nanalogs: # If type analgs. Number of analogs to be searched + # save: 'all' # 'all'/'none'/'exp_only' Skill: metric: RPS RPSS BSS10 # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS save: 'all' # 'all'/'none' @@ -68,8 +70,10 @@ Run: output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ startR_workflow: - modules: downscaling skill # Modules to run inside Compute(), in order - chunk_along: {time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + modules: anomalies skill # Modules to run inside Compute(), in order + return: hcst, fcst, obs # Choose only if they will fit into memory + save: hcst, fcst, obs # Only if returned + chunk_along: {time: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} run_on: local # options: local, as_machine, nord3 chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O -- GitLab From 88c948af816ce6355b27c8e51b364b8a6c78c779 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 29 Feb 2024 15:52:52 +0100 Subject: [PATCH 64/96] Remove TODOs; formatting --- build_compute_workflow.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index fd5388a2..ba1bd00b 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -46,7 +46,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, } else if (module == "anomalies") { data <- Anomalies(recipe, data, retrieve = F) } else if (module == "downscaling") { - ## TODO: Test, adapt if necessary data <- Downscaling(recipe, data, retrieve = F) } else if (module == "skill") { skill_metrics <- Skill(recipe, data, retrieve = F, nchunks = nchunks) @@ -54,7 +53,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, ## TODO: Adapt probabilities output probabilities <- Probabilities(recipe, data, retrieve = F, nchunks = nchunks) } else if (module == "indices") { - ## TODO: Won't work with plots... OR WILL IT? data <- Indices(recipe, data, retrieve = F) } } @@ -121,7 +119,9 @@ run_compute_workflow <- function(recipe, data) { } else { spatial_output_dims <- c("latitude", "longitude") } - exp_output_dims <- c(exp_target_dims[!exp_target_dims %in% c('var_dir', 'latitude', 'longitude')], + exp_output_dims <- c(exp_target_dims[!exp_target_dims %in% c('var_dir', + 'latitude', + 'longitude')], spatial_output_dims) # Default outputs: hindcast and observations output_dims <- list(hcst = exp_output_dims, @@ -140,6 +140,9 @@ run_compute_workflow <- function(recipe, data) { list(skill = skill_output_dims)) } ## TODO: Add Indices (Niño1+2, Niño3, Niño3.4, Niño4, NAO) + ## Instead of having the index dimension, we could add a second + ## loop and then define outputs dynamically based on the recipe; + ## one s2dv_cube per index. At least in the case of ENSO. # if ("indices" %in% modules) { # idx_dims <- c('index', 'var', 'syear', 'time', 'region') # } @@ -147,7 +150,7 @@ run_compute_workflow <- function(recipe, data) { # --------------------------------------------------------------------------- # Step 3: Generate the Step and call Compute() # Libraries to be loaded when running Compute() - libs_load <- c("log4r", "docopt", "ClimProjDiags", "multiApply", "yaml", + LIBS_LOAD <- c("log4r", "docopt", "ClimProjDiags", "multiApply", "yaml", "s2dv", "abind", "easyNCDF", "CSTools", "lubridate", "PCICt", "RColorBrewer", "configr", "sf", "ggplot2", "rnaturalearth", "cowplot", "stringr", "pryr") @@ -156,7 +159,7 @@ run_compute_workflow <- function(recipe, data) { target_dims = target_dims, output_dims = output_dims, use_attributes = input_attributes, - use_libraries = libs_load) + use_libraries = LIBS_LOAD) wf <- AddStep(inputs = inputs, step = step, @@ -252,12 +255,11 @@ run_compute_workflow <- function(recipe, data) { # Step 5: Remove temporary files unlink(paste0(recipe$Run$output_dir, "/outputs/tmp/"), recursive = TRUE) - # --------------------------------------------------------------------------- # Step 6: Save data ## PROBLEM: Defining the output dir # Saving(recipe = recipe, data = res$data, skill_metrics = res$skill) - if (recipe$Analysis$Workflow$Skill$save == 'all') { + if (!is.null(res$skill) && recipe$Analysis$Workflow$Skill$save == 'all') { save_metrics(recipe = recipe, skill = res$skill, agg = "global", module = "Skill") } -- GitLab From 78dd5e9b8b2926883233340736087f81cb7d9771 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 27 Mar 2024 10:18:12 +0100 Subject: [PATCH 65/96] Add retrieve parameter to Statistics module --- modules/Statistics/Statistics.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 085bcdc5..fc21c364 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -1,4 +1,4 @@ -Statistics <- function(recipe, data, agg = 'global') { +Statistics <- function(recipe, data, agg = 'global', retrieve = TRUE) { # data$hcst: s2dv_cube containing the hindcast # data$obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml -- GitLab From acb20b727a0e48b4125900243025cc776a057770 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 8 Apr 2024 11:31:52 +0200 Subject: [PATCH 66/96] Fix some unit tests and logger call --- tests/testthat/test-seasonal_daily.R | 2 +- tests/testthat/test-seasonal_downscaling.R | 2 +- tests/testthat/test-seasonal_monthly.R | 2 +- tools/prepare_outputs.R | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index 6cfa4384..1d69bffc 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -146,7 +146,7 @@ is.list(skill_metrics), TRUE ) expect_equal( -names(skill_metrics), +names(skill_metrics)[!names(skill_metrics) == "metadata"], c("enscorr_specs") ) expect_equal( diff --git a/tests/testthat/test-seasonal_downscaling.R b/tests/testthat/test-seasonal_downscaling.R index 8a52657a..78b043b6 100644 --- a/tests/testthat/test-seasonal_downscaling.R +++ b/tests/testthat/test-seasonal_downscaling.R @@ -213,7 +213,7 @@ is.list(skill_metrics), TRUE ) expect_equal( -names(skill_metrics), +names(skill_metrics)[!names(skill_metrics) == "metadata"], c("bss10", "bss10_significance", "crpss", "crpss_significance","rpss", "rpss_significance", "mean_bias") ) expect_equal( diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 18bde4c0..b1b775ea 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -183,7 +183,7 @@ is.list(skill_metrics), TRUE ) expect_equal( -names(skill_metrics), +names(skill_metrics)[!names(skill_metrics) == "metadata"], c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", "enscorr_significance", "corr_individual_members", "corr_individual_members_significance", "enscorr_specs") diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 05dcf12d..bddfd74a 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -99,7 +99,7 @@ prepare_outputs <- function(recipe_file, along with this program. If not, see ." for (i in disclosure_message) { - info(recipe$Run$logger, i) + info(recipe$Run$logger, retrieve = TRUE, i) } # Set up default filesystem if (is.null(recipe$Run$filesystem)) { -- GitLab From 5c63ac06545db611352109c72e14bb4fca48a691 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 8 Apr 2024 14:29:14 +0200 Subject: [PATCH 67/96] Use ClimProjDiags::ArrayToList() to rearrange skill metric array into list --- build_compute_workflow.R | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 390b39ed..ec13e121 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -234,16 +234,10 @@ run_compute_workflow <- function(recipe, data) { if (!is.null(res$skill)) { tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/") metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds")) - ## TODO: Fails if skill has 'region' instead of 'latlon'. Should handle - ## dimensions dynamically. Can Subset() be used? - res$skill <- lapply(seq(dim(res$skill)[1]), - function(x) { - Subset(res$skill, - along = 'metric', - indices = x, - drop = 'selected') - }) - names(res$skill) <- metric_list + res$skill <- ClimProjDiags::ArrayToList(res$skill, + dim = 'metric', + level = 'list', + names = metric_list) # Put chunked metadata back together source("tools/retrieve_metadata.R") res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir, -- GitLab From 1eb97dcdf4fc7ed8ec096133ac5a104cca65335e Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 9 Apr 2024 14:49:44 +0200 Subject: [PATCH 68/96] Remove browser() calls and fix bug in save_metrics --- modules/Saving/R/save_metrics.R | 3 +-- modules/Skill/Skill.R | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index c643eec5..ac895fd4 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -14,7 +14,6 @@ save_metrics <- function(recipe, global_attributes <- .get_global_attributes(recipe, archive) cube_info <- metrics[["metadata"]] metrics[["metadata"]] <- NULL - browser() if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { @@ -155,7 +154,7 @@ save_metrics <- function(recipe, country <- get_countries(grid) ArrayToNc(append(country, time, subset_metric), outfile) } else if (tolower(agg) == "region") { - region <- array(1:dim(skill[[1]])['region'], c(dim(skill[[1]])['region'])) + region <- array(1:dim(metrics[[1]])['region'], c(dim(metrics[[1]])['region'])) # TODO: check metadata when more than 1 region is store in the data array metadata <- list(region = list(long_name = cube_info$attrs$Variable$metadata$region$name)) attr(region, 'variables') <- metadata diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 22b75238..e5dbcb06 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -371,7 +371,6 @@ Skill <- function(recipe, data, agg = 'global', retrieve = TRUE, # "/outputs/Skill/") # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim if (recipe$Analysis$Workflow$Skill$save == 'all') { - browser() corr_metric_names <- grep("^corr_individual_members", names(skill_metrics)) if (length(corr_metric_names) == 0) { -- GitLab From 24f7da5d1fe4641728c28eeecc3b395c88d04ee9 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 24 Apr 2024 16:38:07 +0200 Subject: [PATCH 69/96] Load decadal with retrieve = FALSE (wip) --- modules/Loading/R/load_decadal.R | 209 +++++++++++----------- recipes/atomic_recipes/recipe_decadal.yml | 6 +- 2 files changed, 110 insertions(+), 105 deletions(-) diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index 8f594d86..3e6750c2 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -11,7 +11,7 @@ source("modules/Loading/R/compare_exp_obs_grids.R") # recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml" # recipe_file <- "recipes/atomic_recipes/recipe_decadal_daily.yml" -load_decadal <- function(recipe, retrieve = TRUE) { +load_decadal <- function(recipe, retrieve = retrieve) { ## archive <- read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]] @@ -120,7 +120,7 @@ load_decadal <- function(recipe, retrieve = TRUE) { return_vars = list(latitude = NULL, longitude = NULL, time = c('syear', 'chunk')), silent = !DEBUG, - retrieve = T) + retrieve = retrieve) if (length(variable) > 1) { Start_default_arg_list <- c(Start_default_arg_list, @@ -160,16 +160,21 @@ load_decadal <- function(recipe, retrieve = TRUE) { } tmp_time_attr <- attr(hcst, 'Variables')$common$time - + + ## TODO: for retrieve = FALSE, do this in pre-processing # change syear to c(sday, sweek, syear) # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] - dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) + if (retrieve) { + dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) + } if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = retrieve, "hcst has problem in matching data and time attr dimension.") stop() } dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) + # Get hcst dates + dates <- attr(hcst, 'Variables')$common$time #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 if (multi_path) { @@ -177,9 +182,9 @@ load_decadal <- function(recipe, retrieve = TRUE) { } # Change class from startR_array to s2dv_cube - suppressWarnings( + if (retrieve) { hcst <- as.s2dv_cube(hcst) - ) + } #------------------------------------------- # Step 2: Load the fcst @@ -240,7 +245,7 @@ load_decadal <- function(recipe, retrieve = TRUE) { # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = retrieve, "fcst has problem in matching data and time attr dimension.") stop() } @@ -252,15 +257,14 @@ load_decadal <- function(recipe, retrieve = TRUE) { } # Change class from startR_array to s2dv_cube - suppressWarnings( + if (retrieve) { fcst <- as.s2dv_cube(fcst) - ) - - # Only syear could be different - if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { - error(recipe$Run$logger, - "hcst and fcst do not share the same dimension structure.") - stop() + # Only syear could be different + if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { + error(recipe$Run$logger, retrieve = retrieve, + "hcst and fcst do not share the same dimension structure.") + stop() + } } } else { @@ -281,7 +285,7 @@ load_decadal <- function(recipe, retrieve = TRUE) { # Get from startR_cube # dates <- attr(hcst, 'Variables')$common$time # Get from s2dv_cube - dates <- hcst$attrs$Dates + # dates <- hcst$attrs$Dates dates_file <- sapply(dates, format, '%Y%m') dim(dates_file) <- dim(dates) @@ -319,7 +323,7 @@ load_decadal <- function(recipe, retrieve = TRUE) { return_vars = list(latitude = NULL, longitude = NULL, time = 'file_date'), silent = !DEBUG, - retrieve = TRUE) + retrieve = retrieve) } else if (store.freq == "monthly_mean") { #//////////////// @@ -347,7 +351,7 @@ load_decadal <- function(recipe, retrieve = TRUE) { time = 'file_date'), metadata_dims = 'var', silent = !DEBUG, - retrieve = TRUE) + retrieve = retrieve) } @@ -355,100 +359,101 @@ load_decadal <- function(recipe, retrieve = TRUE) { # sday sweek syear time # 1 1 2 14 - # Remove var_dir dimension - obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") - - # Only ensemble dim could be different - if (!identical(dim(obs), dim(hcst$data)[-9])) { - error(recipe$Run$logger, - "obs and hcst dimensions do not match.") - stop() - } - # Add ensemble dim to obs - dim(obs) <- c(dim(obs), ensemble = 1) - - # Change class from startR_array to s2dv_cube - suppressWarnings( + obs_dates <- attr(obs, 'Variables')$common$time + + if (retrieve) { + # Remove var_dir dimension + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + # Only ensemble dim could be different + if (!identical(dim(obs), dim(hcst$data)[-9])) { + error(recipe$Run$logger, retrieve = retrieve, + "obs and hcst dimensions do not match.") + stop() + } + # Add ensemble dim to obs + dim(obs) <- c(dim(obs), ensemble = 1) + # Transform to s2dv_cube obs <- as.s2dv_cube(obs) - ) + } #------------------------------------------- # Step 4. Verify the consistance between data #------------------------------------------- - # dimension - if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) { - error(recipe$Run$logger, - "hcst and obs don't share the same dimension names.") - stop() - } else { - ens_ind <- which(names(dim(obs$data)) == 'ensemble') - match_ind <- match(names(dim(obs$data))[-ens_ind], names(dim(hcst$data))) - if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) { - error(recipe$Run$logger, - "hcst and obs don't share the same dimension length.") - stop() - } - } - - # time attribute - if (!identical(format(hcst$attrs$Dates, '%Y%m'), - format(obs$attrs$Dates, '%Y%m'))) { - error(recipe$Run$logger, - "hcst and obs don't share the same time.") - stop() - } - - # lat and lon attributes - if (!(recipe$Analysis$Regrid$type == 'none')) { - if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { - lat_error_msg <- paste("Latitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") - error(recipe$Run$logger, lat_error_msg) - hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) - info(recipe$Run$logger, hcst_lat_msg) - obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) - info(recipe$Run$logger, obs_lat_msg) - stop("hcst and obs don't share the same latitudes.") - } - - if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { - lon_error_msg <- paste("Longitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") - error(recipe$Run$logger, lon_error_msg) - hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) - info(recipe$Run$logger, hcst_lon_msg) - obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) - info(recipe$Run$logger, obs_lon_msg) - stop("hcst and obs don't share the same longitudes.") - } - } - - # Check fcst - if (!is.null(fcst)) { + if (retrieve) { # dimension - if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) { - error(recipe$Run$logger, - "hcst and fcst don't share the same dimension names.") + if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) { + error(recipe$Run$logger, retrieve = retrieve, + "hcst and obs don't share the same dimension names.") stop() } else { - ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear')) - match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data))) - if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) { - error(recipe$Run$logger, - "hcst and fcst don't share the same dimension length.") + ens_ind <- which(names(dim(obs$data)) == 'ensemble') + match_ind <- match(names(dim(obs$data))[-ens_ind], names(dim(hcst$data))) + if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) { + error(recipe$Run$logger, retrieve = retrieve, + "hcst and obs don't share the same dimension length.") stop() } } + # time attribute + if (!identical(format(dates, '%Y%m'), format(obs_dates, '%Y%m'))) { + error(recipe$Run$logger, retrieve = retrieve, + "hcst and obs don't share the same time.") + stop() + } + # lat and lon attributes if (!(recipe$Analysis$Regrid$type == 'none')) { - compare_exp_obs_grids(hcst, obs) + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, retrieve = retrieve, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, retrieve = retrieve, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, retrieve = retrieve, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, retrieve = retrieve, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, retrieve = retrieve, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, retrieve = retrieve, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + } + + # Check fcst + if (!is.null(fcst)) { + # dimension + if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) { + error(recipe$Run$logger, retrieve = retrieve, + "hcst and fcst don't share the same dimension names.") + stop() + } else { + ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear')) + match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data))) + if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) { + error(recipe$Run$logger, retrieve = retrieve, + "hcst and fcst don't share the same dimension length.") + stop() + } + } + + # lat and lon attributes + if (!(recipe$Analysis$Regrid$type == 'none')) { + compare_exp_obs_grids(hcst, obs) + } } } @@ -460,7 +465,7 @@ load_decadal <- function(recipe, retrieve = TRUE) { # for (var_idx in 1:length(variable)) { # var_name <- variable[var_idx] # if (dictionary$vars[[var_name]]$accum) { - # info(recipe$Run$logger, + # info(recipe$Run$logger, retrieve = retrieve, # paste0("Accumulated variable ", var_name, # ": setting negative values to zero.")) # # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, @@ -477,9 +482,9 @@ load_decadal <- function(recipe, retrieve = TRUE) { # Step 6. Print summary #------------------------------------------- - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") - .log_memory_usage(recipe$Run$logger, when = "After loading") + .log_memory_usage(recipe$Run$logger, retrieve = retrieve, when = "After loading") return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/recipes/atomic_recipes/recipe_decadal.yml b/recipes/atomic_recipes/recipe_decadal.yml index 26312b34..5ac1fa52 100644 --- a/recipes/atomic_recipes/recipe_decadal.yml +++ b/recipes/atomic_recipes/recipe_decadal.yml @@ -37,7 +37,7 @@ Analysis: method: bias save: 'all' Skill: - metric: RPSS Corr + metric: RPSS save: 'all' Probabilities: percentiles: [[1/3, 2/3]] @@ -52,6 +52,6 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/aho/git/auto-s2s/ + output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ -- GitLab From 90f5eaf0f323e9121a891f3a590fa5446e409d9d Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 26 Apr 2024 11:48:48 +0200 Subject: [PATCH 70/96] load decadal data with retrieve = FALSE --- modules/Loading/R/load_decadal.R | 40 ++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index 3e6750c2..65d69b93 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -16,9 +16,7 @@ load_decadal <- function(recipe, retrieve = retrieve) { archive <- read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]] # Print Start() info or not - DEBUG <- FALSE - - ## TODO: this should come from the main script + DEBUG <- TRUE # Create output folder and log: #------------------------- @@ -132,7 +130,7 @@ load_decadal <- function(recipe, retrieve = retrieve) { if (!multi_path) { Start_hcst_arg_list <- Start_default_arg_list hcst <- do.call(Start, Start_hcst_arg_list) - + hcst_dims <- attr(hcst, "Dimensions") } else { Start_hcst_arg_list <- Start_default_arg_list Start_hcst_arg_list[['syear']] <- NULL @@ -141,7 +139,6 @@ load_decadal <- function(recipe, retrieve = retrieve) { Start_hcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind] hcst <- do.call(Start, Start_hcst_arg_list) - # Reshape and reorder dimensions ## dat should be 1, syear should be length of dat; reorder dimensions dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6]) @@ -166,12 +163,14 @@ load_decadal <- function(recipe, retrieve = retrieve) { # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] if (retrieve) { dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) - } - if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) { - error(recipe$Run$logger, retrieve = retrieve, - "hcst has problem in matching data and time attr dimension.") - stop() - } + } else { + + ## TODO: Why tf does this raise an error? + # if (!identical(dim(tmp_time_attr), hcst_dims[c('syear', 'time')])) { + # error(recipe$Run$logger, retrieve = retrieve, + # "hcst has problem in matching data and time attr dimension.") + # stop() + # } dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) # Get hcst dates dates <- attr(hcst, 'Variables')$common$time @@ -224,8 +223,11 @@ load_decadal <- function(recipe, retrieve = retrieve) { # Reshape and reorder dimensions ## dat should be 1, syear should be length of dat; reorder dimensions ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble] - dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) - fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + if (retrieve) { + dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) + fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', + 'latitude', 'longitude', 'ensemble')) + } # Manipulate time attr because Start() cannot read it correctly wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only @@ -243,11 +245,13 @@ load_decadal <- function(recipe, retrieve = retrieve) { # change syear to c(sday, sweek, syear) # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] - dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) - if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { - error(recipe$Run$logger, retrieve = retrieve, - "fcst has problem in matching data and time attr dimension.") - stop() + if (retrieve) { + dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) + if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { + error(recipe$Run$logger, retrieve = retrieve, + "fcst has problem in matching data and time attr dimension.") + stop() + } } dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) -- GitLab From 3af9f8a7d78498824e36db257193c0d77fdb0eaf Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 26 Apr 2024 14:32:56 +0200 Subject: [PATCH 71/96] Fix dimensions and issues (WIP) --- modules/Loading/R/load_decadal.R | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index 65d69b93..ddc778ef 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -164,6 +164,9 @@ load_decadal <- function(recipe, retrieve = retrieve) { if (retrieve) { dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) } else { + attr(hcst, "Dimensions") <- c(hcst_dims[1:2], sday = 1, sweek = 1, + hcst_dims[3:7]) + } ## TODO: Why tf does this raise an error? # if (!identical(dim(tmp_time_attr), hcst_dims[c('syear', 'time')])) { @@ -217,7 +220,8 @@ load_decadal <- function(recipe, retrieve = retrieve) { Start_fcst_arg_list[['syear']] <- NULL Start_fcst_arg_list[['chunk_depends']] <- NULL remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear') - Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] + Start_fcst_arg_list[['return_vars']][['time']] <- + Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] fcst <- do.call(Start, Start_fcst_arg_list) # Reshape and reorder dimensions @@ -252,7 +256,12 @@ load_decadal <- function(recipe, retrieve = retrieve) { "fcst has problem in matching data and time attr dimension.") stop() } + } else { + fcst_dims <- attr(fcst, "Dimensions") + attr(fcst, "Dimensions") <- c(fcst_dims[1:2], sday = 1, sweek = 1, + fcst_dims[3:7]) } + dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 @@ -269,8 +278,7 @@ load_decadal <- function(recipe, retrieve = retrieve) { "hcst and fcst do not share the same dimension structure.") stop() } - } - + } } else { fcst <- NULL } -- GitLab From c600f469eb6553599e291291add28cf93c818a86 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 3 May 2024 15:40:26 +0200 Subject: [PATCH 72/96] Create recipe for testing --- .../recipe_test_compute_decadal.yml | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 recipes/atomic_recipes/recipe_test_compute_decadal.yml diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml new file mode 100644 index 00000000..e8154363 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml @@ -0,0 +1,66 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: HadGEM3-GC31-MM #EC-Earth3-i4 #CanESM5 + member: r1i1p1f2,r2i1p1f2,r3i1p1f2 #'all' + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst_year: [2020,2021] + hcst_start: 1990 + hcst_end: 1993 +# season: 'Annual' + ftime_min: 2 + ftime_max: 24 + Region: + latmin: 10 #-90 + latmax: 20 #90 + lonmin: 0 + lonmax: 15 #359.9 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Anomalies: + compute: no + cross_validation: + save: + Calibration: + method: bias + save: 'all' + Skill: + metric: RPSS + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'all' + Indicators: + index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + startR_workflow: + modules: anomalies skill # Modules to run inside Compute(), in order + return: hcst, fcst, obs # Choose only if they will fit into memory + save: hcst, fcst, obs # Only if returned + chunk_along: {time: 6} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + run_on: local # options: local, as_machine, nord3 + chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' + expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O + hpc_user: bsc32762 # your hpc username. Only if run_on is not 'local' + slurm_directives: ['#SBATCH --constraint=medmem', '#SBATCH --exclusive'] # Only if run_on is not 'local' -- GitLab From 5c7e78a211e917adf7cdcb5fb9a2ce04151c35b8 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 6 May 2024 15:55:36 +0200 Subject: [PATCH 73/96] Fix issue with sday and sweek dimensions in decadal case --- example_scripts/test_compute.R | 2 +- modules/Loading/R/load_decadal.R | 13 +++---------- modules/Preprocessing/Preprocessing.R | 16 ++++++++++++---- .../recipe_test_compute_decadal.yml | 2 +- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R index 8179ba73..5622c5ec 100644 --- a/example_scripts/test_compute.R +++ b/example_scripts/test_compute.R @@ -9,7 +9,7 @@ source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") source("build_compute_workflow.R") -recipe_file <- "recipes/atomic_recipes/recipe_test_compute.yml" +recipe_file <- "recipes/atomic_recipes/recipe_test_compute_decadal.yml" recipe <- prepare_outputs(recipe_file) # Load datasets diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index ddc778ef..10651836 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -119,7 +119,7 @@ load_decadal <- function(recipe, retrieve = retrieve) { time = c('syear', 'chunk')), silent = !DEBUG, retrieve = retrieve) - + if (length(variable) > 1) { Start_default_arg_list <- c(Start_default_arg_list, list(table = table, grid = grid, version = version, @@ -163,12 +163,9 @@ load_decadal <- function(recipe, retrieve = retrieve) { # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] if (retrieve) { dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) - } else { - attr(hcst, "Dimensions") <- c(hcst_dims[1:2], sday = 1, sweek = 1, - hcst_dims[3:7]) } - ## TODO: Why tf does this raise an error? + ## TODO: Why does this raise an error? # if (!identical(dim(tmp_time_attr), hcst_dims[c('syear', 'time')])) { # error(recipe$Run$logger, retrieve = retrieve, # "hcst has problem in matching data and time attr dimension.") @@ -256,11 +253,7 @@ load_decadal <- function(recipe, retrieve = retrieve) { "fcst has problem in matching data and time attr dimension.") stop() } - } else { - fcst_dims <- attr(fcst, "Dimensions") - attr(fcst, "Dimensions") <- c(fcst_dims[1:2], sday = 1, sweek = 1, - fcst_dims[3:7]) - } + } dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) diff --git a/modules/Preprocessing/Preprocessing.R b/modules/Preprocessing/Preprocessing.R index 6f118670..d6a287e8 100644 --- a/modules/Preprocessing/Preprocessing.R +++ b/modules/Preprocessing/Preprocessing.R @@ -1,10 +1,18 @@ preprocess_datasets <- function(recipe, data) { # Remove 'var_dir' dimension for (element in names(data)) { - data[[element]] <- Subset(x = data[[element]], - along = c('var_dir'), - indices = list(1), - drop = 'selected') + if ("var_dir" %in% names(dim(data[[element]]))) { + data[[element]] <- Subset(x = data[[element]], + along = c('var_dir'), + indices = list(1), + drop = 'selected') + } + if (!("sday" %in% names(dim(data[[element]])))) { + dim(data[[element]]) <- c(dim(data[[element]]), sday = 1) + } + if (!("sweek" %in% names(dim(data[[element]])))) { + dim(data[[element]]) <- c(dim(data[[element]]), sweek = 1) + } } # Add 'ensemble' dimension to obs dim(data$obs) <- c(dim(data$obs), ensemble = 1) diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml index e8154363..985d5c0b 100644 --- a/recipes/atomic_recipes/recipe_test_compute_decadal.yml +++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml @@ -58,7 +58,7 @@ Run: modules: anomalies skill # Modules to run inside Compute(), in order return: hcst, fcst, obs # Choose only if they will fit into memory save: hcst, fcst, obs # Only if returned - chunk_along: {time: 6} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + chunk_along: {latitude: 2, longitude: 1} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} run_on: local # options: local, as_machine, nord3 chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O -- GitLab From c5440d0e1ca71567dc543bcd690f52a1470ab98e Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 10 May 2024 16:11:52 +0200 Subject: [PATCH 74/96] TEST: Add sweek and sday dimensions for decadal output --- build_compute_workflow.R | 6 ++++-- recipes/atomic_recipes/recipe_test_compute_decadal.yml | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index ec13e121..a4403cb5 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -122,7 +122,8 @@ run_compute_workflow <- function(recipe, data) { exp_output_dims <- c(exp_target_dims[!exp_target_dims %in% c('var_dir', 'latitude', 'longitude')], - spatial_output_dims) + spatial_output_dims, + c("sday", "sweek")) # Default outputs: hindcast and observations output_dims <- list(hcst = exp_output_dims, obs = exp_output_dims) @@ -169,7 +170,7 @@ run_compute_workflow <- function(recipe, data) { ## TODO: Create function to generate call to Compute()? # Compute locally run_on <- recipe$Run$startR_workflow$run_on - + r_module_ver <- list(local = "R/4.1.2-foss-2015a-bare", as_machine = "R/4.1.2-foss-2015a-bare", nord3 = "R/4.1.2-foss-2019b") @@ -181,6 +182,7 @@ run_compute_workflow <- function(recipe, data) { # Compute locally, in serial res <- Compute(wf$hcst, chunks = recipe$Run$startR_workflow$chunk_along) + print("made it here") } else if (run_on %in% c('nord3', 'as_machine')) { #NOTE: autosubmit_suite_dir can be anywhere ## TODO: Change code_dir to autosubmit dir when necessary diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml index 985d5c0b..8ea48e9a 100644 --- a/recipes/atomic_recipes/recipe_test_compute_decadal.yml +++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml @@ -21,10 +21,10 @@ Analysis: ftime_min: 2 ftime_max: 24 Region: - latmin: 10 #-90 + latmin: -10 #-90 latmax: 20 #90 lonmin: 0 - lonmax: 15 #359.9 + lonmax: 30 #359.9 Regrid: method: bilinear type: to_system #to_reference @@ -55,10 +55,10 @@ Run: output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ startR_workflow: - modules: anomalies skill # Modules to run inside Compute(), in order + modules: skill # Modules to run inside Compute(), in order return: hcst, fcst, obs # Choose only if they will fit into memory save: hcst, fcst, obs # Only if returned - chunk_along: {latitude: 2, longitude: 1} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} run_on: local # options: local, as_machine, nord3 chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O -- GitLab From 2cb80e23bdcd646244ef856548daa09b0fa01e7f Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 13 May 2024 14:54:45 +0200 Subject: [PATCH 75/96] Remove absolute paths to temporary files and replace with local copy --- modules/Indices/Indices.R | 8 +- modules/Indices/R/tmp/EOF.R | 292 ++++ modules/Indices/R/tmp/NAO.R | 434 ++++++ modules/Indices/R/tmp/ProjectField.R | 272 ++++ modules/Indices/R/tmp/Utils.R | 1884 ++++++++++++++++++++++++++ 5 files changed, 2886 insertions(+), 4 deletions(-) create mode 100644 modules/Indices/R/tmp/EOF.R create mode 100644 modules/Indices/R/tmp/NAO.R create mode 100644 modules/Indices/R/tmp/ProjectField.R create mode 100644 modules/Indices/R/tmp/Utils.R diff --git a/modules/Indices/Indices.R b/modules/Indices/Indices.R index b767e983..a6ab15de 100644 --- a/modules/Indices/Indices.R +++ b/modules/Indices/Indices.R @@ -3,10 +3,10 @@ source("modules/Indices/R/compute_nino.R") source("modules/Indices/R/drop_indices_dims.R") source("modules/Saving/Saving.R") ## TODO: Remove later -source("/home/shared/Earth/vagudets/git/s2dv/R/NAO.R") -source("/home/shared/Earth/vagudets/git/s2dv/R/Utils.R") -source("/home/shared/Earth/vagudets/git/s2dv/R/EOF.R") -source("/home/shared/Earth/vagudets/git/s2dv/R/ProjectField.R") +source("modules/Indices/R/tmp/NAO.R") +source("modules/Indices/R/tmp/Utils.R") +source("modules/Indices/R/tmp/EOF.R") +source("modules/Indices/R/tmp/ProjectField.R") Indices <- function(recipe, data, retrieve = TRUE) { # Define parameters diff --git a/modules/Indices/R/tmp/EOF.R b/modules/Indices/R/tmp/EOF.R new file mode 100644 index 00000000..38c3fae0 --- /dev/null +++ b/modules/Indices/R/tmp/EOF.R @@ -0,0 +1,292 @@ +#'Area-weighted empirical orthogonal function analysis using SVD +#' +#'Perform an area-weighted EOF analysis using single value decomposition (SVD) +#'based on a covariance matrix or a correlation matrix if parameter 'corr' is +#'set to TRUE. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs +#' could exist but it should be consistent along time_dim. That is, if one grid +#' point has NAs, all the time steps at this point should be NAs. +#'@param lat A vector of the latitudes of 'ano'. +#'@param lon A vector of the longitudes of 'ano'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param neofs A positive integer of the modes to be kept. The default value is +#' 15. If time length or the product of the length of space_dim is smaller than +#' neofs, neofs will be changed to the minimum of the three values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{EOFs}{ +#' An array of EOF patterns normalized to 1 (unitless) with dimensions +#' (number of modes, rest of the dimensions of 'ano' except 'time_dim'). +#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed +#' field. +#'} +#'\item{PCs}{ +#' An array of principal components with the units of the original field to +#' the power of 2, with dimensions (time_dim, number of modes, rest of the +#' dimensions of 'ano' except 'space_dim'). +#' 'PCs' contains already the percentage of explained variance so, +#' to reconstruct the original field it's only needed to multiply 'EOFs' +#' by 'PCs'. +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode (number of modes). The dimensions are (number of +#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). +#'} +#'\item{mask}{ +#' An array of the mask with dimensions (space_dim, rest of the dimensions of +#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that +#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to +#' replace NAs with 0s for EOF calculation and mask the result with NAs again +#' after the calculation. +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by cosine of 'lat' and used to compute the fraction of variance explained by +#' each EOFs. +#'} +#'\item{tot_var}{ +#' A number or a numeric array of the total variance explained by all the modes. +#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. +#'} +#' +#'@seealso ProjectField, NAO, PlotBoxWhisker +#'@examples +#'# This example computes the EOFs along forecast horizons and plots the one +#'# that explains the greatest amount of variability. The example data has low +#'# resolution so the result may not be explanatory, but it displays how to +#'# use this function. +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'tmp <- MeanDims(ano$exp, c('dataset', 'member')) +#'ano <- tmp[1, , ,] +#'names(dim(ano)) <- names(dim(tmp))[-2] +#'eof <- EOF(ano, sampleData$lat, sampleData$lon) +#'\dontrun{ +#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) +#'} +#' +#'@import multiApply +#'@importFrom stats sd +#'@export +EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), + neofs = 15, corr = FALSE, ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + ## lon + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") + } + if (any(lon > 360 | lon < -360)) { + .warning("Some 'lon' is out of the range [-360, 360].") + } + ## neofs + if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) { + stop("Parameter 'neofs' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate EOF + +# # Replace mask of NAs with 0s for EOF analysis. +# ano[!is.finite(ano)] <- 0 + + # Area weighting. Weights for EOF; needed to compute the + # fraction of variance explained by each EOFs + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi / 180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anomaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + # neofs is bounded + if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) { + neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs) + .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and neofs.")) + } + + res <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(EOFs = c('mode', space_dim), + PCs = c(time_dim, 'mode'), + var = 'mode', + tot_var = NULL, + mask = space_dim), + fun = .EOF, + corr = corr, neofs = neofs, + wght = wght, + ncores = ncores) + + return(c(res, wght = list(wght))) + +} + +.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) { + # ano: [time, lat, lon] + + # Dimensions + nt <- dim(ano)[1] + ny <- dim(ano)[2] + nx <- dim(ano)[3] + + # Check if all the time steps at one grid point are NA-consistent. + # The grid point should have all NAs or no NA along time dim. + if (anyNA(ano)) { + ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon] + na_ind <- which(is.na(ano_latlon), arr.ind = T) + if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) { + stop("Detect certain grid points have NAs but not consistent across time ", + "dimension. If the grid point is NA, it should have NA at all time step.") + } + } + + # Build the mask + mask <- ano[1, , ] + mask[!is.finite(mask)] <- NA + mask[is.finite(mask)] <- 1 + dim(mask) <- c(ny, nx) + + # Replace mask of NAs with 0s for EOF analysis. + ano[!is.finite(ano)] <- 0 + + ano <- ano * InsertDim(wght, 1, nt) + + # The use of the correlation matrix is done under the option corr. + if (corr) { + stdv <- apply(ano, c(2, 3), sd, na.rm = T) + ano <- ano / InsertDim(stdv, 1, nt) + } + + # Time/space matrix for SVD + dim(ano) <- c(nt, ny * nx) + dim.dat <- dim(ano) + + # 'transpose' means the array needs to be transposed before + # calling La.svd for computational efficiency because the + # spatial dimension is larger than the time dimension. This + # goes with transposing the outputs of LA.svd also. + if (dim.dat[2] > dim.dat[1]) { + transpose <- TRUE + } else { + transpose <- FALSE + } + if (transpose) { + pca <- La.svd(t(ano)) + } else { + pca <- La.svd(ano) + } + + # La.svd conventions: decomposition X = U D t(V) La.svd$u + # returns U La.svd$d returns diagonal values of D La.svd$v + # returns t(V) !! The usual convention is PC=U and EOF=V. + # If La.svd is called for ano (transpose=FALSE case): EOFs: + # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE + # case): EOFs: t($u) PCs: t($v) + + if (transpose) { + pca.EOFs <- t(pca$u) + pca.PCs <- t(pca$v) + } else { + pca.EOFs <- pca$v + pca.PCs <- pca$u + } + + # The numbers of transposition is limited to neofs + PC <- pca.PCs[, 1:neofs] + EOF <- pca.EOFs[1:neofs, ] + dim(EOF) <- c(neofs, ny, nx) + + # To sort out crash when neofs=1. + if (neofs == 1) { + PC <- InsertDim(PC, 2, 1, name = 'new') + } + + # Computation of the % of variance associated with each mode + W <- pca$d[1:neofs] + tot.var <- sum(pca$d^2) + var.eof <- 100 * pca$d[1:neofs]^2 / tot.var + + for (e in 1:neofs) { + # Set all masked grid points to NA in the EOFs + # Divide patterns by area weights so that EOF * PC gives unweigthed (original) data + EOF[e, , ] <- EOF[e, , ] * mask / wght + # PC is multiplied by the explained variance, + # so that the reconstruction is only EOF * PC + PC[, e] <- PC[, e] * W[e] + } + + if (neofs == 1) { + var.eof <- as.array(var.eof) + } + + return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask))) +} diff --git a/modules/Indices/R/tmp/NAO.R b/modules/Indices/R/tmp/NAO.R new file mode 100644 index 00000000..75731055 --- /dev/null +++ b/modules/Indices/R/tmp/NAO.R @@ -0,0 +1,434 @@ +#'Compute the North Atlantic Oscillation (NAO) Index +#' +#'Compute the North Atlantic Oscillation (NAO) index based on the leading EOF +#'of the sea level pressure (SLP) anomalies over the north Atlantic region +#'(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and +#'observed anomalies onto the observed EOF pattern or the forecast +#'anomalies onto the EOF pattern of the other years of the forecast. +#'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month +#'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns +#'cross-validated PCs of the NAO index for forecast (exp) and observations +#'(obs) based on the leading EOF pattern. +#' +#'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of observational data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param obs A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of experimental data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param lat A vector of the latitudes of 'exp' and 'obs'. +#'@param lon A vector of the longitudes of 'exp' and 'obs'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'exp' and 'obs'. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length +#' must be 1. The default value is 'member'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension of 'exp' and 'obs'. The default value is 'ftime'. +#'@param ftime_avg A numeric vector of the forecast time steps to average +#' across the target period. If average is not needed, set NULL. The default +#' value is 2:4, i.e., from 2nd to 4th forecast time steps. +#'@param obsproj A logical value indicating whether to compute the NAO index by +#' projecting the forecast anomalies onto the leading EOF of observational +#' reference (TRUE) or compute the NAO by first computing the leading +#' EOF of the forecast anomalies (in cross-validation mode, i.e. leaving the +#' year you are evaluating out), and then projecting forecast anomalies onto +#' this EOF (FALSE). The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list which contains: +#'\item{exp}{ +#' A numeric array of forecast NAO index in verification format with the same +#' dimensions as 'exp' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#'\item{obs}{ +#' A numeric array of observed NAO index in verification format with the same +#' dimensions as 'obs' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#'} +#' +#'@references +#'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of +#' multi-model seasonal forecasts of the wintertime North Atlantic +#' Oscillation. Climate Dynamics, 21, 501-514. +#' DOI: 10.1007/s00382-003-0350-4 +#' +#'@examples +#'# Make up synthetic data +#'set.seed(1) +#'exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'set.seed(2) +#'obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'lat <- seq(20, 80, length.out = 6) +#'lon <- seq(-80, 40, length.out = 9) +#'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) +#' +#'# plot the NAO index +#' \dontrun{ +#'nao$exp <- Reorder(nao$exp, c(2, 1)) +#'nao$obs <- Reorder(nao$obs, c(2, 1)) +#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", +#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") +#' } +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', + memb_dim = 'member', space_dim = c('lat', 'lon'), + ftime_dim = 'ftime', ftime_avg = 2:4, + obsproj = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(obs) & is.null(exp)) { + stop("Parameter 'exp' and 'obs' cannot both be NULL.") + } + if (!is.null(exp)) { + if (!is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (is.null(dim(exp))) { + stop("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { + stop("Parameter 'exp' must have dimension names.") + } + } + if (!is.null(obs)) { + if (!is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (is.null(dim(obs))) { + stop("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'obs' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!time_dim %in% names(dim(exp))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + if (!is.null(obs)) { + if (memb_dim %in% names(dim(obs))) { + if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } else { + add_member_back <- TRUE + obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected') + } + } else { + add_member_back <- FALSE + } + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!is.null(exp)) { + if (!all(space_dim %in% names(dim(exp)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!all(space_dim %in% names(dim(obs)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!is.null(exp) && !is.null(ftime_avg)) { + if (!ftime_dim %in% names(dim(exp))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs) && !is.null(ftime_avg)) { + if (!ftime_dim %in% names(dim(obs))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## exp and obs (2) + if (!is.null(exp) & !is.null(obs)) { + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + throw_error <- FALSE + if (length(name_exp) != length(name_obs)) { + throw_error <- TRUE + } else if (any(name_exp != name_obs)) { + throw_error <- TRUE + } else if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + throw_error <- TRUE + } + if (throw_error) { + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'memb_dim'.") + } + } + ## ftime_avg + if (!is.null(ftime_avg)) { + if (!is.vector(ftime_avg) | !is.numeric(ftime_avg)) { + stop("Parameter 'ftime_avg' must be an integer vector.") + } + if (!is.null(exp)) { + if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } else { + if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + } + ## sdate >= 2 + if (!is.null(exp)) { + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } else { + if (dim(obs)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } + ## lat and lon + if (!is.null(exp)) { + if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } else { + if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } + stop_needed <- FALSE + if (max(lat) > 80 | min(lat) < 20) { + stop_needed <- TRUE + } + #NOTE: different from s2dverification + # lon is not used in the calculation actually. EOF only uses lat to do the + # weight. So we just need to ensure the data is in this region, regardless + # the order. + if (any(lon < 0)) { #[-180, 180] + if (!(min(lon) > -90 & min(lon) < -70 & max(lon) < 50 & max(lon) > 30)) { + stop_needed <- TRUE + } + } else { #[0, 360] + if (any(lon >= 50 & lon <= 270)) { + stop_needed <- TRUE + } else { + lon_E <- lon[which(lon < 50)] + lon_W <- lon[-which(lon < 50)] + if (max(lon_E) < 30 | min(lon_W) > 290) { + stop_needed <- TRUE + } + } + } + if (stop_needed) { + stop("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") + } + ## obsproj + if (!is.logical(obsproj) | length(obsproj) > 1) { + stop("Parameter 'obsproj' must be either TRUE or FALSE.") + } + if (obsproj) { + if (is.null(obs)) { + stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.") + } + if (is.null(exp)) { + .warning("parameter 'obsproj' set to TRUE but no 'exp' provided.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores == 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # Average ftime + if (!is.null(ftime_avg)) { + if (!is.null(exp)) { + exp_sub <- ClimProjDiags::Subset(exp, ftime_dim, ftime_avg, drop = FALSE) + exp <- MeanDims(exp_sub, ftime_dim, na.rm = TRUE) + ## Cross-validated PCs. Fabian. This should be extended to + ## nmod and nlt by simple loops. Virginie + } + if (!is.null(obs)) { + obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE) + obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE) + } + } + + # wght + wght <- array(sqrt(cos(lat * pi / 180)), dim = c(length(lat), length(lon))) + + if (!is.null(exp) & !is.null(obs)) { + res <- Apply(list(exp, obs), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } else if (!is.null(exp)) { + res <- Apply(list(exp = exp), + target_dims = list(exp = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, obs = NULL, + obsproj = obsproj, add_member_back = FALSE, + ncores = ncores) + } else if (!is.null(obs)) { + if (add_member_back) { + output_dims <- list(obs = c(time_dim, memb_dim)) + } else { + output_dims <- list(obs = time_dim) + } + res <- Apply(list(obs = obs), + target_dims = list(obs = c(time_dim, space_dim)), + output_dims = output_dims, + fun = .NAO, + lat = lat, wght = wght, exp = NULL, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + return(res) +} + +.NAO <- function(exp = NULL, obs = NULL, lat, wght, obsproj = TRUE, add_member_back = FALSE) { + # exp: [memb_exp, sdate, lat, lon] + # obs: [sdate, lat, lon] + # wght: [lat, lon] + + if (!is.null(exp)) { + ntime <- dim(exp)[2] + nlat <- dim(exp)[3] + nlon <- dim(exp)[4] + nmemb_exp <- dim(exp)[1] + } else { + ntime <- dim(obs)[1] + nlat <- dim(obs)[2] + nlon <- dim(obs)[3] + } + + if (!is.null(obs)) NAOO.ver <- array(NA, dim = ntime) + if (!is.null(exp)) NAOF.ver <- array(NA, dim = c(ntime, nmemb_exp)) + + for (tt in 1:ntime) { #sdate + + if (!is.null(obs)) { + ## Calculate observation EOF. Excluding one forecast start year. + obs_sub <- obs[(1:ntime)[-tt], , , drop = FALSE] + obs_EOF <- .EOF(obs_sub, neofs = 1, wght = wght) # $EOFs: [mode, lat, lon] + + ## Correct polarity of pattern. + # dim(obs_EOF$EOFs): [mode, lat, lon] + if (0 < mean(obs_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + obs_EOF$EOFs <- obs_EOF$EOFs * (-1) +# obs_EOF$PCs <- obs_EOF$PCs * (-1) # not used + } + ## Project observed anomalies. + PF <- .ProjectField(obs, eof_mode = obs_EOF$EOFs[1, , ], wght = wght) # [sdate] + ## Keep PCs of excluded forecast start year. Fabian. + NAOO.ver[tt] <- PF[tt] + } + + if (!is.null(exp)) { + if (!obsproj) { + exp_sub <- exp[, (1:ntime)[-tt], , , drop = FALSE] + # Combine 'memb' and 'sdate' to calculate EOF + dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) + exp_EOF <- .EOF(exp_sub, neofs = 1, wght = wght) # $EOFs: [mode, lat, lon] + + ## Correct polarity of pattern. + ##NOTE: different from s2dverification, which doesn't use mean(). +# if (0 < exp_EOF$EOFs[1, which.min(abs(lat - 65)), ]) { + if (0 < mean(exp_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + exp_EOF$EOFs <- exp_EOF$EOFs * (-1) +# exp_EOF$PCs <- exp_EOF$PCs * sign # not used + } + + ### Lines below could be simplified further by computing + ### ProjectField() only on the year of interest... (though this is + ### not vital). Lauriane + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], + eof_mode = exp_EOF$EOFs[1, , ], + wght = wght) # [sdate, memb] + NAOF.ver[tt, imemb] <- PF[tt] + } + } else { + ## Project forecast anomalies on obs EOF + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], + eof_mode = obs_EOF$EOFs[1, , ], + wght = wght) # [sdate] + NAOF.ver[tt, imemb] <- PF[tt] + } + } + } + + } # for loop sdate + + # add_member_back + if (add_member_back) { + suppressWarnings({ + NAOO.ver <- InsertDim(NAOO.ver, 2, 1, name = names(dim(exp))[1]) + }) + } + + #NOTE: EOFs_obs is not returned because it's only the result of the last sdate + # (It is returned in s2dverification.) + if (!is.null(exp) & !is.null(obs)) { + return(list(exp = NAOF.ver, obs = NAOO.ver)) #, EOFs_obs = obs_EOF)) + } else if (!is.null(exp)) { + return(list(exp = NAOF.ver)) + } else if (!is.null(obs)) { + return(list(obs = NAOO.ver)) + } +} diff --git a/modules/Indices/R/tmp/ProjectField.R b/modules/Indices/R/tmp/ProjectField.R new file mode 100644 index 00000000..4fdf1892 --- /dev/null +++ b/modules/Indices/R/tmp/ProjectField.R @@ -0,0 +1,272 @@ +#'Project anomalies onto modes of variability +#' +#'Project anomalies onto modes of variability to get the temporal evolution of +#'the EOF mode selected. It returns principal components (PCs) by area-weighted +#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from +#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and +#'returns NA if the whole spatial pattern is NA. +#' +#'@param ano A numerical array of anomalies with named dimensions. The +#' dimensions must have at least 'time_dim' and 'space_dim'. It can be +#' generated by Ano(). +#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which +#' are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and +#' 'space_dim' at least. 'wght' has dimensions space_dim. It can be generated +#' by EOF() or REOF(). +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param mode An integer of the variability mode number in the EOF to be +#' projected on. The default value is NULL, which means all the modes of 'eof' +#' is calculated. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numerical array of the principal components in the verification +#' format. The dimensions are the same as 'ano' except 'space_dim'. +#' +#'@seealso EOF, NAO, PlotBoxWhisker +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) +#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) +#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) +#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) +#' +#'\dontrun{ +#' # Plot the forecast and the observation of the first mode for the last year +#' # of forecast +#' sdate_dim_length <- dim(mode1_obs)['sdate'] +#' plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), +#' lwd = 2) +#' for (i in 1:dim(mode1_exp)['member']) { +#' par(new = TRUE) +#' plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], +#' ylim = c(-15000, 15000)) +#' } +#'} +#' +#'@import multiApply +#'@export +ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), + mode = NULL, ncores = NULL) { + + # Check inputs + ## ano (1) + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## eof (1) + if (is.null(eof)) { + stop("Parameter 'eof' cannot be NULL.") + } + if (!is.list(eof)) { + stop("Parameter 'eof' must be a list generated by EOF() or REOF().") + } + if ('EOFs' %in% names(eof)) { + EOFs <- "EOFs" + } else if ('REOFs' %in% names(eof)) { + EOFs <- "REOFs" + } else if ('patterns' %in% names(eof)) { + EOFs <- "patterns" + } else { + stop("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().") + } + if (!'wght' %in% names(eof)) { + stop("Parameter 'eof' must be a list that contains 'wght'. ", + "It can be generated by EOF() or REOF().") + } + if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") + } + if (!is.numeric(eof$wght) || !is.array(eof$wght)) { + stop("The component 'wght' of parameter 'eof' must be a numeric array.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## ano (2) + if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { + stop("Parameter 'ano' must be an array with dimensions named as ", + "parameter 'space_dim' and 'time_dim'.") + } + ## eof (2) + if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | + !'mode' %in% names(dim(eof[[EOFs]]))) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.") + } + if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { + stop("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.") + } + ## mode + if (!is.null(mode)) { + if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) { + stop("Parameter 'mode' must be NULL or a positive integer.") + } + if (mode > dim(eof[[EOFs]])['mode']) { + stop("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + +#------------------------------------------------------- + + # Keep the chosen mode + if (!is.null(mode)) { + eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected') + } else { + eof_mode <- eof[[EOFs]] + } + + if ('mode' %in% names(dim(eof_mode))) { + dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')] + } else { + dimnames_without_mode <- names(dim(eof_mode)) + } + + if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon] + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + + } else { + + if (!all(dimnames_without_mode %in% names(dim(ano)))) { + stop("The array 'EOF' in parameter 'eof' has dimension not in parameter ", + "'ano'. Check if 'ano' and 'eof' are compatible.") + } + + common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] + if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != + dim(eof_mode)[dimnames_without_mode])) { + stop("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", + "with different length. Check if 'ano' and 'eof' are compatible.") + } + + # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent + # between ano and eof. + additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] + additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] + if (length(additional_dims) != 0) { + for (i in seq_along(additional_dims)) { + eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), + lendim = additional_dims[i], name = names(additional_dims)[i]) + } + } + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + } + + return(res) +} + + +.ProjectField <- function(ano, eof_mode, wght) { + # ano: [sdate, lat, lon] + # eof_mode: [lat, lon] or [mode, lat, lon] + # wght: [lat, lon] + + ntime <- dim(ano)[1] + + if (length(dim(eof_mode)) == 2) { # mode != NULL + # Initialization of pc.ver. + pc.ver <- array(NA, dim = ntime) #[sdate] + + # Weight + e.1 <- eof_mode * wght + ano <- ano * InsertDim(wght, 1, ntime) + #ano <- aaply(ano, 1, '*', wght) # much heavier + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + #na <- apply(ano, 1, mean, na.rm = TRUE) # much heavier + tmp <- ano * InsertDim(e.1, 1, ntime) # [sdate, lat, lon] + rm(ano) + #pc.ver <- apply(tmp, 1, sum, na.rm = TRUE) # much heavier + pc.ver <- rowSums(tmp, na.rm = TRUE) + pc.ver[which(is.na(na))] <- NA + + } else { # mode = NULL + # Weight + e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1]) + dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3])) # [mode, lat*lon] + ano <- ano * InsertDim(wght, 1, ntime) + dim(ano) <- c(ntime, prod(dim(ano)[2:3])) # [sdate, lat*lon] + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1)) + + # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate] + # Result: [mode, sdate] + pc.ver <- e.1 %*% t(ano) + pc.ver[which(is.na(na))] <- NA + +# # Change back dimensions to feet original input +# dim(projection) <- c(moredims, mode = unname(neofs)) +# return(projection) + } + + return(pc.ver) +} + diff --git a/modules/Indices/R/tmp/Utils.R b/modules/Indices/R/tmp/Utils.R new file mode 100644 index 00000000..c6e233c0 --- /dev/null +++ b/modules/Indices/R/tmp/Utils.R @@ -0,0 +1,1884 @@ +#'@importFrom abind abind +#'@import plyr ncdf4 +#'@importFrom grDevices png jpeg pdf svg bmp tiff +#'@importFrom easyVerification convert2prob + +## Function to tell if a regexpr() match is a complete match to a specified name +.IsFullMatch <- function(x, name) { + x > 0 && attributes(x)$match.length == nchar(name) +} + +.ConfigReplaceVariablesInString <- function(string, replace_values, + allow_undefined_key_vars = FALSE) { + # This function replaces all the occurrences of a variable in a string by + # their corresponding string stored in the replace_values. + if (length(strsplit(string, "\\$")[[1]]) > 1) { + parts <- strsplit(string, "\\$")[[1]] + output <- "" + i <- 0 + for (part in parts) { + if (i %% 2 == 0) { + output <- paste0(output, part) + } else { + if (part %in% names(replace_values)) { + output <- paste0(output, + .ConfigReplaceVariablesInString(replace_values[[part]], + replace_values, + allow_undefined_key_vars)) + } else if (allow_undefined_key_vars) { + output <- paste0(output, "$", part, "$") + } else { + stop('Error: The variable $', part, + '$ was not defined in the configuration file.', sep = '') + } + } + i <- i + 1 + } + output + } else { + string + } +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') + return(known_lon_names) +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') + return(known_lat_names) +} + +.t2nlatlon <- function(t) { + ## As seen in cdo's griddes.c: ntr2nlat() + nlats <- (t * 3 + 1) / 2 + if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) { + nlats <- ceiling(nlats) + } else { + nlats <- round(nlats) + } + if (nlats %% 2 > 0) { + nlats <- nlats + 1 + } + ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF + nlons <- 2 * nlats + keep_going <- TRUE + while (keep_going) { + n <- nlons + if (n %% 8 == 0) n <- trunc(n / 8) + while (n %% 6 == 0) n <- trunc(n / 6) + while (n %% 5 == 0) n <- trunc(n / 5) + while (n %% 4 == 0) n <- trunc(n / 4) + while (n %% 3 == 0) n <- trunc(n / 3) + if (n %% 2 == 0) n <- trunc(n / 2) + if (n <= 8) { + keep_going <- FALSE + } else { + nlons <- nlons + 2 + if (nlons > 9999) { + stop("Error: pick another gaussian grid truncation. ", + "It doesn't fulfill the standards to apply FFT.") + } + } + } + c(nlats, nlons) +} + +.nlat2t <- function(nlats) { + trunc((nlats * 2 - 1) / 3) +} + +.LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) { + # The purpose, working modes, inputs and outputs of this function are + # explained in ?LoadDataFile + #suppressPackageStartupMessages({library(ncdf4)}) + #suppressPackageStartupMessages({library(bigmemory)}) + #suppressPackageStartupMessages({library(plyr)}) + # Auxiliar function to convert array indices to lineal indices + arrayIndex2VectorIndex <- function(indices, dims) { + if (length(indices) > length(dims)) { + stop("Error: indices do not match dimensions in arrayIndex2VectorIndex.") + } + position <- 1 + dims <- rev(dims) + indices <- rev(indices) + for (i in seq_along(indices)) { + position <- position + (indices[i] - 1) * prod(dims[-(1:i)]) + } + position + } + + found_file <- NULL + dims <- NULL + grid_name <- units <- var_long_name <- NULL + is_2d_var <- array_across_gw <- NULL + data_across_gw <- NULL + + filename <- work_piece[['filename']] + namevar <- work_piece[['namevar']] + output <- work_piece[['output']] + # The names of all data files in the directory of the repository that match + # the pattern are obtained. + if (any(grep("^http", filename))) { + is_url <- TRUE + files <- filename + ## TODO: Check that the user is not using shell globbing exps. + } else { + is_url <- FALSE + files <- Sys.glob(filename) + } + + # If we don't find any, we leave the flag 'found_file' with a NULL value. + if (length(files) > 0) { + # The first file that matches the pattern is chosen and read. + filename <- head(files, 1) + filein <- filename + found_file <- filename + mask <- work_piece[['mask']] + + if (!silent && explore_dims) { + .message(paste("Exploring dimensions...", filename)) + ##} else { + ## cat(paste("* Reading & processing data...", filename, '\n')) + ##} + } + + # We will fill in 'expected_dims' with the names of the expected dimensions of + # the data array we'll retrieve from the file. + expected_dims <- NULL + remap_needed <- FALSE + # But first we open the file and work out whether the requested variable is 2d + fnc <- nc_open(filein) + if (!(namevar %in% names(fnc$var))) { + stop("Error: The variable", namevar, "is not defined in the file", filename) + } + var_long_name <- fnc$var[[namevar]]$longname + units <- fnc$var[[namevar]]$units + file_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + # The following two 'ifs' are to allow for 'lon'/'lat' by default, instead of + # 'longitude'/'latitude'. + if (!(work_piece[['dimnames']][['lon']] %in% file_dimnames) && + (work_piece[['dimnames']][['lon']] == 'longitude') && + ('lon' %in% file_dimnames)) { + work_piece[['dimnames']][['lon']] <- 'lon' + } + if (!(work_piece[['dimnames']][['lat']] %in% file_dimnames) && + (work_piece[['dimnames']][['lat']] == 'latitude') && + ('lat' %in% file_dimnames)) { + work_piece[['dimnames']][['lat']] <- 'lat' + } + if (is.null(work_piece[['is_2d_var']])) { + is_2d_var <- all(c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) %in% + unlist(lapply(fnc$var[[namevar]][['dim']], + '[[', 'name'))) + } else { + is_2d_var <- work_piece[['is_2d_var']] + } + if ((is_2d_var || work_piece[['is_file_per_dataset']])) { + if (Sys.which("cdo")[[1]] == "") { + stop("Error: CDO libraries not available") + } + + cdo_version <- + strsplit(suppressWarnings( + system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] + + cdo_version <- + as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) + + } + # If the variable to load is 2-d, we need to determine whether: + # - interpolation is needed + # - subsetting is requested + if (is_2d_var) { + ## We read the longitudes and latitudes from the file. + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + first_lon_in_original_file <- lon[1] + # If a common grid is requested or we are exploring the file dimensions + # we need to read the grid type and size of the file to finally work out the + # CDO grid name. + if (!is.null(work_piece[['grid']]) || explore_dims) { + # Here we read the grid type and its number of longitudes and latitudes + file_info <- system(paste('cdo -s griddes', filein, '2> /dev/null'), intern = TRUE) + grids_positions <- grep('# gridID', file_info) + if (length(grids_positions) < 1) { + stop("The grid should be defined in the files.") + } + grids_first_lines <- grids_positions + 2 + grids_last_lines <- c((grids_positions - 2)[-1], length(file_info)) + grids_info <- as.list(seq_along(grids_positions)) + grids_info <- lapply(grids_info, + function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) + grids_info <- lapply(grids_info, function (x) gsub(" *", " ", x)) + grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x)) + grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = "))) + grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1])) + grids_matches <- unlist(lapply(grids_info, function (x) { + nlons <- if (any(grep('xsize', x))) { + as.numeric(x[grep('xsize', x) + 1]) + } else { + NA + } + nlats <- if (any(grep('ysize', x))) { + as.numeric(x[grep('ysize', x) + 1]) + } else { + NA + } + result <- FALSE + if (!anyNA(c(nlons, nlats))) { + if ((nlons == length(lon)) && + (nlats == length(lat))) { + result <- TRUE + } + } + result + })) + grids_matches <- grids_matches[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_info <- grids_info[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_types <- grids_types[which(grids_types %in% c('gaussian', 'lonlat'))] + if (length(grids_matches) == 0) { + stop("Error: Only 'gaussian' and 'lonlat' grids supported. See e.g: cdo sinfo ", filename) + } + if (sum(grids_matches) > 1) { + if ((all(grids_types[which(grids_matches)] == 'gaussian') || + all(grids_types[which(grids_matches)] == 'lonlat')) && + all(unlist(lapply(grids_info[which(grids_matches)], identical, + grids_info[which(grids_matches)][[1]])))) { + grid_type <- grids_types[which(grids_matches)][1] + } else { + stop("Error: Load() can't disambiguate: ", + "More than one lonlat/gaussian grids with the same size as ", + "the requested variable defined in ", filename) + } + } else if (sum(grids_matches) == 1) { + grid_type <- grids_types[which(grids_matches)] + } else { + stop("Unexpected error.") + } + grid_lons <- length(lon) + grid_lats <- length(lat) + # Convert to CDO grid name as seen in cdo's griddes.c: nlat2ntr() + if (grid_type == 'lonlat') { + grid_name <- paste0('r', grid_lons, 'x', grid_lats) + } else { + grid_name <- paste0('t', .nlat2t(grid_lats), 'grid') + } + if (is.null(work_piece[['grid']])) { + .warning(paste0("Detect the grid type to be '", grid_name, "'. ", + "If it is not expected, assign parameter 'grid' to avoid wrong result.")) + } + } + # If a common grid is requested, we will also calculate its size which we will use + # later on. + if (!is.null(work_piece[['grid']])) { + # Now we calculate the common grid type and its lons and lats + if (any(grep('^t\\d{1,+}grid$', work_piece[['grid']]))) { + common_grid_type <- 'gaussian' + common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + nlonlat <- .t2nlatlon(common_grid_res) + common_grid_lats <- nlonlat[1] + common_grid_lons <- nlonlat[2] + } else if (any(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']]))) { + common_grid_type <- 'lonlat' + common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3]) + } else { + stop("Error: Only supported grid types in parameter 'grid' are tgrid and rx") + } + } else { + ## If no 'grid' is specified, there is no common grid. + ## But these variables are filled in for consistency in the code. + common_grid_lons <- length(lon) + common_grid_lats <- length(lat) + } + first_common_grid_lon <- 0 + last_common_grid_lon <- 360 - 360 / common_grid_lons + ## This is not true for gaussian grids or for some regular grids, but + ## is a safe estimation + first_common_grid_lat <- -90 + last_common_grid_lat <- 90 + # And finally determine whether interpolation is needed or not + remove_shift <- FALSE + if (!is.null(work_piece[['grid']])) { + if ((grid_lons != common_grid_lons) || + (grid_lats != common_grid_lats) || + (grid_type != common_grid_type) || + (lon[1] != first_common_grid_lon)) { + if (grid_lons == common_grid_lons && grid_lats == common_grid_lats && + grid_type == common_grid_type && lon[1] != first_common_grid_lon) { + remove_shift <- TRUE + } + remap_needed <- TRUE + common_grid_name <- work_piece[['grid']] + } + } else if ((lon[1] != first_common_grid_lon) && explore_dims && + !work_piece[['single_dataset']]) { + remap_needed <- TRUE + common_grid_name <- grid_name + remove_shift <- TRUE + } + if (remap_needed && (work_piece[['remap']] == 'con') && + (cdo_version >= as.numeric_version('1.7.0'))) { + work_piece[['remap']] <- 'ycon' + } + if (remove_shift && !explore_dims) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + .warning(paste0("The dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' doesn't start at longitude 0 and will be re-interpolated in order ", + "to align its longitudes with the standard CDO grids definable with ", + "the names 'tgrid' or 'rx', which are by definition ", + "starting at the longitude 0.\n")) + if (!is.null(mask)) { + .warning(paste0("A mask was provided for the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "'. This dataset has been re-interpolated to align its longitudes to ", + "start at 0. You must re-interpolate the corresponding mask to align ", + "its longitudes to start at 0 as well, if you haven't done so yet. ", + "Running cdo remapcon,", common_grid_name, + " original_mask_file.nc new_mask_file.nc will fix it.\n")) + } + } + if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + if (!explore_dims) { + .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is originally on ", + "a grid coarser than the common grid and it has been ", + "extrapolated. Check the results carefully. It is ", + "recommended to specify as common grid the coarsest grid ", + "among all requested datasets via the parameter 'grid'.\n")) + } + } + # Now calculate if the user requests for a lonlat subset or for the + # entire field + lonmin <- work_piece[['lon_limits']][1] + lonmax <- work_piece[['lon_limits']][2] + latmin <- work_piece[['lat_limits']][1] + latmax <- work_piece[['lat_limits']][2] + lon_subsetting_requested <- FALSE + lonlat_subsetting_requested <- FALSE + if (lonmin <= lonmax) { + if ((lonmin > first_common_grid_lon) || (lonmax < last_common_grid_lon)) { + lon_subsetting_requested <- TRUE + } + } else { + if ((lonmin - lonmax) > 360 / common_grid_lons) { + lon_subsetting_requested <- TRUE + } else { + gap_width <- floor(lonmin / (360 / common_grid_lons)) - + floor(lonmax / (360 / common_grid_lons)) + if (gap_width > 0) { + if (!(gap_width == 1 && (lonmin %% (360 / common_grid_lons) == 0) && + (lonmax %% (360 / common_grid_lons) == 0))) { + lon_subsetting_requested <- TRUE + } + } + } + } + if ((latmin > first_common_grid_lat) || (latmax < last_common_grid_lat) + || (lon_subsetting_requested)) { + lonlat_subsetting_requested <- TRUE + } + # Now that we know if subsetting was requested, we can say if final data + # will go across greenwich + if (lonmax < lonmin) { + data_across_gw <- TRUE + } else { + data_across_gw <- !lon_subsetting_requested + } + + # When remap is needed but no subsetting, the file is copied locally + # so that cdo works faster, and then interpolated. + # Otherwise the file is kept as is and the subset will have to be + # interpolated still. + if (!lonlat_subsetting_requested && remap_needed) { + nc_close(fnc) + filecopy <- tempfile(pattern = "load", fileext = ".nc") + file.copy(filein, filecopy) + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the + # underlying hdf5 library. + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " -selname,", namevar, " ", filecopy, " ", filein, + " 2>/dev/null")) + file.remove(filecopy) + work_piece[['dimnames']][['lon']] <- 'lon' + work_piece[['dimnames']][['lat']] <- 'lat' + fnc <- nc_open(filein) + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + } + + # Read and check also the mask + if (!is.null(mask)) { + ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + if (is.list(mask)) { + if (!file.exists(mask[['path']])) { + stop("Error: Couldn't find the mask file", mask[['path']]) + } + mask_file <- mask[['path']] + ###file.copy(work_piece[['mask']][['path']], mask_file) + fnc_mask <- nc_open(mask_file) + vars_in_mask <- sapply(fnc_mask$var, '[[', 'name') + if ('nc_var_name' %in% names(mask)) { + if (!(mask[['nc_var_name']] %in% + vars_in_mask)) { + stop("Error: couldn't find variable", mask[['nc_var_name']], + "in the mask file", mask[['path']]) + } + } else { + if (length(vars_in_mask) != 1) { + stop("Error: one and only one non-coordinate variable should be ", + "defined in the mask file", + mask[['path']], + "if the component 'nc_var_name' is not specified. ", + "Currently found: ", + toString(vars_in_mask), ".") + } else { + mask[['nc_var_name']] <- vars_in_mask + } + } + if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) { + stop("Error: the variable '", + mask[['nc_var_name']], + "' must be defined only over the dimensions '", + work_piece[['dimnames']][['lon']], "' and '", + work_piece[['dimnames']][['lat']], + "' in the mask file ", + mask[['path']]) + } + mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE) + nc_close(fnc_mask) + ### mask_lon <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lon']]) + ### mask_lat <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lat']]) + ###} else { + ### dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + ### dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ### ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + ### fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ### ncvar_put(fnc_mask, ncdf_var, work_piece[['mask']]) + ### nc_close(fnc_mask) + ### fnc_mask <- nc_open(mask_file) + ### work_piece[['mask']] <- list(path = mask_file, nc_var_name = 'LSM') + ### mask_lon <- lon + ### mask_lat <- lat + ###} + ###} + ### Now ready to check that the mask is right + ##if (!(lonlat_subsetting_requested && remap_needed)) { + ### if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) { + ### stop(paste("Error: the mask of the dataset with index ", + ### tail(work_piece[['indices']], 1), " in '", + ### work_piece[['dataset_type']], "' is wrong. ", + ### "It must be on the common grid if the selected output type is 'lonlat', ", + ### "'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on ", + ### "the grid of the corresponding dataset if the selected output type is ", + ### "'areave' and no 'grid' has been specified. For more information ", + ### "check ?Load and see help on parameters 'grid', 'maskmod' and ", + ### "'maskobs'.", sep = "")) + ### } + ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { + ### stop(paste0("Error: the longitudes and latitudes in the masks must be ", + ### "identical to the ones in the corresponding data files if output = 'areave' ", + ### " or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in ", + ### "the mask file must start by 0 and the latitudes must be ordered from ", + ### "highest to lowest. See\n ", + ### work_piece[['mask']][['path']], " and ", filein)) + ###} + } + } + + lon_indices <- seq_along(lon) + if (!(lonlat_subsetting_requested && remap_needed)) { + lon[which(lon < 0)] <- lon[which(lon < 0)] + 360 + } + if (lonmax >= lonmin) { + lon_indices <- lon_indices[which(((lon %% 360) >= lonmin) & ((lon %% 360) <= lonmax))] + } else if (!remap_needed) { + lon_indices <- lon_indices[which(((lon %% 360) <= lonmax) | ((lon %% 360) >= lonmin))] + } + lat_indices <- which(lat >= latmin & lat <= latmax) + ## In most of the cases the latitudes are ordered from -90 to 90. + ## We will reorder them to be in the order from 90 to -90, so mostly + ## always the latitudes are reordered. + ## TODO: This could be avoided in future. + if (lat[1] < lat[length(lat)]) { + lat_indices <- lat_indices[rev(seq_along(lat_indices))] + } + if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) { + if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) { + stop("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is wrong. It must be on the ", + "common grid if the selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must be on the grid of ", + "the corresponding dataset if the selected output type is 'areave' and ", + "no 'grid' has been specified. For more information check ?Load and see ", + "help on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask <- mask[lon_indices, lat_indices] + } + ## If the user requests subsetting, we must extend the lon and lat limits if possible + ## so that the interpolation after is done properly + maximum_extra_points <- work_piece[['remapcells']] + if (lonlat_subsetting_requested && remap_needed) { + if ((maximum_extra_points > (head(lon_indices, 1) - 1)) || + (maximum_extra_points > (length(lon) - tail(lon_indices, 1)))) { + ## if the requested number of points goes beyond the left or right + ## sides of the map, we need to take the entire map so that the + ## interpolation works properly + lon_indices <- seq_along(lon) + } else { + extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1) + if (extra_points > 0) { + lon_indices <- + c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) + } + extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1)) + if (extra_points > 0) { + lon_indices <- c(lon_indices, + (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) + } + } + min_lat_ind <- min(lat_indices) + max_lat_ind <- max(lat_indices) + extra_points <- min(maximum_extra_points, min_lat_ind - 1) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c(lat_indices, (min_lat_ind - 1):(min_lat_ind - extra_points)) + } else { + lat_indices <- c((min_lat_ind - extra_points):(min_lat_ind - 1), lat_indices) + } + } + extra_points <- min(maximum_extra_points, length(lat) - max_lat_ind) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c((max_lat_ind + extra_points):(max_lat_ind + 1), lat_indices) + } else { + lat_indices <- c(lat_indices, (max_lat_ind + 1):(max_lat_ind + extra_points)) + } + } + } + lon <- lon[lon_indices] + lat <- lat[lat_indices] + expected_dims <- c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) + } else { + lon <- 0 + lat <- 0 + } + # We keep on filling the expected dimensions + var_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + nmemb <- nltime <- NULL + ## Sometimes CDO renames 'members' dimension to 'lev' + old_members_dimname <- NULL + if (('lev' %in% var_dimnames) && !(work_piece[['dimnames']][['member']] %in% var_dimnames)) { + old_members_dimname <- work_piece[['dimnames']][['member']] + work_piece[['dimnames']][['member']] <- 'lev' + } + if (work_piece[['dimnames']][['member']] %in% var_dimnames) { + nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], + var_dimnames)]]$len + expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']]) + } else { + nmemb <- 1 + } + if (length(expected_dims) > 0) { + dim_matches <- match(expected_dims, var_dimnames) + if (anyNA(dim_matches)) { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the expected dimension(s)", + toString(expected_dims[which(is.na(dim_matches))]), + "were not found in", filename) + } + time_dimname <- var_dimnames[-dim_matches] + } else { + time_dimname <- var_dimnames + } + if (length(time_dimname) > 0) { + if (length(time_dimname) == 1) { + nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len + expected_dims <- c(expected_dims, time_dimname) + dim_matches <- match(expected_dims, var_dimnames) + } else { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the variable ", namevar, + " is defined over more dimensions than the expected (", + toString(c(expected_dims, 'time')), + "). It could also be that the members, longitude or latitude ", + "dimensions are named incorrectly. In that case, either rename ", + "the dimensions in the file or adjust Load() to recognize the actual ", + "name with the parameter 'dimnames'. See file ", filename) + } + } else { + nltime <- 1 + } + + # Now we must retrieve the data from the file, but only the asked indices. + # So we build up the indices to retrieve. + # Longitudes or latitudes have been retrieved already. + if (explore_dims) { + # If we're exploring the file we only want one time step from one member, + # to regrid it and work out the number of longitudes and latitudes. + # We don't need more. + members <- 1 + ltimes_list <- list(1) + } else { + # The data is arranged in the array 'tmp' with the dimensions in a + # common order: + # 1) Longitudes + # 2) Latitudes + # 3) Members (even if is not a file per member experiment) + # 4) Lead-times + if (work_piece[['is_file_per_dataset']]) { + time_indices <- 1:nltime + mons <- strsplit(system(paste('cdo showmon ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + years <- strsplit(system(paste('cdo showyear ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + mons <- as.numeric(mons[[1]][which(mons[[1]] != "")]) + years <- as.numeric(years[[1]][which(years[[1]] != "")]) + time_indices <- ts(time_indices, start = c(years[1], mons[1]), + end = c(years[length(years)], mons[length(mons)]), + frequency = 12) + ltimes_list <- list() + for (sdate in work_piece[['startdates']]) { + selected_time_indices <- window(time_indices, start = c(as.numeric( + substr(sdate, 1, 4)), as.numeric(substr(sdate, 5, 6))), + end = c(3000, 12), frequency = 12, extend = TRUE) + selected_time_indices <- selected_time_indices[work_piece[['leadtimes']]] + ltimes_list <- c(ltimes_list, list(selected_time_indices)) + } + } else { + ltimes <- work_piece[['leadtimes']] + #if (work_piece[['dataset_type']] == 'exp') { + ltimes_list <- list(ltimes[which(ltimes <= nltime)]) + #} + } + ## TODO: Put, when reading matrices, this kind of warnings + # if (nmember < nmemb) { + # cat("Warning: + members <- 1:work_piece[['nmember']] + members <- members[which(members <= nmemb)] + } + + # Now, for each list of leadtimes to load (usually only one list with all leadtimes), + # we'll join the indices and retrieve data + found_disordered_dims <- FALSE + for (ltimes in ltimes_list) { + if (is_2d_var) { + start <- c(min(lon_indices), min(lat_indices)) + end <- c(max(lon_indices), max(lat_indices)) + if (lonlat_subsetting_requested && remap_needed) { + subset_indices <- list(min(lon_indices):max(lon_indices) - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ncdf_dims <- list(dim_longitudes, dim_latitudes) + } else { + subset_indices <- list(lon_indices - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + ncdf_dims <- list() + } + final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1) + } else { + start <- end <- NULL + subset_indices <- list() + ncdf_dims <- list() + final_dims <- c(1, 1, 1, 1) + } + + if (work_piece[['dimnames']][['member']] %in% expected_dims) { + start <- c(start, head(members, 1)) + end <- c(end, tail(members, 1)) + subset_indices <- c(subset_indices, list(members - head(members, 1) + 1)) + dim_members <- ncdim_def(work_piece[['dimnames']][['member']], "", members) + ncdf_dims <- c(ncdf_dims, list(dim_members)) + final_dims[3] <- length(members) + } + if (time_dimname %in% expected_dims) { + if (!all(is.na(ltimes))) { + start <- c(start, head(ltimes[which(!is.na(ltimes))], 1)) + end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1)) + subset_indices <- c(subset_indices, + list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) + } else { + start <- c(start, NA) + end <- c(end, NA) + subset_indices <- c(subset_indices, list(ltimes)) + } + dim_time <- ncdim_def(time_dimname, "", seq_along(ltimes), unlim = TRUE) + ncdf_dims <- c(ncdf_dims, list(dim_time)) + final_dims[4] <- length(ltimes) + } + count <- end - start + 1 + start <- start[dim_matches] + count <- count[dim_matches] + subset_indices <- subset_indices[dim_matches] + # Now that we have the indices to retrieve, we retrieve the data + if (prod(final_dims) > 0) { + tmp <- take(ncvar_get(fnc, namevar, start, count, + collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + # The data is regridded if it corresponds to an atmospheric variable. When + # the chosen output type is 'areave' the data is not regridded to not + # waste computing time unless the user specified a common grid. + if (is_2d_var) { + ###if (!is.null(work_piece[['mask']]) && !(lonlat_subsetting_requested && remap_needed)) { + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### start[dim_matches[1:2]], count[dim_matches[1:2]], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + if (lonlat_subsetting_requested && remap_needed) { + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + filein2 <- tempfile(pattern = "loadRegridded2", fileext = ".nc") + ncdf_var <- ncvar_def(namevar, "", ncdf_dims[dim_matches], + fnc$var[[namevar]]$missval, + prec = if (fnc$var[[namevar]]$prec == 'int') { + 'integer' + } else { + fnc$var[[namevar]]$prec + }) + scale_factor <- ifelse(fnc$var[[namevar]]$hasScaleFact, fnc$var[[namevar]]$scaleFact, 1) + add_offset <- ifelse(fnc$var[[namevar]]$hasAddOffset, fnc$var[[namevar]]$addOffset, 0) + if (fnc$var[[namevar]]$hasScaleFact || fnc$var[[namevar]]$hasAddOffset) { + tmp <- (tmp - add_offset) / scale_factor + } + #nc_close(fnc) + fnc2 <- nc_create(filein2, list(ncdf_var)) + ncvar_put(fnc2, ncdf_var, tmp) + if (add_offset != 0) { + ncatt_put(fnc2, ncdf_var, 'add_offset', add_offset) + } + if (scale_factor != 1) { + ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor) + } + nc_close(fnc2) + system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) { + "0,360," + } else { + paste0(lonmin, ",", lonmax, ",") + }, latmin, ",", latmax, + " -remap", work_piece[['remap']], ",", common_grid_name, + " ", filein2, " ", filein, " 2>/dev/null")) + file.remove(filein2) + fnc2 <- nc_open(filein) + sub_lon <- ncvar_get(fnc2, 'lon') + sub_lat <- ncvar_get(fnc2, 'lat') + ## We read the longitudes and latitudes from the file. + ## In principle cdo should put in order the longitudes + ## and slice them properly unless data is across greenwich + sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360 + sub_lon_indices <- seq_along(sub_lon) + if (lonmax < lonmin) { + sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))] + } + sub_lat_indices <- seq_along(sub_lat) + ## In principle cdo should put in order the latitudes + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + sub_lat_indices <- rev(seq_along(sub_lat)) + } + final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices)) + subset_indices[[dim_matches[1]]] <- sub_lon_indices + subset_indices[[dim_matches[2]]] <- sub_lat_indices + + tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + + if (!is.null(mask)) { + ## We create a very simple 2d netcdf file that is then interpolated to the common + ## grid to know what are the lons and lats of our slice of data + mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc') + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], + "degrees_east", c(0, 360)) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], + "degrees_north", c(-90, 90)) + ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) + nc_close(fnc_mask) + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " ", mask_file, " ", mask_file_remap, " 2>/dev/null")) + fnc_mask <- nc_open(mask_file_remap) + mask_lons <- ncvar_get(fnc_mask, 'lon') + mask_lats <- ncvar_get(fnc_mask, 'lat') + nc_close(fnc_mask) + file.remove(mask_file, mask_file_remap) + if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) { + stop("Error: the mask of the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' is wrong. It must be on the common grid if the ", + "selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must ", + "be on the grid of the corresponding dataset if the ", + "selected output type is 'areave' and no 'grid' has been ", + "specified. For more information check ?Load and see help ", + "on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360 + if (lonmax >= lonmin) { + mask_lon_indices <- which((mask_lons >= lonmin) & (mask_lons <= lonmax)) + } else { + mask_lon_indices <- which((mask_lons >= lonmin) | (mask_lons <= lonmax)) + } + mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax)) + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + mask_lat_indices <- mask_lat_indices[rev(seq_along(mask_lat_indices))] + } + mask <- mask[mask_lon_indices, mask_lat_indices] + } + sub_lon <- sub_lon[sub_lon_indices] + sub_lat <- sub_lat[sub_lat_indices] + ### nc_close(fnc_mask) + ### system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) { + ### "0,360," + ### } else { + ### paste0(lonmin, ",", lonmax, ",") + ### }, latmin, ",", latmax, + ### " -remap", work_piece[['remap']], ",", common_grid_name, + ###This is wrong: same files + ### " ", mask_file, " ", mask_file, " 2>/dev/null", sep = "")) + ### fnc_mask <- nc_open(mask_file) + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + } + } + if (is.unsorted(dim_matches)) { + if (!found_disordered_dims && + rev(work_piece[['indices']])[2] == 1 && + rev(work_piece[['indices']])[3] == 1) { + found_disordered_dims <- TRUE + .warning(paste0("The dimensions for the variable ", namevar, + " in the files of the experiment with index ", + tail(work_piece[['indices']], 1), + " are not in the optimal order for loading with Load(). ", + "The optimal order would be '", + toString(expected_dims), + "'. One of the files of the dataset is stored in ", filename)) + } + tmp <- aperm(tmp, dim_matches) + } + dim(tmp) <- final_dims + # If we are exploring the file we don't need to process and arrange + # the retrieved data. We only need to keep the dimension sizes. + if (is_2d_var && lonlat_subsetting_requested && remap_needed) { + final_lons <- sub_lon + final_lats <- sub_lat + } else { + final_lons <- lon + final_lats <- lat + } + if (explore_dims) { + if (work_piece[['is_file_per_member']]) { + ## TODO: When the exp_full_path contains asterisks and is file_per_member + ## members from different datasets may be accounted. + ## Also if one file member is missing the accounting will be wrong. + ## Should parse the file name and extract number of members. + if (is_url) { + nmemb <- NULL + } else { + nmemb <- length(files) + } + } + dims <- list(member = nmemb, ftime = nltime, lon = final_lons, lat = final_lats) + } else { + # If we are not exploring, then we have to process the retrieved data + if (is_2d_var) { + tmp <- apply(tmp, c(3, 4), function(x) { + # Disable of large values. + if (!is.na(work_piece[['var_limits']][2])) { + x[which(x > work_piece[['var_limits']][2])] <- NA + } + if (!is.na(work_piece[['var_limits']][1])) { + x[which(x < work_piece[['var_limits']][1])] <- NA + } + if (!is.null(mask)) { + x[which(mask < 0.5)] <- NA + } + + if (output == 'areave' || output == 'lon') { + weights <- InsertDim(cos(final_lats * pi / 180), 1, + length(final_lons), name = 'lon') + weights[which(is.na(x))] <- NA + if (output == 'areave') { + weights <- weights / mean(weights, na.rm = TRUE) + mean(x * weights, na.rm = TRUE) + } else { + weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, + length(final_lats), name = 'lat') + MeanDims(x * weights, 2, na.rm = TRUE) + } + } else if (output == 'lat') { + MeanDims(x, 1, na.rm = TRUE) + } else if (output == 'lonlat') { + signif(x, 5) + } + }) + if (output == 'areave') { + dim(tmp) <- c(1, 1, final_dims[3:4]) + } else if (output == 'lon') { + dim(tmp) <- c(final_dims[1], 1, final_dims[3:4]) + } else if (output == 'lat') { + dim(tmp) <- c(1, final_dims[c(2, 3, 4)]) + } else if (output == 'lonlat') { + dim(tmp) <- final_dims + } + } + var_data <- attach.big.matrix(work_piece[['out_pointer']]) + if (work_piece[['dims']][['member']] > 1 && nmemb > 1 && + work_piece[['dims']][['ftime']] > 1 && + nltime < work_piece[['dims']][['ftime']]) { + work_piece[['indices']][2] <- work_piece[['indices']][2] - 1 + for (jmemb in members) { + work_piece[['indices']][2] <- work_piece[['indices']][2] + 1 + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp[, , jmemb, ]) - 1) + var_data[out_indices] <- as.vector(tmp[, , jmemb, ]) + } + work_piece[['indices']][2] <- work_piece[['indices']][2] - tail(members, 1) + 1 + } else { + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp) - 1) + a <- aperm(tmp, c(1, 2, 4, 3)) + as.vector(a) + var_data[out_indices] <- as.vector(aperm(tmp, c(1, 2, 4, 3))) + } + work_piece[['indices']][3] <- work_piece[['indices']][3] + 1 + } + } + } + nc_close(fnc) + if (is_2d_var) { + if (remap_needed) { + array_across_gw <- FALSE + file.remove(filein) + ###if (!is.null(mask) && lonlat_subsetting_requested) { + ### file.remove(mask_file) + ###} + } else { + if (first_lon_in_original_file < 0) { + array_across_gw <- data_across_gw + } else { + array_across_gw <- FALSE + } + } + } + } + if (explore_dims) { + list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, + units = units, var_long_name = var_long_name, + data_across_gw = data_across_gw, array_across_gw = array_across_gw) + } else { + ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) { + ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) + ###} + if (!silent && !is.null(work_piece[['progress_amount']])) { + message(work_piece[['progress_amount']], appendLF = FALSE) + } + found_file + } +} + +.LoadSampleData <- function(var, exp = NULL, obs = NULL, sdates, + nmember = NULL, nmemberobs = NULL, + nleadtime = NULL, leadtimemin = 1, + leadtimemax = NULL, storefreq = 'monthly', + sampleperiod = 1, lonmin = 0, lonmax = 360, + latmin = -90, latmax = 90, output = 'areave', + method = 'conservative', grid = NULL, + maskmod = vector("list", 15), + maskobs = vector("list", 15), + configfile = NULL, suffixexp = NULL, + suffixobs = NULL, varmin = NULL, varmax = NULL, + silent = FALSE, nprocs = NULL) { + ## This function loads and selects sample data stored in sampleMap and + ## sampleTimeSeries and is used in the examples instead of Load() so as + ## to avoid nco and cdo system calls and computation time in the stage + ## of running examples in the CHECK process on CRAN. + selected_start_dates <- match(sdates, c('19851101', '19901101', '19951101', + '20001101', '20051101')) + start_dates_position <- 3 + lead_times_position <- 4 + + if (output == 'lonlat') { + sampleData <- s2dv::sampleMap + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ] + } else if (output == 'areave') { + sampleData <- s2dv::sampleTimeSeries + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times] + } + + dims_out <- dim(sampleData$mod) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$mod) <- dims_out + + dims_out <- dim(sampleData$obs) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$obs) <- dims_out + + invisible(list(mod = dataOut$mod, obs = dataOut$obs, + lat = dataOut$lat, lon = dataOut$lon)) +} + +.ConfigGetDatasetInfo <- function(matching_entries, table_name) { + # This function obtains the information of a dataset and variable pair, + # applying all the entries that match in the configuration file. + if (table_name == 'experiments') { + id <- 'EXP' + } else { + id <- 'OBS' + } + defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), + paste0('$DEFAULT_', id, '_FILE_PATH$'), + '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', + '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') + info <- NULL + + for (entry in matching_entries) { + if (is.null(info)) { + info <- entry[-1:-2] + info[which(info == '*')] <- defaults[which(info == '*')] + } else { + info[which(entry[-1:-2] != '*')] <- entry[-1:-2][which(entry[-1:-2] != '*')] + } + } + + info <- as.list(info) + names(info) <- c('main_path', 'file_path', 'nc_var_name', 'suffix', 'var_min', 'var_max') + info +} + +.ReplaceGlobExpressions <- function(path_with_globs, actual_path, + replace_values, tags_to_keep, + dataset_name, permissive) { + # The goal of this function is to replace the shell globbing expressions in + # a path pattern (that may contain shell globbing expressions and Load() + # tags) by the corresponding part of the real existing path. + # What is done actually is to replace all the values of the tags in the + # actual path by the corresponding $TAG$ + # + # It takes mainly two inputs. The path with expressions and tags, e.g.: + # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc + # and a complete known path to one of the matching files, e.g.: + # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc + # and it returns the path pattern but without shell globbing expressions: + # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc + # + # To do that, it needs also as inputs the list of replace values (the + # association of each tag to their value). + # + # All the tags not present in the parameter tags_to_keep will be repalced. + # + # Not all cases can be resolved with the implemented algorithm. In an + # unsolvable case a warning is given and one possible guess is returned. + # + # In some cases it is interesting to replace only the expressions in the + # path to the file, but not the ones in the file name itself. To keep the + # expressions in the file name, the parameter permissive can be set to + # TRUE. To replace all the expressions it can be set to FALSE. + clean <- function(x) { + if (nchar(x) > 0) { + x <- gsub('\\\\', '', x) + x <- gsub('\\^', '', x) + x <- gsub('\\$', '', x) + x <- unname(sapply(strsplit(x, '[', fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) + do.call(paste0, as.list(x)) + } else { + x + } + } + + strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") + + if (permissive) { + actual_path_chunks <- strsplit(actual_path, '/')[[1]] + actual_path <- paste(actual_path_chunks[-length(actual_path_chunks)], collapse = '/') + file_name <- tail(actual_path_chunks, 1) + if (length(actual_path_chunks) > 1) { + file_name <- paste0('/', file_name) + } + path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] + path_with_globs <- paste(path_with_globs_chunks[-length(path_with_globs_chunks)], + collapse = '/') + path_with_globs <- .ConfigReplaceVariablesInString(path_with_globs, replace_values) + file_name_with_globs <- tail(path_with_globs_chunks, 1) + if (length(path_with_globs_chunks) > 1) { + file_name_with_globs <- paste0('/', file_name_with_globs) + } + right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) + right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values) + path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags)) + match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), + paste0(actual_path, file_name)) + if (match != 1) { + stop("Incorrect parameters to replace glob expressions. ", + "The path with expressions does not match the actual path.") + } + if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { + path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*') + file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) + } + } + path_with_globs_rx <- utils::glob2rx(path_with_globs) + values_to_replace <- NULL + tags_to_replace_starts <- NULL + tags_to_replace_ends <- NULL + give_warning <- FALSE + for (tag in tags_to_keep) { + matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] + lengths <- attr(matches, 'match.length') + if (!(length(matches) == 1 && matches[1] == -1)) { + for (i in seq_along(matches)) { + left <- NULL + if (matches[i] > 1) { + left <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, + matches[i] - 1), replace_values) + left_known <- + strReverse(head(strsplit(strReverse(left), + strReverse('.*'), fixed = TRUE)[[1]], 1)) + } + right <- NULL + if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { + right <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, + matches[i] + lengths[i], + nchar(path_with_globs_rx)), + replace_values) + right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) + } + match_limits <- NULL + if (!is.null(left)) { + left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) + match_len <- attr(left_match, 'match.length') + left_match_limits <- + c(left_match + match_len - 1 - nchar(clean(right_known)) - + nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - nchar(clean(right_known))) + if (!(left_match < 1)) { + match_limits <- left_match_limits + } + } + right_match <- NULL + if (!is.null(right)) { + right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) + match_len <- attr(right_match, 'match.length') + right_match_limits <- + c(right_match + nchar(clean(left_known)), + right_match + nchar(clean(left_known)) + + nchar(replace_values[[tag]]) - 1) + if (is.null(match_limits) && !(right_match < 1)) { + match_limits <- right_match_limits + } + } + if (!is.null(right_match) && !is.null(left_match)) { + if (!identical(right_match_limits, left_match_limits)) { + give_warning <- TRUE + } + } + if (is.null(match_limits)) { + stop("Too complex path pattern specified for ", dataset_name, + ". Specify a simpler path pattern for this dataset.") + } + values_to_replace <- c(values_to_replace, tag) + tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) + tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) + } + } + } + + if (length(tags_to_replace_starts) > 0) { + reorder <- sort(tags_to_replace_starts, index.return = TRUE) + tags_to_replace_starts <- reorder$x + values_to_replace <- values_to_replace[reorder$ix] + tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] + while (length(values_to_replace) > 0) { + actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1), + '$', head(values_to_replace, 1), '$', + substr(actual_path, head(tags_to_replace_ends, 1) + 1, + nchar(actual_path))) + extra_chars <- nchar(head(values_to_replace, 1)) + 2 - + (head(tags_to_replace_ends, 1) - + head(tags_to_replace_starts, 1) + 1) + values_to_replace <- values_to_replace[-1] + tags_to_replace_starts <- tags_to_replace_starts[-1] + tags_to_replace_ends <- tags_to_replace_ends[-1] + tags_to_replace_starts <- tags_to_replace_starts + extra_chars + tags_to_replace_ends <- tags_to_replace_ends + extra_chars + } + } + + if (give_warning) { + .warning(paste0("Too complex path pattern specified for ", dataset_name, + ". Double check carefully the '$Files' fetched for this dataset ", + "or specify a simpler path pattern.")) + } + + if (permissive) { + paste0(actual_path, file_name_with_globs) + } else { + actual_path + } +} + +.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { + tag <- paste0('\\$', tag, '\\$') + path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$') + parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]] + parts <- as.list(grep(tag, parts, value = TRUE)) + longest_couples <- NULL + pos_longest_couples <- NULL + found_value <- NULL + for (i in seq_along(parts)) { + parts[[i]] <- strsplit(parts[[i]], tag)[[1]] + if (length(parts[[i]]) == 1) { + parts[[i]] <- c(parts[[i]], '') + } + len_parts <- sapply(parts[[i]], nchar) + len_couples <- len_parts[-length(len_parts)] + len_parts[2:length(len_parts)] + pos_longest_couples <- c(pos_longest_couples, which.max(len_couples)) + longest_couples <- c(longest_couples, max(len_couples)) + } + chosen_part <- which.max(longest_couples) + parts[[chosen_part]] <- + parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] + if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) { + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + actual_path <- + substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + } + } + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + found_value <- substr(actual_path, 0, match_right - 1) + } + } + } else { + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + actual_path <- substr(actual_path, 0, match_right - 1) + } + } + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + found_value <- + substr(actual_path, match_left + attr(match_left, 'match.length'), + nchar(actual_path)) + } + } + } + found_value +} + +.FilterUserGraphicArgs <- function(excludedArgs, ...) { + # This function filter the extra graphical parameters passed by the user in + # a plot function, excluding the ones that the plot function uses by default. + # Each plot function has a different set of arguments that are not allowed to + # be modified. + args <- list(...) + userArgs <- list() + for (name in names(args)) { + if ((name != "") & !is.element(name, excludedArgs)) { + # If the argument has a name and it is not in the list of excluded + # arguments, then it is added to the list that will be used + userArgs[[name]] <- args[[name]] + } else { + .warning(paste0("the argument '", name, "' can not be + modified and the new value will be ignored")) + } + } + userArgs +} + +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + .warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + .warning(paste0("some extensions of the filenames provided in 'fileout' ", + "are not ", ext[1], + ". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + .warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + +.message <- function(...) { + # Function to use the 'message' R function with our custom settings + # Default: new line at end of message, indent to 0, exdent to 3, + # collapse to \n* + args <- list(...) + + ## In case we need to specify message arguments + if (!is.null(args[["appendLF"]])) { + appendLF <- args[["appendLF"]] + } else { + ## Default value in message function + appendLF <- TRUE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value in message function + domain <- NULL + } + args[["appendLF"]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n*" + } + args[["collapse"]] <- NULL + + ## Message tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "* " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + message(tmp, appendLF = appendLF, domain = domain) +} + +.warning <- function(...) { + # Function to use the 'warning' R function with our custom settings + # Default: no call information, indent to 0, exdent to 3, + # collapse to \n + args <- list(...) + + ## In case we need to specify warning arguments + if (!is.null(args[["call."]])) { + call <- args[["call."]] + } else { + ## Default: don't show info about the call where the warning came up + call <- FALSE + } + if (!is.null(args[["immediate."]])) { + immediate <- args[["immediate."]] + } else { + ## Default value in warning function + immediate <- FALSE + } + if (!is.null(args[["noBreaks."]])) { + noBreaks <- args[["noBreaks."]] + } else { + ## Default value warning function + noBreaks <- FALSE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value warning function + domain <- NULL + } + args[["call."]] <- NULL + args[["immediate."]] <- NULL + args[["noBreaks."]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n!" + } + args[["collapse"]] <- NULL + + ## Warning tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "! Warning: " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + warning(tmp, call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) +} + +.IsColor <- function(x) { + res <- try(col2rgb(x), silent = TRUE) + return(!is(res, "try-error")) +} + +# This function switches to a specified figure at position (row, col) in a layout. +# This overcomes the bug in par(mfg = ...). However the mode par(new = TRUE) is +# activated, i.e., all drawn elements will be superimposed. Additionally, after +# using this function, the automatical pointing to the next figure in the layout +# will be spoiled: once the last figure in the layout is drawn, the pointer won't +# move to the first figure in the layout. +# Only figures with numbers other than 0 (when creating the layout) will be +# accessible. +# Inputs: either row and col, or n and mat +.SwitchToFigure <- function(row = NULL, col = NULL, n = NULL, mat = NULL) { + if (!is.null(n) && !is.null(mat)) { + if (!is.numeric(n) || length(n) != 1) { + stop("Parameter 'n' must be a single numeric value.") + } + n <- round(n) + if (!is.array(mat)) { + stop("Parameter 'mat' must be an array.") + } + target <- which(mat == n, arr.ind = TRUE)[1, ] + row <- target[1] + col <- target[2] + } else if (!is.null(row) && !is.null(col)) { + if (!is.numeric(row) || length(row) != 1) { + stop("Parameter 'row' must be a single numeric value.") + } + row <- round(row) + if (!is.numeric(col) || length(col) != 1) { + stop("Parameter 'col' must be a single numeric value.") + } + col <- round(col) + } else { + stop("Either 'row' and 'col' or 'n' and 'mat' must be provided.") + } + next_attempt <- c(row, col) + par(mfg = next_attempt) + i <- 1 + layout_size <- par('mfrow') + layout_cells <- matrix(1:prod(layout_size), layout_size[1], layout_size[2], + byrow = TRUE) + while (any((par('mfg')[1:2] != c(row, col)))) { + next_attempt <- which(layout_cells == i, arr.ind = TRUE)[1, ] + par(mfg = next_attempt) + i <- i + 1 + if (i > prod(layout_size)) { + stop("Figure not accessible.") + } + } + plot(0, type = 'n', axes = FALSE, ann = FALSE) + par(mfg = next_attempt) +} + +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +.aperm2 <- function(x, new_order) { + old_dims <- dim(x) + attr_bk <- attributes(x) + if ('dim' %in% names(attr_bk)) { + attr_bk[['dim']] <- NULL + } + if (is.numeric(x)) { + x <- aperm(x, new_order) + } else { + y <- array(seq_along(x), dim = dim(x)) + y <- aperm(y, new_order) + x <- x[as.vector(y)] + } + dim(x) <- old_dims[new_order] + attributes(x) <- c(attributes(x), attr_bk) + x +} + +# This function is a helper for the function .MergeArrays. +# It expects as inputs two named numeric vectors, and it extends them +# with dimensions of length 1 until an ordered common dimension +# format is reached. +# The first output is dims1 extended with 1s. +# The second output is dims2 extended with 1s. +# The third output is a merged dimension vector. If dimensions with +# the same name are found in the two inputs, and they have a different +# length, the maximum is taken. +.MergeArrayDims <- function(dims1, dims2) { + new_dims1 <- NULL + new_dims2 <- NULL + while (length(dims1) > 0) { + if (names(dims1)[1] %in% names(dims2)) { + pos <- which(names(dims2) == names(dims1)[1]) + dims_to_add <- rep(1, pos - 1) + if (length(dims_to_add) > 0) { + names(dims_to_add) <- names(dims2[1:(pos - 1)]) + } + new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) + new_dims2 <- c(new_dims2, dims2[1:pos]) + dims1 <- dims1[-1] + dims2 <- dims2[-(1:pos)] + } else { + new_dims1 <- c(new_dims1, dims1[1]) + new_dims2 <- c(new_dims2, 1) + names(new_dims2)[length(new_dims2)] <- names(dims1)[1] + dims1 <- dims1[-1] + } + } + if (length(dims2) > 0) { + dims_to_add <- rep(1, length(dims2)) + names(dims_to_add) <- names(dims2) + new_dims1 <- c(new_dims1, dims_to_add) + new_dims2 <- c(new_dims2, dims2) + } + list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) +} + +# This function takes two named arrays and merges them, filling with +# NA where needed. +# dim(array1) +# 'b' 'c' 'e' 'f' +# 1 3 7 9 +# dim(array2) +# 'a' 'b' 'd' 'f' 'g' +# 2 3 5 9 11 +# dim(.MergeArrays(array1, array2, 'b')) +# 'a' 'b' 'c' 'e' 'd' 'f' 'g' +# 2 4 3 7 5 9 11 +.MergeArrays <- function(array1, array2, along) { + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in seq_along(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } + } + } + } + } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 + } + array1 +} + +# only can be used in Trend(). Needs generalization or be replaced by other function. +.reorder <- function(output, time_dim, dim_names) { + # Add dim name back + if (is.null(dim(output))) { + dim(output) <- c(stats = length(output)) + } else { #is an array + if (length(dim(output)) == 1) { + if (!is.null(names(dim(output)))) { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { + names(dim(output)) <- time_dim + } + } else { # more than one dim + if (names(dim(output))[1] != "") { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { #regular case + names(dim(output))[1] <- time_dim + } + } + } + # reorder + pos <- match(dim_names, names(dim(output))) + output <- aperm(output, pos) + names(dim(output)) <- dim_names + names(dim(output))[names(dim(output)) == time_dim] <- 'stats' + return(output) +} + +# to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R +.Indices <- function(data, type, monini, indices_for_clim, + fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) { + + if (type == 'dcpp') { + + fyear_dim <- 'fyear' + data <- Season(data = data, time_dim = fmonth_dim, + monini = monini, moninf = 1, monsup = 12, + method = mean, na.rm = na.rm) + names(dim(data))[which(names(dim(data)) == fmonth_dim)] <- fyear_dim + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + + anom <- data + + } else { ## Different indices_for_clim for each forecast year (to use the same calendar years) + + n_fyears <- as.numeric(dim(data)[fyear_dim]) + n_sdates <- as.numeric(dim(data)[sdate_dim]) + + if (is.null(indices_for_clim)) { ## climatology over the whole (common) period + first_years_for_clim <- n_fyears : 1 + last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) + } else { ## indices_for_clim specified as a numeric vector + first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) + last_years_for_clim <- + seq(from = indices_for_clim[length(indices_for_clim)], + by = -1, length.out = n_fyears) + } + + data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) + anom <- array(data = NA, dim = dim(data)) + for (i in 1:n_fyears) { + clim <- mean(data[i, first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) + anom[i, ] <- data[i, ] - clim + } + } + + } else if (type %in% c('obs', 'hist')) { + + data <- multiApply::Apply(data = data, target_dims = month_dim, + fun = mean, na.rm = na.rm)$output1 + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + clim <- 0 + } else if (is.null(indices_for_clim)) { + ## climatology over the whole period + clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, + na.rm = na.rm)$output1 + } else { + ## indices_for_clim specified as a numeric vector + clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, + indices = indices_for_clim), + target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } + + anom <- data - clim + + } else { + stop('type must be dcpp, hist or obs') + } + + return(anom) +} + +#TODO: Remove from s2dv when PlotLayout can get colorbar info from plotting function directly. +# The function is temporarily here because PlotLayout() needs to draw the colorbars of +# PlotMostLikelyQuantileMap(). +#Draws Color Bars for Categories +#A wrapper of s2dv::ColorBar to generate multiple color bars for different +#categories, and each category has different color set. +GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits, var_limits = NULL, + triangle_ends = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, + label_scale = 1, extra_margin = rep(0, 4), + ...) { + # bar_limits + if (!is.numeric(bar_limits) || length(bar_limits) != 2) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2.") + } + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = bar_limits[1], to = bar_limits[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(seq_along(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != nmap) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in seq_along(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { + s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, +# bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + #TODO: col_inf and col_sup + return(list(brks = brks, cols = cols)) + } + +} + -- GitLab From 6d0432ca662901511236aae8c68fad5c044732d5 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 13 May 2024 16:46:32 +0200 Subject: [PATCH 76/96] Fix output_dims in Compute() and add threads_load and threads_compute --- build_compute_workflow.R | 19 ++++++++++--------- .../recipe_test_compute_decadal.yml | 8 ++++---- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index a4403cb5..c416bb65 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -7,7 +7,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, source("modules/Anomalies/Anomalies.R") source("modules/Downscaling/Downscaling.R") source("modules/Skill/Skill.R") - source("modules/Indices/Indices.R") + # source("modules/Indices/Indices.R") # Define appender with custom log layout so that it knows not to append # within Compute(). @@ -107,7 +107,7 @@ run_compute_workflow <- function(recipe, data) { # Add forecast if not empty if (!is.null(data$fcst)) { target_dims <- c(target_dims, - list(exp_target_dims)) + list(fcst = exp_target_dims)) inputs <- c(inputs, list(fcst = data$fcst)) input_attributes <- c(input_attributes, list(fcst = STARTR_CUBE_ATTRS)) } @@ -119,11 +119,10 @@ run_compute_workflow <- function(recipe, data) { } else { spatial_output_dims <- c("latitude", "longitude") } - exp_output_dims <- c(exp_target_dims[!exp_target_dims %in% c('var_dir', - 'latitude', - 'longitude')], - spatial_output_dims, - c("sday", "sweek")) + ## TODO: There may be a bug here when the user requests only lat or lon as chunking dim + exp_output_dims <- unique(c(exp_target_dims, spatial_output_dims, 'sday', 'sweek')) + exp_output_dims <- c(exp_output_dims[!exp_output_dims %in% + c('var_dir', names(recipe$Run$startR_workflow$chunk_along))]) # Default outputs: hindcast and observations output_dims <- list(hcst = exp_output_dims, obs = exp_output_dims) @@ -133,6 +132,7 @@ run_compute_workflow <- function(recipe, data) { list(fcst = exp_output_dims)) } # Add skill metrics if skill module is called + ## TODO: Fix potential bug if ("skill" %in% modules) { skill_dims <- c('metric', 'var', 'time', spatial_output_dims) skill_output_dims <- @@ -181,8 +181,9 @@ run_compute_workflow <- function(recipe, data) { if (run_on == 'local') { # Compute locally, in serial res <- Compute(wf$hcst, - chunks = recipe$Run$startR_workflow$chunk_along) - print("made it here") + chunks = recipe$Run$startR_workflow$chunk_along, + threads_compute = recipe$Analysis$ncores, + threads_load = recipe$Analysis$ncores/2) } else if (run_on %in% c('nord3', 'as_machine')) { #NOTE: autosubmit_suite_dir can be anywhere ## TODO: Change code_dir to autosubmit dir when necessary diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml index 8ea48e9a..555eaec7 100644 --- a/recipes/atomic_recipes/recipe_test_compute_decadal.yml +++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml @@ -18,7 +18,7 @@ Analysis: hcst_start: 1990 hcst_end: 1993 # season: 'Annual' - ftime_min: 2 + ftime_min: 1 ftime_max: 24 Region: latmin: -10 #-90 @@ -46,7 +46,7 @@ Analysis: index: FALSE Visualization: plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles - ncores: # Optional, int: number of cores, defaults to 1 + ncores: 10 # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E Run: @@ -55,10 +55,10 @@ Run: output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ startR_workflow: - modules: skill # Modules to run inside Compute(), in order + modules: 'anomalies skill' # Modules to run inside Compute(), in order return: hcst, fcst, obs # Choose only if they will fit into memory save: hcst, fcst, obs # Only if returned - chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + chunk_along: {latitude: 2, longitude: 1} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} run_on: local # options: local, as_machine, nord3 chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O -- GitLab From f269df8d69e37078bd9d8ecec1a8840343773a14 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 10 May 2024 13:22:13 +0200 Subject: [PATCH 77/96] Fix typo: 360-day to 360_day --- conf/archive_decadal.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index c7c6f3a4..0697fc3f 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -100,7 +100,7 @@ esarchive: daily_mean: grid: {"tasmin":"gn", "tasmax":"gn", "pr":"gn"} version: {"tasmin":"v20200417", "tasmax":"v20200417", "pr":"v20200417"} - calendar: "360-day" + calendar: "360_day" member: r1i1p1f2,r2i1p1f2,r3i1p1f2,r4i1p1f2,r5i1p1f2,r6i1p1f2,r7i1p1f2,r8i1p1f2,r9i1p1f2,r10i1p1f2 initial_month: 11 sdate_add: 0 -- GitLab From 616e8e3b4d1e16655f385fd8fa9348347302502c Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 14 May 2024 10:40:45 +0200 Subject: [PATCH 78/96] Add retrieve = TRUE to logger in viz functions --- modules/Visualization/R/plot_ensemble_mean.R | 2 +- modules/Visualization/R/plot_most_likely_terciles_map.R | 2 +- modules/Visualization/Visualization.R | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 3cbfbd8e..a88e5184 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -223,6 +223,6 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } } } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, "##### FORECAST ENSEMBLE MEAN PLOTS SAVED TO OUTPUT DIRECTORY #####") } diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index c9ce70f2..33bbb103 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -233,6 +233,6 @@ plot_most_likely_terciles <- function(recipe, } } } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, "##### MOST LIKELY TERCILE PLOTS SAVED TO OUTPUT DIRECTORY #####") } diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 14b4194e..6db108a8 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -244,7 +244,7 @@ Visualization <- function(recipe, system(system_command) } unlink(plot_files) - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, paste0("##### PLOT FILES CONVERTED TO ", toupper(extension), " #####")) } } -- GitLab From 386618a8e8475a2189b73d17a82b4f260bfee058 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 14 May 2024 10:41:19 +0200 Subject: [PATCH 79/96] Add check to remove singleton chunking dimensions from chunk list; comment code --- tools/check_recipe.R | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index efb9877e..47b5f08b 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -866,7 +866,7 @@ check_recipe <- function(recipe) { paste(STARTR_MODULES, collapse = ", "), ".")) error_status <- T } - # Check chunking dims + # Check if chunking dims are among the allowed dimensions if (!(any(names(recipe$Run$startR_workflow$chunk_along) %in% CHUNK_DIMS))) { error(recipe$Run$logger, retrieve = TRUE, paste0("The dimensions in 'Run:startR_workflow:chunk_along' can ", @@ -874,6 +874,7 @@ check_recipe <- function(recipe) { error_status <- T } # Check compatibility between module selection and chunking dimensions + # spatial dimensions if (any(c("latitude", "longitude") %in% names(recipe$Run$startR_workflow$chunk_along)) && any(modules %in% MODULES_USING_LATLON)) { error(recipe$Run$logger, retrieve = TRUE, @@ -882,6 +883,7 @@ check_recipe <- function(recipe) { paste(MODULES_USING_LATLON, collapse = ", "), ".")) error_status <- T } + # 'time' if ("time" %in% names(recipe$Run$startR_workflow$chunk_along) && any(modules %in% MODULES_USING_TIME)) { error(recipe$Run$logger, retrieve = TRUE, @@ -890,6 +892,7 @@ check_recipe <- function(recipe) { paste(MODULES_USING_TIME, collapse = ", "), ".")) error_status <- T } + # 'var' if ("var" %in% names(recipe$Run$startR_workflow$chunk_along) && any(modules %in% MODULES_USING_VAR)) { error(recipe$Run$logger, retrieve = TRUE, @@ -898,6 +901,14 @@ check_recipe <- function(recipe) { paste(MODULES_USING_VAR, collapse = ", "), ".")) error_status <- T } + # Remove chunks of length 1 + if (any(recipe$Run$startR_workflow$chunk_along) == 1) { + recipe$Run$startR_workflow$chunk_along <- + recipe$Run$startR_workflow$chunk_along[-which(recipe$Run$startR_workflow$chunk_along == 1)] + warn(recipe$Run$logger, retrieve = TRUE, + paste0("Chunks of length 1 defined in startR_workflow:chunk_along", + " have been removed from the chunk list.")) + } } } -- GitLab From 0a865d7a50088c805b29a1fb793edf2ca7c74a98 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 14 May 2024 10:54:29 +0200 Subject: [PATCH 80/96] Build paths with file.path() --- build_compute_workflow.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index c416bb65..77b16bb9 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -235,7 +235,7 @@ run_compute_workflow <- function(recipe, data) { } ## TODO: Replicate for probabilities and other modules if (!is.null(res$skill)) { - tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/") + tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Skill") metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds")) res$skill <- ClimProjDiags::ArrayToList(res$skill, dim = 'metric', @@ -250,7 +250,7 @@ run_compute_workflow <- function(recipe, data) { # --------------------------------------------------------------------------- # Step 5: Remove temporary files - unlink(paste0(recipe$Run$output_dir, "/outputs/tmp/"), recursive = TRUE) + unlink(file.path(recipe$Run$output_dir, "outputs", "tmp"), recursive = TRUE) # --------------------------------------------------------------------------- # Step 6: Save data -- GitLab From ce2d5ab98505e58e5107212548790605f02eea76 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 14 May 2024 12:51:00 +0200 Subject: [PATCH 81/96] bugfix: fix any() condition for chunk_along in recipe checker --- tools/check_recipe.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 47b5f08b..a22f995a 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -902,7 +902,7 @@ check_recipe <- function(recipe) { error_status <- T } # Remove chunks of length 1 - if (any(recipe$Run$startR_workflow$chunk_along) == 1) { + if (any(recipe$Run$startR_workflow$chunk_along == 1) { recipe$Run$startR_workflow$chunk_along <- recipe$Run$startR_workflow$chunk_along[-which(recipe$Run$startR_workflow$chunk_along == 1)] warn(recipe$Run$logger, retrieve = TRUE, -- GitLab From f6b6ae1307d2ce4bc5e790a3f421a7e8b8f463ae Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 14 May 2024 17:13:53 +0200 Subject: [PATCH 82/96] Fix paths and checks for metadata retrieval --- build_compute_workflow.R | 4 ++-- tools/check_recipe.R | 2 +- tools/retrieve_metadata.R | 30 +++++++++++++++++------------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 77b16bb9..19106957 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -236,7 +236,7 @@ run_compute_workflow <- function(recipe, data) { ## TODO: Replicate for probabilities and other modules if (!is.null(res$skill)) { tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Skill") - metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds")) + metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) res$skill <- ClimProjDiags::ArrayToList(res$skill, dim = 'metric', level = 'list', @@ -247,7 +247,7 @@ run_compute_workflow <- function(recipe, data) { chunks = recipe$Run$startR_workflow$chunk_along, array_dims = dim(res$skill[[1]])) } - + # --------------------------------------------------------------------------- # Step 5: Remove temporary files unlink(file.path(recipe$Run$output_dir, "outputs", "tmp"), recursive = TRUE) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index a22f995a..19d5082b 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -902,7 +902,7 @@ check_recipe <- function(recipe) { error_status <- T } # Remove chunks of length 1 - if (any(recipe$Run$startR_workflow$chunk_along == 1) { + if (any(recipe$Run$startR_workflow$chunk_along == 1)) { recipe$Run$startR_workflow$chunk_along <- recipe$Run$startR_workflow$chunk_along[-which(recipe$Run$startR_workflow$chunk_along == 1)] warn(recipe$Run$logger, retrieve = TRUE, diff --git a/tools/retrieve_metadata.R b/tools/retrieve_metadata.R index 3550b19a..66e381e6 100644 --- a/tools/retrieve_metadata.R +++ b/tools/retrieve_metadata.R @@ -16,14 +16,15 @@ retrieve_metadata <- function(tmp_dir, chunks, array_dims) { } metadata_file_pattern <- paste0(metadata_file_pattern, ".Rds") metadata_files <- list.files(path = tmp_dir, pattern = glob2rx(metadata_file_pattern)) - metadata <- readRDS(paste0(tmp_dir, metadata_files[1])) + metadata <- readRDS(file.path(tmp_dir, metadata_files[1])) # Piece together variable info if ("var" %in% names(chunks)) { # $attrs for (i in 2:chunks[["var"]]) { - metadata_chunk <- readRDS(paste0(tmp_dir, - gsub("var_1", paste0("var_", i), - metadata_files[1]))) + metadata_chunk <- readRDS(file.path(tmp_dir, + paste0(gsub("var_1", + paste0("var_", i), + metadata_files[1])))) var_name <- metadata_chunk$attrs$Variable$varName metadata$attrs$Variable$varName <- c(metadata$attrs$Variable$varName, var_name) @@ -42,9 +43,10 @@ retrieve_metadata <- function(tmp_dir, chunks, array_dims) { if ("time" %in% names(chunks)) { dates_dims <- dim(metadata$attrs$Dates) for (i in 2:chunks[["time"]]) { - metadata_chunk <- readRDS(paste0(tmp_dir, - gsub("time_1", paste0("time_", i), - metadata_files[1]))) + metadata_chunk <- readRDS(file.path(tmp_dir, + paste0(gsub("time_1", + paste0("time_", i), + metadata_files[1])))) metadata$attrs$Dates <- c(metadata$attrs$Dates, metadata_chunk$attrs$Dates) } time_dim_length <- array_dims[["time"]] @@ -61,9 +63,10 @@ retrieve_metadata <- function(tmp_dir, chunks, array_dims) { # $attrs ## TODO: Preserve attributes for (i in 2:chunks[["latitude"]]) { - metadata_chunk <- readRDS(paste0(tmp_dir, - gsub("latitude_1", paste0("latitude_", i), - metadata_files[1]))) + metadata_chunk <- readRDS(file.path(tmp_dir, + paste0(gsub("latitude_1", + paste0("latitude_", i), + metadata_files[1])))) metadata$attrs$Variable$metadata$latitude <- c(metadata$attrs$Variable$metadata$latitude, metadata_chunk$attrs$Variable$metadata$latitude) } @@ -78,9 +81,10 @@ retrieve_metadata <- function(tmp_dir, chunks, array_dims) { # $attrs ## TODO: Preserve attributes for (i in 2:chunks[["longitude"]]) { - metadata_chunk <- readRDS(paste0(tmp_dir, - gsub("longitude_1", paste0("longitude_", i), - metadata_files[1]))) + metadata_chunk <- readRDS(file.path(tmp_dir, + paste0(gsub("longitude_1", + paste0("longitude_", i), + metadata_files[1])))) metadata$attrs$Variable$metadata$longitude <- c(metadata$attrs$Variable$metadata$longitude, metadata_chunk$attrs$Variable$metadata$longitude) } -- GitLab From 5bd4f5f79e8e3e5abd3cb5caa58c37f61c19a1f0 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 24 May 2024 17:03:41 +0200 Subject: [PATCH 83/96] test time aggregation module (WIP) --- build_compute_workflow.R | 7 +++++-- modules/Aggregation/Aggregation.R | 4 ++-- .../atomic_recipes/recipe_test_compute_decadal.yml | 12 ++++++++++-- tools/check_recipe.R | 4 ++-- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 19106957..efb0688b 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -7,7 +7,8 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, source("modules/Anomalies/Anomalies.R") source("modules/Downscaling/Downscaling.R") source("modules/Skill/Skill.R") - # source("modules/Indices/Indices.R") + source("modules/Indices/Indices.R") + source("modules/Aggregation/Aggregation.R") # Define appender with custom log layout so that it knows not to append # within Compute(). @@ -41,7 +42,9 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, data <- Units(recipe, data, retrieve = F) # Loop over the modules for (module in modules) { - if (module == "calibration") { + if (module == "aggregation") { + data <- Aggregation(recipe, data, retrieve = F) + } else if (module == "calibration") { data <- Calibration(recipe, data, retrieve = F) } else if (module == "anomalies") { data <- Anomalies(recipe, data, retrieve = F) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index d5c09fac..f12a570c 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -8,7 +8,7 @@ ## ini = 2, end = 3 with monthly freq would be a 2 months agg source("modules/Aggregation/R/agg_ini_end.R") -Aggregation <- function(recipe, data) { +Aggregation <- function(recipe, data, retrieve = TRUE) { ## TODO: Move checks to recipe checker ncores <- recipe$Analysis$ncores # is it already checked? NULL or number @@ -58,6 +58,6 @@ Aggregation <- function(recipe, data) { } } return(res) - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, "##### TIME AGGREGATION COMPLETE #####") } diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml index 555eaec7..44f77898 100644 --- a/recipes/atomic_recipes/recipe_test_compute_decadal.yml +++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml @@ -16,7 +16,7 @@ Analysis: Time: fcst_year: [2020,2021] hcst_start: 1990 - hcst_end: 1993 + hcst_end: 2020 # season: 'Annual' ftime_min: 1 ftime_max: 24 @@ -33,6 +33,14 @@ Analysis: compute: no cross_validation: save: + Time_aggregation: + execute: yes + method: 'average' + user_def: + JJA: !expr sort(c(seq(6,24, 12), seq(7,24,12), seq(8, 24, 12))) + MMA: !expr sort(c(seq(3,24, 12), seq(4,24,12), seq(5, 24, 12))) + SON: !expr sort(c(seq(9,24, 12), seq(10,24,12), seq(11, 24, 12))) + DJF: !expr sort(c(seq(12,24, 12), seq(13,24,12), seq(2, 24, 12))) Calibration: method: bias save: 'all' @@ -55,7 +63,7 @@ Run: output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ startR_workflow: - modules: 'anomalies skill' # Modules to run inside Compute(), in order + modules: 'aggregation skill' # Modules to run inside Compute(), in order return: hcst, fcst, obs # Choose only if they will fit into memory save: hcst, fcst, obs # Only if returned chunk_along: {latitude: 2, longitude: 1} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 9396415d..8ca1890d 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -941,10 +941,10 @@ check_recipe <- function(recipe) { STARTR_PARAMS <- c("modules", "chunk_along") STARTR_MODULES <- c("calibration", "anomalies", "downscaling", - "skill", "probabilities", "indices") + "skill", "probabilities", "indices", "aggregation") CHUNK_DIMS <- c("var", "time", "latitude", "longitude") MODULES_USING_LATLON <- c("downscaling", "indices") - MODULES_USING_TIME <- c("indicators") + MODULES_USING_TIME <- c("indicators", "aggregation") MODULES_USING_VAR <- c("indicators") # Check that all required fields are present if ("startR_workflow" %in% names(recipe$Run)) { -- GitLab From 352d228fb1cfca1ebb0ecc790258182a5881eb3e Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 24 May 2024 17:15:38 +0200 Subject: [PATCH 84/96] Adapt loading when retrieve = FALSE for multi_path = FALSE case --- modules/Loading/R/load_decadal.R | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index 10651836..09a5969b 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -141,12 +141,18 @@ load_decadal <- function(recipe, retrieve = retrieve) { hcst <- do.call(Start, Start_hcst_arg_list) # Reshape and reorder dimensions ## dat should be 1, syear should be length of dat; reorder dimensions - dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6]) - hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + if (retrieve) { + dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6]) + hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + hcst_dims <- dim(hcst) + } else { + hcst_dims <- c(dat = 1, syear = as.numeric(attr(hcst, "Dimensions"))[1], + attr(hcst, "Dimensions")[2:6]) + } # Manipulate time attr because Start() cannot read it correctly wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only - tmp <- array(dim = c(dim(hcst)[c('syear', 'time')])) + tmp <- array(dim = c(hcst_dims[c('syear', 'time')])) tmp[1, ] <- wrong_time_attr yr_diff <- (sdates_hcst - sdates_hcst[1])[-1] #diff(sdates_hcst) for (i_syear in 1:length(yr_diff)) { @@ -228,11 +234,15 @@ load_decadal <- function(recipe, retrieve = retrieve) { dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + fcst_dims <- dim(fcst) + } else { + fcst_dims <- c(dat = 1, syear = as.numeric(attr(fcst, "Dimensions"))[1], + attr(fcst, "Dimensions")[2:6]) } # Manipulate time attr because Start() cannot read it correctly wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only - tmp <- array(dim = c(dim(fcst)[c('syear', 'time')])) + tmp <- array(dim = c(fcst_dims[c('syear', 'time')])) tmp[1, ] <- wrong_time_attr yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst) for (i_syear in 1:length(yr_diff)) { -- GitLab From da547a611601fd39d2e2c14b537e7432d4fcbc29 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 28 May 2024 10:46:58 +0200 Subject: [PATCH 85/96] Bugfix: change 'data' to 'x' --- modules/Aggregation/R/agg_ini_end.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Aggregation/R/agg_ini_end.R b/modules/Aggregation/R/agg_ini_end.R index 17c6940a..551e4d49 100644 --- a/modules/Aggregation/R/agg_ini_end.R +++ b/modules/Aggregation/R/agg_ini_end.R @@ -1,4 +1,4 @@ -agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { +agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm, ncores) { # to make the code work with both options: if (!is.null(ini) && is.null(indices)) { # list of vectors for the indices from ini to end pairs @@ -36,7 +36,7 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { stop("Unknown method") } # Check in case only 1 aggregation only is requested to add time dim back: - if (!('time' %in% names(dim(data[[1]]$data)))) { + if (!('time' %in% names(dim(x[[1]]$data)))) { dim(x[[1]]$data) <- c(dim(x[[1]]$data), time = 1) } x[[1]]$data <- Reorder(x[[1]]$data, original_dims) -- GitLab From 92e671c53ae8440ea48343b5f44f7f630be15866 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 28 May 2024 10:47:50 +0200 Subject: [PATCH 86/96] Adjustments to retrieve = FALSE case for decadal data --- modules/Loading/R/load_decadal.R | 1 + modules/Preprocessing/Preprocessing.R | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index 09a5969b..724a3471 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -148,6 +148,7 @@ load_decadal <- function(recipe, retrieve = retrieve) { } else { hcst_dims <- c(dat = 1, syear = as.numeric(attr(hcst, "Dimensions"))[1], attr(hcst, "Dimensions")[2:6]) + # attr(hcst, "Dimensions") <- hcst_dims } # Manipulate time attr because Start() cannot read it correctly diff --git a/modules/Preprocessing/Preprocessing.R b/modules/Preprocessing/Preprocessing.R index d6a287e8..1344dfa0 100644 --- a/modules/Preprocessing/Preprocessing.R +++ b/modules/Preprocessing/Preprocessing.R @@ -13,6 +13,11 @@ preprocess_datasets <- function(recipe, data) { if (!("sweek" %in% names(dim(data[[element]])))) { dim(data[[element]]) <- c(dim(data[[element]]), sweek = 1) } + if (!("syear" %in% names(dim(data[[element]]))) && + (dim(data[[element]])[["dat"]] > 1)) { + names(dim(data[[element]]))[which(names(dim(data[[element]])) == 'dat')] <- "syear" + dim(data[[element]]) <- c(dat = 1, dim(data[[element]])) + } } # Add 'ensemble' dimension to obs dim(data$obs) <- c(dim(data$obs), ensemble = 1) -- GitLab From 9e95c9f77b6bdce70e8dad5f7febf640d5b7ea58 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 28 May 2024 10:48:08 +0200 Subject: [PATCH 87/96] Add modified version of as.s2dv_cube() (to be removed later) --- tools/libs.R | 1 + tools/tmp/as.s2dv_cube.R | 363 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 364 insertions(+) create mode 100644 tools/tmp/as.s2dv_cube.R diff --git a/tools/libs.R b/tools/libs.R index fe646cb6..43ae0170 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -44,6 +44,7 @@ source("tools/write_autosubmit_conf.R") source("tools/get_archive.R") source("tools/Utils.R") source("tools/restructure_recipe.R") +source("tools/tmp/as.s2dv_cube.R") # source("tools/add_dims.R") # Not sure if necessary yet # Settings diff --git a/tools/tmp/as.s2dv_cube.R b/tools/tmp/as.s2dv_cube.R new file mode 100644 index 00000000..1b4c4275 --- /dev/null +++ b/tools/tmp/as.s2dv_cube.R @@ -0,0 +1,363 @@ +#'Conversion of 'startR_array' or 'list' objects to 's2dv_cube' +#' +#'This function converts data loaded using Start function from startR package or +#'Load from s2dv into an 's2dv_cube' object. +#' +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#'@author Nicolau Manubens, \email{nicolau.manubens@bsc.es} +#' +#'@param object An object of class 'startR_array' generated from function +#' \code{Start} from startR package or a list output from function \code{Load} +#' from s2dv package. Any other object class will not be accepted. +#'@param remove_attrs_coords A logical value indicating whether to remove the +#' attributes of the coordinates (TRUE) or not (FALSE). The default value is +#' FALSE. +#'@param remove_null Optional. A logical value indicating whether to remove the +#' elements that are NULL (TRUE) or not (FALSE) of the output object. It is +#' only used when the object is an output from function \code{Load}. The +#' default value is FALSE. +#' +#'@return The function returns an 's2dv_cube' object to be easily used with +#'functions with the prefix \code{CST} from CSTools and CSIndicators packages. +#'The object is mainly a list with the following elements:\cr +#'\itemize{ +#' \item{'data', array with named dimensions;} +#' \item{'dims', named vector of the data dimensions;} +#' \item{'coords', list of named vectors with the coordinates corresponding to +#' the dimensions of the data parameter;} +#' \item{'attrs', named list with elements: +#' \itemize{ +#' \item{'Dates', array with named temporal dimensions of class 'POSIXct' +#' from time values in the data;} +#' \item{'Variable', has the following components: +#' \itemize{ +#' \item{'varName', character vector of the short variable name. It is +#' usually specified in the parameter 'var' from the functions +#' Start and Load;} +#' \item{'metadata', named list of elements with variable metadata. +#' They can be from coordinates variables (e.g. longitude) or +#' main variables (e.g. 'var');} +#' } +#' } +#' \item{'Datasets', character strings indicating the names of the +#' datasets;} +#' \item{'source_files', a vector of character strings with complete paths +#' to all the found files involved in loading the data;} +#' \item{'when', a time stamp of the date issued by the Start() or Load() +#' call to obtain the data;} +#' \item{'load_parameters', it contains the components used in the +#' arguments to load the data from Start() or Load() functions.} +#' } +#' } +#'} +#' +#'@seealso \code{\link{s2dv_cube}}, \code{\link{CST_Start}}, +#'\code{\link[startR]{Start}} and \code{\link{CST_Load}} +#'@examples +#'\dontrun{ +#'# Example 1: convert an object from startR::Start function to 's2dv_cube' +#'library(startR) +#'repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +#'data <- Start(dat = repos, +#' var = 'tas', +#' sdate = c('20170101', '20180101'), +#' ensemble = indices(1:5), +#' time = 'all', +#' latitude = indices(1:5), +#' longitude = indices(1:5), +#' return_vars = list(latitude = 'dat', longitude = 'dat', time = 'sdate'), +#' retrieve = TRUE) +#'data <- as.s2dv_cube(data) +#'# Example 2: convert an object from s2dv::Load function to 's2dv_cube' +#'startDates <- c('20001101', '20011101', '20021101', +#' '20031101', '20041101', '20051101') +#'data <- Load(var = 'tas', exp = 'system5c3s', +#' nmember = 2, sdates = startDates, +#' leadtimemax = 3, latmin = 10, latmax = 30, +#' lonmin = -10, lonmax = 10, output = 'lonlat') +#'data <- as.s2dv_cube(data) +#'} +#'@export +as.s2dv_cube <- function(object, remove_attrs_coords = FALSE, + remove_null = FALSE) { + + if (is.list(object) & length(object) == 11) { + if (is.null(object) || (is.null(object$mod) && is.null(object$obs))) { + stop("The s2dv::Load call did not return any data.") + } + obs <- object + obs$mod <- NULL + object$obs <- NULL + names(object)[[1]] <- 'data' # exp + names(obs)[[1]] <- 'data' # obs + # obs + if (!is.null(obs$data)) { + obs_exist <- TRUE + obs$Datasets$exp <- NULL + obs$Datasets <- obs$Datasets$obs + } else { + obs_exist <- FALSE + } + # object + if (!is.null(object$data)) { + exp_exist <- TRUE + object$Datasets$obs <- NULL + object$Datasets <- object$Datasets$exp + } else { + exp_exist <- FALSE + } + result <- list() + # obs and exp + if (obs_exist & exp_exist) { + obs_exp = list(exp = object, obs = obs) + } else if (obs_exist & !exp_exist) { + obs_exp = list(obs = obs) + } else { + obs_exp = list(exp = object) + } + i <- 0 + for (obj_i in obs_exp) { + i <- i + 1 + # attrs + obj_i$attrs <- within(obj_i, rm(list = c('data'))) + obj_i <- within(obj_i, rm(list = names(obj_i$attrs))) + dates <- obj_i$attrs$Dates$start + attr(dates, 'end') <- obj_i$attrs$Dates$end + if (!is.null(dates)) { + dim(dates) <- dim(obj_i$data)[c('ftime', 'sdate')] + obj_i$attrs$Dates <- dates + } + # Variable + varname <- obj_i$attrs$Variable$varName + varmetadata <- NULL + varmetadata[[varname]] <- attributes(obj_i$attrs$Variable)[-1] + obj_i$attrs$Variable <- list(varName = varname, metadata = varmetadata) + # dims + obj_i$dims <- dim(obj_i$data) + # coords + obj_i$coords <- sapply(names(dim(obj_i$data)), function(x) NULL) + # sdate + obj_i$coords$sdate <- obj_i$attrs$load_parameters$sdates + if (!remove_attrs_coords) attr(obj_i$coords$sdate, 'indices') <- FALSE + # lon + if (!is.null(obj_i$attrs$lon)) { + if (remove_attrs_coords) { + obj_i$coords$lon <- as.vector(obj_i$attrs$lon) + } else { + obj_i$coords$lon <- obj_i$attrs$lon + dim(obj_i$coords$lon) <- NULL + attr(obj_i$coords$lon, 'indices') <- FALSE + } + obj_i$attrs$Variable$metadata$lon <- obj_i$attrs$lon + obj_i$attrs <- within(obj_i$attrs, rm(list = 'lon')) + } + # lat + if (!is.null(obj_i$attrs$lat)) { + if (remove_attrs_coords) { + obj_i$coords$lat <- as.vector(obj_i$attrs$lat) + } else { + obj_i$coords$lat <- obj_i$attrs$lat + dim(obj_i$coords$lat) <- NULL + attr(obj_i$coords$lat, 'indices') <- FALSE + } + obj_i$attrs$Variable$metadata$lat <- obj_i$attrs$lat + obj_i$attrs <- within(obj_i$attrs, rm(list = 'lat')) + } + # member + obj_i$coords$member <- 1:obj_i$dims['member'] + if (!remove_attrs_coords) attr(obj_i$coords$member, 'indices') <- TRUE + # dataset + if (!is.null(names(obj_i$attrs$Datasets))) { + obj_i$coords$dataset <- names(obj_i$attrs$Datasets) + if (!remove_attrs_coords) attr(obj_i$coords$dataset, 'indices') <- FALSE + obj_i$attrs$Datasets <- names(obj_i$attrs$Datasets) + } else { + obj_i$coords$dataset <- 1:obj_i$dims['dataset'] + if (!remove_attrs_coords) attr(obj_i$coords$dataset, 'indices') <- TRUE + } + # ftime + obj_i$coords$ftime <- 1:obj_i$dims['ftime'] + if (!remove_attrs_coords) attr(obj_i$coords$ftime, 'indices') <- TRUE + # remove NULL values + if (isTRUE(remove_null)) { + obj_i$attrs$load_parameters <- .rmNullObs(obj_i$attrs$load_parameters) + } + obj_i <- obj_i[c('data', 'dims', 'coords', 'attrs')] + class(obj_i) <- 's2dv_cube' + if (names(obs_exp)[[i]] == 'exp') { + result$exp <- obj_i + } else { + result$obs <- obj_i + } + } + if (is.list(result)) { + if (is.null(result$exp)) { + result <- result$obs + } else if (is.null(result$obs)) { + result <- result$exp + } else { + warning("The output is a list of two 's2dv_cube' objects", + " corresponding to 'exp' and 'obs'.") + } + } + + } else if (inherits(object, 'startR_array')) { + # From Start: + result <- list() + result$data <- as.vector(object) + ## dims + dims <- dim(object) + dim(result$data) <- dims + result$dims <- dims + ## coords + result$coords <- sapply(names(dims), function(x) NULL) + # Find coordinates + FileSelector <- attributes(object)$FileSelectors + VariablesCommon <- names(attributes(object)$Variables$common) + dat <- names(FileSelector)[1] + VariablesDat <- names(attributes(object)$Variables[[dat]]) + varName <- NULL + for (i_coord in names(dims)) { + if (i_coord %in% names(FileSelector[[dat]])) { # coords in FileSelector + coord_in_fileselector <- FileSelector[[dat]][which(i_coord == names(FileSelector[[dat]]))] + if (length(coord_in_fileselector) == 1) { + if (length(coord_in_fileselector[[i_coord]][[1]]) == dims[i_coord]) { + # TO DO: add var_dim parameter + if (i_coord %in% c('var', 'vars')) { + varName <- as.vector(coord_in_fileselector[[i_coord]][[1]]) + } + if (remove_attrs_coords) { + result$coords[[i_coord]] <- as.vector(coord_in_fileselector[[i_coord]][[1]]) + } else { + result$coords[[i_coord]] <- coord_in_fileselector[[i_coord]][[1]] + attr(result$coords[[i_coord]], 'indices') <- FALSE + } + } else { + result$coords[[i_coord]] <- 1:dims[i_coord] + if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE + } + } + } else if (i_coord %in% VariablesCommon) { # coords in common + coord_in_common <- attributes(object)$Variables$common[[which(i_coord == VariablesCommon)]] + if (inherits(coord_in_common, "POSIXct")) { + result$attrs$Dates <- coord_in_common + } + if (length(coord_in_common) == dims[i_coord]) { + if (remove_attrs_coords) { + if (inherits(coord_in_common, "POSIXct")) { + result$coords[[i_coord]] <- 1:dims[i_coord] + attr(result$coords[[i_coord]], 'indices') <- TRUE + } else { + result$coords[[i_coord]] <- as.vector(coord_in_common) + } + } else { + result$coords[[i_coord]] <- coord_in_common + attr(result$coords[[i_coord]], 'indices') <- FALSE + } + } else { + result$coords[[i_coord]] <- 1:dims[i_coord] + if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE + } + } else if (!is.null(VariablesDat)) { # coords in dat + if (i_coord %in% VariablesDat) { + coord_in_dat <- attributes(object)$Variables[[dat]][[which(i_coord == VariablesDat)]] + if (inherits(coord_in_dat, "POSIXct")) { + result$attrs$Dates <- coord_in_dat + } + if (length(coord_in_dat) == dims[i_coord]) { + if (remove_attrs_coords) { + if (inherits(coord_in_dat, "POSIXct")) { + result$coords[[i_coord]] <- coord_in_dat + } else { + result$coords[[i_coord]] <- as.vector(coord_in_dat) + } + } else { + result$coords[[i_coord]] <- coord_in_dat + attr(result$coords[[i_coord]], 'indices') <- FALSE + } + } else { + result$coords[[i_coord]] <- 1:dims[i_coord] + if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE + } + } else { + result$coords[[i_coord]] <- 1:dims[i_coord] + if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE + } + } else { # missing other dims + result$coords[[i_coord]] <- 1:dims[i_coord] + if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE + } + dim(result$coords[[i_coord]]) <- NULL + } + + # attrs + ## varName + if (!is.null(varName)) { + result$attrs$Variable$varName <- varName + } + ## Variables + for (var_type in names(attributes(object)$Variables)) { + if (!is.null(attributes(object)$Variables[[var_type]])) { + for (var in names(attributes(object)$Variables[[var_type]])) { + attr_variable <- attributes(object)$Variables[[var_type]][[var]] + if (is.null(result$attrs$Dates)) { + if (inherits(attr_variable, "POSIXct")) { + result$attrs$Dates <- attr_variable + } + } + if (is.null(result$attrs$Variable$metadata[[var]])) { + result$attrs$Variable$metadata[[var]] <- attr_variable + } + } + } + } + ## Datasets + if (length(names(FileSelector)) > 1) { + # lon name + known_lon_names <- .KnownLonNames() + lon_name_dat <- names(dims)[which(names(dims) %in% known_lon_names)] + # lat name + known_lat_names <- .KnownLatNames() + lat_name_dat <- names(dims)[which(names(dims) %in% known_lat_names)] + result$attrs$Datasets <- names(FileSelector) + # TO DO: add dat_dim parameter + if (any(names(dims) %in% c('dat', 'dataset'))) { + dat_dim <- names(dims)[which(names(dims) %in% c('dat', 'dataset'))] + result$coords[[dat_dim]] <- names(FileSelector) + if (!remove_attrs_coords) attr(result$coords[[dat_dim]], 'indices') <- FALSE + } + for (i in 2:length(names(FileSelector))) { + if (!(length(lon_name_dat) == 0L)) { + if (any(result$coords[[lon_name_dat]] != as.vector(attributes(object)$Variables[[names(FileSelector)[i]]][[lon_name_dat]]))) { + warning("'lon' values are different for different datasets. ", + "Only values from the first will be used.") + } + } + if (!(length(lat_name_dat) == 0L)) { + if (any(result$coords[[lat_name_dat]] != as.vector(attributes(object)$Variables[[names(FileSelector)[i]]][[lat_name_dat]]))) { + warning("'lat' values are different for different datasets. ", + "Only values from the first will be used.") + } + } + } + } else { + result$attrs$Datasets <- names(FileSelector) + } + ## when + result$attrs$when <- Sys.time() + ## source_files + result$attrs$source_files <- attributes(object)$Files + ## load_parameters + result$attrs$load_parameters <- attributes(object)$FileSelectors + class(result) <- 's2dv_cube' + } else if (inherits(object, 'startR_cube')) { + stop("Unsupported object class: 'startR_cube'. ", + "When using startR::Start() or CSTools::CST_Start(), set ", + "'retrieve = TRUE' to ensure the data is retrieved into ", + "memory and can be converted into a 's2dv_cube' object.") + } else { + stop("The class of parameter 'object' is not implemented", + " to be converted into 's2dv_cube' class yet.") + } + return(result) +} -- GitLab From 3b0d6d7c1098915a7f5810b619fcd672ca9c99e4 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 28 May 2024 10:48:27 +0200 Subject: [PATCH 88/96] Refactor code --- build_compute_workflow.R | 77 ++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index efb0688b..fb0e19c4 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -1,5 +1,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, - nchunks = chunk_indices) { + nchunks = chunk_indices, + expected_output_dims) { # Load modules source("modules/Preprocessing/Preprocessing.R") source("modules/Units/Units.R") @@ -9,6 +10,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, source("modules/Skill/Skill.R") source("modules/Indices/Indices.R") source("modules/Aggregation/Aggregation.R") + source("tools/tmp/as.s2dv_cube.R") # Define appender with custom log layout so that it knows not to append # within Compute(). @@ -61,12 +63,17 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, } ## TODO: Define what to return depending the modules called + the recipe return_list <- list(hcst = data$hcst$data, - obs = data$obs$data, fcst = data$fcst$data, + obs = data$obs$data, skill = skill_metrics, probabilities = probabilities) # Eliminate NULL elements from the return list return_list <- return_list[!sapply(return_list, is.null)] + # Reorder dimensions as expected + for (element in names(return_list)) { + return_list[[element]] <- s2dv::Reorder(data = return_list[[element]], + order = expected_output_dims[[element]]) + } return(return_list) } @@ -90,31 +97,16 @@ run_compute_workflow <- function(recipe, data) { # --------------------------------------------------------------------------- # Step 2: Define the inputs and outputs for Compute() + + # Reorder data: hcst, obs, fcst + data <- data[c("hcst", "obs", "fcst")] + data <- data[!sapply(data, is.null)] # Retrieve original dimensions - exp_dims <- names(attr(data$hcst, "Dimensions")) - obs_dims <- names(attr(data$obs, "Dimensions")) - # Remove the chunking dimensions from the original dimensions in the arrays, - # to create the target dimensions - exp_target_dims <- - exp_dims[!exp_dims %in% names(recipe$Run$startR_workflow$chunk_along)] - obs_target_dims <- - obs_dims[!obs_dims %in% names(recipe$Run$startR_workflow$chunk_along)] - # Default inputs: hindcast and observations - target_dims <- list(hcst = exp_target_dims, - obs = obs_target_dims) - inputs <- list(hcst = data$hcst, obs = data$obs) - # Define input attributes from Start() + chunking_dims <- names(recipe$Run$startR_workflow$chunk_along) + target_dims <- sapply(names(data), function(x) NULL) + inputs <- sapply(names(data), function(x) NULL) + input_attributes <- sapply(names(data), function(x) NULL) STARTR_CUBE_ATTRS <- c("Variables", "Dimensions", "FileSelectors", "Files") - input_attributes <- list(hcst = STARTR_CUBE_ATTRS, - obs = STARTR_CUBE_ATTRS) - # Add forecast if not empty - if (!is.null(data$fcst)) { - target_dims <- c(target_dims, - list(fcst = exp_target_dims)) - inputs <- c(inputs, list(fcst = data$fcst)) - input_attributes <- c(input_attributes, list(fcst = STARTR_CUBE_ATTRS)) - } - # Create output dimensions ## TODO: Improve this condition if ("indices" %in% modules) { @@ -123,23 +115,31 @@ run_compute_workflow <- function(recipe, data) { spatial_output_dims <- c("latitude", "longitude") } ## TODO: There may be a bug here when the user requests only lat or lon as chunking dim - exp_output_dims <- unique(c(exp_target_dims, spatial_output_dims, 'sday', 'sweek')) - exp_output_dims <- c(exp_output_dims[!exp_output_dims %in% - c('var_dir', names(recipe$Run$startR_workflow$chunk_along))]) + FIXED_OUTPUT_DIMS <- c("dat", "var", "syear", "sday", "sweek", "time", "ensemble") + expected_output_dims <- c(FIXED_OUTPUT_DIMS, spatial_output_dims) + expected_output_dims <- expected_output_dims[!expected_output_dims %in% chunking_dims] # Default outputs: hindcast and observations - output_dims <- list(hcst = exp_output_dims, - obs = exp_output_dims) - # Add forecast if not empty - if (!is.null(data$fcst)) { - output_dims <- c(output_dims, - list(fcst = exp_output_dims)) + output_dims <- sapply(names(data), function(x) NULL) + # Iterate over datasets to define inputs and outputs + for (dataset in names(data)) { + # Retrieve original dimensions + dataset_dims <- names(attr(data[[dataset]], "Dimensions")) + # Remove the chunking dimensions from the original dimensions in the arrays, + # to create the target dimensions + target_dims[[dataset]] <- dataset_dims[!dataset_dims %in% chunking_dims] + # Define inputs + inputs[[dataset]] <- data[[dataset]] + # Define input attributes + input_attributes[[dataset]] <- STARTR_CUBE_ATTRS + # Define output dimensions + output_dims[[dataset]] <- expected_output_dims } + # Add skill metrics if skill module is called ## TODO: Fix potential bug if ("skill" %in% modules) { skill_dims <- c('metric', 'var', 'time', spatial_output_dims) - skill_output_dims <- - skill_dims[!skill_dims %in% names(recipe$Run$startR_workflow$chunk_along)] + skill_output_dims <- skill_dims[!skill_dims %in% chunking_dims] output_dims <- c(output_dims, list(skill = skill_output_dims)) } @@ -167,7 +167,8 @@ run_compute_workflow <- function(recipe, data) { wf <- AddStep(inputs = inputs, step = step, - recipe = recipe) + recipe = recipe, + expected_output_dims = output_dims) #--------------------------------------- ## TODO: Create function to generate call to Compute()? @@ -185,7 +186,7 @@ run_compute_workflow <- function(recipe, data) { # Compute locally, in serial res <- Compute(wf$hcst, chunks = recipe$Run$startR_workflow$chunk_along, - threads_compute = recipe$Analysis$ncores, + threads_compute = 1, # recipe$Analysis$ncores, threads_load = recipe$Analysis$ncores/2) } else if (run_on %in% c('nord3', 'as_machine')) { #NOTE: autosubmit_suite_dir can be anywhere -- GitLab From cbbdee4ad5f4616c1666f1ccb3a3cfe918d5a6cf Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 29 May 2024 14:32:11 +0200 Subject: [PATCH 89/96] Add notes --- build_compute_workflow.R | 4 ++++ modules/Loading/R/load_decadal.R | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index fb0e19c4..7c2b7d47 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -19,9 +19,13 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) # Create data list + print(dim(attr(data$hcst, "Variables")$common$time)) data <- list(hcst = hcst, obs = obs, fcst = fcst) + print(dim(attr(data$hcst, "Variables")$common$time)) + print(dim(attr(data$fcst, "Variables")$common$time)) + data <- data[!sapply(data, is.null)] var_name <- as.vector(attributes(hcst)$FileSelectors[[1]]$var[[1]]) # Remove duplicated objects diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index 724a3471..94c11fb0 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -152,6 +152,7 @@ load_decadal <- function(recipe, retrieve = retrieve) { } # Manipulate time attr because Start() cannot read it correctly + ## NOTE: Why can't Start() read it correctly?? wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only tmp <- array(dim = c(hcst_dims[c('syear', 'time')])) tmp[1, ] <- wrong_time_attr @@ -165,7 +166,6 @@ load_decadal <- function(recipe, retrieve = retrieve) { tmp_time_attr <- attr(hcst, 'Variables')$common$time - ## TODO: for retrieve = FALSE, do this in pre-processing # change syear to c(sday, sweek, syear) # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] if (retrieve) { -- GitLab From 8285e66c50d605fc7ef60051e43a40d5f7d75f40 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 10 Jun 2024 11:41:50 +0200 Subject: [PATCH 90/96] Seasonal daily with retrieve = FALSE --- modules/Loading/R/load_seasonal.R | 30 ++++++++++++++++----------- modules/Preprocessing/Preprocessing.R | 3 +++ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R index c2ff36eb..7277766a 100644 --- a/modules/Loading/R/load_seasonal.R +++ b/modules/Loading/R/load_seasonal.R @@ -123,15 +123,19 @@ load_seasonal <- function(recipe, retrieve = TRUE) { split_multiselected_dims = split_multiselected_dims, retrieve = retrieve) + browser() + if (store.freq %in% c("daily_mean", "daily")) { # Adjusts dims for daily case, could be removed if startR allows # multidim split - names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" - default_dims <- c(dat = 1, var = 1, sday = 1, - sweek = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(hcst))] <- dim(hcst) - dim(hcst) <- default_dims + if (retrieve) { + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + } # Change time attribute dimensions default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) names(dim(attr(hcst, "Variables")$common$time))[which(names( @@ -190,12 +194,14 @@ load_seasonal <- function(recipe, retrieve = TRUE) { if (store.freq %in% c("daily_mean", "daily")) { # Adjusts dims for daily case, could be removed if startR allows # multidim split - names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" - default_dims <- c(dat = 1, var = 1, sday = 1, - sweek = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(fcst))] <- dim(fcst) - dim(fcst) <- default_dims + if (retrieve) { + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + } # Change time attribute dimensions ## TODO: Is this still necessary? default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) diff --git a/modules/Preprocessing/Preprocessing.R b/modules/Preprocessing/Preprocessing.R index 1344dfa0..aedd85bb 100644 --- a/modules/Preprocessing/Preprocessing.R +++ b/modules/Preprocessing/Preprocessing.R @@ -17,6 +17,9 @@ preprocess_datasets <- function(recipe, data) { (dim(data[[element]])[["dat"]] > 1)) { names(dim(data[[element]]))[which(names(dim(data[[element]])) == 'dat')] <- "syear" dim(data[[element]]) <- c(dat = 1, dim(data[[element]])) + } else if (!("syear" %in% names(dim(data[[element]]))) && + ("file_date" %in% names(dim(data[[element]])))) { + names(dim(data[[element]]))[which(names(dim(data[[element]])) == 'file_date')] <- "syear" } } # Add 'ensemble' dimension to obs -- GitLab From 06edfd69f2f7c53c27fdff460093e0da6e3ad09c Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 10 Jun 2024 11:42:08 +0200 Subject: [PATCH 91/96] Add recipe for testing --- .../recipe_test_compute_seasonal_daily.yml | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml diff --git a/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml b/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml new file mode 100644 index 00000000..c4d6ee26 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml @@ -0,0 +1,83 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: daily_mean + # units: {tas: C} + Datasets: + System: + name: ECMWF-SEAS5 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '0101' + fcst_year: # '2020' + hcst_start: '2000' + hcst_end: '2010' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: 20 + latmax: 80 + lonmin: -80 + lonmax: 40 + Regrid: + method: bilinear + type: "r180x90" #to_reference #'r360x181' + Workflow: + Anomalies: + compute: yes # yes/no, default yes + cross_validation: yes # yes/no, default yes + save: 'all' # 'all'/'none'/'exp_only'/'fcst_only' + Calibration: + method: raw + save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' + # Indices: + # NAO: {obsproj: TRUE, save: 'all', plot_ts: TRUE, plot_sp: yes} + # Downscaling: + # # Assumption 1: leave-one-out cross-validation is always applied + # # Assumption 2: for analogs, we select the best analog (minimum distance) + # type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + # int_method: conservative # regridding method accepted by CDO. + # bc_method: bias # If type intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + # lr_method: # If type intlr. Options: 'basic', 'large_scale', '4nn' + # log_reg_method: # If type logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + # target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + # nanalogs: # If type analgs. Number of analogs to be searched + # save: 'all' # 'all'/'none'/'exp_only' + Skill: + metric: RPS RPSS BSS10 # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + save: 'all' # 'all'/'none' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' + Visualization: + plots: skill_metrics, forecast_ensemble_mean + multi_panel: no + projection: cylindrical_equidistant + Indicators: + index: no + ncores: 10 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + startR_workflow: + modules: anomalies skill # Modules to run inside Compute(), in order + return: hcst, fcst, obs # Choose only if they will fit into memory + save: hcst, fcst, obs # Only if returned + chunk_along: {time: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + run_on: local # options: local, as_machine, nord3 + chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' + expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O + hpc_user: bsc32762 # your hpc username. Only if run_on is not 'local' + slurm_directives: ['#SBATCH --constraint=medmem', '#SBATCH --exclusive'] # Only if run_on is not 'local' + + -- GitLab From e72776615b73156fc082f4b8950a2e8e44e76e67 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 10 Jun 2024 11:42:29 +0200 Subject: [PATCH 92/96] testing --- recipes/atomic_recipes/recipe_test_compute_decadal.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml index 44f77898..df8d22aa 100644 --- a/recipes/atomic_recipes/recipe_test_compute_decadal.yml +++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml @@ -16,7 +16,7 @@ Analysis: Time: fcst_year: [2020,2021] hcst_start: 1990 - hcst_end: 2020 + hcst_end: 2018 # season: 'Annual' ftime_min: 1 ftime_max: 24 -- GitLab From ad3e5cbf325f9ef27468820c80a511718061516e Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 10 Jun 2024 11:43:58 +0200 Subject: [PATCH 93/96] WIP --- build_compute_workflow.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 7c2b7d47..23cc3a42 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -19,12 +19,9 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, ", | |,")[[1]]) # Create data list - print(dim(attr(data$hcst, "Variables")$common$time)) data <- list(hcst = hcst, obs = obs, fcst = fcst) - print(dim(attr(data$hcst, "Variables")$common$time)) - print(dim(attr(data$fcst, "Variables")$common$time)) data <- data[!sapply(data, is.null)] var_name <- as.vector(attributes(hcst)$FileSelectors[[1]]$var[[1]]) @@ -118,7 +115,7 @@ run_compute_workflow <- function(recipe, data) { } else { spatial_output_dims <- c("latitude", "longitude") } - ## TODO: There may be a bug here when the user requests only lat or lon as chunking dim + FIXED_OUTPUT_DIMS <- c("dat", "var", "syear", "sday", "sweek", "time", "ensemble") expected_output_dims <- c(FIXED_OUTPUT_DIMS, spatial_output_dims) expected_output_dims <- expected_output_dims[!expected_output_dims %in% chunking_dims] -- GitLab From a69cdd9744d88ab8b543f88c7801287c7761fbe8 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 10 Jun 2024 15:30:07 +0200 Subject: [PATCH 94/96] Workaround for daily seasonal metadata --- modules/Preprocessing/Preprocessing.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/Preprocessing/Preprocessing.R b/modules/Preprocessing/Preprocessing.R index aedd85bb..96196344 100644 --- a/modules/Preprocessing/Preprocessing.R +++ b/modules/Preprocessing/Preprocessing.R @@ -15,11 +15,14 @@ preprocess_datasets <- function(recipe, data) { } if (!("syear" %in% names(dim(data[[element]]))) && (dim(data[[element]])[["dat"]] > 1)) { - names(dim(data[[element]]))[which(names(dim(data[[element]])) == 'dat')] <- "syear" + names(dim(data[[element]]))[which(names(dim(data[[element]])) == "dat")] <- "syear" dim(data[[element]]) <- c(dat = 1, dim(data[[element]])) } else if (!("syear" %in% names(dim(data[[element]]))) && ("file_date" %in% names(dim(data[[element]])))) { - names(dim(data[[element]]))[which(names(dim(data[[element]])) == 'file_date')] <- "syear" + names(dim(data[[element]]))[which(names(dim(data[[element]])) == "file_date")] <- "syear" + time_dim_names <- names(dim(attr(data[[element]], "Variables")$common$time)) + time_dim_names[which(time_dim_names == "file_date")] <- "syear" + names(dim(attr(data[[element]], "Variables")$common$time)) <- time_dim_names } } # Add 'ensemble' dimension to obs -- GitLab From 84d5e9ee0fffcec898661433a3d20994786a3311 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 13 Jun 2024 09:47:23 +0200 Subject: [PATCH 95/96] Testing --- .../recipe_test_compute_decadal.yml | 4 +- .../recipe_test_compute_decadal_daily.yml | 74 +++++++++++++++++++ .../recipe_test_compute_seasonal_daily.yml | 2 +- 3 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml index df8d22aa..ef500b43 100644 --- a/recipes/atomic_recipes/recipe_test_compute_decadal.yml +++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml @@ -16,7 +16,7 @@ Analysis: Time: fcst_year: [2020,2021] hcst_start: 1990 - hcst_end: 2018 + hcst_end: 2000 # season: 'Annual' ftime_min: 1 ftime_max: 24 @@ -60,7 +60,7 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/ + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ startR_workflow: modules: 'aggregation skill' # Modules to run inside Compute(), in order diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml b/recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml new file mode 100644 index 00000000..dd78e5d2 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml @@ -0,0 +1,74 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: EC-Earth3-i4 #CanESM5 + member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst_year: [2020,2021] + hcst_start: 1990 + hcst_end: 1994 +# season: 'Annual' + ftime_min: 1 + ftime_max: 12 + Region: + latmin: -10 #-90 + latmax: 20 #90 + lonmin: 0 + lonmax: 30 #359.9 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Anomalies: + compute: no + cross_validation: + save: + Time_aggregation: + execute: yes + method: 'average' + user_def: + JJA: !expr sort(c(seq(6,24, 12), seq(7,24,12), seq(8, 24, 12))) + MMA: !expr sort(c(seq(3,24, 12), seq(4,24,12), seq(5, 24, 12))) + SON: !expr sort(c(seq(9,24, 12), seq(10,24,12), seq(11, 24, 12))) + DJF: !expr sort(c(seq(12,24, 12), seq(13,24,12), seq(2, 24, 12))) + Calibration: + method: bias + save: 'all' + Skill: + metric: RPSS + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'all' + Indicators: + index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + ncores: 10 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + startR_workflow: + modules: 'aggregation skill' # Modules to run inside Compute(), in order + return: hcst, fcst, obs # Choose only if they will fit into memory + save: hcst, fcst, obs # Only if returned + chunk_along: {latitude: 2, longitude: 1} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + run_on: local # options: local, as_machine, nord3 + chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' + expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O + hpc_user: bsc32762 # your hpc username. Only if run_on is not 'local' + slurm_directives: ['#SBATCH --constraint=medmem', '#SBATCH --exclusive'] # Only if run_on is not 'local' diff --git a/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml b/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml index c4d6ee26..2a34c36e 100644 --- a/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml +++ b/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml @@ -73,7 +73,7 @@ Run: modules: anomalies skill # Modules to run inside Compute(), in order return: hcst, fcst, obs # Choose only if they will fit into memory save: hcst, fcst, obs # Only if returned - chunk_along: {time: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + chunk_along: {time: 6} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} run_on: local # options: local, as_machine, nord3 chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O -- GitLab From f0c01d29c8310bcccf8e51767b49dbaadf14c2df Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 13 Jun 2024 09:47:44 +0200 Subject: [PATCH 96/96] Add metadata saving and retrieval for aggregation module --- build_compute_workflow.R | 36 +++++++++++++++++++++---------- example_scripts/test_compute.R | 3 ++- modules/Aggregation/Aggregation.R | 19 +++++++++++++++- tools/retrieve_metadata.R | 5 ++--- 4 files changed, 47 insertions(+), 16 deletions(-) diff --git a/build_compute_workflow.R b/build_compute_workflow.R index 23cc3a42..bbddc1d2 100644 --- a/build_compute_workflow.R +++ b/build_compute_workflow.R @@ -46,7 +46,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL, # Loop over the modules for (module in modules) { if (module == "aggregation") { - data <- Aggregation(recipe, data, retrieve = F) + data <- Aggregation(recipe, data, retrieve = F, nchunks = nchunks) } else if (module == "calibration") { data <- Calibration(recipe, data, retrieve = F) } else if (module == "anomalies") { @@ -88,7 +88,7 @@ convert_to_s2dv_cube <- function(new_cube, original_cube) { return(new_cube) } -run_compute_workflow <- function(recipe, data) { +run_compute_workflow <- function(recipe, data, last_module = NULL) { # --------------------------------------------------------------------------- # Step 1: Retrieve the modules that will be called inside the workflow @@ -187,7 +187,7 @@ run_compute_workflow <- function(recipe, data) { # Compute locally, in serial res <- Compute(wf$hcst, chunks = recipe$Run$startR_workflow$chunk_along, - threads_compute = 1, # recipe$Analysis$ncores, + threads_compute = 1, # recipe$Analysis$ncores, # 1 threads_load = recipe$Analysis$ncores/2) } else if (run_on %in% c('nord3', 'as_machine')) { #NOTE: autosubmit_suite_dir can be anywhere @@ -230,14 +230,28 @@ run_compute_workflow <- function(recipe, data) { # --------------------------------------------------------------------------- # Step 4: Convert results to s2dv_cube/list format + source("tools/retrieve_metadata.R") for (cube in c("hcst", "obs", "fcst")) { - if (!is.null(res[[cube]])) { - res$data[[cube]] <- convert_to_s2dv_cube(res[[cube]], data[[cube]]) - } else { - res$data[[cube]] <- NULL - } - res[[cube]] <- NULL + if (!is.null(res[[cube]])) { + if (!is.null(last_module)) { + tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", + str_to_title(last_module)) + res$data[[cube]] <- retrieve_metadata(tmp_dir = tmp_dir, + chunks = recipe$Run$startR_workflow$chunk_along, + array_dims = dim(res[[cube]]), + metadata_file_pattern = paste0(last_module, + "_metadata_", + cube)) + res$data[[cube]]$data <- res[[cube]] + } else { + res$data[[cube]] <- convert_to_s2dv_cube(res[[cube]], data[[cube]]) + } + } else { + res$data[[cube]] <- NULL + } + res[[cube]] <- NULL } + ## TODO: Replicate for probabilities and other modules if (!is.null(res$skill)) { tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Skill") @@ -247,10 +261,10 @@ run_compute_workflow <- function(recipe, data) { level = 'list', names = metric_list) # Put chunked metadata back together - source("tools/retrieve_metadata.R") res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir, chunks = recipe$Run$startR_workflow$chunk_along, - array_dims = dim(res$skill[[1]])) + array_dims = dim(res$skill[[1]]), + metadata_file_pattern = "skill_metadata") } # --------------------------------------------------------------------------- diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R index 5622c5ec..9726ee71 100644 --- a/example_scripts/test_compute.R +++ b/example_scripts/test_compute.R @@ -9,7 +9,8 @@ source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") source("build_compute_workflow.R") -recipe_file <- "recipes/atomic_recipes/recipe_test_compute_decadal.yml" +# recipe_file <- "recipes/atomic_recipes/recipe_test_compute_decadal.yml" +recipe_file <- "recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml" recipe <- prepare_outputs(recipe_file) # Load datasets diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index f12a570c..baf42984 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -8,7 +8,7 @@ ## ini = 2, end = 3 with monthly freq would be a 2 months agg source("modules/Aggregation/R/agg_ini_end.R") -Aggregation <- function(recipe, data, retrieve = TRUE) { +Aggregation <- function(recipe, data, retrieve = TRUE, nchunks = nchunks) { ## TODO: Move checks to recipe checker ncores <- recipe$Analysis$ncores # is it already checked? NULL or number @@ -57,6 +57,23 @@ Aggregation <- function(recipe, data, retrieve = TRUE) { } } } + if (!retrieve) { + tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Aggregation/") + if (!dir.exists(tmp_dir)) { + dir.create(tmp_dir, recursive = TRUE) + } + # Save s2dv_cube metadata + for (chunk in names(nchunks)) { + for (element in names(data)) { + metadata_filename <- paste0("aggregation_metadata", "_", element, "_", + chunk, "_", nchunks[[chunk]], ".Rds") + cube_metadata <- list(dims = res[[element]]$dims, + coords = res[[element]]$coords, + attrs = res[[element]]$attrs) + saveRDS(cube_metadata, paste0(tmp_dir, metadata_filename)) + } + } + } return(res) info(recipe$Run$logger, retrieve = retrieve, "##### TIME AGGREGATION COMPLETE #####") diff --git a/tools/retrieve_metadata.R b/tools/retrieve_metadata.R index 66e381e6..f1784425 100644 --- a/tools/retrieve_metadata.R +++ b/tools/retrieve_metadata.R @@ -6,11 +6,10 @@ # chunks: named list of chunking dimensions with the amount of chunks for each. # array_dims: vector containing the dimensions of the final array. -retrieve_metadata <- function(tmp_dir, chunks, array_dims) { - ## TODO: add array_dims parameter +retrieve_metadata <- function(tmp_dir, chunks, array_dims, + metadata_file_pattern) { ## For each chunking dimension # Build metadata file pattern: - metadata_file_pattern <- "skill_metadata" for (chunk in sort(names(chunks))) { metadata_file_pattern <- paste0(metadata_file_pattern, "_", chunk, "_*") } -- GitLab