diff --git a/.gitignore b/.gitignore index 263c4e640a4ffe3bd13bf6a14f80c37553954d4d..331f47eb2582f002a26c7ededf5cc961d836ccd3 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ ecsbatch.log* modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml +STARTR_CHUNKING_*/ +# TODO: ignore bigmemory object like a68e_1_2 and a68e_1_2.desc diff --git a/build_compute_workflow.R b/build_compute_workflow.R new file mode 100644 index 0000000000000000000000000000000000000000..bbddc1d226297213354771f7cffe07909ec70013 --- /dev/null +++ b/build_compute_workflow.R @@ -0,0 +1,290 @@ +compute_workflow <- function(recipe, hcst, obs, fcst = NULL, + nchunks = chunk_indices, + expected_output_dims) { + # Load modules + source("modules/Preprocessing/Preprocessing.R") + source("modules/Units/Units.R") + source("modules/Calibration/Calibration.R") + source("modules/Anomalies/Anomalies.R") + source("modules/Downscaling/Downscaling.R") + source("modules/Skill/Skill.R") + source("modules/Indices/Indices.R") + source("modules/Aggregation/Aggregation.R") + source("tools/tmp/as.s2dv_cube.R") + + # Define appender with custom log layout so that it knows not to append + # within Compute(). + recipe$Run$logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(console_appender(layout = .custom_log_layout()))) + modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, + ", | |,")[[1]]) + # Create data list + data <- list(hcst = hcst, + obs = obs, + fcst = fcst) + + data <- data[!sapply(data, is.null)] + var_name <- as.vector(attributes(hcst)$FileSelectors[[1]]$var[[1]]) + # Remove duplicated objects + rm(hcst, obs, fcst) + # Pre-process datasets + data <- preprocess_datasets(recipe, data) + # Transform data to s2dv_cube + data <- lapply(data, + function(x) { + class(x) <- "startR_array" + as.s2dv_cube(x) + }) + for (cube_idx in seq(1:length(data))) { + data[[cube_idx]]$attrs$Variable$varName <- var_name + } + # Define other outputs + skill_metrics <- NULL + probabilities <- NULL + # Change units + data <- Units(recipe, data, retrieve = F) + # Loop over the modules + for (module in modules) { + if (module == "aggregation") { + data <- Aggregation(recipe, data, retrieve = F, nchunks = nchunks) + } else if (module == "calibration") { + data <- Calibration(recipe, data, retrieve = F) + } else if (module == "anomalies") { + data <- Anomalies(recipe, data, retrieve = F) + } else if (module == "downscaling") { + data <- Downscaling(recipe, data, retrieve = F) + } else if (module == "skill") { + skill_metrics <- Skill(recipe, data, retrieve = F, nchunks = nchunks) + } else if (module == "probabilities") { + ## TODO: Adapt probabilities output + probabilities <- Probabilities(recipe, data, retrieve = F, nchunks = nchunks) + } else if (module == "indices") { + data <- Indices(recipe, data, retrieve = F) + } + } + ## TODO: Define what to return depending the modules called + the recipe + return_list <- list(hcst = data$hcst$data, + fcst = data$fcst$data, + obs = data$obs$data, + skill = skill_metrics, + probabilities = probabilities) + # Eliminate NULL elements from the return list + return_list <- return_list[!sapply(return_list, is.null)] + # Reorder dimensions as expected + for (element in names(return_list)) { + return_list[[element]] <- s2dv::Reorder(data = return_list[[element]], + order = expected_output_dims[[element]]) + } + return(return_list) +} + +convert_to_s2dv_cube <- function(new_cube, original_cube) { + attr(new_cube, "Variables") <- attr(original_cube, "Variables") + attr(new_cube, "FileSelectors") <- attr(original_cube, "FileSelectors") + attr(new_cube, "Files") <- attr(original_cube, "Files") + + class(new_cube) <- "startR_array" + new_cube <- as.s2dv_cube(new_cube) + return(new_cube) +} + +run_compute_workflow <- function(recipe, data, last_module = NULL) { + + # --------------------------------------------------------------------------- + # Step 1: Retrieve the modules that will be called inside the workflow + + modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, + ", | |,")[[1]]) + + # --------------------------------------------------------------------------- + # Step 2: Define the inputs and outputs for Compute() + + # Reorder data: hcst, obs, fcst + data <- data[c("hcst", "obs", "fcst")] + data <- data[!sapply(data, is.null)] + # Retrieve original dimensions + chunking_dims <- names(recipe$Run$startR_workflow$chunk_along) + target_dims <- sapply(names(data), function(x) NULL) + inputs <- sapply(names(data), function(x) NULL) + input_attributes <- sapply(names(data), function(x) NULL) + STARTR_CUBE_ATTRS <- c("Variables", "Dimensions", "FileSelectors", "Files") + # Create output dimensions + ## TODO: Improve this condition + if ("indices" %in% modules) { + spatial_output_dims <- c("region") + } else { + spatial_output_dims <- c("latitude", "longitude") + } + + FIXED_OUTPUT_DIMS <- c("dat", "var", "syear", "sday", "sweek", "time", "ensemble") + expected_output_dims <- c(FIXED_OUTPUT_DIMS, spatial_output_dims) + expected_output_dims <- expected_output_dims[!expected_output_dims %in% chunking_dims] + # Default outputs: hindcast and observations + output_dims <- sapply(names(data), function(x) NULL) + # Iterate over datasets to define inputs and outputs + for (dataset in names(data)) { + # Retrieve original dimensions + dataset_dims <- names(attr(data[[dataset]], "Dimensions")) + # Remove the chunking dimensions from the original dimensions in the arrays, + # to create the target dimensions + target_dims[[dataset]] <- dataset_dims[!dataset_dims %in% chunking_dims] + # Define inputs + inputs[[dataset]] <- data[[dataset]] + # Define input attributes + input_attributes[[dataset]] <- STARTR_CUBE_ATTRS + # Define output dimensions + output_dims[[dataset]] <- expected_output_dims + } + + # Add skill metrics if skill module is called + ## TODO: Fix potential bug + if ("skill" %in% modules) { + skill_dims <- c('metric', 'var', 'time', spatial_output_dims) + skill_output_dims <- skill_dims[!skill_dims %in% chunking_dims] + output_dims <- c(output_dims, + list(skill = skill_output_dims)) + } + ## TODO: Add Indices (Niño1+2, Niño3, Niño3.4, Niño4, NAO) + ## Instead of having the index dimension, we could add a second + ## loop and then define outputs dynamically based on the recipe; + ## one s2dv_cube per index. At least in the case of ENSO. + # if ("indices" %in% modules) { + # idx_dims <- c('index', 'var', 'syear', 'time', 'region') + # } + + # --------------------------------------------------------------------------- + # Step 3: Generate the Step and call Compute() + # Libraries to be loaded when running Compute() + LIBS_LOAD <- c("log4r", "docopt", "ClimProjDiags", "multiApply", "yaml", + "s2dv", "abind", "easyNCDF", "CSTools", "lubridate", "PCICt", + "RColorBrewer", "configr", "sf", "ggplot2", "rnaturalearth", + "cowplot", "stringr", "pryr") + + step <- Step(fun = compute_workflow, + target_dims = target_dims, + output_dims = output_dims, + use_attributes = input_attributes, + use_libraries = LIBS_LOAD) + + wf <- AddStep(inputs = inputs, + step = step, + recipe = recipe, + expected_output_dims = output_dims) + + #--------------------------------------- + ## TODO: Create function to generate call to Compute()? + # Compute locally + run_on <- recipe$Run$startR_workflow$run_on + + r_module_ver <- list(local = "R/4.1.2-foss-2015a-bare", + as_machine = "R/4.1.2-foss-2015a-bare", + nord3 = "R/4.1.2-foss-2019b") + cdo_module_ver <- list(local = "CDO/1.9.8-foss-2015a", + as_machine = "CDO/1.9.8-foss-2015a", + nord3 = "CDO/1.9.8-foss-2019b") + + if (run_on == 'local') { + # Compute locally, in serial + res <- Compute(wf$hcst, + chunks = recipe$Run$startR_workflow$chunk_along, + threads_compute = 1, # recipe$Analysis$ncores, # 1 + threads_load = recipe$Analysis$ncores/2) + } else if (run_on %in% c('nord3', 'as_machine')) { + #NOTE: autosubmit_suite_dir can be anywhere + ## TODO: Change code_dir to autosubmit dir when necessary + autosubmit_suite_dir <- recipe$Run$code_dir + if (!is.null(recipe$Run$startR_workflow$slurm_directives)) { + extra_queue_params <- as.list(recipe$Run$startR_workflow$slurm_directives) + } else { + extra_queue_params <- NULL + } + ## TODO: Review the core/thread setting + # Compute in cluster/AS machine, in parallel, with autosubmit + res <- Compute(wf$hcst, + chunks = recipe$Run$startR_workflow$chunk_along, + threads_compute = recipe$Analysis$ncores, + threads_load = 2, + cluster = list( + queue_host = run_on, # name in platforms.yml + r_module = r_module_ver[[run_on]], + CDO_module = cdo_module_ver[[run_on]], + init_commands = list('module load GDAL PROJ GEOS'), + autosubmit_module = 'autosubmit/4.0.0b-foss-2015a-Python-3.7.3', + cores_per_job = recipe$Analysis$ncores, + job_wallclock = recipe$Run$startR_workflow$chunk_wallclock, + max_jobs = prod(unlist(recipe$Run$startR_workflow$chunk_along)), + polling_period = 10, + extra_queue_params = extra_queue_params, + expid = recipe$Run$startR_workflow$expid, + hpc_user = recipe$Run$startR_workflow$hpc_user, + run_dir = recipe$Run$code_dir + ), + workflow_manager = 'autosubmit', # 'ecFlow' + autosubmit_suite_dir = autosubmit_suite_dir, + autosubmit_server = NULL, #or 'bscesautosubmit02', + wait = TRUE + ) + } + info(recipe$Run$logger, retrieve = TRUE, + "##### COMPUTE SECTION ENDED, REFORMATTING OUTPUT #####") + + # --------------------------------------------------------------------------- + # Step 4: Convert results to s2dv_cube/list format + source("tools/retrieve_metadata.R") + for (cube in c("hcst", "obs", "fcst")) { + if (!is.null(res[[cube]])) { + if (!is.null(last_module)) { + tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", + str_to_title(last_module)) + res$data[[cube]] <- retrieve_metadata(tmp_dir = tmp_dir, + chunks = recipe$Run$startR_workflow$chunk_along, + array_dims = dim(res[[cube]]), + metadata_file_pattern = paste0(last_module, + "_metadata_", + cube)) + res$data[[cube]]$data <- res[[cube]] + } else { + res$data[[cube]] <- convert_to_s2dv_cube(res[[cube]], data[[cube]]) + } + } else { + res$data[[cube]] <- NULL + } + res[[cube]] <- NULL + } + + ## TODO: Replicate for probabilities and other modules + if (!is.null(res$skill)) { + tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Skill") + metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds")) + res$skill <- ClimProjDiags::ArrayToList(res$skill, + dim = 'metric', + level = 'list', + names = metric_list) + # Put chunked metadata back together + res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir, + chunks = recipe$Run$startR_workflow$chunk_along, + array_dims = dim(res$skill[[1]]), + metadata_file_pattern = "skill_metadata") + } + + # --------------------------------------------------------------------------- + # Step 5: Remove temporary files + unlink(file.path(recipe$Run$output_dir, "outputs", "tmp"), recursive = TRUE) + + # --------------------------------------------------------------------------- + # Step 6: Save data + ## PROBLEM: Defining the output dir + # Saving(recipe = recipe, data = res$data, skill_metrics = res$skill) + if (!is.null(res$skill) && recipe$Analysis$Workflow$Skill$save == 'all') { + save_metrics(recipe = recipe, metrics = res$skill, + agg = "global", module = "Skill") + } + # --------------------------------------------------------------------------- + # Step 7: Return outputs + res <- res[!sapply(res, is.null)] + info(recipe$Run$logger, retrieve = TRUE, + "##### DATA RETURNED AS A NAMED LIST #####") + info(recipe$Run$logger, retrieve = TRUE, + paste0("Steps performed: ", recipe$Run$startR_workflow$modules)) + return(res) +} diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R new file mode 100644 index 0000000000000000000000000000000000000000..9726ee710770ea8d8761c429e358decb33c8f415 --- /dev/null +++ b/example_scripts/test_compute.R @@ -0,0 +1,24 @@ +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Preprocessing/Preprocessing.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Indices/Indices.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("build_compute_workflow.R") + +# recipe_file <- "recipes/atomic_recipes/recipe_test_compute_decadal.yml" +recipe_file <- "recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml" +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- Loading(recipe, retrieve = F) + +# Run workflow with compute +result <- run_compute_workflow(recipe, data) + +# Plot data +Visualization(recipe, data = result$data, skill_metrics = result$skill, + significance = T) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index d5c09faca9f7a44d8a650fee2bc286f88fabb69f..baf429841e98e7e494c672176abf4b9e163f2d31 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -8,7 +8,7 @@ ## ini = 2, end = 3 with monthly freq would be a 2 months agg source("modules/Aggregation/R/agg_ini_end.R") -Aggregation <- function(recipe, data) { +Aggregation <- function(recipe, data, retrieve = TRUE, nchunks = nchunks) { ## TODO: Move checks to recipe checker ncores <- recipe$Analysis$ncores # is it already checked? NULL or number @@ -57,7 +57,24 @@ Aggregation <- function(recipe, data) { } } } + if (!retrieve) { + tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Aggregation/") + if (!dir.exists(tmp_dir)) { + dir.create(tmp_dir, recursive = TRUE) + } + # Save s2dv_cube metadata + for (chunk in names(nchunks)) { + for (element in names(data)) { + metadata_filename <- paste0("aggregation_metadata", "_", element, "_", + chunk, "_", nchunks[[chunk]], ".Rds") + cube_metadata <- list(dims = res[[element]]$dims, + coords = res[[element]]$coords, + attrs = res[[element]]$attrs) + saveRDS(cube_metadata, paste0(tmp_dir, metadata_filename)) + } + } + } return(res) - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, "##### TIME AGGREGATION COMPLETE #####") } diff --git a/modules/Aggregation/R/agg_ini_end.R b/modules/Aggregation/R/agg_ini_end.R index 17c6940a7a77ccf65f42c1fc110856d171f9bee1..551e4d49ad80a46b356e12777228092c261c0342 100644 --- a/modules/Aggregation/R/agg_ini_end.R +++ b/modules/Aggregation/R/agg_ini_end.R @@ -1,4 +1,4 @@ -agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { +agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm, ncores) { # to make the code work with both options: if (!is.null(ini) && is.null(indices)) { # list of vectors for the indices from ini to end pairs @@ -36,7 +36,7 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { stop("Unknown method") } # Check in case only 1 aggregation only is requested to add time dim back: - if (!('time' %in% names(dim(data[[1]]$data)))) { + if (!('time' %in% names(dim(x[[1]]$data)))) { dim(x[[1]]$data) <- c(dim(x[[1]]$data), time = 1) } x[[1]]$data <- Reorder(x[[1]]$data, original_dims) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 939e427e0723d1a88a0155658039ce05625e5aed..e4b76087978fbaf4bdc2bbcee18a405bfb69647e 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -1,10 +1,12 @@ # Compute the hcst, obs and fcst anomalies with or without cross-validation # and return them, along with the hcst and obs climatologies. -Anomalies <- function(recipe, data) { +Anomalies <- function(recipe, data, retrieve = TRUE) { + ## TODO: Is this necessary? + # Check if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = retrieve, paste("The anomaly module has been called, but the element", "'Workflow:Anomalies:compute' is missing from the recipe.")) stop() @@ -103,17 +105,19 @@ Anomalies <- function(recipe, data) { paste(data$fcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") } } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, paste("The anomalies have been computed,", cross_msg, "cross-validation. The original full fields are returned as", "$hcst.full_val and $obs.full_val.")) - info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") - - # Save outputs - if (recipe$Analysis$Workflow$Anomalies$save != 'none') { - - info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") + info(recipe$Run$logger, retrieve = retrieve, + "##### ANOMALIES COMPUTED SUCCESSFULLY #####") + if (retrieve) { + if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + info(recipe$Run$logger, retrieve = retrieve, + "##### START SAVING ANOMALIES #####") + } + # Save outputs recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/outputs/Anomalies/") # Save forecast @@ -131,20 +135,20 @@ Anomalies <- function(recipe, data) { save_observations(recipe = recipe, data_cube = data$obs) } } - } else { - warn(recipe$Run$logger, paste("The Anomalies module has been called, but", - "recipe parameter Workflow:anomalies:compute is set to FALSE.", - "The full fields will be returned.")) + warn(recipe$Run$logger, retrieve = retrieve, + paste("The Anomalies module has been called, but recipe parameter", + "Workflow:anomalies:compute is set to FALSE.", + "The full fields will be returned.")) hcst_fullvalue <- NULL obs_fullvalue <- NULL - info(recipe$Run$logger, "##### ANOMALIES NOT COMPUTED #####") + info(recipe$Run$logger, retrieve = retrieve, + "##### ANOMALIES NOT COMPUTED #####") } ## TODO: Return fcst full value? - .log_memory_usage(recipe$Run$logger, "After computing anomalies") - + .log_memory_usage(recipe$Run$logger, retrieve = retrieve, + "After computing anomalies") return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) - } diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index f1362a22c7608ca95322351d5ba1ea5422792670..ddedf01606d5c3a22f43243af643c270dcbf8ed8 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -1,4 +1,4 @@ -Calibration <- function(recipe, data) { +Calibration <- function(recipe, data, retrieve = TRUE) { # Function that calibrates the hindcast using the method stated in the # recipe. If the forecast is not null, it calibrates it as well. # @@ -9,7 +9,7 @@ Calibration <- function(recipe, data) { method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (method == "raw") { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = retrieve, paste("The Calibration module has been called, but the calibration", "method in the recipe is 'raw'. The hcst and fcst will not be", "calibrated.")) @@ -40,24 +40,13 @@ Calibration <- function(recipe, data) { CALIB_MSG <- "##### CALIBRATION COMPLETE #####" # Replicate observation array for the multi-model case - ## TODO: Implement for obs.full_val - if (mm) { - obs.mm <- data$obs$data - for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { - obs.mm <- abind(obs.mm, data$obs$data, - along=which(names(dim(data$obs$data)) == 'dat')) - } - names(dim(obs.mm)) <- names(data$obs$dims) - data$obs$data <- obs.mm - remove(obs.mm) - } - + if (recipe$Analysis$Variables$freq == "monthly_mean") { CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") ## TODO: implement other calibration methods if (!(method %in% CST_CALIB_METHODS)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = retrieve, paste("Calibration method in the recipe is not available for", "monthly data.")) stop() @@ -111,7 +100,7 @@ Calibration <- function(recipe, data) { } else if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { # Daily data calibration using Quantile Mapping if (!(method %in% c("qmap"))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = retrieve, paste("Calibration method in the recipe is not available for", "daily data. Only quantile mapping 'qmap is implemented.")) stop() @@ -128,6 +117,7 @@ Calibration <- function(recipe, data) { na.rm = na.rm, wet.day = F) # Restore dimension order + browser() hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) # In the case where anomalies have been computed, calibrate full values if (!is.null(data$hcst.full_val)) { @@ -162,25 +152,28 @@ Calibration <- function(recipe, data) { } } } - info(recipe$Run$logger, CALIB_MSG) - .log_memory_usage(recipe$Run$logger, "After calibration") + info(recipe$Run$logger, retrieve = retrieve, CALIB_MSG) + .log_memory_usage(recipe$Run$logger, when = "After calibration") # Saving - if (recipe$Analysis$Workflow$Calibration$save != 'none') { - info(recipe$Run$logger, "##### START SAVING CALIBRATED DATA #####") - - ## TODO: What do we do with the full values? - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Calibration/") - if ((recipe$Analysis$Workflow$Calibration$save %in% - c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { - save_forecast(recipe = recipe, data_cube = fcst_calibrated, type = 'fcst') - } - if (recipe$Analysis$Workflow$Calibration$save %in% - c('all', 'exp_only')) { - save_forecast(recipe = recipe, data_cube = hcst_calibrated, type = 'hcst') - } - if (recipe$Analysis$Workflow$Calibration$save == 'all') { - save_observations(recipe = recipe, data_cube = data$obs) + if (retrieve) { + if (recipe$Analysis$Workflow$Calibration$save != 'none') { + info(recipe$Run$logger, retrieve = retrieve, + "##### START SAVING CALIBRATED DATA #####") + + ## TODO: What do we do with the full values? + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Calibration/") + if ((recipe$Analysis$Workflow$Calibration$save %in% + c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { + save_forecast(recipe = recipe, data_cube = fcst_calibrated, type = 'fcst') + } + if (recipe$Analysis$Workflow$Calibration$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = hcst_calibrated, type = 'hcst') + } + if (recipe$Analysis$Workflow$Calibration$save == 'all') { + save_observations(recipe = recipe, data_cube = data$obs) + } } } diff --git a/modules/Calibration/calibrate_with_compute.R b/modules/Calibration/calibrate_with_compute.R new file mode 100644 index 0000000000000000000000000000000000000000..c44d31a37ec34bb9eadd93ec0d05e571986f8ae4 --- /dev/null +++ b/modules/Calibration/calibrate_with_compute.R @@ -0,0 +1,176 @@ + +calibrate_with_compute <- function(recipe, hcst, obs, fcst = NULL) { + # Function that calibrates the hindcast using the method stated in the + # recipe. If the forecast is not null, it calibrates it as well. + # + # data: list of s2dv_cube objects containing the hcst, obs and fcst. + # Optionally, it may also have hcst.full_val and obs.full_val. + # recipe: object obtained when passing the .yml recipe file to read_yaml() + + method <- tolower(recipe$Analysis$Workflow$Calibration$method) + + if (method == "raw") { +# warn(recipe$Run$logger, +# paste("The Calibration module has been called, but the calibration", +# "method in the recipe is 'raw'. The hcst and fcst will not be", +# "calibrated.")) + fcst_calibrated <- fcst + hcst_calibrated <- hcst + # if (!is.null(data$hcst.full_val)) { + # hcst_full_calibrated <- data$hcst.full_val + # } else { + # hcst_full_calibrated <- NULL + # } + CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####" + + } else { + ## TODO: Calibrate full fields when present + # Calibration function params + mm <- recipe$Analysis$Datasets$Multimodel + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } + + CALIB_MSG <- "##### CALIBRATION COMPLETE #####" + # Replicate observation array for the multi-model case + ## TODO: Implement for obs.full_val + if (mm) { + obs.mm <- obs + for(dat in 1:(dim(hcst)['dat'][[1]]-1)) { + obs.mm <- abind(obs.mm, obs, + along=which(names(dim(obs)) == 'dat')) + } + names(dim(obs.mm)) <- names(dim(obs$data)) + obs$data <- obs.mm + remove(obs.mm) + } + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + + CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") + ## TODO: implement other calibration methods + if (!(method %in% CST_CALIB_METHODS)) { + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "monthly data.")) + stop() + } else { + # Calibrate the hindcast + hcst_calibrated <- CST_Calibration(hcst, obs, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + # # In the case where anomalies have been computed, calibrate full values + # if (!is.null(data$hcst.full_val)) { + # hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, + # data$obs.full_val, + # cal.method = method, + # eval.method = "leave-one-out", + # multi.model = mm, + # na.fill = TRUE, + # na.rm = na.rm, + # apply_to = NULL, + # memb_dim = "ensemble", + # sdate_dim = "syear", + # ncores = ncores) + # } else { + # hcst_full_calibrated <- NULL + # } + + # Calibrate the forecast + if (!is.null(fcst)) { + fcst_calibrated <- CST_Calibration(hcst, obs, fcst, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + fcst_calibrated <- NULL + } + } + } else if (recipe$Analysis$Variables$freq == "daily_mean") { + # Daily data calibration using Quantile Mapping + if (!(method %in% c("qmap"))) { + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "daily data. Only quantile mapping 'qmap is implemented.")) + stop() + } + # Calibrate the hindcast + dim_order <- names(dim(hcst$data)) + hcst_calibrated <- CST_QuantileMapping(hcst, obs, + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) + # Restore dimension order + hcst_calibrated <- Reorder(hcst_calibrated, dim_order) + # In the case where anomalies have been computed, calibrate full values + # if (!is.null(data$hcst.full_val)) { + # hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, + # data$obs.full_val, + # exp_cor = NULL, + # sdate_dim = "syear", + # memb_dim = "ensemble", + # method = "QUANT", + # ncores = ncores, + # na.rm = na.rm, + # wet.day = F) + # } else { + # hcst_full_calibrated <- NULL + # } + + if (!is.null(fcst)) { + # Calibrate the forecast + fcst_calibrated <- CST_QuantileMapping(hcst, obs, + exp_cor = fcst, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) + # Restore dimension order + fcst_calibrated <- Reorder(fcst_calibrated, dim_order) + } else { + fcst_calibrated <- NULL + } + } + } + # info(recipe$Run$logger, CALIB_MSG) + ## TODO: Sort out returns + return_list <- list(hcst = hcst_calibrated, + obs = obs)#, + # fcst = fcst_calibrated) + # if (!is.null(hcst_full_calibrated)) { + # return_list <- append(return_list, + # list(hcst.full_val = hcst_full_calibrated, + # obs.full_val = data$obs.full_val)) + # } + return(return_list) +} diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index f334b1de77e423d1504031ac98506e9b10fe3d21..4f216e1705a4345abbe5d584de681c2952ab098f 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -6,13 +6,12 @@ source('modules/Downscaling/tmp/Analogs.R') source('modules/Downscaling/tmp/LogisticReg.R') source('modules/Downscaling/tmp/Utils.R') -Downscaling <- function(recipe, data) { +Downscaling <- function(recipe, data, retrieve = TRUE) { # Function that downscale the hindcast using the method stated in the # recipe. For the moment, forecast must be null. # # data: list of s2dv_cube objects containing the hcst, obs and fcst. # recipe: object obtained when passing the .yml recipe file to read_yaml() - type <- tolower(recipe$Analysis$Workflow$Downscaling$type) if (type == "none") { @@ -334,21 +333,24 @@ Downscaling <- function(recipe, data) { } # Saving - if (recipe$Analysis$Workflow$Downscaling$save != 'none') { - info(recipe$Run$logger, "##### START SAVING DOWNSCALED DATA #####") - } - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Downscaling/") - if ((recipe$Analysis$Workflow$Downscaling$save %in% - c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { - save_forecast(recipe = recipe, data_cube = fcst_downscal$exp, type = 'fcst') - } - if ((recipe$Analysis$Workflow$Downscaling$save %in% - c('all', 'exp_only')) && (!is.null(hcst_downscal$exp))) { - save_forecast(recipe = recipe, data_cube = hcst_downscal$exp, type = 'hcst') - } - if (recipe$Analysis$Workflow$Downscaling$save == 'all') { - save_observations(recipe = recipe, data_cube = hcst_downscal$obs) + if (retrieve) { + if (recipe$Analysis$Workflow$Downscaling$save != 'none') { + info(recipe$Run$logger, retrieve = TRUE, + "##### START SAVING DOWNSCALED DATA #####") + } + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Downscaling/") + if ((recipe$Analysis$Workflow$Downscaling$save %in% + c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { + save_forecast(recipe = recipe, data_cube = fcst_downscal$exp, type = 'fcst') + } + if ((recipe$Analysis$Workflow$Downscaling$save %in% + c('all', 'exp_only')) && (!is.null(hcst_downscal$exp))) { + save_forecast(recipe = recipe, data_cube = hcst_downscal$exp, type = 'hcst') + } + if (recipe$Analysis$Workflow$Downscaling$save == 'all') { + save_observations(recipe = recipe, data_cube = hcst_downscal$obs) + } } return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = fcst_downscal$exp)) diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R index 6237840472b57c4d9e5ba552302674844b6e32a7..d9fe912edfc41d5f4ad3474dcdcdc406e533ea0e 100644 --- a/modules/Downscaling/tmp/Intbc.R +++ b/modules/Downscaling/tmp/Intbc.R @@ -369,8 +369,7 @@ Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo exp_cor = exp_cor_interpolated$data, na.rm = TRUE, memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, ...) - } - else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { + } else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { # Dynamical bias correction is not yet prepared to handle hindcast-forecast data # It will return only the hindcast downscaled if (!is.null(exp_cor)) { diff --git a/modules/Indices/Indices.R b/modules/Indices/Indices.R index fb9a7277587f5fb63f4f8151a090a6245f571a5b..a6ab15dea020b4c991d49a9435e4bf7968b483ca 100644 --- a/modules/Indices/Indices.R +++ b/modules/Indices/Indices.R @@ -2,7 +2,13 @@ source("modules/Indices/R/compute_nao.R") source("modules/Indices/R/compute_nino.R") source("modules/Indices/R/drop_indices_dims.R") source("modules/Saving/Saving.R") -Indices <- function(recipe, data) { +## TODO: Remove later +source("modules/Indices/R/tmp/NAO.R") +source("modules/Indices/R/tmp/Utils.R") +source("modules/Indices/R/tmp/EOF.R") +source("modules/Indices/R/tmp/ProjectField.R") + +Indices <- function(recipe, data, retrieve = TRUE) { # Define parameters nao <- NULL if ("nao" %in% tolower(names(recipe$Analysis$Workflow$Indices))) { @@ -29,7 +35,7 @@ Indices <- function(recipe, data) { nao <- compute_nao(data = data, recipe = recipe, obsproj = obsproj, plot_ts = plot_ts, plot_sp = plot_sp, - alpha = alpha) + alpha = alpha, retrieve = retrieve) } ninos <- list() num_ninos <- sum(tolower(substr(names(recipe$Analysis$Workflow$Indices), @@ -101,11 +107,11 @@ Indices <- function(recipe, data) { } if (is.null(nao)) { return(ninos) - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, "##### EL NINO INDICES COMPUTED SUCCESSFULLY #####") } else { return(nao) - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, "##### NAO INDEX COMPUTED SUCCESSFULLY #####") } } diff --git a/modules/Indices/R/compute_nao.R b/modules/Indices/R/compute_nao.R index b109fdae1757c8ce5b8afbf421749c0035eb3e13..e2b9d76f031de1c3520a3fb0b372fbd6ade70ec5 100644 --- a/modules/Indices/R/compute_nao.R +++ b/modules/Indices/R/compute_nao.R @@ -1,6 +1,6 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, - alpha, logo = NULL) { + alpha, logo = NULL, retrieve = TRUE) { ## TODO: if fcst object in data compute the nao too if (!is.null(data$fcst)) { warning("NAO computed only for hindcast data.") @@ -49,10 +49,11 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, Variable = c(data$hcst$Variable[1], level = 'surface'), data$hcst$Va, Datasets = data$hcst$Datasets, time_dims = c('syear', 'time'), - Dates = data$hcst$Dates) + Dates = data$hcst$attrs$Dates) obs <- s2dv_cube(data = obs1$data, lat = obs1$lat, lon = obs1$lon, Variable = c(data$obs$Variable[1], level = 'surface'), - Datasets = data$obs$Datasets, time_dims = c('syear', 'time')) + Datasets = data$obs$Datasets, time_dims = c('syear', 'time'), + Dates = data$obs$attrs$Dates) # TODO check and create data object for the next steps data <- list(hcst = hcst, obs = obs) lons <- data$hcst$coords$longitude @@ -83,15 +84,8 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, nao$exp <- InsertDim(nao$exp, posdim = 1, lendim = 1, name = 'region') nao$obs <- InsertDim(nao$obs, posdim = 1, lendim = 1, name = 'region') hcst_dates <- data$hcst$attrs$Dates - hcst_dates <- drop(data$hcst$attrs$Dates) - - if (!("time" %in% names(dim(hcst_dates)))) { - if (is.null(dim(hcst_dates))) { - hcst_dates <- array(hcst_dates, c(syear = length(hcst_dates))) - } - hcst_dates <- InsertDim(hcst_dates, pos = 1, len = 1, name = 'time') - hcst_dates <- as.POSIXct(hcst_dates, origin = '1970-01-01', tz = 'UTC') - } + dim(hcst_dates) <- dim(hcst_dates)[intersect(names(dim(hcst_dates)), + names(dim(nao$exp)))] nao <- list(hcst = s2dv_cube( data = nao$exp, varName = "nao", @@ -119,6 +113,7 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, Dates = data$obs$attrs$Dates, Dataset = recipe$Analysis$Datasets$Reference$name)) if (recipe$Analysis$Workflow$Indices$NAO$save == 'all') { + ## TODO: Fix output directory file_dest <- paste0(recipe$Run$output_dir, "/outputs/Indices/") if (tolower(recipe$Analysis$Horizon) == "seasonal") { # Use startdates param from SaveExp to correctly name the files: @@ -134,25 +129,27 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, recipe$Analysis$Time$hcst_start : recipe$Analysis$Time$hcst_end) } # need to recover original dimensions after saving to make Skill module work - nao_original_dims_hcst <- nao$hcst$data - nao$hcst$data <- .drop_indices_dims(nao$hcst$data) - CST_SaveExp(data = nao$hcst, - destination = file_dest, - startdates = as.vector(file_dates), - dat_dim = NULL, sdate_dim = 'syear', - ftime_dim = 'time', var_dim = NULL, - memb_dim = 'ensemble') - nao_original_dims_obs <- nao$obs$data - nao$obs$data <- .drop_indices_dims(nao$obs$data) - CST_SaveExp(data = nao$obs, #res, - destination = file_dest, - startdates = as.vector(file_dates), - dat_dim = NULL, sdate_dim = 'syear', - ftime_dim = 'time', var_dim = NULL, - memb_dim = NULL) - nao$hcst$data <- nao_original_dims_hcst - nao$obs$data <- nao_original_dims_obs - nao_original_dims_hcst <- nao_original_dims_obs <- NULL + if (retrieve) { + nao_original_dims_hcst <- nao$hcst$data + nao$hcst$data <- .drop_indices_dims(nao$hcst$data) + CST_SaveExp(data = nao$hcst, + destination = file_dest, + startdates = as.vector(file_dates), + dat_dim = NULL, sdate_dim = 'syear', + ftime_dim = 'time', var_dim = NULL, + memb_dim = 'ensemble') + nao_original_dims_obs <- nao$obs$data + nao$obs$data <- .drop_indices_dims(nao$obs$data) + CST_SaveExp(data = nao$obs, #res, + destination = file_dest, + startdates = as.vector(file_dates), + dat_dim = NULL, sdate_dim = 'syear', + ftime_dim = 'time', var_dim = NULL, + memb_dim = NULL) + nao$hcst$data <- nao_original_dims_hcst + nao$obs$data <- nao_original_dims_obs + nao_original_dims_hcst <- nao_original_dims_obs <- NULL + } gc() } # Read variable long_name to plot it @@ -164,13 +161,21 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), showWarnings = F, recursive = T) source("modules/Indices/R/plot_deterministic_forecast.R") - for (tstep in 1:dim(nao$hcst$data)['time']) { - mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + - (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) - mes <- ifelse(mes > 12, mes - 12, mes) - fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) - obs <- Subset(nao$obs$data, along = 'time', ind = tstep) - exp <- Subset(nao$hcst$data, along = 'time', ind = tstep) + time_step_dates <- Subset(data$hcst$attrs$Dates, + along = "syear", + indices = 1, + drop = T) + for (tstep in 1:length(time_step_dates)) { + mes <- lubridate::month(time_step_dates[tstep]) + fmonth <- mes - as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) + 1 + fmonth <- sprintf("%02d", ifelse(fmonth < 0, fmonth + 12, fmonth)) + if ('time' %in% names(dim(nao$obs$data))) { + obs <- Subset(nao$obs$data, along = 'time', ind = tstep) + exp <- Subset(nao$hcst$data, along = 'time', ind = tstep) + } else { + obs <- nao$obs$data + exp <- nao$hcst$data + } if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { system <- recipe$Analysis$Datasets$System$name } else { @@ -184,7 +189,7 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", system, "_", recipe$Analysis$Datasets$Reference$name, "_s", recipe$Analysis$Time$sdate, "_ftime", - sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf") + fmonth, ".pdf") caption <- paste0("NAO method: ", ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", @@ -200,8 +205,7 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, recipe$Analysis$Time$hcst_end) plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", system, "_", recipe$Analysis$Datasets$Reference$name, - "_ftime", - sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf") + "_ftime", fmonth, ".pdf") caption <- paste0("NAO method: ", ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", @@ -260,12 +264,26 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, time_dim = 'syear', method = 'pearson', alpha = alpha, test.type = 'two-sided', pval = FALSE)}, ncores = recipe$Analysis$ncores) - - for (tstep in 1:dim(nao$obs$data)['time']) { - fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + + + time_step_dates <- Subset(data$hcst$attrs$Dates, + along = "syear", + indices = 1, + drop = T) + for (tstep in 1:length(time_step_dates)) { + mes <- lubridate::month(time_step_dates[tstep]) + fmonth <- mes - as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) + 1 + fmonth <- sprintf("%02d", ifelse(fmonth < 0, fmonth + 12, fmonth)) + # for (tstep in 1:dim(nao$obs$data)['time']) { + # fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) ## Observations - map <- drop(Subset(correl_obs$r, along = 'time', ind = tstep)) - sig <- drop(Subset(correl_obs$sign, along = 'time', ind = tstep)) + if ("time" %in% names(dim(data$hcst$data))) { + map <- drop(Subset(correl_obs$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_obs$sign, along = 'time', ind = tstep)) + } else { + map <- drop(correl_obs$r) + sig <- drop(correl_obs$sign) + } if (tolower(recipe$Analysis$Horizon) == "seasonal") { mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) @@ -325,8 +343,13 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, fileout = plotfile, width = 8, height = 6, brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) ## Ensemble-mean - map <- drop(Subset(correl_hcst$r, along = 'time', ind = tstep)) - sig <- drop(Subset(correl_hcst$sign, along = 'time', ind = tstep)) + if ("time" %in% names(dim(data$hcst$data))) { + map <- drop(Subset(correl_hcst$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst$sign, along = 'time', ind = tstep)) + } else { + map <- drop(correl_hcst$r) + sig <- drop(correl_hcst$sign) + } if (tolower(recipe$Analysis$Horizon) == "seasonal") { toptitle <- paste(recipe$Analysis$Datasets$System$name, "\n", "NAO Index -", var_name, "\n", @@ -382,8 +405,13 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) # Full hcst corr - map <- drop(Subset(correl_hcst_full$r, along = 'time', ind = tstep)) - sig <- drop(Subset(correl_hcst_full$sign, along = 'time', ind = tstep)) + if ("time" %in% names(dim(correl_hcst_full$r))) { + map <- drop(Subset(correl_hcst_full$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst_full$sign, along = 'time', ind = tstep)) + } else { + map <- drop(correl_hcst_full$r) + sig <- drop(correl_hcst_full$sign) + } if (tolower(recipe$Analysis$Horizon) == "seasonal") { toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", "NAO Index -",var_name, "\n", diff --git a/modules/Indices/R/tmp/EOF.R b/modules/Indices/R/tmp/EOF.R new file mode 100644 index 0000000000000000000000000000000000000000..38c3fae0970f3e487441d64bea40311fbf02e37c --- /dev/null +++ b/modules/Indices/R/tmp/EOF.R @@ -0,0 +1,292 @@ +#'Area-weighted empirical orthogonal function analysis using SVD +#' +#'Perform an area-weighted EOF analysis using single value decomposition (SVD) +#'based on a covariance matrix or a correlation matrix if parameter 'corr' is +#'set to TRUE. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs +#' could exist but it should be consistent along time_dim. That is, if one grid +#' point has NAs, all the time steps at this point should be NAs. +#'@param lat A vector of the latitudes of 'ano'. +#'@param lon A vector of the longitudes of 'ano'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param neofs A positive integer of the modes to be kept. The default value is +#' 15. If time length or the product of the length of space_dim is smaller than +#' neofs, neofs will be changed to the minimum of the three values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{EOFs}{ +#' An array of EOF patterns normalized to 1 (unitless) with dimensions +#' (number of modes, rest of the dimensions of 'ano' except 'time_dim'). +#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed +#' field. +#'} +#'\item{PCs}{ +#' An array of principal components with the units of the original field to +#' the power of 2, with dimensions (time_dim, number of modes, rest of the +#' dimensions of 'ano' except 'space_dim'). +#' 'PCs' contains already the percentage of explained variance so, +#' to reconstruct the original field it's only needed to multiply 'EOFs' +#' by 'PCs'. +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode (number of modes). The dimensions are (number of +#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). +#'} +#'\item{mask}{ +#' An array of the mask with dimensions (space_dim, rest of the dimensions of +#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that +#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to +#' replace NAs with 0s for EOF calculation and mask the result with NAs again +#' after the calculation. +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by cosine of 'lat' and used to compute the fraction of variance explained by +#' each EOFs. +#'} +#'\item{tot_var}{ +#' A number or a numeric array of the total variance explained by all the modes. +#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. +#'} +#' +#'@seealso ProjectField, NAO, PlotBoxWhisker +#'@examples +#'# This example computes the EOFs along forecast horizons and plots the one +#'# that explains the greatest amount of variability. The example data has low +#'# resolution so the result may not be explanatory, but it displays how to +#'# use this function. +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'tmp <- MeanDims(ano$exp, c('dataset', 'member')) +#'ano <- tmp[1, , ,] +#'names(dim(ano)) <- names(dim(tmp))[-2] +#'eof <- EOF(ano, sampleData$lat, sampleData$lon) +#'\dontrun{ +#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) +#'} +#' +#'@import multiApply +#'@importFrom stats sd +#'@export +EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), + neofs = 15, corr = FALSE, ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + ## lon + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") + } + if (any(lon > 360 | lon < -360)) { + .warning("Some 'lon' is out of the range [-360, 360].") + } + ## neofs + if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) { + stop("Parameter 'neofs' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate EOF + +# # Replace mask of NAs with 0s for EOF analysis. +# ano[!is.finite(ano)] <- 0 + + # Area weighting. Weights for EOF; needed to compute the + # fraction of variance explained by each EOFs + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi / 180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anomaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + # neofs is bounded + if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) { + neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs) + .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and neofs.")) + } + + res <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(EOFs = c('mode', space_dim), + PCs = c(time_dim, 'mode'), + var = 'mode', + tot_var = NULL, + mask = space_dim), + fun = .EOF, + corr = corr, neofs = neofs, + wght = wght, + ncores = ncores) + + return(c(res, wght = list(wght))) + +} + +.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) { + # ano: [time, lat, lon] + + # Dimensions + nt <- dim(ano)[1] + ny <- dim(ano)[2] + nx <- dim(ano)[3] + + # Check if all the time steps at one grid point are NA-consistent. + # The grid point should have all NAs or no NA along time dim. + if (anyNA(ano)) { + ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon] + na_ind <- which(is.na(ano_latlon), arr.ind = T) + if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) { + stop("Detect certain grid points have NAs but not consistent across time ", + "dimension. If the grid point is NA, it should have NA at all time step.") + } + } + + # Build the mask + mask <- ano[1, , ] + mask[!is.finite(mask)] <- NA + mask[is.finite(mask)] <- 1 + dim(mask) <- c(ny, nx) + + # Replace mask of NAs with 0s for EOF analysis. + ano[!is.finite(ano)] <- 0 + + ano <- ano * InsertDim(wght, 1, nt) + + # The use of the correlation matrix is done under the option corr. + if (corr) { + stdv <- apply(ano, c(2, 3), sd, na.rm = T) + ano <- ano / InsertDim(stdv, 1, nt) + } + + # Time/space matrix for SVD + dim(ano) <- c(nt, ny * nx) + dim.dat <- dim(ano) + + # 'transpose' means the array needs to be transposed before + # calling La.svd for computational efficiency because the + # spatial dimension is larger than the time dimension. This + # goes with transposing the outputs of LA.svd also. + if (dim.dat[2] > dim.dat[1]) { + transpose <- TRUE + } else { + transpose <- FALSE + } + if (transpose) { + pca <- La.svd(t(ano)) + } else { + pca <- La.svd(ano) + } + + # La.svd conventions: decomposition X = U D t(V) La.svd$u + # returns U La.svd$d returns diagonal values of D La.svd$v + # returns t(V) !! The usual convention is PC=U and EOF=V. + # If La.svd is called for ano (transpose=FALSE case): EOFs: + # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE + # case): EOFs: t($u) PCs: t($v) + + if (transpose) { + pca.EOFs <- t(pca$u) + pca.PCs <- t(pca$v) + } else { + pca.EOFs <- pca$v + pca.PCs <- pca$u + } + + # The numbers of transposition is limited to neofs + PC <- pca.PCs[, 1:neofs] + EOF <- pca.EOFs[1:neofs, ] + dim(EOF) <- c(neofs, ny, nx) + + # To sort out crash when neofs=1. + if (neofs == 1) { + PC <- InsertDim(PC, 2, 1, name = 'new') + } + + # Computation of the % of variance associated with each mode + W <- pca$d[1:neofs] + tot.var <- sum(pca$d^2) + var.eof <- 100 * pca$d[1:neofs]^2 / tot.var + + for (e in 1:neofs) { + # Set all masked grid points to NA in the EOFs + # Divide patterns by area weights so that EOF * PC gives unweigthed (original) data + EOF[e, , ] <- EOF[e, , ] * mask / wght + # PC is multiplied by the explained variance, + # so that the reconstruction is only EOF * PC + PC[, e] <- PC[, e] * W[e] + } + + if (neofs == 1) { + var.eof <- as.array(var.eof) + } + + return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask))) +} diff --git a/modules/Indices/R/tmp/NAO.R b/modules/Indices/R/tmp/NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..75731055c83d9330ca1b5b67056d3a1025856690 --- /dev/null +++ b/modules/Indices/R/tmp/NAO.R @@ -0,0 +1,434 @@ +#'Compute the North Atlantic Oscillation (NAO) Index +#' +#'Compute the North Atlantic Oscillation (NAO) index based on the leading EOF +#'of the sea level pressure (SLP) anomalies over the north Atlantic region +#'(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and +#'observed anomalies onto the observed EOF pattern or the forecast +#'anomalies onto the EOF pattern of the other years of the forecast. +#'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month +#'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns +#'cross-validated PCs of the NAO index for forecast (exp) and observations +#'(obs) based on the leading EOF pattern. +#' +#'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of observational data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param obs A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of experimental data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param lat A vector of the latitudes of 'exp' and 'obs'. +#'@param lon A vector of the longitudes of 'exp' and 'obs'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'exp' and 'obs'. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length +#' must be 1. The default value is 'member'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension of 'exp' and 'obs'. The default value is 'ftime'. +#'@param ftime_avg A numeric vector of the forecast time steps to average +#' across the target period. If average is not needed, set NULL. The default +#' value is 2:4, i.e., from 2nd to 4th forecast time steps. +#'@param obsproj A logical value indicating whether to compute the NAO index by +#' projecting the forecast anomalies onto the leading EOF of observational +#' reference (TRUE) or compute the NAO by first computing the leading +#' EOF of the forecast anomalies (in cross-validation mode, i.e. leaving the +#' year you are evaluating out), and then projecting forecast anomalies onto +#' this EOF (FALSE). The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list which contains: +#'\item{exp}{ +#' A numeric array of forecast NAO index in verification format with the same +#' dimensions as 'exp' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#'\item{obs}{ +#' A numeric array of observed NAO index in verification format with the same +#' dimensions as 'obs' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#'} +#' +#'@references +#'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of +#' multi-model seasonal forecasts of the wintertime North Atlantic +#' Oscillation. Climate Dynamics, 21, 501-514. +#' DOI: 10.1007/s00382-003-0350-4 +#' +#'@examples +#'# Make up synthetic data +#'set.seed(1) +#'exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'set.seed(2) +#'obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'lat <- seq(20, 80, length.out = 6) +#'lon <- seq(-80, 40, length.out = 9) +#'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) +#' +#'# plot the NAO index +#' \dontrun{ +#'nao$exp <- Reorder(nao$exp, c(2, 1)) +#'nao$obs <- Reorder(nao$obs, c(2, 1)) +#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", +#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") +#' } +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', + memb_dim = 'member', space_dim = c('lat', 'lon'), + ftime_dim = 'ftime', ftime_avg = 2:4, + obsproj = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(obs) & is.null(exp)) { + stop("Parameter 'exp' and 'obs' cannot both be NULL.") + } + if (!is.null(exp)) { + if (!is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (is.null(dim(exp))) { + stop("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { + stop("Parameter 'exp' must have dimension names.") + } + } + if (!is.null(obs)) { + if (!is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (is.null(dim(obs))) { + stop("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'obs' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!time_dim %in% names(dim(exp))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + if (!is.null(obs)) { + if (memb_dim %in% names(dim(obs))) { + if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } else { + add_member_back <- TRUE + obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected') + } + } else { + add_member_back <- FALSE + } + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!is.null(exp)) { + if (!all(space_dim %in% names(dim(exp)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!all(space_dim %in% names(dim(obs)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!is.null(exp) && !is.null(ftime_avg)) { + if (!ftime_dim %in% names(dim(exp))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs) && !is.null(ftime_avg)) { + if (!ftime_dim %in% names(dim(obs))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## exp and obs (2) + if (!is.null(exp) & !is.null(obs)) { + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + throw_error <- FALSE + if (length(name_exp) != length(name_obs)) { + throw_error <- TRUE + } else if (any(name_exp != name_obs)) { + throw_error <- TRUE + } else if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + throw_error <- TRUE + } + if (throw_error) { + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'memb_dim'.") + } + } + ## ftime_avg + if (!is.null(ftime_avg)) { + if (!is.vector(ftime_avg) | !is.numeric(ftime_avg)) { + stop("Parameter 'ftime_avg' must be an integer vector.") + } + if (!is.null(exp)) { + if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } else { + if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + } + ## sdate >= 2 + if (!is.null(exp)) { + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } else { + if (dim(obs)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } + ## lat and lon + if (!is.null(exp)) { + if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } else { + if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } + stop_needed <- FALSE + if (max(lat) > 80 | min(lat) < 20) { + stop_needed <- TRUE + } + #NOTE: different from s2dverification + # lon is not used in the calculation actually. EOF only uses lat to do the + # weight. So we just need to ensure the data is in this region, regardless + # the order. + if (any(lon < 0)) { #[-180, 180] + if (!(min(lon) > -90 & min(lon) < -70 & max(lon) < 50 & max(lon) > 30)) { + stop_needed <- TRUE + } + } else { #[0, 360] + if (any(lon >= 50 & lon <= 270)) { + stop_needed <- TRUE + } else { + lon_E <- lon[which(lon < 50)] + lon_W <- lon[-which(lon < 50)] + if (max(lon_E) < 30 | min(lon_W) > 290) { + stop_needed <- TRUE + } + } + } + if (stop_needed) { + stop("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") + } + ## obsproj + if (!is.logical(obsproj) | length(obsproj) > 1) { + stop("Parameter 'obsproj' must be either TRUE or FALSE.") + } + if (obsproj) { + if (is.null(obs)) { + stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.") + } + if (is.null(exp)) { + .warning("parameter 'obsproj' set to TRUE but no 'exp' provided.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores == 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # Average ftime + if (!is.null(ftime_avg)) { + if (!is.null(exp)) { + exp_sub <- ClimProjDiags::Subset(exp, ftime_dim, ftime_avg, drop = FALSE) + exp <- MeanDims(exp_sub, ftime_dim, na.rm = TRUE) + ## Cross-validated PCs. Fabian. This should be extended to + ## nmod and nlt by simple loops. Virginie + } + if (!is.null(obs)) { + obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE) + obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE) + } + } + + # wght + wght <- array(sqrt(cos(lat * pi / 180)), dim = c(length(lat), length(lon))) + + if (!is.null(exp) & !is.null(obs)) { + res <- Apply(list(exp, obs), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } else if (!is.null(exp)) { + res <- Apply(list(exp = exp), + target_dims = list(exp = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, obs = NULL, + obsproj = obsproj, add_member_back = FALSE, + ncores = ncores) + } else if (!is.null(obs)) { + if (add_member_back) { + output_dims <- list(obs = c(time_dim, memb_dim)) + } else { + output_dims <- list(obs = time_dim) + } + res <- Apply(list(obs = obs), + target_dims = list(obs = c(time_dim, space_dim)), + output_dims = output_dims, + fun = .NAO, + lat = lat, wght = wght, exp = NULL, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + return(res) +} + +.NAO <- function(exp = NULL, obs = NULL, lat, wght, obsproj = TRUE, add_member_back = FALSE) { + # exp: [memb_exp, sdate, lat, lon] + # obs: [sdate, lat, lon] + # wght: [lat, lon] + + if (!is.null(exp)) { + ntime <- dim(exp)[2] + nlat <- dim(exp)[3] + nlon <- dim(exp)[4] + nmemb_exp <- dim(exp)[1] + } else { + ntime <- dim(obs)[1] + nlat <- dim(obs)[2] + nlon <- dim(obs)[3] + } + + if (!is.null(obs)) NAOO.ver <- array(NA, dim = ntime) + if (!is.null(exp)) NAOF.ver <- array(NA, dim = c(ntime, nmemb_exp)) + + for (tt in 1:ntime) { #sdate + + if (!is.null(obs)) { + ## Calculate observation EOF. Excluding one forecast start year. + obs_sub <- obs[(1:ntime)[-tt], , , drop = FALSE] + obs_EOF <- .EOF(obs_sub, neofs = 1, wght = wght) # $EOFs: [mode, lat, lon] + + ## Correct polarity of pattern. + # dim(obs_EOF$EOFs): [mode, lat, lon] + if (0 < mean(obs_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + obs_EOF$EOFs <- obs_EOF$EOFs * (-1) +# obs_EOF$PCs <- obs_EOF$PCs * (-1) # not used + } + ## Project observed anomalies. + PF <- .ProjectField(obs, eof_mode = obs_EOF$EOFs[1, , ], wght = wght) # [sdate] + ## Keep PCs of excluded forecast start year. Fabian. + NAOO.ver[tt] <- PF[tt] + } + + if (!is.null(exp)) { + if (!obsproj) { + exp_sub <- exp[, (1:ntime)[-tt], , , drop = FALSE] + # Combine 'memb' and 'sdate' to calculate EOF + dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) + exp_EOF <- .EOF(exp_sub, neofs = 1, wght = wght) # $EOFs: [mode, lat, lon] + + ## Correct polarity of pattern. + ##NOTE: different from s2dverification, which doesn't use mean(). +# if (0 < exp_EOF$EOFs[1, which.min(abs(lat - 65)), ]) { + if (0 < mean(exp_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + exp_EOF$EOFs <- exp_EOF$EOFs * (-1) +# exp_EOF$PCs <- exp_EOF$PCs * sign # not used + } + + ### Lines below could be simplified further by computing + ### ProjectField() only on the year of interest... (though this is + ### not vital). Lauriane + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], + eof_mode = exp_EOF$EOFs[1, , ], + wght = wght) # [sdate, memb] + NAOF.ver[tt, imemb] <- PF[tt] + } + } else { + ## Project forecast anomalies on obs EOF + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], + eof_mode = obs_EOF$EOFs[1, , ], + wght = wght) # [sdate] + NAOF.ver[tt, imemb] <- PF[tt] + } + } + } + + } # for loop sdate + + # add_member_back + if (add_member_back) { + suppressWarnings({ + NAOO.ver <- InsertDim(NAOO.ver, 2, 1, name = names(dim(exp))[1]) + }) + } + + #NOTE: EOFs_obs is not returned because it's only the result of the last sdate + # (It is returned in s2dverification.) + if (!is.null(exp) & !is.null(obs)) { + return(list(exp = NAOF.ver, obs = NAOO.ver)) #, EOFs_obs = obs_EOF)) + } else if (!is.null(exp)) { + return(list(exp = NAOF.ver)) + } else if (!is.null(obs)) { + return(list(obs = NAOO.ver)) + } +} diff --git a/modules/Indices/R/tmp/ProjectField.R b/modules/Indices/R/tmp/ProjectField.R new file mode 100644 index 0000000000000000000000000000000000000000..4fdf1892f2a8cd38a2f6492906700aa0cafb1cf2 --- /dev/null +++ b/modules/Indices/R/tmp/ProjectField.R @@ -0,0 +1,272 @@ +#'Project anomalies onto modes of variability +#' +#'Project anomalies onto modes of variability to get the temporal evolution of +#'the EOF mode selected. It returns principal components (PCs) by area-weighted +#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from +#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and +#'returns NA if the whole spatial pattern is NA. +#' +#'@param ano A numerical array of anomalies with named dimensions. The +#' dimensions must have at least 'time_dim' and 'space_dim'. It can be +#' generated by Ano(). +#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which +#' are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and +#' 'space_dim' at least. 'wght' has dimensions space_dim. It can be generated +#' by EOF() or REOF(). +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param mode An integer of the variability mode number in the EOF to be +#' projected on. The default value is NULL, which means all the modes of 'eof' +#' is calculated. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numerical array of the principal components in the verification +#' format. The dimensions are the same as 'ano' except 'space_dim'. +#' +#'@seealso EOF, NAO, PlotBoxWhisker +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) +#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) +#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) +#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) +#' +#'\dontrun{ +#' # Plot the forecast and the observation of the first mode for the last year +#' # of forecast +#' sdate_dim_length <- dim(mode1_obs)['sdate'] +#' plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), +#' lwd = 2) +#' for (i in 1:dim(mode1_exp)['member']) { +#' par(new = TRUE) +#' plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], +#' ylim = c(-15000, 15000)) +#' } +#'} +#' +#'@import multiApply +#'@export +ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), + mode = NULL, ncores = NULL) { + + # Check inputs + ## ano (1) + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## eof (1) + if (is.null(eof)) { + stop("Parameter 'eof' cannot be NULL.") + } + if (!is.list(eof)) { + stop("Parameter 'eof' must be a list generated by EOF() or REOF().") + } + if ('EOFs' %in% names(eof)) { + EOFs <- "EOFs" + } else if ('REOFs' %in% names(eof)) { + EOFs <- "REOFs" + } else if ('patterns' %in% names(eof)) { + EOFs <- "patterns" + } else { + stop("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().") + } + if (!'wght' %in% names(eof)) { + stop("Parameter 'eof' must be a list that contains 'wght'. ", + "It can be generated by EOF() or REOF().") + } + if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") + } + if (!is.numeric(eof$wght) || !is.array(eof$wght)) { + stop("The component 'wght' of parameter 'eof' must be a numeric array.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## ano (2) + if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { + stop("Parameter 'ano' must be an array with dimensions named as ", + "parameter 'space_dim' and 'time_dim'.") + } + ## eof (2) + if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | + !'mode' %in% names(dim(eof[[EOFs]]))) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.") + } + if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { + stop("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.") + } + ## mode + if (!is.null(mode)) { + if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) { + stop("Parameter 'mode' must be NULL or a positive integer.") + } + if (mode > dim(eof[[EOFs]])['mode']) { + stop("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + +#------------------------------------------------------- + + # Keep the chosen mode + if (!is.null(mode)) { + eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected') + } else { + eof_mode <- eof[[EOFs]] + } + + if ('mode' %in% names(dim(eof_mode))) { + dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')] + } else { + dimnames_without_mode <- names(dim(eof_mode)) + } + + if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon] + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + + } else { + + if (!all(dimnames_without_mode %in% names(dim(ano)))) { + stop("The array 'EOF' in parameter 'eof' has dimension not in parameter ", + "'ano'. Check if 'ano' and 'eof' are compatible.") + } + + common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] + if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != + dim(eof_mode)[dimnames_without_mode])) { + stop("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", + "with different length. Check if 'ano' and 'eof' are compatible.") + } + + # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent + # between ano and eof. + additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] + additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] + if (length(additional_dims) != 0) { + for (i in seq_along(additional_dims)) { + eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), + lendim = additional_dims[i], name = names(additional_dims)[i]) + } + } + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + } + + return(res) +} + + +.ProjectField <- function(ano, eof_mode, wght) { + # ano: [sdate, lat, lon] + # eof_mode: [lat, lon] or [mode, lat, lon] + # wght: [lat, lon] + + ntime <- dim(ano)[1] + + if (length(dim(eof_mode)) == 2) { # mode != NULL + # Initialization of pc.ver. + pc.ver <- array(NA, dim = ntime) #[sdate] + + # Weight + e.1 <- eof_mode * wght + ano <- ano * InsertDim(wght, 1, ntime) + #ano <- aaply(ano, 1, '*', wght) # much heavier + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + #na <- apply(ano, 1, mean, na.rm = TRUE) # much heavier + tmp <- ano * InsertDim(e.1, 1, ntime) # [sdate, lat, lon] + rm(ano) + #pc.ver <- apply(tmp, 1, sum, na.rm = TRUE) # much heavier + pc.ver <- rowSums(tmp, na.rm = TRUE) + pc.ver[which(is.na(na))] <- NA + + } else { # mode = NULL + # Weight + e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1]) + dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3])) # [mode, lat*lon] + ano <- ano * InsertDim(wght, 1, ntime) + dim(ano) <- c(ntime, prod(dim(ano)[2:3])) # [sdate, lat*lon] + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1)) + + # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate] + # Result: [mode, sdate] + pc.ver <- e.1 %*% t(ano) + pc.ver[which(is.na(na))] <- NA + +# # Change back dimensions to feet original input +# dim(projection) <- c(moredims, mode = unname(neofs)) +# return(projection) + } + + return(pc.ver) +} + diff --git a/modules/Indices/R/tmp/Utils.R b/modules/Indices/R/tmp/Utils.R new file mode 100644 index 0000000000000000000000000000000000000000..c6e233c00ca1c532c7bcd7224b62411e5585e209 --- /dev/null +++ b/modules/Indices/R/tmp/Utils.R @@ -0,0 +1,1884 @@ +#'@importFrom abind abind +#'@import plyr ncdf4 +#'@importFrom grDevices png jpeg pdf svg bmp tiff +#'@importFrom easyVerification convert2prob + +## Function to tell if a regexpr() match is a complete match to a specified name +.IsFullMatch <- function(x, name) { + x > 0 && attributes(x)$match.length == nchar(name) +} + +.ConfigReplaceVariablesInString <- function(string, replace_values, + allow_undefined_key_vars = FALSE) { + # This function replaces all the occurrences of a variable in a string by + # their corresponding string stored in the replace_values. + if (length(strsplit(string, "\\$")[[1]]) > 1) { + parts <- strsplit(string, "\\$")[[1]] + output <- "" + i <- 0 + for (part in parts) { + if (i %% 2 == 0) { + output <- paste0(output, part) + } else { + if (part %in% names(replace_values)) { + output <- paste0(output, + .ConfigReplaceVariablesInString(replace_values[[part]], + replace_values, + allow_undefined_key_vars)) + } else if (allow_undefined_key_vars) { + output <- paste0(output, "$", part, "$") + } else { + stop('Error: The variable $', part, + '$ was not defined in the configuration file.', sep = '') + } + } + i <- i + 1 + } + output + } else { + string + } +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') + return(known_lon_names) +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') + return(known_lat_names) +} + +.t2nlatlon <- function(t) { + ## As seen in cdo's griddes.c: ntr2nlat() + nlats <- (t * 3 + 1) / 2 + if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) { + nlats <- ceiling(nlats) + } else { + nlats <- round(nlats) + } + if (nlats %% 2 > 0) { + nlats <- nlats + 1 + } + ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF + nlons <- 2 * nlats + keep_going <- TRUE + while (keep_going) { + n <- nlons + if (n %% 8 == 0) n <- trunc(n / 8) + while (n %% 6 == 0) n <- trunc(n / 6) + while (n %% 5 == 0) n <- trunc(n / 5) + while (n %% 4 == 0) n <- trunc(n / 4) + while (n %% 3 == 0) n <- trunc(n / 3) + if (n %% 2 == 0) n <- trunc(n / 2) + if (n <= 8) { + keep_going <- FALSE + } else { + nlons <- nlons + 2 + if (nlons > 9999) { + stop("Error: pick another gaussian grid truncation. ", + "It doesn't fulfill the standards to apply FFT.") + } + } + } + c(nlats, nlons) +} + +.nlat2t <- function(nlats) { + trunc((nlats * 2 - 1) / 3) +} + +.LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) { + # The purpose, working modes, inputs and outputs of this function are + # explained in ?LoadDataFile + #suppressPackageStartupMessages({library(ncdf4)}) + #suppressPackageStartupMessages({library(bigmemory)}) + #suppressPackageStartupMessages({library(plyr)}) + # Auxiliar function to convert array indices to lineal indices + arrayIndex2VectorIndex <- function(indices, dims) { + if (length(indices) > length(dims)) { + stop("Error: indices do not match dimensions in arrayIndex2VectorIndex.") + } + position <- 1 + dims <- rev(dims) + indices <- rev(indices) + for (i in seq_along(indices)) { + position <- position + (indices[i] - 1) * prod(dims[-(1:i)]) + } + position + } + + found_file <- NULL + dims <- NULL + grid_name <- units <- var_long_name <- NULL + is_2d_var <- array_across_gw <- NULL + data_across_gw <- NULL + + filename <- work_piece[['filename']] + namevar <- work_piece[['namevar']] + output <- work_piece[['output']] + # The names of all data files in the directory of the repository that match + # the pattern are obtained. + if (any(grep("^http", filename))) { + is_url <- TRUE + files <- filename + ## TODO: Check that the user is not using shell globbing exps. + } else { + is_url <- FALSE + files <- Sys.glob(filename) + } + + # If we don't find any, we leave the flag 'found_file' with a NULL value. + if (length(files) > 0) { + # The first file that matches the pattern is chosen and read. + filename <- head(files, 1) + filein <- filename + found_file <- filename + mask <- work_piece[['mask']] + + if (!silent && explore_dims) { + .message(paste("Exploring dimensions...", filename)) + ##} else { + ## cat(paste("* Reading & processing data...", filename, '\n')) + ##} + } + + # We will fill in 'expected_dims' with the names of the expected dimensions of + # the data array we'll retrieve from the file. + expected_dims <- NULL + remap_needed <- FALSE + # But first we open the file and work out whether the requested variable is 2d + fnc <- nc_open(filein) + if (!(namevar %in% names(fnc$var))) { + stop("Error: The variable", namevar, "is not defined in the file", filename) + } + var_long_name <- fnc$var[[namevar]]$longname + units <- fnc$var[[namevar]]$units + file_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + # The following two 'ifs' are to allow for 'lon'/'lat' by default, instead of + # 'longitude'/'latitude'. + if (!(work_piece[['dimnames']][['lon']] %in% file_dimnames) && + (work_piece[['dimnames']][['lon']] == 'longitude') && + ('lon' %in% file_dimnames)) { + work_piece[['dimnames']][['lon']] <- 'lon' + } + if (!(work_piece[['dimnames']][['lat']] %in% file_dimnames) && + (work_piece[['dimnames']][['lat']] == 'latitude') && + ('lat' %in% file_dimnames)) { + work_piece[['dimnames']][['lat']] <- 'lat' + } + if (is.null(work_piece[['is_2d_var']])) { + is_2d_var <- all(c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) %in% + unlist(lapply(fnc$var[[namevar]][['dim']], + '[[', 'name'))) + } else { + is_2d_var <- work_piece[['is_2d_var']] + } + if ((is_2d_var || work_piece[['is_file_per_dataset']])) { + if (Sys.which("cdo")[[1]] == "") { + stop("Error: CDO libraries not available") + } + + cdo_version <- + strsplit(suppressWarnings( + system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] + + cdo_version <- + as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) + + } + # If the variable to load is 2-d, we need to determine whether: + # - interpolation is needed + # - subsetting is requested + if (is_2d_var) { + ## We read the longitudes and latitudes from the file. + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + first_lon_in_original_file <- lon[1] + # If a common grid is requested or we are exploring the file dimensions + # we need to read the grid type and size of the file to finally work out the + # CDO grid name. + if (!is.null(work_piece[['grid']]) || explore_dims) { + # Here we read the grid type and its number of longitudes and latitudes + file_info <- system(paste('cdo -s griddes', filein, '2> /dev/null'), intern = TRUE) + grids_positions <- grep('# gridID', file_info) + if (length(grids_positions) < 1) { + stop("The grid should be defined in the files.") + } + grids_first_lines <- grids_positions + 2 + grids_last_lines <- c((grids_positions - 2)[-1], length(file_info)) + grids_info <- as.list(seq_along(grids_positions)) + grids_info <- lapply(grids_info, + function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) + grids_info <- lapply(grids_info, function (x) gsub(" *", " ", x)) + grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x)) + grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = "))) + grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1])) + grids_matches <- unlist(lapply(grids_info, function (x) { + nlons <- if (any(grep('xsize', x))) { + as.numeric(x[grep('xsize', x) + 1]) + } else { + NA + } + nlats <- if (any(grep('ysize', x))) { + as.numeric(x[grep('ysize', x) + 1]) + } else { + NA + } + result <- FALSE + if (!anyNA(c(nlons, nlats))) { + if ((nlons == length(lon)) && + (nlats == length(lat))) { + result <- TRUE + } + } + result + })) + grids_matches <- grids_matches[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_info <- grids_info[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_types <- grids_types[which(grids_types %in% c('gaussian', 'lonlat'))] + if (length(grids_matches) == 0) { + stop("Error: Only 'gaussian' and 'lonlat' grids supported. See e.g: cdo sinfo ", filename) + } + if (sum(grids_matches) > 1) { + if ((all(grids_types[which(grids_matches)] == 'gaussian') || + all(grids_types[which(grids_matches)] == 'lonlat')) && + all(unlist(lapply(grids_info[which(grids_matches)], identical, + grids_info[which(grids_matches)][[1]])))) { + grid_type <- grids_types[which(grids_matches)][1] + } else { + stop("Error: Load() can't disambiguate: ", + "More than one lonlat/gaussian grids with the same size as ", + "the requested variable defined in ", filename) + } + } else if (sum(grids_matches) == 1) { + grid_type <- grids_types[which(grids_matches)] + } else { + stop("Unexpected error.") + } + grid_lons <- length(lon) + grid_lats <- length(lat) + # Convert to CDO grid name as seen in cdo's griddes.c: nlat2ntr() + if (grid_type == 'lonlat') { + grid_name <- paste0('r', grid_lons, 'x', grid_lats) + } else { + grid_name <- paste0('t', .nlat2t(grid_lats), 'grid') + } + if (is.null(work_piece[['grid']])) { + .warning(paste0("Detect the grid type to be '", grid_name, "'. ", + "If it is not expected, assign parameter 'grid' to avoid wrong result.")) + } + } + # If a common grid is requested, we will also calculate its size which we will use + # later on. + if (!is.null(work_piece[['grid']])) { + # Now we calculate the common grid type and its lons and lats + if (any(grep('^t\\d{1,+}grid$', work_piece[['grid']]))) { + common_grid_type <- 'gaussian' + common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + nlonlat <- .t2nlatlon(common_grid_res) + common_grid_lats <- nlonlat[1] + common_grid_lons <- nlonlat[2] + } else if (any(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']]))) { + common_grid_type <- 'lonlat' + common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3]) + } else { + stop("Error: Only supported grid types in parameter 'grid' are tgrid and rx") + } + } else { + ## If no 'grid' is specified, there is no common grid. + ## But these variables are filled in for consistency in the code. + common_grid_lons <- length(lon) + common_grid_lats <- length(lat) + } + first_common_grid_lon <- 0 + last_common_grid_lon <- 360 - 360 / common_grid_lons + ## This is not true for gaussian grids or for some regular grids, but + ## is a safe estimation + first_common_grid_lat <- -90 + last_common_grid_lat <- 90 + # And finally determine whether interpolation is needed or not + remove_shift <- FALSE + if (!is.null(work_piece[['grid']])) { + if ((grid_lons != common_grid_lons) || + (grid_lats != common_grid_lats) || + (grid_type != common_grid_type) || + (lon[1] != first_common_grid_lon)) { + if (grid_lons == common_grid_lons && grid_lats == common_grid_lats && + grid_type == common_grid_type && lon[1] != first_common_grid_lon) { + remove_shift <- TRUE + } + remap_needed <- TRUE + common_grid_name <- work_piece[['grid']] + } + } else if ((lon[1] != first_common_grid_lon) && explore_dims && + !work_piece[['single_dataset']]) { + remap_needed <- TRUE + common_grid_name <- grid_name + remove_shift <- TRUE + } + if (remap_needed && (work_piece[['remap']] == 'con') && + (cdo_version >= as.numeric_version('1.7.0'))) { + work_piece[['remap']] <- 'ycon' + } + if (remove_shift && !explore_dims) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + .warning(paste0("The dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' doesn't start at longitude 0 and will be re-interpolated in order ", + "to align its longitudes with the standard CDO grids definable with ", + "the names 'tgrid' or 'rx', which are by definition ", + "starting at the longitude 0.\n")) + if (!is.null(mask)) { + .warning(paste0("A mask was provided for the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "'. This dataset has been re-interpolated to align its longitudes to ", + "start at 0. You must re-interpolate the corresponding mask to align ", + "its longitudes to start at 0 as well, if you haven't done so yet. ", + "Running cdo remapcon,", common_grid_name, + " original_mask_file.nc new_mask_file.nc will fix it.\n")) + } + } + if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + if (!explore_dims) { + .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is originally on ", + "a grid coarser than the common grid and it has been ", + "extrapolated. Check the results carefully. It is ", + "recommended to specify as common grid the coarsest grid ", + "among all requested datasets via the parameter 'grid'.\n")) + } + } + # Now calculate if the user requests for a lonlat subset or for the + # entire field + lonmin <- work_piece[['lon_limits']][1] + lonmax <- work_piece[['lon_limits']][2] + latmin <- work_piece[['lat_limits']][1] + latmax <- work_piece[['lat_limits']][2] + lon_subsetting_requested <- FALSE + lonlat_subsetting_requested <- FALSE + if (lonmin <= lonmax) { + if ((lonmin > first_common_grid_lon) || (lonmax < last_common_grid_lon)) { + lon_subsetting_requested <- TRUE + } + } else { + if ((lonmin - lonmax) > 360 / common_grid_lons) { + lon_subsetting_requested <- TRUE + } else { + gap_width <- floor(lonmin / (360 / common_grid_lons)) - + floor(lonmax / (360 / common_grid_lons)) + if (gap_width > 0) { + if (!(gap_width == 1 && (lonmin %% (360 / common_grid_lons) == 0) && + (lonmax %% (360 / common_grid_lons) == 0))) { + lon_subsetting_requested <- TRUE + } + } + } + } + if ((latmin > first_common_grid_lat) || (latmax < last_common_grid_lat) + || (lon_subsetting_requested)) { + lonlat_subsetting_requested <- TRUE + } + # Now that we know if subsetting was requested, we can say if final data + # will go across greenwich + if (lonmax < lonmin) { + data_across_gw <- TRUE + } else { + data_across_gw <- !lon_subsetting_requested + } + + # When remap is needed but no subsetting, the file is copied locally + # so that cdo works faster, and then interpolated. + # Otherwise the file is kept as is and the subset will have to be + # interpolated still. + if (!lonlat_subsetting_requested && remap_needed) { + nc_close(fnc) + filecopy <- tempfile(pattern = "load", fileext = ".nc") + file.copy(filein, filecopy) + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the + # underlying hdf5 library. + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " -selname,", namevar, " ", filecopy, " ", filein, + " 2>/dev/null")) + file.remove(filecopy) + work_piece[['dimnames']][['lon']] <- 'lon' + work_piece[['dimnames']][['lat']] <- 'lat' + fnc <- nc_open(filein) + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + } + + # Read and check also the mask + if (!is.null(mask)) { + ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + if (is.list(mask)) { + if (!file.exists(mask[['path']])) { + stop("Error: Couldn't find the mask file", mask[['path']]) + } + mask_file <- mask[['path']] + ###file.copy(work_piece[['mask']][['path']], mask_file) + fnc_mask <- nc_open(mask_file) + vars_in_mask <- sapply(fnc_mask$var, '[[', 'name') + if ('nc_var_name' %in% names(mask)) { + if (!(mask[['nc_var_name']] %in% + vars_in_mask)) { + stop("Error: couldn't find variable", mask[['nc_var_name']], + "in the mask file", mask[['path']]) + } + } else { + if (length(vars_in_mask) != 1) { + stop("Error: one and only one non-coordinate variable should be ", + "defined in the mask file", + mask[['path']], + "if the component 'nc_var_name' is not specified. ", + "Currently found: ", + toString(vars_in_mask), ".") + } else { + mask[['nc_var_name']] <- vars_in_mask + } + } + if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) { + stop("Error: the variable '", + mask[['nc_var_name']], + "' must be defined only over the dimensions '", + work_piece[['dimnames']][['lon']], "' and '", + work_piece[['dimnames']][['lat']], + "' in the mask file ", + mask[['path']]) + } + mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE) + nc_close(fnc_mask) + ### mask_lon <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lon']]) + ### mask_lat <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lat']]) + ###} else { + ### dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + ### dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ### ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + ### fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ### ncvar_put(fnc_mask, ncdf_var, work_piece[['mask']]) + ### nc_close(fnc_mask) + ### fnc_mask <- nc_open(mask_file) + ### work_piece[['mask']] <- list(path = mask_file, nc_var_name = 'LSM') + ### mask_lon <- lon + ### mask_lat <- lat + ###} + ###} + ### Now ready to check that the mask is right + ##if (!(lonlat_subsetting_requested && remap_needed)) { + ### if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) { + ### stop(paste("Error: the mask of the dataset with index ", + ### tail(work_piece[['indices']], 1), " in '", + ### work_piece[['dataset_type']], "' is wrong. ", + ### "It must be on the common grid if the selected output type is 'lonlat', ", + ### "'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on ", + ### "the grid of the corresponding dataset if the selected output type is ", + ### "'areave' and no 'grid' has been specified. For more information ", + ### "check ?Load and see help on parameters 'grid', 'maskmod' and ", + ### "'maskobs'.", sep = "")) + ### } + ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { + ### stop(paste0("Error: the longitudes and latitudes in the masks must be ", + ### "identical to the ones in the corresponding data files if output = 'areave' ", + ### " or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in ", + ### "the mask file must start by 0 and the latitudes must be ordered from ", + ### "highest to lowest. See\n ", + ### work_piece[['mask']][['path']], " and ", filein)) + ###} + } + } + + lon_indices <- seq_along(lon) + if (!(lonlat_subsetting_requested && remap_needed)) { + lon[which(lon < 0)] <- lon[which(lon < 0)] + 360 + } + if (lonmax >= lonmin) { + lon_indices <- lon_indices[which(((lon %% 360) >= lonmin) & ((lon %% 360) <= lonmax))] + } else if (!remap_needed) { + lon_indices <- lon_indices[which(((lon %% 360) <= lonmax) | ((lon %% 360) >= lonmin))] + } + lat_indices <- which(lat >= latmin & lat <= latmax) + ## In most of the cases the latitudes are ordered from -90 to 90. + ## We will reorder them to be in the order from 90 to -90, so mostly + ## always the latitudes are reordered. + ## TODO: This could be avoided in future. + if (lat[1] < lat[length(lat)]) { + lat_indices <- lat_indices[rev(seq_along(lat_indices))] + } + if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) { + if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) { + stop("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is wrong. It must be on the ", + "common grid if the selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must be on the grid of ", + "the corresponding dataset if the selected output type is 'areave' and ", + "no 'grid' has been specified. For more information check ?Load and see ", + "help on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask <- mask[lon_indices, lat_indices] + } + ## If the user requests subsetting, we must extend the lon and lat limits if possible + ## so that the interpolation after is done properly + maximum_extra_points <- work_piece[['remapcells']] + if (lonlat_subsetting_requested && remap_needed) { + if ((maximum_extra_points > (head(lon_indices, 1) - 1)) || + (maximum_extra_points > (length(lon) - tail(lon_indices, 1)))) { + ## if the requested number of points goes beyond the left or right + ## sides of the map, we need to take the entire map so that the + ## interpolation works properly + lon_indices <- seq_along(lon) + } else { + extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1) + if (extra_points > 0) { + lon_indices <- + c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) + } + extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1)) + if (extra_points > 0) { + lon_indices <- c(lon_indices, + (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) + } + } + min_lat_ind <- min(lat_indices) + max_lat_ind <- max(lat_indices) + extra_points <- min(maximum_extra_points, min_lat_ind - 1) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c(lat_indices, (min_lat_ind - 1):(min_lat_ind - extra_points)) + } else { + lat_indices <- c((min_lat_ind - extra_points):(min_lat_ind - 1), lat_indices) + } + } + extra_points <- min(maximum_extra_points, length(lat) - max_lat_ind) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c((max_lat_ind + extra_points):(max_lat_ind + 1), lat_indices) + } else { + lat_indices <- c(lat_indices, (max_lat_ind + 1):(max_lat_ind + extra_points)) + } + } + } + lon <- lon[lon_indices] + lat <- lat[lat_indices] + expected_dims <- c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) + } else { + lon <- 0 + lat <- 0 + } + # We keep on filling the expected dimensions + var_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + nmemb <- nltime <- NULL + ## Sometimes CDO renames 'members' dimension to 'lev' + old_members_dimname <- NULL + if (('lev' %in% var_dimnames) && !(work_piece[['dimnames']][['member']] %in% var_dimnames)) { + old_members_dimname <- work_piece[['dimnames']][['member']] + work_piece[['dimnames']][['member']] <- 'lev' + } + if (work_piece[['dimnames']][['member']] %in% var_dimnames) { + nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], + var_dimnames)]]$len + expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']]) + } else { + nmemb <- 1 + } + if (length(expected_dims) > 0) { + dim_matches <- match(expected_dims, var_dimnames) + if (anyNA(dim_matches)) { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the expected dimension(s)", + toString(expected_dims[which(is.na(dim_matches))]), + "were not found in", filename) + } + time_dimname <- var_dimnames[-dim_matches] + } else { + time_dimname <- var_dimnames + } + if (length(time_dimname) > 0) { + if (length(time_dimname) == 1) { + nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len + expected_dims <- c(expected_dims, time_dimname) + dim_matches <- match(expected_dims, var_dimnames) + } else { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the variable ", namevar, + " is defined over more dimensions than the expected (", + toString(c(expected_dims, 'time')), + "). It could also be that the members, longitude or latitude ", + "dimensions are named incorrectly. In that case, either rename ", + "the dimensions in the file or adjust Load() to recognize the actual ", + "name with the parameter 'dimnames'. See file ", filename) + } + } else { + nltime <- 1 + } + + # Now we must retrieve the data from the file, but only the asked indices. + # So we build up the indices to retrieve. + # Longitudes or latitudes have been retrieved already. + if (explore_dims) { + # If we're exploring the file we only want one time step from one member, + # to regrid it and work out the number of longitudes and latitudes. + # We don't need more. + members <- 1 + ltimes_list <- list(1) + } else { + # The data is arranged in the array 'tmp' with the dimensions in a + # common order: + # 1) Longitudes + # 2) Latitudes + # 3) Members (even if is not a file per member experiment) + # 4) Lead-times + if (work_piece[['is_file_per_dataset']]) { + time_indices <- 1:nltime + mons <- strsplit(system(paste('cdo showmon ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + years <- strsplit(system(paste('cdo showyear ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + mons <- as.numeric(mons[[1]][which(mons[[1]] != "")]) + years <- as.numeric(years[[1]][which(years[[1]] != "")]) + time_indices <- ts(time_indices, start = c(years[1], mons[1]), + end = c(years[length(years)], mons[length(mons)]), + frequency = 12) + ltimes_list <- list() + for (sdate in work_piece[['startdates']]) { + selected_time_indices <- window(time_indices, start = c(as.numeric( + substr(sdate, 1, 4)), as.numeric(substr(sdate, 5, 6))), + end = c(3000, 12), frequency = 12, extend = TRUE) + selected_time_indices <- selected_time_indices[work_piece[['leadtimes']]] + ltimes_list <- c(ltimes_list, list(selected_time_indices)) + } + } else { + ltimes <- work_piece[['leadtimes']] + #if (work_piece[['dataset_type']] == 'exp') { + ltimes_list <- list(ltimes[which(ltimes <= nltime)]) + #} + } + ## TODO: Put, when reading matrices, this kind of warnings + # if (nmember < nmemb) { + # cat("Warning: + members <- 1:work_piece[['nmember']] + members <- members[which(members <= nmemb)] + } + + # Now, for each list of leadtimes to load (usually only one list with all leadtimes), + # we'll join the indices and retrieve data + found_disordered_dims <- FALSE + for (ltimes in ltimes_list) { + if (is_2d_var) { + start <- c(min(lon_indices), min(lat_indices)) + end <- c(max(lon_indices), max(lat_indices)) + if (lonlat_subsetting_requested && remap_needed) { + subset_indices <- list(min(lon_indices):max(lon_indices) - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ncdf_dims <- list(dim_longitudes, dim_latitudes) + } else { + subset_indices <- list(lon_indices - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + ncdf_dims <- list() + } + final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1) + } else { + start <- end <- NULL + subset_indices <- list() + ncdf_dims <- list() + final_dims <- c(1, 1, 1, 1) + } + + if (work_piece[['dimnames']][['member']] %in% expected_dims) { + start <- c(start, head(members, 1)) + end <- c(end, tail(members, 1)) + subset_indices <- c(subset_indices, list(members - head(members, 1) + 1)) + dim_members <- ncdim_def(work_piece[['dimnames']][['member']], "", members) + ncdf_dims <- c(ncdf_dims, list(dim_members)) + final_dims[3] <- length(members) + } + if (time_dimname %in% expected_dims) { + if (!all(is.na(ltimes))) { + start <- c(start, head(ltimes[which(!is.na(ltimes))], 1)) + end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1)) + subset_indices <- c(subset_indices, + list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) + } else { + start <- c(start, NA) + end <- c(end, NA) + subset_indices <- c(subset_indices, list(ltimes)) + } + dim_time <- ncdim_def(time_dimname, "", seq_along(ltimes), unlim = TRUE) + ncdf_dims <- c(ncdf_dims, list(dim_time)) + final_dims[4] <- length(ltimes) + } + count <- end - start + 1 + start <- start[dim_matches] + count <- count[dim_matches] + subset_indices <- subset_indices[dim_matches] + # Now that we have the indices to retrieve, we retrieve the data + if (prod(final_dims) > 0) { + tmp <- take(ncvar_get(fnc, namevar, start, count, + collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + # The data is regridded if it corresponds to an atmospheric variable. When + # the chosen output type is 'areave' the data is not regridded to not + # waste computing time unless the user specified a common grid. + if (is_2d_var) { + ###if (!is.null(work_piece[['mask']]) && !(lonlat_subsetting_requested && remap_needed)) { + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### start[dim_matches[1:2]], count[dim_matches[1:2]], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + if (lonlat_subsetting_requested && remap_needed) { + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + filein2 <- tempfile(pattern = "loadRegridded2", fileext = ".nc") + ncdf_var <- ncvar_def(namevar, "", ncdf_dims[dim_matches], + fnc$var[[namevar]]$missval, + prec = if (fnc$var[[namevar]]$prec == 'int') { + 'integer' + } else { + fnc$var[[namevar]]$prec + }) + scale_factor <- ifelse(fnc$var[[namevar]]$hasScaleFact, fnc$var[[namevar]]$scaleFact, 1) + add_offset <- ifelse(fnc$var[[namevar]]$hasAddOffset, fnc$var[[namevar]]$addOffset, 0) + if (fnc$var[[namevar]]$hasScaleFact || fnc$var[[namevar]]$hasAddOffset) { + tmp <- (tmp - add_offset) / scale_factor + } + #nc_close(fnc) + fnc2 <- nc_create(filein2, list(ncdf_var)) + ncvar_put(fnc2, ncdf_var, tmp) + if (add_offset != 0) { + ncatt_put(fnc2, ncdf_var, 'add_offset', add_offset) + } + if (scale_factor != 1) { + ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor) + } + nc_close(fnc2) + system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) { + "0,360," + } else { + paste0(lonmin, ",", lonmax, ",") + }, latmin, ",", latmax, + " -remap", work_piece[['remap']], ",", common_grid_name, + " ", filein2, " ", filein, " 2>/dev/null")) + file.remove(filein2) + fnc2 <- nc_open(filein) + sub_lon <- ncvar_get(fnc2, 'lon') + sub_lat <- ncvar_get(fnc2, 'lat') + ## We read the longitudes and latitudes from the file. + ## In principle cdo should put in order the longitudes + ## and slice them properly unless data is across greenwich + sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360 + sub_lon_indices <- seq_along(sub_lon) + if (lonmax < lonmin) { + sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))] + } + sub_lat_indices <- seq_along(sub_lat) + ## In principle cdo should put in order the latitudes + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + sub_lat_indices <- rev(seq_along(sub_lat)) + } + final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices)) + subset_indices[[dim_matches[1]]] <- sub_lon_indices + subset_indices[[dim_matches[2]]] <- sub_lat_indices + + tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + + if (!is.null(mask)) { + ## We create a very simple 2d netcdf file that is then interpolated to the common + ## grid to know what are the lons and lats of our slice of data + mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc') + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], + "degrees_east", c(0, 360)) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], + "degrees_north", c(-90, 90)) + ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) + nc_close(fnc_mask) + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " ", mask_file, " ", mask_file_remap, " 2>/dev/null")) + fnc_mask <- nc_open(mask_file_remap) + mask_lons <- ncvar_get(fnc_mask, 'lon') + mask_lats <- ncvar_get(fnc_mask, 'lat') + nc_close(fnc_mask) + file.remove(mask_file, mask_file_remap) + if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) { + stop("Error: the mask of the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' is wrong. It must be on the common grid if the ", + "selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must ", + "be on the grid of the corresponding dataset if the ", + "selected output type is 'areave' and no 'grid' has been ", + "specified. For more information check ?Load and see help ", + "on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360 + if (lonmax >= lonmin) { + mask_lon_indices <- which((mask_lons >= lonmin) & (mask_lons <= lonmax)) + } else { + mask_lon_indices <- which((mask_lons >= lonmin) | (mask_lons <= lonmax)) + } + mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax)) + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + mask_lat_indices <- mask_lat_indices[rev(seq_along(mask_lat_indices))] + } + mask <- mask[mask_lon_indices, mask_lat_indices] + } + sub_lon <- sub_lon[sub_lon_indices] + sub_lat <- sub_lat[sub_lat_indices] + ### nc_close(fnc_mask) + ### system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) { + ### "0,360," + ### } else { + ### paste0(lonmin, ",", lonmax, ",") + ### }, latmin, ",", latmax, + ### " -remap", work_piece[['remap']], ",", common_grid_name, + ###This is wrong: same files + ### " ", mask_file, " ", mask_file, " 2>/dev/null", sep = "")) + ### fnc_mask <- nc_open(mask_file) + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + } + } + if (is.unsorted(dim_matches)) { + if (!found_disordered_dims && + rev(work_piece[['indices']])[2] == 1 && + rev(work_piece[['indices']])[3] == 1) { + found_disordered_dims <- TRUE + .warning(paste0("The dimensions for the variable ", namevar, + " in the files of the experiment with index ", + tail(work_piece[['indices']], 1), + " are not in the optimal order for loading with Load(). ", + "The optimal order would be '", + toString(expected_dims), + "'. One of the files of the dataset is stored in ", filename)) + } + tmp <- aperm(tmp, dim_matches) + } + dim(tmp) <- final_dims + # If we are exploring the file we don't need to process and arrange + # the retrieved data. We only need to keep the dimension sizes. + if (is_2d_var && lonlat_subsetting_requested && remap_needed) { + final_lons <- sub_lon + final_lats <- sub_lat + } else { + final_lons <- lon + final_lats <- lat + } + if (explore_dims) { + if (work_piece[['is_file_per_member']]) { + ## TODO: When the exp_full_path contains asterisks and is file_per_member + ## members from different datasets may be accounted. + ## Also if one file member is missing the accounting will be wrong. + ## Should parse the file name and extract number of members. + if (is_url) { + nmemb <- NULL + } else { + nmemb <- length(files) + } + } + dims <- list(member = nmemb, ftime = nltime, lon = final_lons, lat = final_lats) + } else { + # If we are not exploring, then we have to process the retrieved data + if (is_2d_var) { + tmp <- apply(tmp, c(3, 4), function(x) { + # Disable of large values. + if (!is.na(work_piece[['var_limits']][2])) { + x[which(x > work_piece[['var_limits']][2])] <- NA + } + if (!is.na(work_piece[['var_limits']][1])) { + x[which(x < work_piece[['var_limits']][1])] <- NA + } + if (!is.null(mask)) { + x[which(mask < 0.5)] <- NA + } + + if (output == 'areave' || output == 'lon') { + weights <- InsertDim(cos(final_lats * pi / 180), 1, + length(final_lons), name = 'lon') + weights[which(is.na(x))] <- NA + if (output == 'areave') { + weights <- weights / mean(weights, na.rm = TRUE) + mean(x * weights, na.rm = TRUE) + } else { + weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, + length(final_lats), name = 'lat') + MeanDims(x * weights, 2, na.rm = TRUE) + } + } else if (output == 'lat') { + MeanDims(x, 1, na.rm = TRUE) + } else if (output == 'lonlat') { + signif(x, 5) + } + }) + if (output == 'areave') { + dim(tmp) <- c(1, 1, final_dims[3:4]) + } else if (output == 'lon') { + dim(tmp) <- c(final_dims[1], 1, final_dims[3:4]) + } else if (output == 'lat') { + dim(tmp) <- c(1, final_dims[c(2, 3, 4)]) + } else if (output == 'lonlat') { + dim(tmp) <- final_dims + } + } + var_data <- attach.big.matrix(work_piece[['out_pointer']]) + if (work_piece[['dims']][['member']] > 1 && nmemb > 1 && + work_piece[['dims']][['ftime']] > 1 && + nltime < work_piece[['dims']][['ftime']]) { + work_piece[['indices']][2] <- work_piece[['indices']][2] - 1 + for (jmemb in members) { + work_piece[['indices']][2] <- work_piece[['indices']][2] + 1 + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp[, , jmemb, ]) - 1) + var_data[out_indices] <- as.vector(tmp[, , jmemb, ]) + } + work_piece[['indices']][2] <- work_piece[['indices']][2] - tail(members, 1) + 1 + } else { + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp) - 1) + a <- aperm(tmp, c(1, 2, 4, 3)) + as.vector(a) + var_data[out_indices] <- as.vector(aperm(tmp, c(1, 2, 4, 3))) + } + work_piece[['indices']][3] <- work_piece[['indices']][3] + 1 + } + } + } + nc_close(fnc) + if (is_2d_var) { + if (remap_needed) { + array_across_gw <- FALSE + file.remove(filein) + ###if (!is.null(mask) && lonlat_subsetting_requested) { + ### file.remove(mask_file) + ###} + } else { + if (first_lon_in_original_file < 0) { + array_across_gw <- data_across_gw + } else { + array_across_gw <- FALSE + } + } + } + } + if (explore_dims) { + list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, + units = units, var_long_name = var_long_name, + data_across_gw = data_across_gw, array_across_gw = array_across_gw) + } else { + ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) { + ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) + ###} + if (!silent && !is.null(work_piece[['progress_amount']])) { + message(work_piece[['progress_amount']], appendLF = FALSE) + } + found_file + } +} + +.LoadSampleData <- function(var, exp = NULL, obs = NULL, sdates, + nmember = NULL, nmemberobs = NULL, + nleadtime = NULL, leadtimemin = 1, + leadtimemax = NULL, storefreq = 'monthly', + sampleperiod = 1, lonmin = 0, lonmax = 360, + latmin = -90, latmax = 90, output = 'areave', + method = 'conservative', grid = NULL, + maskmod = vector("list", 15), + maskobs = vector("list", 15), + configfile = NULL, suffixexp = NULL, + suffixobs = NULL, varmin = NULL, varmax = NULL, + silent = FALSE, nprocs = NULL) { + ## This function loads and selects sample data stored in sampleMap and + ## sampleTimeSeries and is used in the examples instead of Load() so as + ## to avoid nco and cdo system calls and computation time in the stage + ## of running examples in the CHECK process on CRAN. + selected_start_dates <- match(sdates, c('19851101', '19901101', '19951101', + '20001101', '20051101')) + start_dates_position <- 3 + lead_times_position <- 4 + + if (output == 'lonlat') { + sampleData <- s2dv::sampleMap + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ] + } else if (output == 'areave') { + sampleData <- s2dv::sampleTimeSeries + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times] + } + + dims_out <- dim(sampleData$mod) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$mod) <- dims_out + + dims_out <- dim(sampleData$obs) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$obs) <- dims_out + + invisible(list(mod = dataOut$mod, obs = dataOut$obs, + lat = dataOut$lat, lon = dataOut$lon)) +} + +.ConfigGetDatasetInfo <- function(matching_entries, table_name) { + # This function obtains the information of a dataset and variable pair, + # applying all the entries that match in the configuration file. + if (table_name == 'experiments') { + id <- 'EXP' + } else { + id <- 'OBS' + } + defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), + paste0('$DEFAULT_', id, '_FILE_PATH$'), + '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', + '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') + info <- NULL + + for (entry in matching_entries) { + if (is.null(info)) { + info <- entry[-1:-2] + info[which(info == '*')] <- defaults[which(info == '*')] + } else { + info[which(entry[-1:-2] != '*')] <- entry[-1:-2][which(entry[-1:-2] != '*')] + } + } + + info <- as.list(info) + names(info) <- c('main_path', 'file_path', 'nc_var_name', 'suffix', 'var_min', 'var_max') + info +} + +.ReplaceGlobExpressions <- function(path_with_globs, actual_path, + replace_values, tags_to_keep, + dataset_name, permissive) { + # The goal of this function is to replace the shell globbing expressions in + # a path pattern (that may contain shell globbing expressions and Load() + # tags) by the corresponding part of the real existing path. + # What is done actually is to replace all the values of the tags in the + # actual path by the corresponding $TAG$ + # + # It takes mainly two inputs. The path with expressions and tags, e.g.: + # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc + # and a complete known path to one of the matching files, e.g.: + # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc + # and it returns the path pattern but without shell globbing expressions: + # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc + # + # To do that, it needs also as inputs the list of replace values (the + # association of each tag to their value). + # + # All the tags not present in the parameter tags_to_keep will be repalced. + # + # Not all cases can be resolved with the implemented algorithm. In an + # unsolvable case a warning is given and one possible guess is returned. + # + # In some cases it is interesting to replace only the expressions in the + # path to the file, but not the ones in the file name itself. To keep the + # expressions in the file name, the parameter permissive can be set to + # TRUE. To replace all the expressions it can be set to FALSE. + clean <- function(x) { + if (nchar(x) > 0) { + x <- gsub('\\\\', '', x) + x <- gsub('\\^', '', x) + x <- gsub('\\$', '', x) + x <- unname(sapply(strsplit(x, '[', fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) + do.call(paste0, as.list(x)) + } else { + x + } + } + + strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") + + if (permissive) { + actual_path_chunks <- strsplit(actual_path, '/')[[1]] + actual_path <- paste(actual_path_chunks[-length(actual_path_chunks)], collapse = '/') + file_name <- tail(actual_path_chunks, 1) + if (length(actual_path_chunks) > 1) { + file_name <- paste0('/', file_name) + } + path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] + path_with_globs <- paste(path_with_globs_chunks[-length(path_with_globs_chunks)], + collapse = '/') + path_with_globs <- .ConfigReplaceVariablesInString(path_with_globs, replace_values) + file_name_with_globs <- tail(path_with_globs_chunks, 1) + if (length(path_with_globs_chunks) > 1) { + file_name_with_globs <- paste0('/', file_name_with_globs) + } + right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) + right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values) + path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags)) + match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), + paste0(actual_path, file_name)) + if (match != 1) { + stop("Incorrect parameters to replace glob expressions. ", + "The path with expressions does not match the actual path.") + } + if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { + path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*') + file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) + } + } + path_with_globs_rx <- utils::glob2rx(path_with_globs) + values_to_replace <- NULL + tags_to_replace_starts <- NULL + tags_to_replace_ends <- NULL + give_warning <- FALSE + for (tag in tags_to_keep) { + matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] + lengths <- attr(matches, 'match.length') + if (!(length(matches) == 1 && matches[1] == -1)) { + for (i in seq_along(matches)) { + left <- NULL + if (matches[i] > 1) { + left <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, + matches[i] - 1), replace_values) + left_known <- + strReverse(head(strsplit(strReverse(left), + strReverse('.*'), fixed = TRUE)[[1]], 1)) + } + right <- NULL + if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { + right <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, + matches[i] + lengths[i], + nchar(path_with_globs_rx)), + replace_values) + right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) + } + match_limits <- NULL + if (!is.null(left)) { + left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) + match_len <- attr(left_match, 'match.length') + left_match_limits <- + c(left_match + match_len - 1 - nchar(clean(right_known)) - + nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - nchar(clean(right_known))) + if (!(left_match < 1)) { + match_limits <- left_match_limits + } + } + right_match <- NULL + if (!is.null(right)) { + right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) + match_len <- attr(right_match, 'match.length') + right_match_limits <- + c(right_match + nchar(clean(left_known)), + right_match + nchar(clean(left_known)) + + nchar(replace_values[[tag]]) - 1) + if (is.null(match_limits) && !(right_match < 1)) { + match_limits <- right_match_limits + } + } + if (!is.null(right_match) && !is.null(left_match)) { + if (!identical(right_match_limits, left_match_limits)) { + give_warning <- TRUE + } + } + if (is.null(match_limits)) { + stop("Too complex path pattern specified for ", dataset_name, + ". Specify a simpler path pattern for this dataset.") + } + values_to_replace <- c(values_to_replace, tag) + tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) + tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) + } + } + } + + if (length(tags_to_replace_starts) > 0) { + reorder <- sort(tags_to_replace_starts, index.return = TRUE) + tags_to_replace_starts <- reorder$x + values_to_replace <- values_to_replace[reorder$ix] + tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] + while (length(values_to_replace) > 0) { + actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1), + '$', head(values_to_replace, 1), '$', + substr(actual_path, head(tags_to_replace_ends, 1) + 1, + nchar(actual_path))) + extra_chars <- nchar(head(values_to_replace, 1)) + 2 - + (head(tags_to_replace_ends, 1) - + head(tags_to_replace_starts, 1) + 1) + values_to_replace <- values_to_replace[-1] + tags_to_replace_starts <- tags_to_replace_starts[-1] + tags_to_replace_ends <- tags_to_replace_ends[-1] + tags_to_replace_starts <- tags_to_replace_starts + extra_chars + tags_to_replace_ends <- tags_to_replace_ends + extra_chars + } + } + + if (give_warning) { + .warning(paste0("Too complex path pattern specified for ", dataset_name, + ". Double check carefully the '$Files' fetched for this dataset ", + "or specify a simpler path pattern.")) + } + + if (permissive) { + paste0(actual_path, file_name_with_globs) + } else { + actual_path + } +} + +.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { + tag <- paste0('\\$', tag, '\\$') + path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$') + parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]] + parts <- as.list(grep(tag, parts, value = TRUE)) + longest_couples <- NULL + pos_longest_couples <- NULL + found_value <- NULL + for (i in seq_along(parts)) { + parts[[i]] <- strsplit(parts[[i]], tag)[[1]] + if (length(parts[[i]]) == 1) { + parts[[i]] <- c(parts[[i]], '') + } + len_parts <- sapply(parts[[i]], nchar) + len_couples <- len_parts[-length(len_parts)] + len_parts[2:length(len_parts)] + pos_longest_couples <- c(pos_longest_couples, which.max(len_couples)) + longest_couples <- c(longest_couples, max(len_couples)) + } + chosen_part <- which.max(longest_couples) + parts[[chosen_part]] <- + parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] + if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) { + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + actual_path <- + substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + } + } + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + found_value <- substr(actual_path, 0, match_right - 1) + } + } + } else { + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + actual_path <- substr(actual_path, 0, match_right - 1) + } + } + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + found_value <- + substr(actual_path, match_left + attr(match_left, 'match.length'), + nchar(actual_path)) + } + } + } + found_value +} + +.FilterUserGraphicArgs <- function(excludedArgs, ...) { + # This function filter the extra graphical parameters passed by the user in + # a plot function, excluding the ones that the plot function uses by default. + # Each plot function has a different set of arguments that are not allowed to + # be modified. + args <- list(...) + userArgs <- list() + for (name in names(args)) { + if ((name != "") & !is.element(name, excludedArgs)) { + # If the argument has a name and it is not in the list of excluded + # arguments, then it is added to the list that will be used + userArgs[[name]] <- args[[name]] + } else { + .warning(paste0("the argument '", name, "' can not be + modified and the new value will be ignored")) + } + } + userArgs +} + +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + .warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + .warning(paste0("some extensions of the filenames provided in 'fileout' ", + "are not ", ext[1], + ". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + .warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + +.message <- function(...) { + # Function to use the 'message' R function with our custom settings + # Default: new line at end of message, indent to 0, exdent to 3, + # collapse to \n* + args <- list(...) + + ## In case we need to specify message arguments + if (!is.null(args[["appendLF"]])) { + appendLF <- args[["appendLF"]] + } else { + ## Default value in message function + appendLF <- TRUE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value in message function + domain <- NULL + } + args[["appendLF"]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n*" + } + args[["collapse"]] <- NULL + + ## Message tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "* " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + message(tmp, appendLF = appendLF, domain = domain) +} + +.warning <- function(...) { + # Function to use the 'warning' R function with our custom settings + # Default: no call information, indent to 0, exdent to 3, + # collapse to \n + args <- list(...) + + ## In case we need to specify warning arguments + if (!is.null(args[["call."]])) { + call <- args[["call."]] + } else { + ## Default: don't show info about the call where the warning came up + call <- FALSE + } + if (!is.null(args[["immediate."]])) { + immediate <- args[["immediate."]] + } else { + ## Default value in warning function + immediate <- FALSE + } + if (!is.null(args[["noBreaks."]])) { + noBreaks <- args[["noBreaks."]] + } else { + ## Default value warning function + noBreaks <- FALSE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value warning function + domain <- NULL + } + args[["call."]] <- NULL + args[["immediate."]] <- NULL + args[["noBreaks."]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n!" + } + args[["collapse"]] <- NULL + + ## Warning tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "! Warning: " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + warning(tmp, call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) +} + +.IsColor <- function(x) { + res <- try(col2rgb(x), silent = TRUE) + return(!is(res, "try-error")) +} + +# This function switches to a specified figure at position (row, col) in a layout. +# This overcomes the bug in par(mfg = ...). However the mode par(new = TRUE) is +# activated, i.e., all drawn elements will be superimposed. Additionally, after +# using this function, the automatical pointing to the next figure in the layout +# will be spoiled: once the last figure in the layout is drawn, the pointer won't +# move to the first figure in the layout. +# Only figures with numbers other than 0 (when creating the layout) will be +# accessible. +# Inputs: either row and col, or n and mat +.SwitchToFigure <- function(row = NULL, col = NULL, n = NULL, mat = NULL) { + if (!is.null(n) && !is.null(mat)) { + if (!is.numeric(n) || length(n) != 1) { + stop("Parameter 'n' must be a single numeric value.") + } + n <- round(n) + if (!is.array(mat)) { + stop("Parameter 'mat' must be an array.") + } + target <- which(mat == n, arr.ind = TRUE)[1, ] + row <- target[1] + col <- target[2] + } else if (!is.null(row) && !is.null(col)) { + if (!is.numeric(row) || length(row) != 1) { + stop("Parameter 'row' must be a single numeric value.") + } + row <- round(row) + if (!is.numeric(col) || length(col) != 1) { + stop("Parameter 'col' must be a single numeric value.") + } + col <- round(col) + } else { + stop("Either 'row' and 'col' or 'n' and 'mat' must be provided.") + } + next_attempt <- c(row, col) + par(mfg = next_attempt) + i <- 1 + layout_size <- par('mfrow') + layout_cells <- matrix(1:prod(layout_size), layout_size[1], layout_size[2], + byrow = TRUE) + while (any((par('mfg')[1:2] != c(row, col)))) { + next_attempt <- which(layout_cells == i, arr.ind = TRUE)[1, ] + par(mfg = next_attempt) + i <- i + 1 + if (i > prod(layout_size)) { + stop("Figure not accessible.") + } + } + plot(0, type = 'n', axes = FALSE, ann = FALSE) + par(mfg = next_attempt) +} + +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +.aperm2 <- function(x, new_order) { + old_dims <- dim(x) + attr_bk <- attributes(x) + if ('dim' %in% names(attr_bk)) { + attr_bk[['dim']] <- NULL + } + if (is.numeric(x)) { + x <- aperm(x, new_order) + } else { + y <- array(seq_along(x), dim = dim(x)) + y <- aperm(y, new_order) + x <- x[as.vector(y)] + } + dim(x) <- old_dims[new_order] + attributes(x) <- c(attributes(x), attr_bk) + x +} + +# This function is a helper for the function .MergeArrays. +# It expects as inputs two named numeric vectors, and it extends them +# with dimensions of length 1 until an ordered common dimension +# format is reached. +# The first output is dims1 extended with 1s. +# The second output is dims2 extended with 1s. +# The third output is a merged dimension vector. If dimensions with +# the same name are found in the two inputs, and they have a different +# length, the maximum is taken. +.MergeArrayDims <- function(dims1, dims2) { + new_dims1 <- NULL + new_dims2 <- NULL + while (length(dims1) > 0) { + if (names(dims1)[1] %in% names(dims2)) { + pos <- which(names(dims2) == names(dims1)[1]) + dims_to_add <- rep(1, pos - 1) + if (length(dims_to_add) > 0) { + names(dims_to_add) <- names(dims2[1:(pos - 1)]) + } + new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) + new_dims2 <- c(new_dims2, dims2[1:pos]) + dims1 <- dims1[-1] + dims2 <- dims2[-(1:pos)] + } else { + new_dims1 <- c(new_dims1, dims1[1]) + new_dims2 <- c(new_dims2, 1) + names(new_dims2)[length(new_dims2)] <- names(dims1)[1] + dims1 <- dims1[-1] + } + } + if (length(dims2) > 0) { + dims_to_add <- rep(1, length(dims2)) + names(dims_to_add) <- names(dims2) + new_dims1 <- c(new_dims1, dims_to_add) + new_dims2 <- c(new_dims2, dims2) + } + list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) +} + +# This function takes two named arrays and merges them, filling with +# NA where needed. +# dim(array1) +# 'b' 'c' 'e' 'f' +# 1 3 7 9 +# dim(array2) +# 'a' 'b' 'd' 'f' 'g' +# 2 3 5 9 11 +# dim(.MergeArrays(array1, array2, 'b')) +# 'a' 'b' 'c' 'e' 'd' 'f' 'g' +# 2 4 3 7 5 9 11 +.MergeArrays <- function(array1, array2, along) { + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in seq_along(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } + } + } + } + } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 + } + array1 +} + +# only can be used in Trend(). Needs generalization or be replaced by other function. +.reorder <- function(output, time_dim, dim_names) { + # Add dim name back + if (is.null(dim(output))) { + dim(output) <- c(stats = length(output)) + } else { #is an array + if (length(dim(output)) == 1) { + if (!is.null(names(dim(output)))) { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { + names(dim(output)) <- time_dim + } + } else { # more than one dim + if (names(dim(output))[1] != "") { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { #regular case + names(dim(output))[1] <- time_dim + } + } + } + # reorder + pos <- match(dim_names, names(dim(output))) + output <- aperm(output, pos) + names(dim(output)) <- dim_names + names(dim(output))[names(dim(output)) == time_dim] <- 'stats' + return(output) +} + +# to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R +.Indices <- function(data, type, monini, indices_for_clim, + fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) { + + if (type == 'dcpp') { + + fyear_dim <- 'fyear' + data <- Season(data = data, time_dim = fmonth_dim, + monini = monini, moninf = 1, monsup = 12, + method = mean, na.rm = na.rm) + names(dim(data))[which(names(dim(data)) == fmonth_dim)] <- fyear_dim + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + + anom <- data + + } else { ## Different indices_for_clim for each forecast year (to use the same calendar years) + + n_fyears <- as.numeric(dim(data)[fyear_dim]) + n_sdates <- as.numeric(dim(data)[sdate_dim]) + + if (is.null(indices_for_clim)) { ## climatology over the whole (common) period + first_years_for_clim <- n_fyears : 1 + last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) + } else { ## indices_for_clim specified as a numeric vector + first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) + last_years_for_clim <- + seq(from = indices_for_clim[length(indices_for_clim)], + by = -1, length.out = n_fyears) + } + + data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) + anom <- array(data = NA, dim = dim(data)) + for (i in 1:n_fyears) { + clim <- mean(data[i, first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) + anom[i, ] <- data[i, ] - clim + } + } + + } else if (type %in% c('obs', 'hist')) { + + data <- multiApply::Apply(data = data, target_dims = month_dim, + fun = mean, na.rm = na.rm)$output1 + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + clim <- 0 + } else if (is.null(indices_for_clim)) { + ## climatology over the whole period + clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, + na.rm = na.rm)$output1 + } else { + ## indices_for_clim specified as a numeric vector + clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, + indices = indices_for_clim), + target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } + + anom <- data - clim + + } else { + stop('type must be dcpp, hist or obs') + } + + return(anom) +} + +#TODO: Remove from s2dv when PlotLayout can get colorbar info from plotting function directly. +# The function is temporarily here because PlotLayout() needs to draw the colorbars of +# PlotMostLikelyQuantileMap(). +#Draws Color Bars for Categories +#A wrapper of s2dv::ColorBar to generate multiple color bars for different +#categories, and each category has different color set. +GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits, var_limits = NULL, + triangle_ends = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, + label_scale = 1, extra_margin = rep(0, 4), + ...) { + # bar_limits + if (!is.numeric(bar_limits) || length(bar_limits) != 2) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2.") + } + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = bar_limits[1], to = bar_limits[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(seq_along(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != nmap) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in seq_along(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { + s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, +# bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + #TODO: col_inf and col_sup + return(list(brks = brks, cols = cols)) + } + +} + diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 6e219bf13b09a6d2888b8edb4095b1f78b875ee0..9b220ecfa1f0004f68a2277f188204465432cdba 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -1,6 +1,6 @@ source("tools/libs.R") -Loading <- function(recipe) { +Loading <- function(recipe, retrieve = TRUE) { # Source correct function depending on filesystem and time horizon # Case: CERISE (Mars) if (tolower(recipe$Run$filesystem) == "mars") { @@ -18,21 +18,23 @@ Loading <- function(recipe) { data <- load_tas_tos(recipe) } else { source("modules/Loading/R/load_seasonal.R") - data <- load_seasonal(recipe) + data <- load_seasonal(recipe, retrieve = retrieve) } } else if (time_horizon == "decadal") { source("modules/Loading/R/load_decadal.R") - data <- load_decadal(recipe) + data <- load_decadal(recipe, retrieve = retrieve) } else { stop("Incorrect time horizon.") } } # Display data summary - if (recipe$Run$logger$threshold <= 2) { - data_summary(data$hcst, recipe) - data_summary(data$obs, recipe) - if (!is.null(data$fcst)) { - data_summary(data$fcst, recipe) + if (retrieve) { + if (recipe$Run$logger$threshold <= 2) { + data_summary(data$hcst, recipe) + data_summary(data$obs, recipe) + if (!is.null(data$fcst)) { + data_summary(data$fcst, recipe) + } } } return(data) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R new file mode 100644 index 0000000000000000000000000000000000000000..74c97e299f7e7e232fbf2fb3e261bec9673a394e --- /dev/null +++ b/modules/Loading/Loading_decadal.R @@ -0,0 +1,554 @@ +# Loading module: +# 1. archive.yml +# 2. recipe.yml +# 3. Load_decadal.R (V) +#setwd('/esarchive/scratch/aho/git/auto-s2s/') + +## TODO: remove paths to personal scratchs +source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") +# Load required libraries/funs +source("modules/Loading/helper_loading_decadal.R") +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/check_latlon.R") +source("modules/Loading/R/get_timeidx.R") +source("tools/libs.R") + +#==================================================================== + +# recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml" +# recipe_file <- "recipes/atomic_recipes/recipe_decadal_daily.yml" + +load_datasets <- function(recipe) { + + ## + archive <- read_yaml(paste0("conf/archive_decadal.yml"))$esarchive + + # Print Start() info or not + DEBUG <- FALSE + + ## TODO: this should come from the main script + # Create output folder and log: + + #------------------------- + # Read from recipe: + #------------------------- + exp.name <- recipe$Analysis$Datasets$System$name #'HadGEM3' + ref.name <- recipe$Analysis$Datasets$Reference$name #'era5' + member <- strsplit(recipe$Analysis$Datasets$System$member, ',')[[1]] #c("r1i1p1f2", "r2i1p1f2") +# variable <- recipe$Analysis$Variables$name #'tas' + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + store.freq <- recipe$Analysis$Variables$freq #monthly_mean + lats.min <- as.numeric(recipe$Analysis$Region$latmin) #0 + lats.max <- as.numeric(recipe$Analysis$Region$latmax) #10 + lons.min <- as.numeric(recipe$Analysis$Region$lonmin) #0 + lons.max <- as.numeric(recipe$Analysis$Region$lonmax) #10 + + # change to: sdates <- dates2load(recipe, logger) + sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) #1960:2015 + sdates_fcst <- recipe$Analysis$Time$fcst + + if (store.freq == "monthly_mean") { + time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) + + } else if (store.freq == "daily_mean") { + time_ind <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min), + ftimemax = as.numeric(recipe$Analysis$Time$ftime_max), + initial_month = archive$System[[exp.name]]$initial_month, + sdates = sdates_hcst, + calendar = archive$System[[exp.name]]$calendar) + } + +#NOTE: May be used in the future +# season <- recipe$Analysis$Time$season + + #------------------------- + # Read from archive: + #------------------------- + if (store.freq == "monthly_mean") { + table <- archive$System[[exp.name]][[store.freq]]$table[variable] #list(tas = 'Amon') + } else { + table <- 'day' + } + grid <- archive$System[[exp.name]][[store.freq]]$grid[variable] #list(tas = 'gr') + version <- archive$System[[exp.name]][[store.freq]]$version[variable] #list(tas = 'v20210910') + if (identical(member, 'all')) { + member <- strsplit(archive$System[[exp.name]]$member, ',')[[1]] + } + + #------------------------- + # derived from above: + #------------------------- + # Check lat and lon and decide CircularSort + circularsort <- check_latlon(latmin = lats.min, latmax = lats.max, lonmin = lons.min, lonmax = lons.max) + + # generate transform params for system and ref + regrid_params <- get_regrid_params(recipe, archive) + + # Only if the time length in each chunk may differ that we need largest_dims_length to be TRUE. Otherwise, set FALSE to increase efficiency. + need_largest_dims_length <- ifelse(exp.name == 'EC-Earth3-i2', TRUE, FALSE) + + + #------------------------------------------- + # Step 1: Load the hcst + #------------------------------------------- + #monthly and daily + tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, + version = version, sdates = sdates_hcst) + path_list <- tmp$path_list + multi_path <- tmp$multi_path + + #TODO: to make this case work; enhance Start() if it's possible + if (multi_path & length(variable) > 1) { + stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") + } + + Start_default_arg_list <- list( + dat = path_list, + var = variable, + syear = paste0(sdates_hcst), + chunk = 'all', + chunk_depends = 'syear', + time = indices(time_ind), + time_across = 'chunk', + merge_across_dims = TRUE, + largest_dims_length = need_largest_dims_length, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + ensemble = member, + transform = regrid_params$fcst.transform, + transform_extra_cells = 2, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), +# path_glob_permissive = 2, # for version + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = c('syear', 'chunk')), + silent = !DEBUG, + retrieve = T) + + if (length(variable) > 1) { + Start_default_arg_list <- c(Start_default_arg_list, + list(table = table, grid = grid, version = version, + table_depends = 'var', grid_depends = 'var', version_depends = 'var', + metadata_dims = 'var')) + } + + if (!multi_path) { + Start_hcst_arg_list <- Start_default_arg_list + hcst <- do.call(Start, Start_hcst_arg_list) + + } else { + Start_hcst_arg_list <- Start_default_arg_list + Start_hcst_arg_list[['syear']] <- NULL + Start_hcst_arg_list[['chunk_depends']] <- NULL + remove_ind <- which(Start_hcst_arg_list[['return_vars']][['time']] == 'syear') + Start_hcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind] + + hcst <- do.call(Start, Start_hcst_arg_list) + + # Reshape and reorder dimensions + ## dat should be 1, syear should be length of dat; reorder dimensions + dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6]) + hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + + # Manipulate time attr because Start() cannot read it correctly + wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only + tmp <- array(dim = c(dim(hcst)[c('syear', 'time')])) + tmp[1, ] <- wrong_time_attr + yr_diff <- (sdates_hcst - sdates_hcst[1])[-1] #diff(sdates_hcst) + for (i_syear in 1:length(yr_diff)) { + tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) + } + attr(hcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') + + } + + tmp_time_attr <- attr(hcst, 'Variables')$common$time + + # change syear to c(sday, sweek, syear) + # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] + dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) + if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) { + error(recipe$Run$logger, + "hcst has problem in matching data and time attr dimension.") + stop() + } + dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) + + #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 + if (multi_path) { + attributes(hcst)$Variables$common[[variable]] <- attributes(hcst)$Variables$dat1[[variable]] + } + + # Change class from startR_array to s2dv_cube + suppressWarnings( + hcst <- as.s2dv_cube(hcst) + ) + +#------------------------------------------- +# Step 2: Load the fcst +#------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst)) { + + tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, + version = version, sdates = sdates_fcst) + path_list <- tmp$path_list + multi_path <- tmp$multi_path + + #TODO: to make this case work; enhance Start() if it's possible + if (multi_path & length(variable) > 1) { + stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") + } + + # monthly & daily + if (!multi_path) { + #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst + Start_fcst_arg_list <- Start_default_arg_list + Start_fcst_arg_list[['dat']] <- path_list + Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) + fcst <- do.call(Start, Start_fcst_arg_list) + + + } else { # multi_path + + #TODO: time attribute is not correct. Improve Start(). + Start_fcst_arg_list <- Start_default_arg_list + Start_fcst_arg_list[['dat']] <- path_list + Start_fcst_arg_list[['syear']] <- NULL + Start_fcst_arg_list[['chunk_depends']] <- NULL + remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear') + Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] + fcst <- do.call(Start, Start_fcst_arg_list) + + # Reshape and reorder dimensions + ## dat should be 1, syear should be length of dat; reorder dimensions + ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble] + dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) + fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + + # Manipulate time attr because Start() cannot read it correctly + wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only + tmp <- array(dim = c(dim(fcst)[c('syear', 'time')])) + tmp[1, ] <- wrong_time_attr + yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst) + for (i_syear in 1:length(yr_diff)) { + tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) + } + attr(fcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') + + } + + tmp_time_attr <- attr(fcst, 'Variables')$common$time + + # change syear to c(sday, sweek, syear) + # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] + dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) + if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { + error(recipe$Run$logger, + "fcst has problem in matching data and time attr dimension.") + stop() + } + dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) + + #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 + if (multi_path) { + attributes(fcst)$Variables$common[[variable]] <- attributes(fcst)$Variables$dat1[[variable]] + } + + # Change class from startR_array to s2dv_cube + suppressWarnings( + fcst <- as.s2dv_cube(fcst) + ) + + # Only syear could be different + if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { + error(recipe$Run$logger, + "hcst and fcst do not share the same dimension structure.") + stop() + } + + } else { + fcst <- NULL + } + +#------------------------------------------- +# Step 3. Load the reference +#------------------------------------------- + obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, + store.freq, "$var$$var_dir$", "$var$_$file_date$.nc") + var_dir_obs <- archive$Reference[[ref.name]][[store.freq]][variable] # list(tas = "_f1h-r1440x721cds", tos = "_f1h-r1440x721cds") + +# obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, store.freq, +# paste0(variable, archive$Reference[[ref.name]][[store.freq]][[variable]])) +# obs.files <- paste0('$var$_$file_date$.nc') + + # Get from startR_cube +# dates <- attr(hcst, 'Variables')$common$time + # Get from s2dv_cube + dates <- hcst$attrs$Dates + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + + if (store.freq == "daily_mean") { +#//////////////// +# Method 1: use hcst time attr as obs time selector +#//////////////// + + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = unique(format(dates, '%Y%m')), + time = dates, # [sday, sweek, syear, time] + time_across = 'file_date', + merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_extra_cells = 2, + transform_params = list(grid = regrid_params$obs.gridtype, #nc file + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = 'file_date'), + silent = !DEBUG, + retrieve = TRUE) + + } else if (store.freq == "monthly_mean") { +#//////////////// +# Method 2: reshape hcst time attr's date into an array with time dim then as obs date selector +#//////////////// + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = dates_file, #dates_arr, # [sday, sweek, syear, time] + split_multiselected_dims = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_extra_cells = 2, + transform_params = list(grid = regrid_params$obs.gridtype, #nc file + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = 'file_date'), + metadata_dims = 'var', + silent = !DEBUG, + retrieve = TRUE) + } + + +#dim(attr(obs, 'Variables')$common$time) +# sday sweek syear time +# 1 1 2 14 + + # Remove var_dir dimension + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + + # Only ensemble dim could be different + if (!identical(dim(obs), dim(hcst$data)[-9])) { + error(recipe$Run$logger, + "obs and hcst dimensions do not match.") + stop() + } + # Add ensemble dim to obs + dim(obs) <- c(dim(obs), ensemble = 1) + + # Change class from startR_array to s2dv_cube + suppressWarnings( + obs <- as.s2dv_cube(obs) + ) + +#------------------------------------------- +# Step 4. Verify the consistance between data +#------------------------------------------- + # dimension + if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) { + error(recipe$Run$logger, + "hcst and obs don't share the same dimension names.") + stop() + } else { + ens_ind <- which(names(dim(obs$data)) == 'ensemble') + match_ind <- match(names(dim(obs$data))[-ens_ind], names(dim(hcst$data))) + if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) { + error(recipe$Run$logger, + "hcst and obs don't share the same dimension length.") + stop() + } + } + + # time attribute + if (!identical(format(hcst$attrs$Dates, '%Y%m'), + format(obs$attrs$Dates, '%Y%m'))) { + error(recipe$Run$logger, + "hcst and obs don't share the same time.") + stop() + } + + # lat and lon attributes + if (!(recipe$Analysis$Regrid$type == 'none')) { + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + } + + # Check fcst + if (!is.null(fcst)) { + # dimension + if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) { + error(recipe$Run$logger, + "hcst and fcst don't share the same dimension names.") + stop() + } else { + ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear')) + match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data))) + if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) { + error(recipe$Run$logger, + "hcst and fcst don't share the same dimension length.") + stop() + } + } + + # lat and lon attributes + if (!(recipe$Analysis$Regrid$type == 'none')) { + if (!identical(as.vector(hcst$lat), as.vector(fcst$lat))) { + lat_error_msg <- paste("Latitude mismatch between hcst and fcst.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + fcst_lat_msg <- paste0("First fcst lat: ", fcst$lat[1], + "; Last fcst lat: ", fcst$lat[length(fcst$lat)]) + info(recipe$Run$logger, fcst_lat_msg) + stop("hcst and fcst don't share the same latitudes.") + } + + if (!identical(as.vector(hcst$lon), as.vector(fcst$lon))) { + lon_error_msg <- paste("Longitude mismatch between hcst and fcst.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + fcst_lon_msg <- paste0("First fcst lon: ", fcst$lon[1], + "; Last fcst lon: ", fcst$lon[length(fcst$lon)]) + info(recipe$Run$logger, fcst_lon_msg) + stop("hcst and fcst don't share the same longitudes.") + } + } + + } + + +#------------------------------------------- +# Step 5. Tune data +#------------------------------------------- + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + for (var_idx in 1:length(variable)) { + var_name <- variable[var_idx] + if (dictionary$vars[[var_name]]$accum) { + info(recipe$Run$logger, + paste0("Accumulated variable ", var_name, + ": setting negative values to zero.")) + # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, + # along = "var", + # indices = var_idx, F), 0) + obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 + hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + } + } + + # Convert prlr from m/s to mm/day + ## TODO: Make a unit conversion function + if (variable[[var_idx]] == "prlr") { + # Verify that the units are m/s and the same in obs and hcst + if (((obs$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (obs$attrs$Variable$metadata[[var_name]]$units == "m s**-1")) && + ((hcst$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (hcst$attrs$Variable$metadata[[var_name]]$units == "m s**-1"))) { + info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") + obs$data[, var_idx, , , , , , , ] <- + obs$data[, var_idx, , , , , , , ]*86400*1000 + obs$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + hcst$data[, var_idx, , , , , , , ] <- + hcst$data[, var_idx, , , , , , , ]*86400*1000 + hcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ] <- + fcst$data[, var_idx, , , , , , , ]*86400*1000 + fcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + } + } + } + } + +#------------------------------------------- +# Step 6. Print summary +#------------------------------------------- + + # Print a summary of the loaded data for the user, for each object + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } + } + + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index f084ce62fb1e4798e5dc3948fe0edd3b2c3bfdd6..2458231ede50f9f8fd8596874132bcc74ead179c 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -15,27 +15,27 @@ library(lubridate) -dates2load <- function(recipe, logger) { +dates2load <- function(recipe) { temp_freq <- recipe$Analysis$Variables$freq - recipe <- recipe$Analysis$Time + Time <- recipe$Analysis$Time # hcst dates - file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), - recipe$sdate) + file_dates <- paste0(strtoi(Time$hcst_start):strtoi(Time$hcst_end), + Time$sdate) if (temp_freq == "monthly_mean") { file_dates <- .add_dims(file_dates) } # fcst dates (if fcst_year empty it creates an empty object) - if (! is.null(recipe$fcst_year)) { - file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) + if (!is.null(Time$fcst_year)) { + file_dates.fcst <- paste0(Time$fcst_year, Time$sdate) if (temp_freq == "monthly_mean") { file_dates.fcst <- .add_dims(file_dates.fcst) } } else { file_dates.fcst <- NULL - info(logger, - paste("fcst_year empty in the recipe, creating empty fcst object...")) + info(recipe$Run$logger, retrieve = TRUE, + "fcst_year empty in the recipe, creating empty fcst object...") } return(list(hcst = file_dates, fcst = file_dates.fcst)) ## TODO: document header of fun diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index d3b4f4399c3bf65a47239bdeb9423b8c29257d5b..94c11fb0c1033dbd2d1387960ce594a1bd9dc3d3 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -11,14 +11,12 @@ source("modules/Loading/R/compare_exp_obs_grids.R") # recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml" # recipe_file <- "recipes/atomic_recipes/recipe_decadal_daily.yml" -load_decadal <- function(recipe) { +load_decadal <- function(recipe, retrieve = retrieve) { ## archive <- read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]] # Print Start() info or not - DEBUG <- FALSE - - ## TODO: this should come from the main script + DEBUG <- TRUE # Create output folder and log: #------------------------- @@ -120,8 +118,8 @@ load_decadal <- function(recipe) { return_vars = list(latitude = NULL, longitude = NULL, time = c('syear', 'chunk')), silent = !DEBUG, - retrieve = T) - + retrieve = retrieve) + if (length(variable) > 1) { Start_default_arg_list <- c(Start_default_arg_list, list(table = table, grid = grid, version = version, @@ -132,7 +130,7 @@ load_decadal <- function(recipe) { if (!multi_path) { Start_hcst_arg_list <- Start_default_arg_list hcst <- do.call(Start, Start_hcst_arg_list) - + hcst_dims <- attr(hcst, "Dimensions") } else { Start_hcst_arg_list <- Start_default_arg_list Start_hcst_arg_list[['syear']] <- NULL @@ -141,15 +139,22 @@ load_decadal <- function(recipe) { Start_hcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind] hcst <- do.call(Start, Start_hcst_arg_list) - # Reshape and reorder dimensions ## dat should be 1, syear should be length of dat; reorder dimensions - dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6]) - hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + if (retrieve) { + dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6]) + hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + hcst_dims <- dim(hcst) + } else { + hcst_dims <- c(dat = 1, syear = as.numeric(attr(hcst, "Dimensions"))[1], + attr(hcst, "Dimensions")[2:6]) + # attr(hcst, "Dimensions") <- hcst_dims + } # Manipulate time attr because Start() cannot read it correctly + ## NOTE: Why can't Start() read it correctly?? wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only - tmp <- array(dim = c(dim(hcst)[c('syear', 'time')])) + tmp <- array(dim = c(hcst_dims[c('syear', 'time')])) tmp[1, ] <- wrong_time_attr yr_diff <- (sdates_hcst - sdates_hcst[1])[-1] #diff(sdates_hcst) for (i_syear in 1:length(yr_diff)) { @@ -160,16 +165,22 @@ load_decadal <- function(recipe) { } tmp_time_attr <- attr(hcst, 'Variables')$common$time - + # change syear to c(sday, sweek, syear) # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] - dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) - if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) { - error(recipe$Run$logger, - "hcst has problem in matching data and time attr dimension.") - stop() + if (retrieve) { + dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) } + + ## TODO: Why does this raise an error? + # if (!identical(dim(tmp_time_attr), hcst_dims[c('syear', 'time')])) { + # error(recipe$Run$logger, retrieve = retrieve, + # "hcst has problem in matching data and time attr dimension.") + # stop() + # } dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) + # Get hcst dates + dates <- attr(hcst, 'Variables')$common$time #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 if (multi_path) { @@ -177,9 +188,9 @@ load_decadal <- function(recipe) { } # Change class from startR_array to s2dv_cube - suppressWarnings( + if (retrieve) { hcst <- as.s2dv_cube(hcst) - ) + } #------------------------------------------- # Step 2: Load the fcst @@ -213,18 +224,26 @@ load_decadal <- function(recipe) { Start_fcst_arg_list[['syear']] <- NULL Start_fcst_arg_list[['chunk_depends']] <- NULL remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear') - Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] + Start_fcst_arg_list[['return_vars']][['time']] <- + Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] fcst <- do.call(Start, Start_fcst_arg_list) # Reshape and reorder dimensions ## dat should be 1, syear should be length of dat; reorder dimensions ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble] - dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) - fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + if (retrieve) { + dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) + fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', + 'latitude', 'longitude', 'ensemble')) + fcst_dims <- dim(fcst) + } else { + fcst_dims <- c(dat = 1, syear = as.numeric(attr(fcst, "Dimensions"))[1], + attr(fcst, "Dimensions")[2:6]) + } # Manipulate time attr because Start() cannot read it correctly wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only - tmp <- array(dim = c(dim(fcst)[c('syear', 'time')])) + tmp <- array(dim = c(fcst_dims[c('syear', 'time')])) tmp[1, ] <- wrong_time_attr yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst) for (i_syear in 1:length(yr_diff)) { @@ -238,12 +257,15 @@ load_decadal <- function(recipe) { # change syear to c(sday, sweek, syear) # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] - dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) - if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { - error(recipe$Run$logger, - "fcst has problem in matching data and time attr dimension.") - stop() - } + if (retrieve) { + dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) + if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { + error(recipe$Run$logger, retrieve = retrieve, + "fcst has problem in matching data and time attr dimension.") + stop() + } + } + dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 @@ -252,17 +274,15 @@ load_decadal <- function(recipe) { } # Change class from startR_array to s2dv_cube - suppressWarnings( + if (retrieve) { fcst <- as.s2dv_cube(fcst) - ) - - # Only syear could be different - if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { - error(recipe$Run$logger, - "hcst and fcst do not share the same dimension structure.") - stop() - } - + # Only syear could be different + if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { + error(recipe$Run$logger, retrieve = retrieve, + "hcst and fcst do not share the same dimension structure.") + stop() + } + } } else { fcst <- NULL } @@ -281,7 +301,7 @@ load_decadal <- function(recipe) { # Get from startR_cube # dates <- attr(hcst, 'Variables')$common$time # Get from s2dv_cube - dates <- hcst$attrs$Dates + # dates <- hcst$attrs$Dates dates_file <- sapply(dates, format, '%Y%m') dim(dates_file) <- dim(dates) @@ -319,7 +339,7 @@ load_decadal <- function(recipe) { return_vars = list(latitude = NULL, longitude = NULL, time = 'file_date'), silent = !DEBUG, - retrieve = TRUE) + retrieve = retrieve) } else if (store.freq == "monthly_mean") { #//////////////// @@ -347,7 +367,7 @@ load_decadal <- function(recipe) { time = 'file_date'), metadata_dims = 'var', silent = !DEBUG, - retrieve = TRUE) + retrieve = retrieve) } @@ -355,100 +375,101 @@ load_decadal <- function(recipe) { # sday sweek syear time # 1 1 2 14 - # Remove var_dir dimension - obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") - - # Only ensemble dim could be different - if (!identical(dim(obs), dim(hcst$data)[-9])) { - error(recipe$Run$logger, - "obs and hcst dimensions do not match.") - stop() - } - # Add ensemble dim to obs - dim(obs) <- c(dim(obs), ensemble = 1) - - # Change class from startR_array to s2dv_cube - suppressWarnings( + obs_dates <- attr(obs, 'Variables')$common$time + + if (retrieve) { + # Remove var_dir dimension + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + # Only ensemble dim could be different + if (!identical(dim(obs), dim(hcst$data)[-9])) { + error(recipe$Run$logger, retrieve = retrieve, + "obs and hcst dimensions do not match.") + stop() + } + # Add ensemble dim to obs + dim(obs) <- c(dim(obs), ensemble = 1) + # Transform to s2dv_cube obs <- as.s2dv_cube(obs) - ) + } #------------------------------------------- # Step 4. Verify the consistance between data #------------------------------------------- - # dimension - if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) { - error(recipe$Run$logger, - "hcst and obs don't share the same dimension names.") - stop() - } else { - ens_ind <- which(names(dim(obs$data)) == 'ensemble') - match_ind <- match(names(dim(obs$data))[-ens_ind], names(dim(hcst$data))) - if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) { - error(recipe$Run$logger, - "hcst and obs don't share the same dimension length.") - stop() - } - } - - # time attribute - if (!identical(format(hcst$attrs$Dates, '%Y%m'), - format(obs$attrs$Dates, '%Y%m'))) { - error(recipe$Run$logger, - "hcst and obs don't share the same time.") - stop() - } - - # lat and lon attributes - if (!(recipe$Analysis$Regrid$type == 'none')) { - if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { - lat_error_msg <- paste("Latitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") - error(recipe$Run$logger, lat_error_msg) - hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) - info(recipe$Run$logger, hcst_lat_msg) - obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) - info(recipe$Run$logger, obs_lat_msg) - stop("hcst and obs don't share the same latitudes.") - } - - if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { - lon_error_msg <- paste("Longitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") - error(recipe$Run$logger, lon_error_msg) - hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) - info(recipe$Run$logger, hcst_lon_msg) - obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) - info(recipe$Run$logger, obs_lon_msg) - stop("hcst and obs don't share the same longitudes.") - } - } - - # Check fcst - if (!is.null(fcst)) { + if (retrieve) { # dimension - if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) { - error(recipe$Run$logger, - "hcst and fcst don't share the same dimension names.") + if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) { + error(recipe$Run$logger, retrieve = retrieve, + "hcst and obs don't share the same dimension names.") stop() } else { - ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear')) - match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data))) - if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) { - error(recipe$Run$logger, - "hcst and fcst don't share the same dimension length.") + ens_ind <- which(names(dim(obs$data)) == 'ensemble') + match_ind <- match(names(dim(obs$data))[-ens_ind], names(dim(hcst$data))) + if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) { + error(recipe$Run$logger, retrieve = retrieve, + "hcst and obs don't share the same dimension length.") stop() } } + # time attribute + if (!identical(format(dates, '%Y%m'), format(obs_dates, '%Y%m'))) { + error(recipe$Run$logger, retrieve = retrieve, + "hcst and obs don't share the same time.") + stop() + } + # lat and lon attributes if (!(recipe$Analysis$Regrid$type == 'none')) { - compare_exp_obs_grids(hcst, obs) + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, retrieve = retrieve, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, retrieve = retrieve, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, retrieve = retrieve, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, retrieve = retrieve, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, retrieve = retrieve, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, retrieve = retrieve, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + } + + # Check fcst + if (!is.null(fcst)) { + # dimension + if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) { + error(recipe$Run$logger, retrieve = retrieve, + "hcst and fcst don't share the same dimension names.") + stop() + } else { + ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear')) + match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data))) + if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) { + error(recipe$Run$logger, retrieve = retrieve, + "hcst and fcst don't share the same dimension length.") + stop() + } + } + + # lat and lon attributes + if (!(recipe$Analysis$Regrid$type == 'none')) { + compare_exp_obs_grids(hcst, obs) + } } } @@ -460,7 +481,7 @@ load_decadal <- function(recipe) { # for (var_idx in 1:length(variable)) { # var_name <- variable[var_idx] # if (dictionary$vars[[var_name]]$accum) { - # info(recipe$Run$logger, + # info(recipe$Run$logger, retrieve = retrieve, # paste0("Accumulated variable ", var_name, # ": setting negative values to zero.")) # # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, @@ -477,9 +498,9 @@ load_decadal <- function(recipe) { # Step 6. Print summary #------------------------------------------- - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") - .log_memory_usage(recipe$Run$logger, when = "After loading") + .log_memory_usage(recipe$Run$logger, retrieve = retrieve, when = "After loading") return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R index 42b74b162fd18a066600bf383db7a4805105462e..7277766a446fd17bb9f4d1f73e528e0d5795843a 100644 --- a/modules/Loading/R/load_seasonal.R +++ b/modules/Loading/R/load_seasonal.R @@ -5,8 +5,7 @@ source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") source("modules/Loading/R/compare_exp_obs_grids.R") -load_seasonal <- function(recipe) { - +load_seasonal <- function(recipe, retrieve = TRUE) { # ------------------------------------------- # Set params ----------------------------------------- @@ -23,8 +22,7 @@ load_seasonal <- function(recipe) { store.freq <- recipe$Analysis$Variables$freq # get sdates array - ## LOGGER: Change dates2load to extract logger from recipe? - sdates <- dates2load(recipe, recipe$Run$logger) + sdates <- dates2load(recipe) idxs <- NULL idxs$hcst <- get_timeidx(sdates$hcst, @@ -92,7 +90,7 @@ load_seasonal <- function(recipe) { #------------------------------------------------------------------- circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) - if (recipe$Analysis$Variables$freq == "monthly_mean"){ + if (recipe$Analysis$Variables$freq == "monthly_mean") { split_multiselected_dims = TRUE } else { split_multiselected_dims = FALSE @@ -123,22 +121,21 @@ load_seasonal <- function(recipe) { longitude = 'dat', time = 'file_date'), split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) - - # Remove var_dir dimension - if ("var_dir" %in% names(dim(hcst))) { - hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") - } - + retrieve = retrieve) + + browser() + if (store.freq %in% c("daily_mean", "daily")) { # Adjusts dims for daily case, could be removed if startR allows # multidim split - names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" - default_dims <- c(dat = 1, var = 1, sday = 1, - sweek = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(hcst))] <- dim(hcst) - dim(hcst) <- default_dims + if (retrieve) { + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + } # Change time attribute dimensions default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) names(dim(attr(hcst, "Variables")$common$time))[which(names( @@ -147,16 +144,21 @@ load_seasonal <- function(recipe) { dim(attr(hcst, "Variables")$common$time) dim(attr(hcst, "Variables")$common$time) <- default_time_dims } - - # Convert hcst to s2dv_cube object - ## TODO: Give correct dimensions to $Dates - ## (sday, sweek, syear instead of file_date) - hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month if (recipe$Analysis$Variables$freq == "monthly_mean") { - hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + attr(hcst, "Variables")$common$time[] <- + attr(hcst, "Variables")$common$time - seconds(exp_descrip$time_stamp_lag) + } + dates <- attr(hcst, "Variables")$common$time + # Convert hcst to s2dv_cube object + if (retrieve) { + if ("var_dir" %in% names(dim(hcst))) { + hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") + } + hcst <- as.s2dv_cube(hcst) + dim(hcst$attrs$Dates) <- dim(dates) } - # Load forecast #------------------------------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst_year)) { @@ -187,38 +189,38 @@ load_seasonal <- function(recipe) { longitude = 'dat', time = 'file_date'), split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) - - if ("var_dir" %in% names(dim(fcst))) { - fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") - } + retrieve = retrieve) if (store.freq %in% c("daily_mean", "daily")) { # Adjusts dims for daily case, could be removed if startR allows # multidim split - names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" - default_dims <- c(dat = 1, var = 1, sday = 1, - sweek = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(fcst))] <- dim(fcst) - dim(fcst) <- default_dims + if (retrieve) { + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + } # Change time attribute dimensions + ## TODO: Is this still necessary? default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) names(dim(attr(fcst, "Variables")$common$time))[which(names( dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- dim(attr(fcst, "Variables")$common$time) dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } else if (recipe$Analysis$Variables$freq == "monthly_mean") { + attr(fcst, "Variables")$common$time[] <- + attr(fcst, "Variables")$common$time - seconds(exp_descrip$time_stamp_lag) } - # Convert fcst to s2dv_cube - fcst <- as.s2dv_cube(fcst) - # Adjust dates for models where the time stamp goes into the next month - if (recipe$Analysis$Variables$freq == "monthly_mean") { - fcst$attrs$Dates[] <- - fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + if (retrieve) { + if ("var_dir" %in% names(dim(fcst))) { + fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") + } + fcst <- as.s2dv_cube(fcst) } - } else { fcst <- NULL } @@ -228,12 +230,9 @@ load_seasonal <- function(recipe) { # Obtain dates and date dimensions from the loaded hcst data to make sure # the corresponding observations are loaded correctly. - dates <- hcst$attrs$Dates - dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] - + # Separate Start() call for monthly vs daily data if (store.freq == "monthly_mean") { - dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") dim(dates_file) <- dim(dates) @@ -257,7 +256,7 @@ load_seasonal <- function(recipe) { longitude = 'dat', time = 'file_date'), split_multiselected_dims = TRUE, - retrieve = TRUE) + retrieve = retrieve) } else if (store.freq %in% c("daily_mean", "daily")) { @@ -295,48 +294,49 @@ load_seasonal <- function(recipe) { longitude = 'dat', time = 'file_date'), split_multiselected_dims = TRUE, - retrieve = TRUE) + retrieve = retrieve) } - - - # Remove var_dir dimension - if ("var_dir" %in% names(dim(obs))) { + + ## TODO: This part belongs to the pre-processing module + # Adjust obs dimensions and compare exp and obs grids, only for retrieve = TRUE + if (retrieve) { + # Adds ensemble dim to obs (for consistency with hcst/fcst) obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") - } - # Adds ensemble dim to obs (for consistency with hcst/fcst) - default_dims <- c(dat = 1, var = 1, sday = 1, - sweek = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(obs))] <- dim(obs) - dim(obs) <- default_dims - - # Convert obs to s2dv_cube - obs <- as.s2dv_cube(obs) - - # Check for consistency between hcst and obs grid - if (!(recipe$Analysis$Regrid$type == 'none')) { - compare_exp_obs_grids(hcst, obs) - } + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!(recipe$Analysis$Regrid$type == 'none')) { + compare_exp_obs_grids(hcst, obs) + } - # Remove negative values in accumulative variables - dictionary <- read_yaml("conf/variable-dictionary.yml") - for (var_idx in 1:length(variable)) { - var_name <- variable[var_idx] - if (dictionary$vars[[var_name]]$accum) { - info(recipe$Run$logger, - paste0("Accumulated variable ", var_name, - ": setting negative values to zero.")) - # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, - # along = "var", - # indices = var_idx, F), 0) - obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 - hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 - if (!is.null(fcst)) { - fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + for (var_idx in 1:length(variable)) { + var_name <- variable[var_idx] + if (dictionary$vars[[var_name]]$accum) { + info(recipe$Run$logger, + paste0("Accumulated variable ", var_name, + ": setting negative values to zero.")) + # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, + # along = "var", + # indices = var_idx, F), 0) + obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 + hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + } } } + .log_memory_usage(recipe$Run$logger, when = "After loading") } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") ############################################################################ @@ -386,7 +386,6 @@ load_seasonal <- function(recipe) { ############################################################################ ############################################################################ - .log_memory_usage(recipe$Run$logger, when = "After loading") return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Loading/R/load_tas_tos.R b/modules/Loading/R/load_tas_tos.R index ea231b56d8a00f3833dc7bf9eaa271bc9a97a097..e3972ba8a8748f0bb7e77d419c06e59d6b409015 100644 --- a/modules/Loading/R/load_tas_tos.R +++ b/modules/Loading/R/load_tas_tos.R @@ -6,7 +6,7 @@ source("modules/Loading/R/check_latlon.R") source('modules/Loading/R/mask_tas_tos.R') source("modules/Loading/R/compare_exp_obs_grids.R") -load_tas_tos <- function(recipe) { +load_tas_tos <- function(recipe, retrieve = TRUE) { # ------------------------------------------- # Set params ----------------------------------------- @@ -33,8 +33,7 @@ load_tas_tos <- function(recipe) { # get sdates array - ## LOGGER: Change dates2load to extract logger from recipe? - sdates <- dates2load(recipe, recipe$Run$logger) + sdates <- dates2load(recipe) idxs <- NULL idxs$hcst <- get_timeidx(sdates$hcst, @@ -480,7 +479,7 @@ load_tas_tos <- function(recipe) { compare_exp_obs_grids(exp = hcst, obs = obs) } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") return(list(hcst = hcst, fcst = fcst, obs = obs)) diff --git a/modules/Preprocessing/Preprocessing.R b/modules/Preprocessing/Preprocessing.R new file mode 100644 index 0000000000000000000000000000000000000000..961963440b652a3bff257cd33dfab5f53fef61d4 --- /dev/null +++ b/modules/Preprocessing/Preprocessing.R @@ -0,0 +1,31 @@ +preprocess_datasets <- function(recipe, data) { + # Remove 'var_dir' dimension + for (element in names(data)) { + if ("var_dir" %in% names(dim(data[[element]]))) { + data[[element]] <- Subset(x = data[[element]], + along = c('var_dir'), + indices = list(1), + drop = 'selected') + } + if (!("sday" %in% names(dim(data[[element]])))) { + dim(data[[element]]) <- c(dim(data[[element]]), sday = 1) + } + if (!("sweek" %in% names(dim(data[[element]])))) { + dim(data[[element]]) <- c(dim(data[[element]]), sweek = 1) + } + if (!("syear" %in% names(dim(data[[element]]))) && + (dim(data[[element]])[["dat"]] > 1)) { + names(dim(data[[element]]))[which(names(dim(data[[element]])) == "dat")] <- "syear" + dim(data[[element]]) <- c(dat = 1, dim(data[[element]])) + } else if (!("syear" %in% names(dim(data[[element]]))) && + ("file_date" %in% names(dim(data[[element]])))) { + names(dim(data[[element]]))[which(names(dim(data[[element]])) == "file_date")] <- "syear" + time_dim_names <- names(dim(attr(data[[element]], "Variables")$common$time)) + time_dim_names[which(time_dim_names == "file_date")] <- "syear" + names(dim(attr(data[[element]], "Variables")$common$time)) <- time_dim_names + } + } + # Add 'ensemble' dimension to obs + dim(data$obs) <- c(dim(data$obs), ensemble = 1) + return(data) +} diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R index 47310bf0e5b80d3aa67c064a654192bfe603f0ec..6414ad1f929a43546b75e8f7d5d8d9eca4188cc9 100644 --- a/modules/Saving/R/save_corr.R +++ b/modules/Saving/R/save_corr.R @@ -118,6 +118,6 @@ save_corr <- function(recipe, ArrayToNc(vars, outfile) } } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R index 6e3bc906d5490bf32ad9631370c37e205c4dc4fd..7c14203ba8066a8458bd4e2a6d3448bf948f2c3b 100644 --- a/modules/Saving/R/save_forecast.R +++ b/modules/Saving/R/save_forecast.R @@ -137,6 +137,6 @@ save_forecast <- function(recipe, } } } - info(recipe$Run$logger, paste("#####", toupper(type), - "SAVED TO NETCDF FILE #####")) + info(recipe$Run$logger, retrieve = TRUE, + paste("#####", toupper(type), "SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index db7ceecacd0f6e8af750fae45147e6dcbf07f506..f2092cd24c06372b4f79f9f1edcd07b27e5961f4 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -1,7 +1,6 @@ save_metrics <- function(recipe, metrics, dictionary = NULL, - data_cube, agg = "global", outdir = NULL, module = "skill") { @@ -13,6 +12,9 @@ save_metrics <- function(recipe, dictionary <- read_yaml("conf/variable-dictionary.yml") global_attributes <- .get_global_attributes(recipe, archive) + cube_info <- metrics[["metadata"]] + metrics[["metadata"]] <- NULL + time_bounds <- NULL ## TODO: Sort out the logic once default behavior is decided if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && @@ -65,16 +67,18 @@ save_metrics <- function(recipe, } } - times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) + times <- .get_times(recipe, cube_info, fcst.sdate, calendar, init_date) # Loop over variable dimension - for (var in 1:data_cube$dims[['var']]) { + for (var in 1:cube_info$dims[['var']]) { # Subset skill arrays subset_metric <- lapply(metrics, function(x) { ClimProjDiags::Subset(x, along = 'var', indices = var, drop = 'selected')}) # Generate name of output file - variable <- data_cube$attrs$Variable$varName[[var]] + variable <- cube_info$attrs$Variable$varName[[var]] + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/outputs/", + module, "/") outdir <- get_dir(recipe = recipe, variable = variable) if (!dir.exists(outdir)) { dir.create(outdir, recursive = T) @@ -83,13 +87,13 @@ save_metrics <- function(recipe, for (i in 1:length(subset_metric)) { if (any('syear' %in% names(dim(subset_metric[[i]])))) { sdate_dim_save = 'syear' - dates <- data_cube$attrs$Dates - time_bounds <- data_cube$attrs$time_bounds + dates <- cube_info$attrs$Dates + time_bounds <- cube_info$attrs$time_bounds } else { sdate_dim_save = NULL - dates <- Subset(data_cube$attrs$Dates, along = 'syear', indices = 1) - if (!is.null(data_cube$attrs$time_bounds)) { - time_bounds <- lapply(data_cube$attrs$time_bounds, + dates <- Subset(cube_info$attrs$Dates, along = 'syear', indices = 1) + if (!is.null(cube_info$attrs$time_bounds)) { + time_bounds <- lapply(cube_info$attrs$time_bounds, FUN = function (x) { Subset(x, along = 'syear', indices = 1) }) @@ -101,11 +105,11 @@ save_metrics <- function(recipe, fcst.sdate, agg, names(subset_metric)[[i]]) SaveExp(data = subset_metric[[i]], destination = outdir, Dates = dates, - coords = c(data_cube$coords['longitude'], - data_cube$coords['latitude']), + coords = c(cube_info$coords['longitude'], + cube_info$coords['latitude']), time_bounds = time_bounds, varname = names(subset_metric)[[i]], - metadata = data_cube$attrs$Variable$metadata, Datasets = NULL, + metadata = cube_info$attrs$Variable$metadata, Datasets = NULL, startdates = NULL, dat_dim = NULL, sdate_dim = sdate_dim_save, ftime_dim = 'time', var_dim = NULL, memb_dim = NULL, drop_dims = NULL, single_file = TRUE, @@ -151,13 +155,13 @@ save_metrics <- function(recipe, region <- list(region = array(1:dim(metrics[[1]])['region'], c(dim(metrics[[1]])['region']))) ## TODO: check metadata when more than 1 region is store in the data array - attr(region, 'variables') <- data_cube$attrs$Variable$metadata['region'] + attr(region, 'variables') <- cube_info$attrs$Variable$metadata['region'] vars <- c(region, times) vars <- c(vars, subset_metric) ArrayToNc(vars, outfile) } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latitude <- cube_info$coords$lat[1:length(cube_info$coords$lat)] + longitude <- cube_info$coords$lon[1:length(cube_info$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- c(latlon, times, subset_metric) @@ -165,7 +169,7 @@ save_metrics <- function(recipe, } } } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, paste("#####", toupper(module), "METRICS SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R index 794697190903fdd584867e6a5695a522f5a3bdc1..be9613b5002b7f7009b9ac339b50add8001bc79d 100644 --- a/modules/Saving/R/save_observations.R +++ b/modules/Saving/R/save_observations.R @@ -135,5 +135,6 @@ save_observations <- function(recipe, } } } - info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, retrieve = TRUE, + "##### OBS SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index d5dfae16f16346242d8bf1b744d536164fb3e30d..5863ef43a13e69a3c5202d8d10033ff288ac0c37 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -122,5 +122,6 @@ save_percentiles <- function(recipe, ArrayToNc(vars, outfile) } } - info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, retrieve = TRUE, + "##### PERCENTILES SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index a9ddc977e231896de23dc7a49dd7dac4386c45f2..b9729aea4f4b74902248ed074a33a9c708b44293 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -132,7 +132,7 @@ save_probabilities <- function(recipe, } } } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, paste("#####", toupper(type), "PROBABILITIES SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index e9e4a5b408cacf3638953c021dd557c69f54e81a..47fc510635e13956990312e8f718527743c8ab16 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -18,11 +18,9 @@ source("modules/Saving/R/drop_dims.R") Saving <- function(recipe, data, skill_metrics = NULL, probabilities = NULL, - agg = 'global', - archive = NULL) { + agg = 'global') { # Wrapper for the saving functions. # recipe: The auto-s2s recipe - # archive: The auto-s2s archive # data: output of load_datasets() # data: output of calibrate_datasets() # skill_metrics: output of compute_skill_metrics() @@ -31,12 +29,13 @@ Saving <- function(recipe, data, # Sanity checks if (is.null(recipe)) { - error(recipe$Run$logger, "The 'recipe' parameter is mandatory.") + error(recipe$Run$logger, retrieve = TRUE, + "The 'recipe' parameter is mandatory.") stop() } - + ## TODO: Modify this if (is.null(data)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("The 'data' parameter is mandatory. It should be a list", "of at least two s2dv_cubes containing the hcst and obs.")) stop() @@ -56,6 +55,7 @@ Saving <- function(recipe, data, } # Iterate over variables to subset s2dv_cubes and save outputs + ## leave as-is save_forecast(recipe = recipe, data_cube = data$hcst, outdir = outdir[var], @@ -71,13 +71,15 @@ Saving <- function(recipe, data, outdir = outdir[var]) # Export skill metrics + ## TODO: change if (!is.null(skill_metrics)) { save_metrics(recipe = recipe, metrics = skill_metrics, - data_cube = data$hcst, agg = agg, + agg = agg, outdir = outdir[var]) } if (!is.null(corr_metrics)) { + ## TODO: change (low priority) save_corr(recipe = recipe, skill = corr_metrics, data_cube = data$hcst, diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index c03841b828ea70746f6a62a2db3afa703e745c3c..cd941901667198ccec3daf4627e68a6e0ca75dc4 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -20,7 +20,8 @@ source("modules/Skill/R/tmp/GetProbs.R") source("modules/Skill/R/tmp/RPS.R") source("modules/Skill/R/tmp/CRPS.R") -Skill <- function(recipe, data, agg = 'global') { +Skill <- function(recipe, data, agg = 'global', retrieve = TRUE, + nchunks = nchunks) { # data$hcst: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations @@ -56,9 +57,10 @@ Skill <- function(recipe, data, agg = 'global') { } else { na.rm = recipe$Analysis$remove_NAs } + ## TODO: Move this to the recipe checker if (is.null(recipe$Analysis$Workflow$Skill$cross_validation)) { - warn(recipe$Run$logger, - "cross_validation parameter not defined, setting it to FALSE.") + # warn(recipe$Run$logger, + # "cross_validation parameter not defined, setting it to FALSE.") cross.val <- FALSE } else { cross.val <- recipe$Analysis$Workflow$Skill$cross_validation @@ -120,12 +122,12 @@ Skill <- function(recipe, data, agg = 'global') { skill_metrics[[ metric ]] <- skill ## temp # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { + # skill <- RPSS(data$hcst$data, data$obs$data, skill <- RPSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - Fair = Fair, - cross.val = cross.val, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss @@ -329,11 +331,12 @@ Skill <- function(recipe, data, agg = 'global') { # Compute SpecsVerification version of the metrics ## Retain _specs in metric name for clarity metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name - if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', - 'rpss'))) { - warn(recipe$Run$logger, - "Some of the requested SpecsVerification metrics are not available.") - } + ## TODO: Move this to the recipe checker + # if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', + # 'rpss'))) { + # warn(recipe$Run$logger, + # "Some of the requested SpecsVerification metrics are not available.") + # } capture.output( skill <- Compute_verif_metrics(data$hcst$data, data$obs$data, skill_metrics = metric_name, @@ -349,40 +352,74 @@ Skill <- function(recipe, data, agg = 'global') { skill_metrics[[ metric ]] <- skill } } - info(recipe$Run$logger, "##### SKILL METRIC COMPUTATION COMPLETE #####") + ## TODO: Create list of returns with data and metadata + info(recipe$Run$logger, retrieve = retrieve, + "##### SKILL METRIC COMPUTATION COMPLETE #####") .log_memory_usage(recipe$Run$logger, when = "After skill metric computation") # Save outputs - #NURIA: I think change the output_dir is a problem for future savings - if (recipe$Analysis$Workflow$Skill$save != 'none') { - info(recipe$Run$logger, "##### START SAVING SKILL METRIC #####") - } - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Skill/") - # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim - ## TODO: merge save_metrics() and save_metrics_scorecards() - if (recipe$Analysis$Workflow$Skill$save == 'all') { - corr_metric_names <- grep("^corr_individual_members", names(skill_metrics)) - if (length(corr_metric_names) == 0) { - save_metrics(recipe = recipe, metrics = skill_metrics, - data_cube = data$hcst, agg = agg, module = "skill") - } else { - # Save corr - if (length(skill_metrics[corr_metric_names]) > 0) { - save_corr(recipe = recipe, skill = skill_metrics[corr_metric_names], - data_cube = data$hcst) - } - # Save other skill metrics - if (length(skill_metrics[-corr_metric_names]) > 0) { - save_metrics(recipe = recipe, metrics = skill_metrics[-corr_metric_names], - data_cube = data$hcst, agg = agg) + if (retrieve) { + skill_metrics$metadata <- data$hcst + skill_metrics$metadata$data <- NULL + ## TODO: Handle the output differently to avoid changing the recipe? + if (recipe$Analysis$Workflow$Skill$save != 'none') { + info(recipe$Run$logger, retrieve = retrieve, + "##### START SAVING SKILL METRIC #####") + } + # recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + # "/outputs/Skill/") + # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim + if (recipe$Analysis$Workflow$Skill$save == 'all') { + corr_metric_names <- grep("^corr_individual_members", + names(skill_metrics)) + if (length(corr_metric_names) == 0) { + save_metrics(recipe = recipe, metrics = skill_metrics, + module = "Skill", agg = agg) + } else { + # Save corr + if (length(skill_metrics[corr_metric_names]) > 0) { + save_corr(recipe = recipe, + skill = skill_metrics[corr_metric_names], + data_cube = skill_metrics$metadata, + agg = agg) + } + # Save other skill metrics + if (length(skill_metrics[-corr_metric_names]) > 0) { + save_metrics(recipe = recipe, + metrics = skill_metrics[-corr_metric_names], + module = "Skill", + agg = agg) + } } } + } else { + # Retrieve original dimensions + dimensions <- names(dim(skill_metrics[[1]])) + # Bind list into an array + skill_metrics <- abind(skill_metrics, along = 0) + # Retrieve dimname attributes + metric_names <- dimnames(skill_metrics)[[1]] + tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/") + # Save metric names (only done once) + if (!dir.exists(tmp_dir)) { + dir.create(tmp_dir, recursive = TRUE) + saveRDS(metric_names, paste0(tmp_dir, "metric_names.Rds")) + } + # Add 'metric' dimension to array and restore attributes + names(dim(skill_metrics)) <- c("metric", dimensions) + # Save s2dv_cube metadata + metadata_filename <- "skill_metadata" + for (chunk in names(nchunks)) { + metadata_filename <- paste0(metadata_filename, "_", chunk, "_", nchunks[[chunk]]) + } + metadata_filename <- paste0(metadata_filename, ".Rds") + data$hcst$data <- NULL + saveRDS(data$hcst, paste0(tmp_dir, metadata_filename)) } # Return results return(skill_metrics) } -Probabilities <- function(recipe, data) { +Probabilities <- function(recipe, data, retrieve = TRUE) { ## TODO: Do hcst and fcst at the same time if (is.null(recipe$Analysis$ncores)) { ncores <- 1 @@ -401,8 +438,9 @@ Probabilities <- function(recipe, data) { named_quantiles <- list() if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { - error(recipe$Run$logger, "Quantiles and probability bins have been - requested, but no thresholds are provided in the recipe.") + error(recipe$Run$logger, retrieve = retrieve, + paste("Quantiles and probability bins have been requested, but no", + "thresholds are provided in the recipe.")) stop() } else { for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { @@ -477,34 +515,35 @@ Probabilities <- function(recipe, data) { percentiles = named_quantiles) } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = retrieve, "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") .log_memory_usage(recipe$Run$logger, when = "After anomaly computation") # Save outputs - if (recipe$Analysis$Workflow$Probabilities$save != 'none') { - info(recipe$Run$logger, - "##### START SAVING PERCENTILES AND PROBABILITY CATEGORIES #####") - - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Skill/") - # Save percentiles - if (recipe$Analysis$Workflow$Probabilities$save %in% - c('all', 'percentiles_only')) { - save_percentiles(recipe = recipe, percentiles = results$percentiles, - data_cube = data$hcst) - } - # Save probability bins - if (recipe$Analysis$Workflow$Probabilities$save %in% - c('all', 'bins_only')) { - save_probabilities(recipe = recipe, probs = results$probs, - data_cube = data$hcst, type = "hcst") - if (!is.null(results$probs_fcst)) { - save_probabilities(recipe = recipe, probs = results$probs_fcst, - data_cube = data$fcst, type = "fcst") + if (retrieve) { + if (recipe$Analysis$Workflow$Probabilities$save != 'none') { + info(recipe$Run$logger, retrieve = retrieve, + "##### START SAVING PERCENTILES AND PROBABILITY CATEGORIES #####") + + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Skill/") + # Save percentiles + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'percentiles_only')) { + save_percentiles(recipe = recipe, percentiles = results$percentiles, + data_cube = data$hcst) + } + # Save probability bins + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = results$probs, + data_cube = data$hcst, type = "hcst") + if (!is.null(results$probs_fcst)) { + save_probabilities(recipe = recipe, probs = results$probs_fcst, + data_cube = data$fcst, type = "fcst") + } } } } - # Return results return(results) } diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 085bcdc58a162133b1316bf1187ae3974f69234d..5d1c4ad9c758f1e1f107a67200c5463987c9e293 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -1,4 +1,4 @@ -Statistics <- function(recipe, data, agg = 'global') { +Statistics <- function(recipe, data, agg = 'global', retrieve = TRUE) { # data$hcst: s2dv_cube containing the hindcast # data$obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml @@ -77,21 +77,28 @@ Statistics <- function(recipe, data, agg = 'global') { } ## close on n_eff } - info(recipe$Run$logger, "##### STATISTICS COMPUTATION COMPLETE #####") + info(recipe$Run$logger, retrieve = retrieve, + "##### STATISTICS COMPUTATION COMPLETE #####") .log_memory_usage(recipe$Run$logger, when = "After statistics computation") - - # Save outputs - if (recipe$Analysis$Workflow$Skill$save != 'none') { - info(recipe$Run$logger, "##### START SAVING STATISTICS #####") - } - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Statistics/") - - if (recipe$Analysis$Workflow$Statistics$save == 'all') { - # Save all statistics - save_metrics(recipe = recipe, metrics = statistics, - data_cube = data$hcst, agg = agg, - module = "statistics") + + if (retrieve) { + # Save output + data$hcst$data <- NULL + statistics$metadata <- data$hcst + if (recipe$Analysis$Workflow$Skill$save != 'none') { + info(recipe$Run$logger, retrieve = TRUE, + "##### START SAVING STATISTICS #####") + } + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Statistics/") + + if (recipe$Analysis$Workflow$Statistics$save == 'all') { + # Save all statistics + save_metrics(recipe = recipe, + metrics = statistics, + agg = agg, + module = "statistics") + } } # Return results return(statistics) diff --git a/modules/Units/R/transform_units_precipitation.R b/modules/Units/R/transform_units_precipitation.R index d0dd7ffd50dfb6f04db35006a06cd61b1d8f43e9..62118dd0c793f6273642a339694b7f956511c648 100644 --- a/modules/Units/R/transform_units_precipitation.R +++ b/modules/Units/R/transform_units_precipitation.R @@ -76,10 +76,11 @@ transform_units_precipitation <- function(data, original_units, new_units, cal <- tolower(data[[1]]$attrs$Variable$metadata[[var_name]]$dim[[time_pos]]$calendar) data_list[[var_index]] <- Apply(list(data_list[[var_index]], data[[1]]$attrs$Dates), - target_dim = list(c('syear'), c('syear')), + target_dim = list(c("time"), c("time")), extra_info = list(cal = cal, days_in_month = .days_in_month), fun = function(x, y) { date <- as.Date(y, "%Y-%m-%d") + dim(date) <- dim(y) num_days <- .days_in_month(date, cal = .cal) res <- x * num_days }, ncores = ncores)$output1 diff --git a/modules/Units/Units.R b/modules/Units/Units.R index a143c0a2037684594cd7642149fa039240e4784c..83964195ecf13feb0648a8426f62fe1e03d35997 100644 --- a/modules/Units/Units.R +++ b/modules/Units/Units.R @@ -9,7 +9,7 @@ source("modules/Units/R/transform_units_pressure.R") # $freq # $flux (if precipitation) # $units (optional) -Units <- function(recipe, data) { +Units <- function(recipe, data, retrieve = T) { # from recipe read the user defined units # from data the original units # deaccumulate option for CDS accumulated variables? @@ -59,12 +59,13 @@ Units <- function(recipe, data) { ## if "/" appears substitute by -1 in at the end of next unit. How to know? # Check if all units are equal (if so, no conversion needed; else convert) if (length(unique(c(unlist(orig_units), user_units))) == length(user_units)) { - info(recipe$Run$logger, "##### NO UNIT CONVERSION NEEDED #####") + info(recipe$Run$logger, retrieve = retrieve, "##### NO UNIT CONVERSION NEEDED #####") res <- data } else { if (recipe$Run$filesystem == 'esarchive' && (sum(!sapply(unique(orig_units), is.null)) != 1)) { - warn(recipe$Run$logger, + ## TODO: Warning + warn(recipe$Run$logger, retrieve = retrieve, paste("The units in", paste(names(orig_units), collapse = ', '), "were not all equal and will be uniformized.", "If this is not expected, please contact the ES data team.")) @@ -86,7 +87,7 @@ Units <- function(recipe, data) { } return(result) }, simplify = TRUE) # instead of lapply to get the named list directly - info(recipe$Run$logger, "##### UNIT CONVERSION COMPLETE #####") + info(recipe$Run$logger, retrieve = retrieve, "##### UNIT CONVERSION COMPLETE #####") } return(res) } diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index ff91adf4cef2d5b453f5a6fa61ca06d7310883e8..cb07c490eb3c89c7f69bb3f3166eee12c68a4fc2 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -53,8 +53,10 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o palette = "RdBu" rev = T } - # Define brks, centered around zero in the case of anomalies - if (grepl("anomaly", var_long_name)) { + # Define brks, centered on in the case of anomalies + ## + if (grepl("anomaly", + fcst$attrs$Variable$metadata[[variable]]$long)) { variable <- paste(variable, "anomaly") max_value <- max(abs(var_ens_mean), na.rm = TRUE) ugly_intervals <- seq(-max_value, max_value, max_value/20) @@ -266,6 +268,6 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } } } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, "##### FORECAST ENSEMBLE MEAN PLOTS SAVED TO OUTPUT DIRECTORY #####") } diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 2df06e8bdf87027e3a42da24b78821c5bdc1de82..4a4ebc15443c2ec0eab8f892abe6c5b89995e032 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -1,11 +1,9 @@ library(stringr) -plot_metrics <- function(recipe, data_cube, metrics, - outdir, significance = F, output_conf) { +plot_metrics <- function(recipe, metrics, + outdir, significance = F, output_conf) { # recipe: Auto-S2S recipe - # archive: Auto-S2S archive - # data_cube: s2dv_cube object with the corresponding hindcast data - # metrics: list of named metric arrays with named dimensions + # metrics: list of named skill metrics arrays # outdir: output directory # significance: T/F, whether to display the significance dots in the plots @@ -19,9 +17,12 @@ plot_metrics <- function(recipe, data_cube, metrics, if (!is.list(metrics) || is.null(names(metrics))) { stop("The element 'metrics' must be a list of named arrays.") } - - latitude <- data_cube$coords$lat - longitude <- data_cube$coords$lon + + cube_info <- metrics[["metadata"]] + metrics[["metadata"]] <- NULL + + latitude <- cube_info$coords$lat + longitude <- cube_info$coords$lon archive <- get_archive(recipe) if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ system_name <- paste0('Multimodel-', @@ -41,7 +42,7 @@ plot_metrics <- function(recipe, data_cube, metrics, month_label <- tolower(month.name[init_month]) month_abbreviation <- month.abb[init_month] # Get months - months <- lubridate::month(Subset(data_cube$attrs$Dates, + months <- lubridate::month(Subset(cube_info$attrs$Dates, "syear", indices = 1), label = T, abb = F,locale = "en_GB") if (!is.null(recipe$Analysis$Workflow$Visualization$projection)) { @@ -66,12 +67,12 @@ plot_metrics <- function(recipe, data_cube, metrics, statistics <- c("cov", "std_hcst", "std_obs", "var_hcst", "var_obs", "n_eff") # Loop over variables and assign colorbar and plot parameters to each metric - for (var in 1:data_cube$dims[['var']]) { - var_name <- data_cube$attrs$Variable$varName[[var]] ## For statistics + for (var in 1:cube_info$dims[['var']]) { + var_name <- cube_info$attrs$Variable$varName[[var]] ## For statistics var_metric <- lapply(metrics, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) for (name in c(skill_scores, scores, statistics, "mean_bias", "enssprerr")) { if (name %in% names(metrics)) { units <- NULL @@ -127,7 +128,7 @@ plot_metrics <- function(recipe, data_cube, metrics, cols <- colorbar[2:(length(colorbar) - 1)] col_inf <- colorbar[1] col_sup <- colorbar[length(colorbar)] - units <- data_cube$attrs$Variable$metadata[[var_name]]$units + units <- cube_info$attrs$Variable$metadata[[var_name]]$units } else if (name %in% "cov") { metric <- var_metric[[name]] display_name <- "Covariance" @@ -138,7 +139,7 @@ plot_metrics <- function(recipe, data_cube, metrics, cols <- colorbar[2:(length(colorbar) - 1)] col_inf <- colorbar[1] col_sup <- colorbar[length(colorbar)] - units <- paste0(data_cube$attrs$Variable$metadata[[var_name]]$units, "²") + units <- paste0(cube_info$attrs$Variable$metadata[[var_name]]$units, "²") } else if (name %in% "std_hcst") { metric <- var_metric[[name]] display_name <- "Hindcast Standard Deviation" @@ -149,7 +150,7 @@ plot_metrics <- function(recipe, data_cube, metrics, cols <- colorbar[2:(length(colorbar) - 1)] col_inf <- colorbar[1] col_sup <- colorbar[length(colorbar)] - units <- data_cube$attrs$Variable$metadata[[var_name]]$units + units <- cube_info$attrs$Variable$metadata[[var_name]]$units } else if (name %in% "std_obs") { metric <- var_metric[[name]] display_name <- "Observation Standard Deviation" @@ -160,7 +161,7 @@ plot_metrics <- function(recipe, data_cube, metrics, cols <- colorbar[2:(length(colorbar) - 1)] col_inf <- colorbar[1] col_sup <- colorbar[length(colorbar)] - units <- data_cube$attrs$Variable$metadata[[var_name]]$units + units <- cube_info$attrs$Variable$metadata[[var_name]]$units } else if (name %in% "n_eff") { metric <- var_metric[[name]] display_name <- "Effective Sample Size" @@ -171,7 +172,6 @@ plot_metrics <- function(recipe, data_cube, metrics, col_inf <- NULL col_sup <- NULL } - # Reorder dimensions metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, @@ -198,8 +198,8 @@ plot_metrics <- function(recipe, data_cube, metrics, outfile <- paste0(outdir[var], name) } # Get variable name and long name - var_name <- data_cube$attrs$Variable$varName[[var]] - var_long_name <- data_cube$attrs$Variable$metadata[[var_name]]$long_name + var_name <- cube_info$attrs$Variable$varName[[var]] + var_long_name <- cube_info$attrs$Variable$metadata[[var_name]]$long_name # Multi-panel or single-panel plots if (recipe$Analysis$Workflow$Visualization$multi_panel) { # Define titles @@ -340,7 +340,7 @@ plot_metrics <- function(recipe, data_cube, metrics, } fileout <- paste0(outfile, "_ft", forecast_time, ".pdf") # Plot - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, paste("Plotting", display_name)) do.call(fun, @@ -352,6 +352,6 @@ plot_metrics <- function(recipe, data_cube, metrics, } } } - info(recipe$Run$logger, - "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") + info(recipe$Run$logger, retrieve = TRUE, + "##### METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") } diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index 739f2d10535191d39175abe11c78016526e1bf6e..1817643913a315d0361bd3a2f247f7379e968afb 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -280,6 +280,6 @@ plot_most_likely_terciles <- function(recipe, } } } - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, "##### MOST LIKELY TERCILE PLOTS SAVED TO OUTPUT DIRECTORY #####") } diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 3750baead9284492691714ef50d356f7a5faa8ba..6db108a8c78431d0f56eb4571ca379024320c91a 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -12,7 +12,7 @@ source("modules/Visualization/R/plot_most_likely_terciles_map.R") source("modules/Visualization/R/plot_ensemble_mean.R") Visualization <- function(recipe, - data, + data = NULL, skill_metrics = NULL, statistics = NULL, probabilities = NULL, @@ -33,7 +33,7 @@ Visualization <- function(recipe, if (recipe$Analysis$Region$name %in% names(output_conf)) { output_conf <- output_conf[[recipe$Analysis$Region$name]] } else { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste0("The region name is not found in the 'output_conf' file. ", "The default plot settings will be used.")) output_conf <- NULL @@ -56,15 +56,21 @@ Visualization <- function(recipe, plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, ", | |,")[[1]] ## TODO: Do not modify output dir here recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/plots/") - outdir <- get_dir(recipe = recipe, - variable = data$hcst$attrs$Variable$varName) + ## TODO: Improve this? + if (!is.null(data$hcst)) { + variable <- data$hcst$attrs$Variable$varName + } else { + variable <- skill_metrics$metadata$attrs$Variable$varName + } + + outdir <- get_dir(recipe = recipe, variable = variable) for (directory in outdir) { dir.create(directory, showWarnings = FALSE, recursive = TRUE) } if ((is.null(skill_metrics)) && (is.null(statistics)) && (is.null(data$fcst))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The Visualization module has been called, but there is no ", "fcst in 'data', and 'skill_metrics' and 'statistics' are ", "NULL, so there is no data that can be plotted.")) @@ -77,10 +83,10 @@ Visualization <- function(recipe, # Plot skill metrics if ("skill_metrics" %in% plots) { if (!is.null(skill_metrics)) { - plot_metrics(recipe, data$hcst, skill_metrics, outdir, + plot_metrics(recipe, skill_metrics, outdir, significance, output_conf = output_conf) } else { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The skill metric plots have been requested, but the ", "parameter 'skill_metrics' is NULL")) } @@ -89,7 +95,7 @@ Visualization <- function(recipe, # Plot statistics if ("statistics" %in% plots) { if (!is.null(statistics)) { - plot_metrics(recipe, data$hcst, statistics, outdir, + plot_metrics(recipe, statistics, outdir, significance, output_conf = output_conf) } else { error(recipe$Run$logger, @@ -120,11 +126,11 @@ Visualization <- function(recipe, if (recipe$Analysis$Workflow$Visualization$mask_ens %in% c('both', TRUE)) { if (is.null(skill_metrics)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the forecast ensemble mean plot, skill_metrics ", "need to be provided to be masked.")) } else if (!('enscorr' %in% names(skill_metrics))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the forecast ensemble mean plot, enscor metric ", "need to be provided to be masked")) } else { @@ -137,11 +143,11 @@ Visualization <- function(recipe, # Plots with dotted negative correlated in ens-mean-fcst if (recipe$Analysis$Workflow$Visualization$dots %in% c('both', TRUE)) { if (is.null(skill_metrics)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the forecast ensemble mean plot, skill_metrics ", "need to be provided for the dots")) } else if (!('enscorr' %in% names(skill_metrics))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the forecast ensemble mean plot, enscor metric ", "needs to be provided for the dots")) } else { @@ -153,7 +159,7 @@ Visualization <- function(recipe, } } else { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The forecast ensemble mean plot has been requested, but ", "there is no fcst element in 'data'")) } @@ -184,11 +190,11 @@ Visualization <- function(recipe, if (recipe$Analysis$Workflow$Visualization$mask_terciles %in% c('both', TRUE)) { if (is.null(skill_metrics)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the most likely terciles plot, skill_metrics ", "need to be provided to be masked.")) } else if (!('rpss' %in% names(skill_metrics))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the most likely terciles plot, rpss metric ", "need to be provided to be masked")) } else { @@ -202,11 +208,11 @@ Visualization <- function(recipe, # Plots with dotted terciles if (recipe$Analysis$Workflow$Visualization$dots %in% c('both', TRUE)) { if (is.null(skill_metrics)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the most likely terciles plot, skill_metrics ", "need to be provided for the dots")) } else if (!('rpss' %in% names(skill_metrics))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the most likely terciles plot, rpss metric ", "needs to be provided for the dots")) } else { @@ -218,7 +224,7 @@ Visualization <- function(recipe, } } } else { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("For the most likely terciles plot, both the fcst and the ", "probabilities must be provided.")) } @@ -238,7 +244,7 @@ Visualization <- function(recipe, system(system_command) } unlink(plot_files) - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, paste0("##### PLOT FILES CONVERTED TO ", toupper(extension), " #####")) } } diff --git a/recipes/atomic_recipes/recipe_decadal.yml b/recipes/atomic_recipes/recipe_decadal.yml index 2028fc468766a74bdc32b2ea70e60f89b637bcaa..ff97ed56156d4a83e543caf852938ea65ab64373 100644 --- a/recipes/atomic_recipes/recipe_decadal.yml +++ b/recipes/atomic_recipes/recipe_decadal.yml @@ -62,6 +62,6 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/nperez/ - code_dir: /esarchive/scratch/nperez/git4/sunset/ + output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/recipes/atomic_recipes/recipe_seasonal-tests.yml b/recipes/atomic_recipes/recipe_seasonal-tests.yml new file mode 100644 index 0000000000000000000000000000000000000000..676cea44d476f52be336535ebddf6ad3cac52556 --- /dev/null +++ b/recipes/atomic_recipes/recipe_seasonal-tests.yml @@ -0,0 +1,49 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas prlr + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5.1 + Multimodel: False + Reference: + name: CERRA + Time: + sdate: '0101' + fcst_year: + hcst_start: '2000' + hcst_end: '2005' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: 30 + latmax: 50 + lonmin: -10 + lonmax: 30 + Regrid: + method: conservative + type: to_system + Workflow: + Calibration: + method: raw + Anomalies: + compute: yes + cross_validation: yes + Skill: + metric: mean_bias EnsCorr RPSS CRPSS EnsSprErr # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 14 + remove_NAs: yes + Output_format: Scorecards +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/recipes/atomic_recipes/recipe_system7c3s-tas.yml b/recipes/atomic_recipes/recipe_system7c3s-tas.yml index 0b1fef13b55a4b6042ea027609ae491bbd57ad59..9f82ea5f02732dcc63f6060b43ef017c5f6239db 100644 --- a/recipes/atomic_recipes/recipe_system7c3s-tas.yml +++ b/recipes/atomic_recipes/recipe_system7c3s-tas.yml @@ -36,7 +36,7 @@ Analysis: method: mse_min save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' Skill: - metric: BSS10 BSS90 + metric: RPSS # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS save: 'all' # 'all'/'none' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10]] @@ -55,3 +55,6 @@ Run: Terminal: yes output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + startR_workflow: + modules: calibration, anomalies, skill # Modules to run inside Compute(), in order + chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml new file mode 100644 index 0000000000000000000000000000000000000000..f082a1ed2102b4e911da9b74bcef4af63e2a1e5a --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_compute.yml @@ -0,0 +1,83 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: psl + freq: monthly_mean + # units: {tas: C} + Datasets: + System: + name: ECMWF-SEAS5 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: # '2020' + hcst_start: '2000' + hcst_end: '2010' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 20 + latmax: 80 + lonmin: -80 + lonmax: 40 + Regrid: + method: bilinear + type: "r180x90" #to_reference #'r360x181' + Workflow: + Anomalies: + compute: yes # yes/no, default yes + cross_validation: yes # yes/no, default yes + save: 'all' # 'all'/'none'/'exp_only'/'fcst_only' + Calibration: + method: raw + save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' + Indices: + NAO: {obsproj: TRUE, save: 'all', plot_ts: TRUE, plot_sp: yes} + # Downscaling: + # # Assumption 1: leave-one-out cross-validation is always applied + # # Assumption 2: for analogs, we select the best analog (minimum distance) + # type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + # int_method: conservative # regridding method accepted by CDO. + # bc_method: bias # If type intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + # lr_method: # If type intlr. Options: 'basic', 'large_scale', '4nn' + # log_reg_method: # If type logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + # target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + # nanalogs: # If type analgs. Number of analogs to be searched + # save: 'all' # 'all'/'none'/'exp_only' + Skill: + metric: RPS RPSS BSS10 # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + save: 'all' # 'all'/'none' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' + Visualization: + plots: skill_metrics, forecast_ensemble_mean + multi_panel: no + projection: cylindrical_equidistant + Indicators: + index: no + ncores: 10 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + startR_workflow: + modules: anomalies skill # Modules to run inside Compute(), in order + return: hcst, fcst, obs # Choose only if they will fit into memory + save: hcst, fcst, obs # Only if returned + chunk_along: {time: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + run_on: local # options: local, as_machine, nord3 + chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' + expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O + hpc_user: bsc32762 # your hpc username. Only if run_on is not 'local' + slurm_directives: ['#SBATCH --constraint=medmem', '#SBATCH --exclusive'] # Only if run_on is not 'local' + + diff --git a/recipes/atomic_recipes/recipe_test_compute_aho.yml b/recipes/atomic_recipes/recipe_test_compute_aho.yml new file mode 100644 index 0000000000000000000000000000000000000000..22a322f5e982daaaa21ee769955589af2cc167f9 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_compute_aho.yml @@ -0,0 +1,66 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas prlr + freq: monthly_mean + Datasets: + System: + name: Meteo-France-System7 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '2000' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system #to_reference #'r360x181' + Workflow: + Anomalies: + compute: yes # yes/no, default yes + cross_validation: yes # yes/no, default yes + save: 'all' # 'all'/'none'/'exp_only'/'fcst_only' + Calibration: + method: mse_min + save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' + Skill: + metric: RPS RPSS BSS10 # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + save: 'all' # 'all'/'none' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' + Visualization: + plots: skill_metrics, forecast_ensemble_mean + multi_panel: no + projection: cylindrical_equidistant + Indicators: + index: no + ncores: 10 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/aho/tmp/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + startR_workflow: + modules: anomalies calibration skill # Modules to run inside Compute(), in order + chunk_along: {var: 1, time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + run_on: nord3 # options: local (WS), as_machime, nord3 + chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss + expid: a68e # leave empty to create a new one? + hpc_user: bsc32734 # your hpc username + slurm_directives: ['#SBATCH --constraint=medmem', '#SBATCH --exclusive'] + diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml new file mode 100644 index 0000000000000000000000000000000000000000..ef500b43302408f3c00c68075ca295d551473d82 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml @@ -0,0 +1,74 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: HadGEM3-GC31-MM #EC-Earth3-i4 #CanESM5 + member: r1i1p1f2,r2i1p1f2,r3i1p1f2 #'all' + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst_year: [2020,2021] + hcst_start: 1990 + hcst_end: 2000 +# season: 'Annual' + ftime_min: 1 + ftime_max: 24 + Region: + latmin: -10 #-90 + latmax: 20 #90 + lonmin: 0 + lonmax: 30 #359.9 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Anomalies: + compute: no + cross_validation: + save: + Time_aggregation: + execute: yes + method: 'average' + user_def: + JJA: !expr sort(c(seq(6,24, 12), seq(7,24,12), seq(8, 24, 12))) + MMA: !expr sort(c(seq(3,24, 12), seq(4,24,12), seq(5, 24, 12))) + SON: !expr sort(c(seq(9,24, 12), seq(10,24,12), seq(11, 24, 12))) + DJF: !expr sort(c(seq(12,24, 12), seq(13,24,12), seq(2, 24, 12))) + Calibration: + method: bias + save: 'all' + Skill: + metric: RPSS + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'all' + Indicators: + index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + ncores: 10 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + startR_workflow: + modules: 'aggregation skill' # Modules to run inside Compute(), in order + return: hcst, fcst, obs # Choose only if they will fit into memory + save: hcst, fcst, obs # Only if returned + chunk_along: {latitude: 2, longitude: 1} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + run_on: local # options: local, as_machine, nord3 + chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' + expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O + hpc_user: bsc32762 # your hpc username. Only if run_on is not 'local' + slurm_directives: ['#SBATCH --constraint=medmem', '#SBATCH --exclusive'] # Only if run_on is not 'local' diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml b/recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml new file mode 100644 index 0000000000000000000000000000000000000000..dd78e5d2a3ab601b1fdcf0646693c11dc9f883e5 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml @@ -0,0 +1,74 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: EC-Earth3-i4 #CanESM5 + member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst_year: [2020,2021] + hcst_start: 1990 + hcst_end: 1994 +# season: 'Annual' + ftime_min: 1 + ftime_max: 12 + Region: + latmin: -10 #-90 + latmax: 20 #90 + lonmin: 0 + lonmax: 30 #359.9 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Anomalies: + compute: no + cross_validation: + save: + Time_aggregation: + execute: yes + method: 'average' + user_def: + JJA: !expr sort(c(seq(6,24, 12), seq(7,24,12), seq(8, 24, 12))) + MMA: !expr sort(c(seq(3,24, 12), seq(4,24,12), seq(5, 24, 12))) + SON: !expr sort(c(seq(9,24, 12), seq(10,24,12), seq(11, 24, 12))) + DJF: !expr sort(c(seq(12,24, 12), seq(13,24,12), seq(2, 24, 12))) + Calibration: + method: bias + save: 'all' + Skill: + metric: RPSS + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'all' + Indicators: + index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + ncores: 10 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + startR_workflow: + modules: 'aggregation skill' # Modules to run inside Compute(), in order + return: hcst, fcst, obs # Choose only if they will fit into memory + save: hcst, fcst, obs # Only if returned + chunk_along: {latitude: 2, longitude: 1} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + run_on: local # options: local, as_machine, nord3 + chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' + expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O + hpc_user: bsc32762 # your hpc username. Only if run_on is not 'local' + slurm_directives: ['#SBATCH --constraint=medmem', '#SBATCH --exclusive'] # Only if run_on is not 'local' diff --git a/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml b/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml new file mode 100644 index 0000000000000000000000000000000000000000..2a34c36eb1052a94104c0dd641792b76b329fae5 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml @@ -0,0 +1,83 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: daily_mean + # units: {tas: C} + Datasets: + System: + name: ECMWF-SEAS5 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '0101' + fcst_year: # '2020' + hcst_start: '2000' + hcst_end: '2010' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: 20 + latmax: 80 + lonmin: -80 + lonmax: 40 + Regrid: + method: bilinear + type: "r180x90" #to_reference #'r360x181' + Workflow: + Anomalies: + compute: yes # yes/no, default yes + cross_validation: yes # yes/no, default yes + save: 'all' # 'all'/'none'/'exp_only'/'fcst_only' + Calibration: + method: raw + save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' + # Indices: + # NAO: {obsproj: TRUE, save: 'all', plot_ts: TRUE, plot_sp: yes} + # Downscaling: + # # Assumption 1: leave-one-out cross-validation is always applied + # # Assumption 2: for analogs, we select the best analog (minimum distance) + # type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + # int_method: conservative # regridding method accepted by CDO. + # bc_method: bias # If type intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + # lr_method: # If type intlr. Options: 'basic', 'large_scale', '4nn' + # log_reg_method: # If type logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + # target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + # nanalogs: # If type analgs. Number of analogs to be searched + # save: 'all' # 'all'/'none'/'exp_only' + Skill: + metric: RPS RPSS BSS10 # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + save: 'all' # 'all'/'none' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' + Visualization: + plots: skill_metrics, forecast_ensemble_mean + multi_panel: no + projection: cylindrical_equidistant + Indicators: + index: no + ncores: 10 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + startR_workflow: + modules: anomalies skill # Modules to run inside Compute(), in order + return: hcst, fcst, obs # Choose only if they will fit into memory + save: hcst, fcst, obs # Only if returned + chunk_along: {time: 6} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + run_on: local # options: local, as_machine, nord3 + chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local' + expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O + hpc_user: bsc32762 # your hpc username. Only if run_on is not 'local' + slurm_directives: ['#SBATCH --constraint=medmem', '#SBATCH --exclusive'] # Only if run_on is not 'local' + + diff --git a/recipes/atomic_recipes/recipe_test_compute_vagudets.yml b/recipes/atomic_recipes/recipe_test_compute_vagudets.yml new file mode 100644 index 0000000000000000000000000000000000000000..cd8cc8a8d90b3be4d0ed4540fd5eb78faa2e2be1 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_compute_vagudets.yml @@ -0,0 +1,65 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas prlr + freq: monthly_mean + Datasets: + System: + name: Meteo-France-System7 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '2000' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system #to_reference #'r360x181' + Workflow: + Anomalies: + compute: yes # yes/no, default yes + cross_validation: yes # yes/no, default yes + save: 'all' # 'all'/'none'/'exp_only'/'fcst_only' + Calibration: + method: mse_min + save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' + Skill: + metric: RPS RPSS BSS10 # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + save: 'all' # 'all'/'none' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' + Visualization: + plots: skill_metrics, forecast_ensemble_mean + multi_panel: no + projection: cylindrical_equidistant + Indicators: + index: no + ncores: 10 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + startR_workflow: + modules: anomalies calibration skill # Modules to run inside Compute(), in order + chunk_along: {time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...} + run_on: local # opt ions: local, cluster + chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss + expid: a6cb # leave empty to create a new one? + hpc_user: bsc32762 # your hpc username + diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index 6cfa4384ee93c907789046c1de9955bd554e012d..1d69bffca2725fa8dc5aa9125af7cd0a20a01d56 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -146,7 +146,7 @@ is.list(skill_metrics), TRUE ) expect_equal( -names(skill_metrics), +names(skill_metrics)[!names(skill_metrics) == "metadata"], c("enscorr_specs") ) expect_equal( diff --git a/tests/testthat/test-seasonal_downscaling.R b/tests/testthat/test-seasonal_downscaling.R index 8a52657a8b4dfd3665d82b27af46af6a60375b63..78b043b6ae7dc50be1f5a23752cdaab3dcaaa1f3 100644 --- a/tests/testthat/test-seasonal_downscaling.R +++ b/tests/testthat/test-seasonal_downscaling.R @@ -213,7 +213,7 @@ is.list(skill_metrics), TRUE ) expect_equal( -names(skill_metrics), +names(skill_metrics)[!names(skill_metrics) == "metadata"], c("bss10", "bss10_significance", "crpss", "crpss_significance","rpss", "rpss_significance", "mean_bias") ) expect_equal( diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 18bde4c01ec55b2fb29e618e588af74dc167c5f7..b1b775ea24e0fc4f222f7a5b3fa4aceed4431d22 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -183,7 +183,7 @@ is.list(skill_metrics), TRUE ) expect_equal( -names(skill_metrics), +names(skill_metrics)[!names(skill_metrics) == "metadata"], c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", "enscorr_significance", "corr_individual_members", "corr_individual_members_significance", "enscorr_specs") diff --git a/tests/testthat/test-seasonal_monthly_statistics.R b/tests/testthat/test-seasonal_monthly_statistics.R index cae7611186539d4de6d5985d7a97bbf95d7a2e87..57564d073dd7af8f3cc1d6b1794c9755340214f8 100644 --- a/tests/testthat/test-seasonal_monthly_statistics.R +++ b/tests/testthat/test-seasonal_monthly_statistics.R @@ -22,7 +22,7 @@ statistics <- Statistics(recipe, data) # Saving suppressWarnings({invisible(capture.output( Saving(recipe = recipe, data = data, - skill_metrics = statistics) + skill_metrics = c(statistics, metadata = data$hcst)) ))}) # Plotting diff --git a/tools/Utils.R b/tools/Utils.R index c0acf3740a10d1d9da97d8363b52f5050260c97d..a5cc84c2e1df671eeb0e622b1ec502fdc904839c 100644 --- a/tools/Utils.R +++ b/tools/Utils.R @@ -1,13 +1,13 @@ ## TODO: Write header ## TODO: Add if 'DEBUG' -.log_memory_usage <- function(logger, when) { - debug(logger, paste0(when, ":")) +.log_memory_usage <- function(logger, retrieve = TRUE, when = NULL) { + debug(logger, retrieve = retrieve, paste0(when, ":")) mem_info <- capture.output(memuse::Sys.meminfo()) for (i in mem_info) { - debug(recipe$Run$logger, i) + debug(recipe$Run$logger, retrieve = retrieve, i) } proc_mem <- capture.output(memuse::Sys.procmem()) for (i in proc_mem) { - debug(recipe$Run$logger, i) + debug(recipe$Run$logger, retrieve = retrieve, i) } } diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 245e4e3dc42501c310f57410420e419bcafc022a..8ca1890d19e79e76c941336f56b85a9a97256001 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -1,8 +1,8 @@ check_recipe <- function(recipe) { # recipe: yaml recipe already read it ## TODO: set up logger-less case - info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) - + info(recipe$Run$logger, retrieve = TRUE, + paste("Checking recipe:", recipe$recipe_path)) # --------------------------------------------------------------------- # ANALYSIS CHECKS # --------------------------------------------------------------------- @@ -21,20 +21,20 @@ check_recipe <- function(recipe) { # Check basic elements in recipe:Analysis: if (!("Analysis" %in% names(recipe))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The recipe must contain an element called 'Analysis'.") error_status <- TRUE } if (!all(PARAMS %in% names(recipe$Analysis))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The element 'Analysis' in the recipe must contain all of ", "the following: ", paste(PARAMS, collapse = ", "), ".")) error_status <- TRUE } if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The element 'Horizon' in the recipe must be one of the ", "following: ", paste(HORIZONS, collapse = ", "), ".")) error_status <- T @@ -44,7 +44,7 @@ check_recipe <- function(recipe) { ## TODO: Specify filesystem archive <- read_yaml(ARCHIVE_SEASONAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_SEASONAL %in% names(recipe$Analysis$Time))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The element 'Time' in the recipe must contain all of the ", "following: ", paste(TIME_SETTINGS_SEASONAL, collapse = ", "), ".")) @@ -53,7 +53,7 @@ check_recipe <- function(recipe) { } else if (tolower(recipe$Analysis$Horizon) == "decadal") { archive <- read_yaml(ARCHIVE_DECADAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_DECADAL %in% names(recipe$Analysis$Time))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The element 'Time' in the recipe must contain all of the ", "following: ", paste(TIME_SETTINGS_DECADAL, collapse = ", "), ".")) @@ -74,7 +74,7 @@ check_recipe <- function(recipe) { (!is.null(recipe$Analysis$Variables$sic_threshold))) { if (!is.numeric(recipe$Analysis$Variables$sic_threshold) || !dplyr::between(recipe$Analysis$Variables$sic_threshold, 0, 1)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("The element Analysis:Variables:sic_threshold must be a", "numeric value between 0 and 1.")) error_status <- TRUE @@ -84,14 +84,14 @@ check_recipe <- function(recipe) { if (!is.null(archive)) { if (!all(recipe$Analysis$Datasets$System$name %in% c(names(archive$System), 'Multimodel'))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The specified System name was not found in the archive.") error_status <- TRUE } # Check reference names if (!all(recipe$Analysis$Datasets$Reference$name %in% names(archive$Reference))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The specified Reference name was not found in the archive.") error_status <- TRUE } @@ -138,36 +138,36 @@ check_recipe <- function(recipe) { # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || (!is.integer(recipe$Analysis$Time$ftime_min))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The element 'ftime_min' must be an integer larger than 0.") error_status <- TRUE } if ((!(recipe$Analysis$Time$ftime_max > 0)) || (!is.integer(recipe$Analysis$Time$ftime_max))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The element 'ftime_max' must be an integer larger than 0.") error_status <- TRUE } if (recipe$Analysis$Time$ftime_max < recipe$Analysis$Time$ftime_min) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "'ftime_max' cannot be smaller than 'ftime_min'.") error_status <- TRUE } # Check consistency of hindcast years if (!(as.numeric(recipe$Analysis$Time$hcst_start) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_start > 0))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The element 'hcst_start' must be a valid year.") error_status <- TRUE } if (!(as.numeric(recipe$Analysis$Time$hcst_end) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_end > 0))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The element 'hcst_end' must be a valid year.") error_status <- TRUE } if (recipe$Analysis$Time$hcst_end < recipe$Analysis$Time$hcst_start) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "'hcst_end' cannot be smaller than 'hcst_start'.") error_status <- TRUE } @@ -183,19 +183,19 @@ check_recipe <- function(recipe) { ## TODO: To be implemented in the future # if (length(recipe$Analysis$Time$sdate$fcst_day) > 1 && # tolower(recipe$Analysis$Horizon) != "subseasonal") { - # warn(recipe$Run$logger, + # warn(recipe$Run$logger, retrieve = TRUE, # paste("Only subseasonal verification allows multiple forecast days."), # "Element fcst_day in recipe set as 1.") # recipe$Analysis$Time$sdate$fcst_day <- '01' # } ## TODO: Delete, this parameter was deprecated # if (is.null(recipe$Analysis$Time$sdate$fcst_sday)) { - # error(recipe$Run$logger, + # error(recipe$Run$logger, retrieve = TRUE, # paste("The element 'fcst_sday' in the recipe should be defined.")) # } if (is.null(recipe$Analysis$Time$fcst_year)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste("The element 'fcst_year' is not defined in the recipe.", "No forecast year will be used.")) } @@ -212,7 +212,7 @@ check_recipe <- function(recipe) { # Regrid checks: if (length(recipe$Analysis$Regrid) != 2) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The 'Regrid' element must specify the 'method' and 'type'.") error_status <- TRUE } @@ -227,7 +227,7 @@ check_recipe <- function(recipe) { # ... # calculate number of workflows to create for each variable and if (length(recipe$Analysis$Horizon) > 1) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Only one single Horizon can be specified in the recipe") error_status <- TRUE } @@ -235,7 +235,7 @@ check_recipe <- function(recipe) { ## TODO: Refine this # nvar <- length(recipe$Analysis$Variables) # if (nvar > 2) { - # error(recipe$Run$logger, + # error(recipe$Run$logger, retrieve = TRUE, # "Only two type of Variables can be listed: ECVs and Indicators.") # stop("EXECUTION FAILED") # } @@ -258,7 +258,7 @@ check_recipe <- function(recipe) { if (is.null(names(recipe$Analysis$Region))) { for (region in recipe$Analysis$Region) { if (!all(LIMITS %in% names(region))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("There must be 4 elements in 'Region': ", paste(LIMITS, collapse = ", "), ".")) error_status <- T @@ -267,7 +267,7 @@ check_recipe <- function(recipe) { if (length(recipe$Analysis$Region) > 1) { for (region in recipe$Analysis$Region) { if (!("name" %in% names(region)) || (is.null(region$name))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("If more than one region has been defined, every region", "must have a unique name.")) } @@ -275,7 +275,7 @@ check_recipe <- function(recipe) { } # Atomic recipe } else if (!all(LIMITS %in% names(recipe$Analysis$Region))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("There must be 4 elements in 'Region': ", paste(LIMITS, collapse = ", "), ".")) error_status <- TRUE @@ -393,19 +393,19 @@ check_recipe <- function(recipe) { recipe$Analysis$Workflow$Calibration$method == FALSE) || tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || is.null(recipe$Analysis$Workflow$Calibration$method)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, "No Calibration method was specified, raw data verification.") recipe$Analysis$Workflow$Calibration$method <- 'raw' } else { if (is.null(recipe$Analysis$Workflow$Calibration$method)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The 'Calibration' element 'method' must be specified.") error_status <- TRUE } SAVING_OPTIONS_CALIB <- c("all", "none", "exp_only", "fcst_only") if ((is.null(recipe$Analysis$Workflow$Calibration$save)) || (!(recipe$Analysis$Workflow$Calibration$save %in% SAVING_OPTIONS_CALIB))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Please specify which Calibration module outputs you want ", "to save with the 'save' parameter. The options are: ", paste(SAVING_OPTIONS_CALIB, collapse = ", "), ".")) @@ -416,18 +416,18 @@ check_recipe <- function(recipe) { if ("Anomalies" %in% names(recipe$Analysis$Workflow)) { # Computation and cross-validation checks if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Parameter 'compute' must be defined under 'Anomalies'.") error_status <- TRUE } else if (!(is.logical(recipe$Analysis$Workflow$Anomalies$compute))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Parameter 'Anomalies:compute' must be a logical value", "(True/False or yes/no).")) error_status <- TRUE } else if ((recipe$Analysis$Workflow$Anomalies$compute)) { # Cross-validation check if (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("If anomaly computation is requested, parameter", "'cross_validation' must be defined under 'Anomalies', and it must be a logical value (True/False or yes/no).")) @@ -437,7 +437,7 @@ check_recipe <- function(recipe) { SAVING_OPTIONS_ANOM <- c("all", "none", "exp_only", "fcst_only") if ((is.null(recipe$Analysis$Workflow$Anomalies$save)) || (!(recipe$Analysis$Workflow$Anomalies$save %in% SAVING_OPTIONS_ANOM))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Please specify which Anomalies module outputs you want ", "to save with the 'save' parameter. The options are: ", paste(SAVING_OPTIONS_ANOM, collapse = ", "), ".")) @@ -459,12 +459,12 @@ check_recipe <- function(recipe) { if ("type" %in% names(downscal_params)) { if (length(downscal_params$type) == 0) { downscal_params$type <- "none" - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste("Downscaling 'type' is empty in the recipe, setting it to", "'none'.")) } if (!(downscal_params$type %in% DOWNSCAL_TYPES)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The type of Downscaling request in the recipe is not ", "available. It must be one of the following: ", paste(DOWNSCAL_TYPES, collapse = ", "), ".")) @@ -472,14 +472,14 @@ check_recipe <- function(recipe) { } if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && (is.null(downscal_params$target_grid))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("A target grid is required for the downscaling method", "requested in the recipe.")) error_status <- T } if (downscal_params$type == "int") { if (is.null(downscal_params$int_method)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type 'int' was requested, but no", "interpolation method is provided in the recipe.")) error_status <- T @@ -487,7 +487,7 @@ check_recipe <- function(recipe) { } else if (downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) { if (is.null(downscal_params$int_method)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type", downscal_params$type, "was requested in the recipe, but no", "interpolation method is provided.")) @@ -495,24 +495,24 @@ check_recipe <- function(recipe) { } } else if (downscal_params$type == "intbc") { if (is.null(downscal_params$bc_method)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type 'intbc' was requested in the recipe, but", "no bias correction method is provided.")) error_status <- T } else if (!(downscal_params$bc_method %in% BC_METHODS)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The accepted Bias Correction methods for the downscaling", " module are: ", paste(BC_METHODS, collapse = ", "), ".")) error_status <- T } } else if (downscal_params$type == "intlr") { if (length(downscal_params$lr_method) == 0) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type 'intlr' was requested in the recipe, but", "no linear regression method was provided.")) error_status <- T } else if (!(downscal_params$lr_method %in% LR_METHODS)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The accepted linear regression methods for the", " downscaling module are: ", paste(LR_METHODS, collapse = ", "), ".")) @@ -520,24 +520,24 @@ check_recipe <- function(recipe) { } } else if (downscal_params$type == "analogs") { if (is.null(downscal_params$nanalogs)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type is 'analogs, but the number of analogs", "has not been provided in the recipe. The default is 3.")) } } else if (downscal_params$type == "logreg") { if (is.null(downscal_params$int_method)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type 'logreg' was requested in the recipe, but", "no interpolation method was provided.")) error_status <- T } if (is.null(downscal_params$log_reg_method)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Downscaling type 'logreg' was requested in the recipe,", "but no logistic regression method is provided.")) error_status <- T } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The accepted logistic regression methods for the ", "downscaling module are: ", paste(LOGREG_METHODS, collapse = ", "), ".")) @@ -552,19 +552,19 @@ check_recipe <- function(recipe) { nino_indices <- paste0("nino", c("1+2", "3", "3.4", "4")) indices <- c("nao", nino_indices) if (!("anomalies" %in% tolower(names(recipe$Analysis$Workflow)))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Indices uses Anomalies as input, but Anomalies are missing", "in the recipe.")) error_status <- TRUE } else if (!(recipe$Analysis$Workflow$Anomalies$compute)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Indices uses Anomalies as input, but the parameter", "'Anomalies:compute' is set as no/False.")) error_status <- T } recipe_indices <- tolower(names(recipe$Analysis$Workflow$Indices)) if (!all(recipe_indices %in% indices)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Some of the indices under 'Indices' are not available.", "The available Indices are: 'NAO', 'Nino1+2', 'Nino3', ", "'Nino3.4' and 'Nino4'.")) @@ -573,7 +573,7 @@ check_recipe <- function(recipe) { # Check that variables correspond with indices requested if (("nao" %in% recipe_indices) && (!all(recipe_variables %in% c("psl", "z500")))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("It is not possible to compute the NAO with some of the ", "variables requested. To compute the NAO, please make sure", "your recipe requests only psl and/or z500.")) @@ -581,7 +581,7 @@ check_recipe <- function(recipe) { } if ((any(nino_indices %in% recipe_indices)) && (!all(recipe_variables %in% c("tos", "sst")))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("It is not possible to compute El Nino indices with some ", "of the variables requested. To compute El Nino, please ", "make sure your recipe requests only tos.")) @@ -599,14 +599,14 @@ check_recipe <- function(recipe) { "frpss_specs", "bss10_specs", "bss90_specs", "rms") if ("Skill" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Skill$metric)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Parameter 'metric' must be defined under 'Skill'.") error_status <- T } else { requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] if (!all(tolower(requested_metrics) %in% AVAILABLE_METRICS)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Some of the metrics requested under 'Skill' are not ", "available in SUNSET. Check the documentation to see the ", "full list of accepted skill metrics.")) @@ -625,7 +625,7 @@ check_recipe <- function(recipe) { SAVING_OPTIONS_SKILL <- c("all", "none") if ((is.null(recipe$Analysis$Workflow$Skill$save)) || (!(recipe$Analysis$Workflow$Skill$save %in% SAVING_OPTIONS_SKILL))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Please specify whether you want to save the Skill metrics ", "with the 'save' parameter. The options are: ", paste(SAVING_OPTIONS_SKILL, collapse = ", "), ".")) @@ -636,11 +636,11 @@ check_recipe <- function(recipe) { # Probabilities if ("Probabilities" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Parameter 'percentiles' must be defined under 'Probabilities'.") error_status <- TRUE } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("Parameter 'Probabilities:percentiles' expects a list.", "See documentation in the wiki for examples.")) error_status <- TRUE @@ -649,7 +649,7 @@ check_recipe <- function(recipe) { SAVING_OPTIONS_PROBS <- c("all", "none", "bins_only", "percentiles_only") if ((is.null(recipe$Analysis$Workflow$Probabilities$save)) || (!(recipe$Analysis$Workflow$Probabilities$save %in% SAVING_OPTIONS_PROBS))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Please specify whether you want to save the percentiles ", "and probability bins with the 'save' parameter. The ", "options are: ", @@ -664,14 +664,14 @@ check_recipe <- function(recipe) { "most_likely_terciles", "statistics") # Separate plots parameter and check if all elements are in PLOT_OPTIONS if (is.null(recipe$Analysis$Workflow$Visualization$plots)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The 'plots' element must be defined under 'Visualization'.") error_status <- TRUE } else { plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, ", | |,")[[1]] if (!all(plots %in% PLOT_OPTIONS)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The options available for the plots are: ", paste(PLOT_OPTIONS, collapse = ", "), ".")) error_status <- TRUE @@ -680,41 +680,41 @@ check_recipe <- function(recipe) { # Check multi_panel option ## TODO: Multi-panel if (is.null(recipe$Analysis$Workflow$Visualization$multi_panel)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste0("Visualization:multi_panel not specified for the plots, the", " default is 'no/False'.")) } else if (!is.logical(recipe$Analysis$Workflow$Visualization$multi_panel)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Parameter 'Visualization:multi_panel' must be a logical ", "value: either 'yes/True' or 'no/False'")) error_status <- TRUE } # Check projection if (is.null(recipe$Analysis$Workflow$Visualization$projection)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste0("Visualization:projection not specified for the plots, the ", "default projection is cylindrical equidistant.")) } ## TODO: Add significance? if ("most_likely_terciles" %in% plots) { if (is.null(recipe$Analysis$Workflow$Visualization$mask_terciles)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste0("Visualization:mask_terciles not set for tercile plots,", " the default setting is: 'no/False'.")) } else if (!(recipe$Analysis$Workflow$Visualization$mask_terciles %in% c(TRUE, FALSE, "both"))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Parameter Visualization:mask_terciles must be one of: ", "yes/True, no/False, 'both'")) error_status <- TRUE } if (is.null(recipe$Analysis$Workflow$Visualization$dots)) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, paste0("Visualization:dots not set for tercile plots, the default", " setting is: 'no/False'.")) } else if (!(recipe$Analysis$Workflow$Visualization$dots %in% c(TRUE, FALSE, "both"))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("Parameter Visualization:plots must be one of: ", "yes/True, no/False, 'both'")) } @@ -740,7 +740,7 @@ check_recipe <- function(recipe) { if ("Scorecards" %in% names(recipe$Analysis$Workflow)) { if (recipe$Analysis$Workflow$Scorecards$execute == TRUE) { if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Parameter 'metric' must be defined under 'Scorecards'.") error_status <- T } else { @@ -801,27 +801,27 @@ check_recipe <- function(recipe) { stop("The recipe must contain an element named 'Run'.") } if (!all(RUN_FIELDS %in% names(recipe$Run))) { - error(recipe$Run$logger, paste("Recipe element 'Run' must contain", + error(recipe$Run$logger, retrieve = TRUE, paste("Recipe element 'Run' must contain", "all of the following fields:", paste(RUN_FIELDS, collapse=", "), ".")) error_status <- TRUE } if (!is.character(recipe$Run$output_dir)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("The Run element 'output_dir' in", recipe$name, "file", "should be a character string indicating the path where", "the outputs should be saved.")) error_status <- TRUE } if (!is.character(recipe$Run$code_dir)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("The Run element 'code_dir' in", recipe$name, "file ", "should be a character string indicating the path", "where the code is.")) error_status <- TRUE } if (!is.logical(recipe$Run$Terminal)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("The Run element 'Terminal' in", recipe$name, "file ", "should be a boolean value indicating whether or not to", "print the logs in the terminal.")) @@ -830,7 +830,7 @@ check_recipe <- function(recipe) { ## TODO: Review this case, since default value is allowed if (!is.character(recipe$Run$Loglevel) || !any(recipe$Run$Loglevel %in% LOG_LEVELS)) { - error(logger, + error(recipe$Run$logger, retrieve = TRUE, paste("The Run element 'Loglevel' in", recipe$name, "file", "should be a character string specifying one of the levels available:", paste0(LOG_LEVELS, collapse='/'))) @@ -854,39 +854,39 @@ check_recipe <- function(recipe) { auto_specs <- read_yaml("conf/autosubmit.yml")[[recipe$Run$filesystem]] # Check that the autosubmit configuration parameters are present if (!("auto_conf" %in% names(recipe$Run))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The 'auto_conf' is missing from the 'Run' section of the recipe.") error_status <- TRUE } else if (!all(AUTO_PARAMS %in% names(recipe$Run$auto_conf))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("The element 'Run:auto_conf' must contain all of the ", "following: ", paste(AUTO_PARAMS, collapse = ", "), ".")) error_status <- TRUE } # Check that the script is not NULL and exists if (is.null(recipe$Run$auto_conf$script)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "A script must be provided to run the recipe with autosubmit.") error_status <- TRUE } else if (!file.exists(recipe$Run$auto_conf$script)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Could not find the file for the script in 'auto_conf'.") error_status <- TRUE } # Check that the experiment ID exists if (is.null(recipe$Run$auto_conf$expid)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("The Autosubmit EXPID is missing. You can create one by", "running the following commands on the autosubmit machine:")) - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("module load", auto_specs$module_version)) - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste("autosubmit expid -H", auto_specs$platform, "-d ")) error_status <- TRUE } else if (!dir.exists(paste0(auto_specs$experiment_dir, recipe$Run$auto_conf$expid))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, paste0("No folder in ", auto_specs$experiment_dir, " for the EXPID", recipe$Run$auto_conf$expid, ". Please make sure it is correct.")) @@ -894,17 +894,17 @@ check_recipe <- function(recipe) { } if ((recipe$Run$auto_conf$email_notifications) && (is.null(recipe$Run$auto_conf$email_address))) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Autosubmit notifications are enabled but email address is empty!") error_status <- TRUE } if (is.null(recipe$Run$auto_conf$hpc_user)) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "The 'Run:auto_conf:hpc_user' field can not be empty.") error_status <- TRUE } else if ((recipe$Run$filesystem == "esarchive") && (!substr(recipe$Run$auto_conf$hpc_user, 1, 6) == "bsc032")) { - error(recipe$Run$logger, + error(recipe$Run$logger, retrieve = TRUE, "Please check your hpc_user ID. It should look like: 'bsc032xxx'") error_status <- TRUE } @@ -935,6 +935,81 @@ check_recipe <- function(recipe) { } } + # --------------------------------------------------------------------- + # STARTR WORKFLOW CHECKS + # --------------------------------------------------------------------- + + STARTR_PARAMS <- c("modules", "chunk_along") + STARTR_MODULES <- c("calibration", "anomalies", "downscaling", + "skill", "probabilities", "indices", "aggregation") + CHUNK_DIMS <- c("var", "time", "latitude", "longitude") + MODULES_USING_LATLON <- c("downscaling", "indices") + MODULES_USING_TIME <- c("indicators", "aggregation") + MODULES_USING_VAR <- c("indicators") + # Check that all required fields are present + if ("startR_workflow" %in% names(recipe$Run)) { + if (!all(STARTR_PARAMS %in% names(recipe$Run$startR_workflow))) { + error(recipe$Run$logger, retrieve = TRUE, + paste0("The element 'Run:startR_workflow' must contain all of the ", + "following: ", paste(STARTR_PARAMS, collapse = ", "), ".")) + error_status <- T + } else { + modules <- tolower(strsplit(recipe$Run$startR_workflow$modules, + ", | |,")[[1]]) + # Check modules + if (!any(modules %in% STARTR_MODULES)) { + error(recipe$Run$logger, retrieve = TRUE, + paste0("The element 'Run:startR_workflow:modules' can only ", + "contain the following modules: ", + paste(STARTR_MODULES, collapse = ", "), ".")) + error_status <- T + } + # Check if chunking dims are among the allowed dimensions + if (!(any(names(recipe$Run$startR_workflow$chunk_along) %in% CHUNK_DIMS))) { + error(recipe$Run$logger, retrieve = TRUE, + paste0("The dimensions in 'Run:startR_workflow:chunk_along' can ", + "only be: ", paste(CHUNK_DIMS, collapse = ", "), ".")) + error_status <- T + } + # Check compatibility between module selection and chunking dimensions + # spatial dimensions + if (any(c("latitude", "longitude") %in% names(recipe$Run$startR_workflow$chunk_along)) && + any(modules %in% MODULES_USING_LATLON)) { + error(recipe$Run$logger, retrieve = TRUE, + paste0("latitude and longitude cannot be chunking dimensions if ", + "any of the following modules is in the startR workflow: ", + paste(MODULES_USING_LATLON, collapse = ", "), ".")) + error_status <- T + } + # 'time' + if ("time" %in% names(recipe$Run$startR_workflow$chunk_along) && + any(modules %in% MODULES_USING_TIME)) { + error(recipe$Run$logger, retrieve = TRUE, + paste0("time cannot be a chunking dimension if any of the ", + "following modules is in the startR workflow: ", + paste(MODULES_USING_TIME, collapse = ", "), ".")) + error_status <- T + } + # 'var' + if ("var" %in% names(recipe$Run$startR_workflow$chunk_along) && + any(modules %in% MODULES_USING_VAR)) { + error(recipe$Run$logger, retrieve = TRUE, + paste0("var cannot be a chunking dimension if any of the ", + "following modules is in the startR workflow: ", + paste(MODULES_USING_VAR, collapse = ", "), ".")) + error_status <- T + } + # Remove chunks of length 1 + if (any(recipe$Run$startR_workflow$chunk_along == 1)) { + recipe$Run$startR_workflow$chunk_along <- + recipe$Run$startR_workflow$chunk_along[-which(recipe$Run$startR_workflow$chunk_along == 1)] + warn(recipe$Run$logger, retrieve = TRUE, + paste0("Chunks of length 1 defined in startR_workflow:chunk_along", + " have been removed from the chunk list.")) + } + } + } + # --------------------------------------------------------------------- # WORKFLOW CHECKS # --------------------------------------------------------------------- @@ -942,16 +1017,16 @@ check_recipe <- function(recipe) { # e.g. only one calibration method ## TODO: Implement number of dependent verifications #nverifications <- check_number_of_dependent_verifications(recipe) - # info(recipe$Run$logger, paste("Start Dates:", + # info(recipe$Run$logger, retrieve = TRUE, paste("Start Dates:", # paste(fcst.sdate, collapse = " "))) # Return error if any check has failed if (error_status) { - error(recipe$Run$logger, "RECIPE CHECK FAILED.") + error(recipe$Run$logger, retrieve = TRUE, "RECIPE CHECK FAILED.") stop("The recipe contains some errors. Find the full list in the", " main.log file.") } else { - info(recipe$Run$logger, "##### RECIPE CHECK SUCCESSFULL #####") + info(recipe$Run$logger, retrieve = TRUE, "##### RECIPE CHECK SUCCESSFULL #####") } return(recipe) } diff --git a/tools/data_summary.R b/tools/data_summary.R index b76101bac4bba40b1a26ff2cdea1c1c16bae9580..4c2c8e21777f81a4c2851e4b8266390d17c1fab2 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -20,32 +20,36 @@ data_summary <- function(data_cube, recipe) { sdate_max <- format(max(as.Date(data_cube$attrs$Dates)), format = date_format) # Log the summary - info(recipe$Run$logger, "DATA SUMMARY:") - info(recipe$Run$logger, paste(object_name, "months:", months)) - info(recipe$Run$logger, paste(object_name, "range:", sdate_min, "to", + info(recipe$Run$logger, retrieve = TRUE, "DATA SUMMARY:") + info(recipe$Run$logger, retrieve = TRUE, + paste(object_name, "months:", months)) + info(recipe$Run$logger, retrieve = TRUE, + paste(object_name, "range:", sdate_min, "to", sdate_max)) - info(recipe$Run$logger, paste(object_name, "dimensions:")) + info(recipe$Run$logger, retrieve = TRUE, + paste(object_name, "dimensions:")) # Use capture.output() and for loop to display results neatly output_string <- capture.output(dim(data_cube$data)) for (i in output_string) { - info(recipe$Run$logger, i) + info(recipe$Run$logger, retrieve = TRUE, i) } # Print statistical summary of the data for every variable - info(recipe$Run$logger, paste0("Statistical summary of the data in ", - object_name, ":")) + info(recipe$Run$logger, retrieve = TRUE, + paste0("Statistical summary of the data in ", object_name, ":")) for (var_index in 1:data_cube$dims[['var']]) { variable_name <- data_cube$attrs$Variable$varName[var_index] variable_units <- data_cube$attrs$Variable$metadata[[variable_name]]$units - info(recipe$Run$logger, + info(recipe$Run$logger, retrieve = TRUE, paste0("Variable: ", variable_name, " (units: ", variable_units, ")")) output_string <- capture.output(summary(Subset(data_cube$data, along = "var", indices = var_index))) for (i in output_string) { - info(recipe$Run$logger, i) + info(recipe$Run$logger, retrieve = TRUE, i) } } - info(recipe$Run$logger, "---------------------------------------------") + info(recipe$Run$logger, retrieve = TRUE, + "---------------------------------------------") invisible(gc()) } diff --git a/tools/libs.R b/tools/libs.R index 401467860ba602c0bd459439e973e3637adb7a4f..43ae01703b92e4c7d7338d53cee274cffcc27791 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -2,6 +2,14 @@ library(log4r) library(docopt) library(startR) +#source all the .R files +# path <- "/esarchive/scratch/vagudets/repos/startR/R/" +# ff <- lapply(list.files(path), function(x) paste0(path, x)) +# invisible(lapply(ff, source)) + # load all the libraries +# lib <- c('parallel', 'abind', 'bigmemory', 'future', 'multiApply', +# 'PCICt', 'ClimProjDiags', 'ncdf4', 'plyr') +# invisible(lapply(lib, library, character.only = TRUE)) library(ClimProjDiags) library(multiApply) library(yaml) @@ -36,6 +44,7 @@ source("tools/write_autosubmit_conf.R") source("tools/get_archive.R") source("tools/Utils.R") source("tools/restructure_recipe.R") +source("tools/tmp/as.s2dv_cube.R") # source("tools/add_dims.R") # Not sure if necessary yet # Settings diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 16a34d321959d2bf0c3b3c20e23aea8f6ac35829..2fdf19c700cbdba69f772d5d7d874e02d225fcd2 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -69,13 +69,13 @@ prepare_outputs <- function(recipe_file, # logger set-up if (recipe$Run$Terminal) { logger <- log4r::logger(threshold = recipe$Run$Loglevel, - appenders = list(console_appender(layout = default_log_layout()), + appenders = list(console_appender(layout = .custom_log_layout()), file_appender(logfile, append = TRUE, - layout = default_log_layout()))) + layout = .custom_log_layout()))) } else { logger <- log4r::logger(threshold = recipe$Run$Loglevel, appenders = list(file_appender(logfile, append = TRUE, - layout = default_log_layout()))) + layout = .custom_log_layout()))) } recipe$Run$logger <- logger recipe$Run$logfile <- logfile @@ -96,12 +96,12 @@ prepare_outputs <- function(recipe_file, along with this program. If not, see ." for (i in disclosure_message) { - info(recipe$Run$logger, i) + info(recipe$Run$logger, retrieve = TRUE, i) } # Set up default filesystem if (is.null(recipe$Run$filesystem)) { recipe$Run$filesystem <- "esarchive" - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, "Filesystem not specified in the recipe. Setting it to 'esarchive'.") } # Restructure the recipe to make the atomic recipe more readable @@ -110,7 +110,7 @@ prepare_outputs <- function(recipe_file, } # Run recipe checker if (disable_checks) { - warn(recipe$Run$logger, + warn(recipe$Run$logger, retrieve = TRUE, "Recipe checks disabled. The recipe will not be checked for errors.") } else { recipe <- check_recipe(recipe) @@ -124,3 +124,17 @@ prepare_outputs <- function(recipe_file, paste0(recipe$name, ".yml"))) return(recipe) } + +.custom_log_layout <- function() { + # Custom modification of the default log4r log layout to remove ANSI + # formatting from log messages when outputting to files. + time_format <- "%Y-%m-%d %H:%M:%S" + function(level, retrieve, ...) { + if (retrieve) { + msg <- paste0(..., collapse = "") + # Strip msg of ANSI formatting + # msg <- ansi_strip(msg) + sprintf("%-5s [%s] %s\n", level, Sys.time(), msg) + } + } +} diff --git a/tools/retrieve_metadata.R b/tools/retrieve_metadata.R new file mode 100644 index 0000000000000000000000000000000000000000..f1784425cf2141bc11d8bf1f70289bc161b543ae --- /dev/null +++ b/tools/retrieve_metadata.R @@ -0,0 +1,97 @@ +# For each chunking dimension, retrieve the metadata for each chunk from its +# .Rds file and put the pieces together, generating the complete coordinates, +# dimensions and attributes of the resulting s2dv_cube. +# +# tmp_dir: string, The temporary directory where the metadata files are stored. +# chunks: named list of chunking dimensions with the amount of chunks for each. +# array_dims: vector containing the dimensions of the final array. + +retrieve_metadata <- function(tmp_dir, chunks, array_dims, + metadata_file_pattern) { + ## For each chunking dimension + # Build metadata file pattern: + for (chunk in sort(names(chunks))) { + metadata_file_pattern <- paste0(metadata_file_pattern, "_", chunk, "_*") + } + metadata_file_pattern <- paste0(metadata_file_pattern, ".Rds") + metadata_files <- list.files(path = tmp_dir, pattern = glob2rx(metadata_file_pattern)) + metadata <- readRDS(file.path(tmp_dir, metadata_files[1])) + # Piece together variable info + if ("var" %in% names(chunks)) { + # $attrs + for (i in 2:chunks[["var"]]) { + metadata_chunk <- readRDS(file.path(tmp_dir, + paste0(gsub("var_1", + paste0("var_", i), + metadata_files[1])))) + var_name <- metadata_chunk$attrs$Variable$varName + metadata$attrs$Variable$varName <- c(metadata$attrs$Variable$varName, + var_name) + metadata$attrs$Variable$metadata <- c(metadata$attrs$Variable$metadata, + metadata_chunk$attrs$Variable$metadata[var_name]) + } + # $coords + metadata$coords$var <- metadata$attrs$Variable$varName + dim(metadata$coords$var) <- c(var = length(metadata$coords$var)) + attr(metadata$coords$var, "values") <- TRUE + attr(metadata$coords$var, "indices") <- FALSE + # $dims + metadata$dims["var"] <- array_dims["var"] + } + # Piece together time info + if ("time" %in% names(chunks)) { + dates_dims <- dim(metadata$attrs$Dates) + for (i in 2:chunks[["time"]]) { + metadata_chunk <- readRDS(file.path(tmp_dir, + paste0(gsub("time_1", + paste0("time_", i), + metadata_files[1])))) + metadata$attrs$Dates <- c(metadata$attrs$Dates, metadata_chunk$attrs$Dates) + } + time_dim_length <- array_dims[["time"]] + dim(metadata$attrs$Dates) <- c(dates_dims[names(dates_dims) != "time"], + time = time_dim_length) + metadata$coords$time <- seq(1, time_dim_length) + dim(metadata$coords$time) <- c(time = time_dim_length) + attr(metadata$coords$time, "indices") <- TRUE + #$dims + metadata$dims["time"] <- array_dims["time"] + } + # Piece together lon/lat info + if ("latitude" %in% names(chunks)) { + # $attrs + ## TODO: Preserve attributes + for (i in 2:chunks[["latitude"]]) { + metadata_chunk <- readRDS(file.path(tmp_dir, + paste0(gsub("latitude_1", + paste0("latitude_", i), + metadata_files[1])))) + metadata$attrs$Variable$metadata$latitude <- c(metadata$attrs$Variable$metadata$latitude, + metadata_chunk$attrs$Variable$metadata$latitude) + } + # $coords + metadata$coords$latitude <- metadata$attrs$Variable$metadata$latitude + dim(metadata$coords$latitude) <- c(latitude = length(metadata$coords$latitude)) + # $dims + metadata$dims["latitude"] <- array_dims["latitude"] + } + + if ("longitude" %in% names(chunks)) { + # $attrs + ## TODO: Preserve attributes + for (i in 2:chunks[["longitude"]]) { + metadata_chunk <- readRDS(file.path(tmp_dir, + paste0(gsub("longitude_1", + paste0("longitude_", i), + metadata_files[1])))) + metadata$attrs$Variable$metadata$longitude <- c(metadata$attrs$Variable$metadata$longitude, + metadata_chunk$attrs$Variable$metadata$longitude) + } + # $coords + metadata$coords$longitude <- metadata$attrs$Variable$metadata$longitude + dim(metadata$coords$longitude) <- c(longitude = length(metadata$coords$longitude)) + # $dims + metadata$dims["longitude"] <- array_dims["longitude"] + } + return(metadata) +} diff --git a/tools/tmp/as.s2dv_cube.R b/tools/tmp/as.s2dv_cube.R new file mode 100644 index 0000000000000000000000000000000000000000..1b4c42751b1845bf90afedb6bbad4b6dd90c5156 --- /dev/null +++ b/tools/tmp/as.s2dv_cube.R @@ -0,0 +1,363 @@ +#'Conversion of 'startR_array' or 'list' objects to 's2dv_cube' +#' +#'This function converts data loaded using Start function from startR package or +#'Load from s2dv into an 's2dv_cube' object. +#' +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#'@author Nicolau Manubens, \email{nicolau.manubens@bsc.es} +#' +#'@param object An object of class 'startR_array' generated from function +#' \code{Start} from startR package or a list output from function \code{Load} +#' from s2dv package. Any other object class will not be accepted. +#'@param remove_attrs_coords A logical value indicating whether to remove the +#' attributes of the coordinates (TRUE) or not (FALSE). The default value is +#' FALSE. +#'@param remove_null Optional. A logical value indicating whether to remove the +#' elements that are NULL (TRUE) or not (FALSE) of the output object. It is +#' only used when the object is an output from function \code{Load}. The +#' default value is FALSE. +#' +#'@return The function returns an 's2dv_cube' object to be easily used with +#'functions with the prefix \code{CST} from CSTools and CSIndicators packages. +#'The object is mainly a list with the following elements:\cr +#'\itemize{ +#' \item{'data', array with named dimensions;} +#' \item{'dims', named vector of the data dimensions;} +#' \item{'coords', list of named vectors with the coordinates corresponding to +#' the dimensions of the data parameter;} +#' \item{'attrs', named list with elements: +#' \itemize{ +#' \item{'Dates', array with named temporal dimensions of class 'POSIXct' +#' from time values in the data;} +#' \item{'Variable', has the following components: +#' \itemize{ +#' \item{'varName', character vector of the short variable name. It is +#' usually specified in the parameter 'var' from the functions +#' Start and Load;} +#' \item{'metadata', named list of elements with variable metadata. +#' They can be from coordinates variables (e.g. longitude) or +#' main variables (e.g. 'var');} +#' } +#' } +#' \item{'Datasets', character strings indicating the names of the +#' datasets;} +#' \item{'source_files', a vector of character strings with complete paths +#' to all the found files involved in loading the data;} +#' \item{'when', a time stamp of the date issued by the Start() or Load() +#' call to obtain the data;} +#' \item{'load_parameters', it contains the components used in the +#' arguments to load the data from Start() or Load() functions.} +#' } +#' } +#'} +#' +#'@seealso \code{\link{s2dv_cube}}, \code{\link{CST_Start}}, +#'\code{\link[startR]{Start}} and \code{\link{CST_Load}} +#'@examples +#'\dontrun{ +#'# Example 1: convert an object from startR::Start function to 's2dv_cube' +#'library(startR) +#'repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +#'data <- Start(dat = repos, +#' var = 'tas', +#' sdate = c('20170101', '20180101'), +#' ensemble = indices(1:5), +#' time = 'all', +#' latitude = indices(1:5), +#' longitude = indices(1:5), +#' return_vars = list(latitude = 'dat', longitude = 'dat', time = 'sdate'), +#' retrieve = TRUE) +#'data <- as.s2dv_cube(data) +#'# Example 2: convert an object from s2dv::Load function to 's2dv_cube' +#'startDates <- c('20001101', '20011101', '20021101', +#' '20031101', '20041101', '20051101') +#'data <- Load(var = 'tas', exp = 'system5c3s', +#' nmember = 2, sdates = startDates, +#' leadtimemax = 3, latmin = 10, latmax = 30, +#' lonmin = -10, lonmax = 10, output = 'lonlat') +#'data <- as.s2dv_cube(data) +#'} +#'@export +as.s2dv_cube <- function(object, remove_attrs_coords = FALSE, + remove_null = FALSE) { + + if (is.list(object) & length(object) == 11) { + if (is.null(object) || (is.null(object$mod) && is.null(object$obs))) { + stop("The s2dv::Load call did not return any data.") + } + obs <- object + obs$mod <- NULL + object$obs <- NULL + names(object)[[1]] <- 'data' # exp + names(obs)[[1]] <- 'data' # obs + # obs + if (!is.null(obs$data)) { + obs_exist <- TRUE + obs$Datasets$exp <- NULL + obs$Datasets <- obs$Datasets$obs + } else { + obs_exist <- FALSE + } + # object + if (!is.null(object$data)) { + exp_exist <- TRUE + object$Datasets$obs <- NULL + object$Datasets <- object$Datasets$exp + } else { + exp_exist <- FALSE + } + result <- list() + # obs and exp + if (obs_exist & exp_exist) { + obs_exp = list(exp = object, obs = obs) + } else if (obs_exist & !exp_exist) { + obs_exp = list(obs = obs) + } else { + obs_exp = list(exp = object) + } + i <- 0 + for (obj_i in obs_exp) { + i <- i + 1 + # attrs + obj_i$attrs <- within(obj_i, rm(list = c('data'))) + obj_i <- within(obj_i, rm(list = names(obj_i$attrs))) + dates <- obj_i$attrs$Dates$start + attr(dates, 'end') <- obj_i$attrs$Dates$end + if (!is.null(dates)) { + dim(dates) <- dim(obj_i$data)[c('ftime', 'sdate')] + obj_i$attrs$Dates <- dates + } + # Variable + varname <- obj_i$attrs$Variable$varName + varmetadata <- NULL + varmetadata[[varname]] <- attributes(obj_i$attrs$Variable)[-1] + obj_i$attrs$Variable <- list(varName = varname, metadata = varmetadata) + # dims + obj_i$dims <- dim(obj_i$data) + # coords + obj_i$coords <- sapply(names(dim(obj_i$data)), function(x) NULL) + # sdate + obj_i$coords$sdate <- obj_i$attrs$load_parameters$sdates + if (!remove_attrs_coords) attr(obj_i$coords$sdate, 'indices') <- FALSE + # lon + if (!is.null(obj_i$attrs$lon)) { + if (remove_attrs_coords) { + obj_i$coords$lon <- as.vector(obj_i$attrs$lon) + } else { + obj_i$coords$lon <- obj_i$attrs$lon + dim(obj_i$coords$lon) <- NULL + attr(obj_i$coords$lon, 'indices') <- FALSE + } + obj_i$attrs$Variable$metadata$lon <- obj_i$attrs$lon + obj_i$attrs <- within(obj_i$attrs, rm(list = 'lon')) + } + # lat + if (!is.null(obj_i$attrs$lat)) { + if (remove_attrs_coords) { + obj_i$coords$lat <- as.vector(obj_i$attrs$lat) + } else { + obj_i$coords$lat <- obj_i$attrs$lat + dim(obj_i$coords$lat) <- NULL + attr(obj_i$coords$lat, 'indices') <- FALSE + } + obj_i$attrs$Variable$metadata$lat <- obj_i$attrs$lat + obj_i$attrs <- within(obj_i$attrs, rm(list = 'lat')) + } + # member + obj_i$coords$member <- 1:obj_i$dims['member'] + if (!remove_attrs_coords) attr(obj_i$coords$member, 'indices') <- TRUE + # dataset + if (!is.null(names(obj_i$attrs$Datasets))) { + obj_i$coords$dataset <- names(obj_i$attrs$Datasets) + if (!remove_attrs_coords) attr(obj_i$coords$dataset, 'indices') <- FALSE + obj_i$attrs$Datasets <- names(obj_i$attrs$Datasets) + } else { + obj_i$coords$dataset <- 1:obj_i$dims['dataset'] + if (!remove_attrs_coords) attr(obj_i$coords$dataset, 'indices') <- TRUE + } + # ftime + obj_i$coords$ftime <- 1:obj_i$dims['ftime'] + if (!remove_attrs_coords) attr(obj_i$coords$ftime, 'indices') <- TRUE + # remove NULL values + if (isTRUE(remove_null)) { + obj_i$attrs$load_parameters <- .rmNullObs(obj_i$attrs$load_parameters) + } + obj_i <- obj_i[c('data', 'dims', 'coords', 'attrs')] + class(obj_i) <- 's2dv_cube' + if (names(obs_exp)[[i]] == 'exp') { + result$exp <- obj_i + } else { + result$obs <- obj_i + } + } + if (is.list(result)) { + if (is.null(result$exp)) { + result <- result$obs + } else if (is.null(result$obs)) { + result <- result$exp + } else { + warning("The output is a list of two 's2dv_cube' objects", + " corresponding to 'exp' and 'obs'.") + } + } + + } else if (inherits(object, 'startR_array')) { + # From Start: + result <- list() + result$data <- as.vector(object) + ## dims + dims <- dim(object) + dim(result$data) <- dims + result$dims <- dims + ## coords + result$coords <- sapply(names(dims), function(x) NULL) + # Find coordinates + FileSelector <- attributes(object)$FileSelectors + VariablesCommon <- names(attributes(object)$Variables$common) + dat <- names(FileSelector)[1] + VariablesDat <- names(attributes(object)$Variables[[dat]]) + varName <- NULL + for (i_coord in names(dims)) { + if (i_coord %in% names(FileSelector[[dat]])) { # coords in FileSelector + coord_in_fileselector <- FileSelector[[dat]][which(i_coord == names(FileSelector[[dat]]))] + if (length(coord_in_fileselector) == 1) { + if (length(coord_in_fileselector[[i_coord]][[1]]) == dims[i_coord]) { + # TO DO: add var_dim parameter + if (i_coord %in% c('var', 'vars')) { + varName <- as.vector(coord_in_fileselector[[i_coord]][[1]]) + } + if (remove_attrs_coords) { + result$coords[[i_coord]] <- as.vector(coord_in_fileselector[[i_coord]][[1]]) + } else { + result$coords[[i_coord]] <- coord_in_fileselector[[i_coord]][[1]] + attr(result$coords[[i_coord]], 'indices') <- FALSE + } + } else { + result$coords[[i_coord]] <- 1:dims[i_coord] + if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE + } + } + } else if (i_coord %in% VariablesCommon) { # coords in common + coord_in_common <- attributes(object)$Variables$common[[which(i_coord == VariablesCommon)]] + if (inherits(coord_in_common, "POSIXct")) { + result$attrs$Dates <- coord_in_common + } + if (length(coord_in_common) == dims[i_coord]) { + if (remove_attrs_coords) { + if (inherits(coord_in_common, "POSIXct")) { + result$coords[[i_coord]] <- 1:dims[i_coord] + attr(result$coords[[i_coord]], 'indices') <- TRUE + } else { + result$coords[[i_coord]] <- as.vector(coord_in_common) + } + } else { + result$coords[[i_coord]] <- coord_in_common + attr(result$coords[[i_coord]], 'indices') <- FALSE + } + } else { + result$coords[[i_coord]] <- 1:dims[i_coord] + if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE + } + } else if (!is.null(VariablesDat)) { # coords in dat + if (i_coord %in% VariablesDat) { + coord_in_dat <- attributes(object)$Variables[[dat]][[which(i_coord == VariablesDat)]] + if (inherits(coord_in_dat, "POSIXct")) { + result$attrs$Dates <- coord_in_dat + } + if (length(coord_in_dat) == dims[i_coord]) { + if (remove_attrs_coords) { + if (inherits(coord_in_dat, "POSIXct")) { + result$coords[[i_coord]] <- coord_in_dat + } else { + result$coords[[i_coord]] <- as.vector(coord_in_dat) + } + } else { + result$coords[[i_coord]] <- coord_in_dat + attr(result$coords[[i_coord]], 'indices') <- FALSE + } + } else { + result$coords[[i_coord]] <- 1:dims[i_coord] + if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE + } + } else { + result$coords[[i_coord]] <- 1:dims[i_coord] + if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE + } + } else { # missing other dims + result$coords[[i_coord]] <- 1:dims[i_coord] + if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE + } + dim(result$coords[[i_coord]]) <- NULL + } + + # attrs + ## varName + if (!is.null(varName)) { + result$attrs$Variable$varName <- varName + } + ## Variables + for (var_type in names(attributes(object)$Variables)) { + if (!is.null(attributes(object)$Variables[[var_type]])) { + for (var in names(attributes(object)$Variables[[var_type]])) { + attr_variable <- attributes(object)$Variables[[var_type]][[var]] + if (is.null(result$attrs$Dates)) { + if (inherits(attr_variable, "POSIXct")) { + result$attrs$Dates <- attr_variable + } + } + if (is.null(result$attrs$Variable$metadata[[var]])) { + result$attrs$Variable$metadata[[var]] <- attr_variable + } + } + } + } + ## Datasets + if (length(names(FileSelector)) > 1) { + # lon name + known_lon_names <- .KnownLonNames() + lon_name_dat <- names(dims)[which(names(dims) %in% known_lon_names)] + # lat name + known_lat_names <- .KnownLatNames() + lat_name_dat <- names(dims)[which(names(dims) %in% known_lat_names)] + result$attrs$Datasets <- names(FileSelector) + # TO DO: add dat_dim parameter + if (any(names(dims) %in% c('dat', 'dataset'))) { + dat_dim <- names(dims)[which(names(dims) %in% c('dat', 'dataset'))] + result$coords[[dat_dim]] <- names(FileSelector) + if (!remove_attrs_coords) attr(result$coords[[dat_dim]], 'indices') <- FALSE + } + for (i in 2:length(names(FileSelector))) { + if (!(length(lon_name_dat) == 0L)) { + if (any(result$coords[[lon_name_dat]] != as.vector(attributes(object)$Variables[[names(FileSelector)[i]]][[lon_name_dat]]))) { + warning("'lon' values are different for different datasets. ", + "Only values from the first will be used.") + } + } + if (!(length(lat_name_dat) == 0L)) { + if (any(result$coords[[lat_name_dat]] != as.vector(attributes(object)$Variables[[names(FileSelector)[i]]][[lat_name_dat]]))) { + warning("'lat' values are different for different datasets. ", + "Only values from the first will be used.") + } + } + } + } else { + result$attrs$Datasets <- names(FileSelector) + } + ## when + result$attrs$when <- Sys.time() + ## source_files + result$attrs$source_files <- attributes(object)$Files + ## load_parameters + result$attrs$load_parameters <- attributes(object)$FileSelectors + class(result) <- 's2dv_cube' + } else if (inherits(object, 'startR_cube')) { + stop("Unsupported object class: 'startR_cube'. ", + "When using startR::Start() or CSTools::CST_Start(), set ", + "'retrieve = TRUE' to ensure the data is retrieved into ", + "memory and can be converted into a 's2dv_cube' object.") + } else { + stop("The class of parameter 'object' is not implemented", + " to be converted into 's2dv_cube' class yet.") + } + return(result) +}