diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 2e0a1b296c14cdf1230e872ad94fd3d1e556829d..1694c11c5b168135293c47fc6c142ea73a0489bc 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -228,8 +228,8 @@ esarchive: # ---- MIROC6: - name: - institution: + name: "MIRC6" + institution: "MIROC" src: hcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" fcst: diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index edb6a8beb96279ba166a2703605829c0757f0da3..d189d862d361750c6019092a0536d22d91020c06 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -22,6 +22,23 @@ Anomalies <- function(recipe, data) { } original_dims <- data$hcst$dim +<<<<<<< HEAD + # Compute anomalies + ##TODO: all NAs returned + anom <- CST_Anomaly(data$hcst, data$obs, + cross = cross, + memb = TRUE, + memb_dim = 'ensemble', + dim_anom = 'syear', + dat_dim = c('dat', 'ensemble'), + ftime_dim = 'time', + ncores = recipe$Analysis$ncores) + # Reorder dims + anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) + anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) + +======= +>>>>>>> b92645e34fd3f703145f77d37f7defd98fb3a6d2 # Save full fields hcst_fullvalue <- data$hcst obs_fullvalue <- data$obs diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 989d9b94519ee298dc6d76a3ecaf254f71927aaa..1097ac39f5a532b6204cfc3b02b8b6ae8ad1e1bc 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -10,7 +10,7 @@ Calibration <- function(recipe, data) { # recipe: object obtained when passing the .yml recipe file to read_yaml() method <- tolower(recipe$Analysis$Workflow$Calibration$method) - + if (method == "raw") { warn(recipe$Run$logger, paste("The Calibration module has been called, but the calibration", @@ -28,7 +28,10 @@ Calibration <- function(recipe, data) { } else { ## TODO: Calibrate full fields when present # Calibration function params - mm <- recipe$Analysis$Datasets$Multimodel + if (!isFALSE(recipe$Analysis$Datasets$Multimodel)){ + mm <- TRUE + } else {mm <- FALSE} + if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 43dea0651fc7eb02cb8cb69a1d44d580903a4666..0801e71c7cfb00b52ff6a54a0bd937bb464ffbe1 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -1,7 +1,466 @@ +<<<<<<< HEAD +## TODO: remove paths to personal scratchs +source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") +# Load required libraries/funs +source('modules/Loading/R/join_datasets.R') +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +## TODO: Move to prepare_outputs.R +======= +>>>>>>> b92645e34fd3f703145f77d37f7defd98fb3a6d2 source("tools/libs.R") ## TODO: Remove with the next release source("modules/Loading/load_datasets.R") +<<<<<<< HEAD +load_datasets <- function(recipe, retrieve = T) { + + if (length(recipe$Analysis$Datasets$System$name) == 1) { ## Only one model + + output <- load_single_datasets(recipe = recipe) + + } else { ## Several models and/or Multimodel + + multimodel <- recipe$Analysis$Datasets$Multimodel ## "TRUE", "FALSE" or "both" + + # Loading each model individually + + ##NOTE: + ## if retrieve = T --> data is a list of models, each model contains a list of s2dv_cube. Try str(data, max.level = 3, give.attr = F). + ## if retrieve = F --> must be a list be startR_cube --> unlist(data, recursive = F). 'dat', 'ensemble' need to be target_dims. + data <- list() + n_members <- list(hcst = c(), fcst = c()) + for (model in 1:length(recipe$Analysis$Datasets$System$name)) { + recipe_single <- recipe + recipe_single$Analysis$Datasets$System$name <- recipe_single$Analysis$Datasets$System$name[model] + data[[model]] <- load_single_datasets(recipe = recipe_single, retrieve = retrieve) + + if (retrieve) { + n_members$hcst[model] <- data[[model]]$hcst$dims['ensemble'] + if (!is.null(data[[model]]$fcst)) { + n_members$fcst[model] <- data[[model]]$fcst$dims['ensemble'] + } + } else { + n_members$hcst[model] <- attr(data[[model]]$hcst, 'Dimensions')['ensemble'] + if (!is.null(data[[model]]$fcst)) { + n_members$fcst[model] <- attr(data[[model]]$fcst, 'Dimensions')['ensemble'] + } + } + } + + if (retrieve) { + # Joining models and/or creating multimodel + output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members) + + # Adjust s2dv_cube + # hcst: + ## $dims + output$hcst$dims <- dim(output$hcst$data) + ## $coords + if (isFALSE(multimodel)) { + output$hcst$coords$dat <- recipe$Analysis$Datasets$System$name + } else if (isTRUE(multimodel)) { + output$hcst$coords$dat <- 'multimodel' + } else if (multimodel == 'both') { + output$hcst$coords$dat <- c(recipe$Analysis$Datasets$System$name, 'multimodel') + } + output$hcst$coords$ensemble <- 1:dim(output$hcst$data)['ensemble'] + ## $attrs + output$hcst$attrs$Datasets <- recipe$Analysis$Datasets$System$name + tmp <- array(c(sapply(data, "[[", c('hcst', 'attrs', 'source_files'))), + dim = c(dim(output$hcst$attrs$source_files)[-1], + dat = length(recipe$Analysis$Datasets$System$name))) + output$hcst$attrs$source_files <- aperm(tmp, c(length(dim(tmp)), 1:(length(dim(tmp)) - 1))) + + # fcst: + if (!is.null(output$fcst)) { + ## $dims + output$fcst$dims <- dim(output$fcst$data) + ## $coords + if (isFALSE(multimodel)) { + output$fcst$coords$dat <- recipe$Analysis$Datasets$System$name + } else if (isTRUE(multimodel)) { + output$fcst$coords$dat <- 'multimodel' + } else if (multimodel == 'both') { + output$fcst$coords$dat <- c(recipe$Analysis$Datasets$System$name, 'multimodel') + } + output$fcst$coords$ensemble <- 1:dim(output$fcst$data)['ensemble'] + ## $attrs + output$fcst$attrs$Datasets <- recipe$Analysis$Datasets$System$name + tmp <- array(c(sapply(data, "[[", c('fcst', 'attrs', 'source_files'))), + dim = c(dim(output$fcst$attrs$source_files)[-1], + dat = length(recipe$Analysis$Datasets$System$name))) + output$fcst$attrs$source_files <- aperm(tmp, c(length(dim(tmp)), 1:(length(dim(tmp)) - 1))) + } + + } else { # retrieve = F + # Unlist data to be a list of s2dv_cube objects + names(data) <- recipe$Analysis$Datasets$System$name + output <- unlist(data, recursive = F) + output$n_members <- n_members + } + + } + + return(output) # list(hcst = hcst, fcst = fcst, obs = obs) +} + +load_single_datasets <- function(recipe, retrieve = T) { + + # ------------------------------------------- + # Set params ----------------------------------------- + + hcst.inityear <- recipe$Analysis$Time$hcst_start + hcst.endyear <- recipe$Analysis$Time$hcst_end + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + store.freq <- recipe$Analysis$Variables$freq + + # get sdates array + ## LOGGER: Change dates2load to extract logger from recipe? + sdates <- dates2load(recipe, recipe$Run$logger) + + idxs <- NULL + idxs$hcst <- get_timeidx(sdates$hcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + + if (!(is.null(sdates$fcst))) { + idxs$fcst <- get_timeidx(sdates$fcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + } + + ## TODO: Examine this verifications part, verify if it's necessary + # stream <- verifications$stream + # sdates <- verifications$fcst.sdate + + ## TODO: define fcst.name + ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name + + # get esarchive datasets dict: + ## TODO: Adapt to 'filesystem' option in recipe + archive <- read_yaml("conf/archive.yml")$esarchive + exp_descrip <- archive$System[[exp.name]] + + freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]]) + reference_descrip <- archive$Reference[[ref.name]] + freq.obs <- unlist(reference_descrip[[store.freq]][variable[1]]) + obs.dir <- reference_descrip$src + fcst.dir <- exp_descrip$src + hcst.dir <- exp_descrip$src + fcst.nmember <- exp_descrip$nmember$fcst + hcst.nmember <- exp_descrip$nmember$hcst + + ## TODO: it is necessary? + ##if ("accum" %in% names(reference_descrip)) { + ## accum <- unlist(reference_descrip$accum[store.freq][[1]]) + ##} else { + ## accum <- FALSE + ##} + + var_dir_obs <- reference_descrip[[store.freq]][variable] + var_dir_exp <- exp_descrip[[store.freq]][variable] + + # ----------- + obs.path <- paste0(archive$src, + obs.dir, store.freq, "/$var$", "$var_dir$", + "/$var$_$file_date$.nc") + + hcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", "$var_dir$", + "$var$_$file_date$.nc") + + fcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", "$var_dir$", + "/$var$_$file_date$.nc") + + # Define regrid parameters: + #------------------------------------------------------------------- + regrid_params <- get_regrid_params(recipe, archive) + + # Longitude circular sort and latitude check + #------------------------------------------------------------------- + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean"){ + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = list(list(name = recipe$Analysis$Datasets$System$name, path = hcst.path)), + var = variable, + var_dir = var_dir_exp, + file_date = sdates$hcst, + time = idxs$hcst, + var_dir_depends = 'var', + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:hcst.nmember), + metadata_dims = 'var', # change to just 'var'? + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = retrieve) + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(hcst))) { + hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + + # Convert hcst to s2dv_cube object + ## TODO: Give correct dimensions to $Dates + ## (sday, sweek, syear instead of file_date) + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + # the call uses file_date instead of fcst_syear so that it can work + # with the daily case and the current version of startR not allowing + # multiple dims split + + fcst <- Start(dat = list(list(name = recipe$Analysis$Datasets$System$name, path = fcst.path)), + var = variable, + var_dir = var_dir_exp, + var_dir_depends = 'var', + file_date = sdates$fcst, + time = idxs$fcst, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = retrieve) + + if ("var_dir" %in% names(dim(fcst))) { + fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + + # Convert fcst to s2dv_cube + fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$attrs$Dates[] <- + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Separate Start() call for monthly vs daily data + if (store.freq == "monthly_mean") { + + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") + dim(dates_file) <- dim(dates) + + obs <- Start(dat = list(list(name = recipe$Analysis$Datasets$Reference$name, path = obs.path)), + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = retrieve) + + } else if (store.freq %in% c("daily_mean", "daily")) { + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = list(list(name = recipe$Analysis$Datasets$Reference$name, path = obs.path)), + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = retrieve) + } + + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(obs))) { + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + } + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!(recipe$Analysis$Regrid$type == 'none')) { + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + + } + } + + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + for (var_idx in 1:length(variable)) { + var_name <- variable[var_idx] + if (dictionary$vars[[var_name]]$accum) { + info(recipe$Run$logger, + paste0("Accumulated variable ", var_name, + ": setting negative values to zero.")) + # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, + # along = "var", + # indices = var_idx, F), 0) + obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 + hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + } +======= Loading <- function(recipe) { # Source correct function depending on filesystem and time horizon # Case: CERISE (Mars) @@ -24,6 +483,7 @@ Loading <- function(recipe) { data <- load_decadal(recipe) } else { stop("Incorrect time horizon.") +>>>>>>> b92645e34fd3f703145f77d37f7defd98fb3a6d2 } } # Display data summary diff --git a/modules/Loading/R/join_datasets.R b/modules/Loading/R/join_datasets.R new file mode 100644 index 0000000000000000000000000000000000000000..b7a5d17c78c6854613043aae2fecf775276e92e4 --- /dev/null +++ b/modules/Loading/R/join_datasets.R @@ -0,0 +1,91 @@ +join_datasets <- function(data, multimodel, n_members) { + + ## Taking the first model as "template" + output <- data[[1]] + + if (is.null(output$fcst$data)) { + exps <- 'hcst' + } else {exps <- c('hcst', 'fcst')} + + for (exp in exps) { + + # New dimensions of the arrays + if (isFALSE(multimodel)) { ## Individual models + n_models <- length(data) + max_members <- max(n_members[[exp]]) + } else if (isTRUE(multimodel)) { ## Multimodel + n_models <- length(data) + max_members <- sum(n_members[[exp]]) + } else if (identical(multimodel, 'both')) { ## Individual models and multimodel + n_models <- length(data) + 1 + max_members <- sum(n_members[[exp]]) + } else { + stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both".') + } + dims <- dim(data[[1]][[exp]]$data) + dims['dat'] <- n_models + dims['ensemble'] <- max_members + + # Creating array for models and/or multimodel + output[[exp]]$data <- array(data = NA, dim = dims) + + ## Filling the models + fill_data <- function(data, n_members, dims) { + # data: a list of hcst/fcst$data from each model + # n_members: a vector of members of each model + # dims: c(dat = n_models, ensemble = max_members) + + .fill_data <- function(data, n_members, dims) { + output <- rep(NA, length = dims['ensemble']) + output[1:n_members] <- data + return(output) + } + + data_list <- vector('list', length = length(data)) + for (i_model in 1:length(data)) { + data_list[[i_model]] <- multiApply::Apply(data = data[[i_model]], + target_dims = 'ensemble', output_dims = 'ensemble', + fun = .fill_data, n_members = n_members[i_model], + dims = dims)$output1 + } + if (dims['dat'] > length(data)) { # Multimodel = 'both' + data_list[[as.numeric(dims['dat'])]] <- array(as.numeric(NA), dim = dim(data_list[[1]])) + } + output <- array(unlist(data_list), dim = c(dims['ensemble'], dim(data_list[[1]])[-c(1, 2)], dims['dat'])) + return(output) + } + + data_exp_data <- lapply(data, '[[', c(exp, 'data')) + output[[exp]]$data <- fill_data(data = (data_exp_data), n_members = n_members[[exp]], + dims = dims[c('dat', 'ensemble')]) + + ## Filling the multimodel + if (!isFALSE(multimodel)) { + data_multi <- CSTools::MergeDims(data = output[[exp]]$data, + merge_dims = c('ensemble', 'dat'), + rename_dim = 'ensemble', + na.rm = TRUE) + + if (isTRUE(multimodel)) { ## Multimodel + output[[exp]]$data <- data_multi + output[[exp]]$data <- s2dv::InsertDim(data = output[[exp]]$data, + posdim = 1, + lendim = 1, + name = 'dat') + + } else if (identical(multimodel, 'both')) { ## Individual models and multimodel + tmp <- asplit(output[[exp]]$data, which(names(dim(output[[exp]]$data)) == 'dat')) + tmp[[n_models]] <- data_multi + output[[exp]]$data <- array(unlist(tmp), dim = c(dim(tmp[[1]]), dat = n_models)) + } + } + + # Reorder data back to standard + tmp <- match(c('dat', 'var', 'sday', 'sweek', 'syear', 'time', 'latitude', 'longitude', 'ensemble'), + names(dim(output[[exp]]$data))) + output[[exp]]$data <- aperm(output[[exp]]$data, tmp[!is.na(tmp)]) + + } + + return(output) +} diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index a0e85e15a970d4cdd9610033fbb25b544021058b..2085b24ab9dfac300d8b0df9fa3a8d262a057f47 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -8,6 +8,10 @@ source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") # Load required libraries/funs source("modules/Loading/R/helper_loading_decadal.R") +<<<<<<< HEAD:modules/Loading/Loading_decadal.R +source('modules/Loading/R/join_datasets.R') +======= +>>>>>>> b92645e34fd3f703145f77d37f7defd98fb3a6d2:modules/Loading/R/load_decadal.R source("modules/Loading/R/dates2load.R") source("modules/Loading/R/check_latlon.R") source("modules/Loading/R/get_timeidx.R") @@ -17,13 +21,50 @@ source("modules/Loading/R/get_timeidx.R") # recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml" # recipe_file <- "recipes/atomic_recipes/recipe_decadal_daily.yml" +<<<<<<< HEAD:modules/Loading/Loading_decadal.R +load_datasets <- function(recipe) { + + if (length(recipe$Analysis$Datasets$System$name) == 1){ ## Only one model + + output <- load_single_datasets(recipe = recipe) + + } else { ## Several models and/or Multimodel + + multimodel <- recipe$Analysis$Datasets$Multimodel ## "TRUE", "FALSE" or "both" + + ## Loading each model individually + data <- list() + n_members <- list(hcst = c(), fcst = c()) + for (model in 1:length(recipe$Analysis$Datasets$System$name)){ + recipe_single <- recipe + recipe_single$Analysis$Datasets$System$name <- recipe_single$Analysis$Datasets$System$name[model] + recipe_single$Analysis$Datasets$System$member <- recipe_single$Analysis$Datasets$System$member[model][[1]] + n_members$hcst[model] <- length(recipe_single$Analysis$Datasets$System$member) + n_members$fcst[model] <- length(recipe_single$Analysis$Datasets$System$member) + data[[model]] <- load_single_datasets(recipe = recipe_single) + } + + ## Joining models and/or creating multimodel + output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members) + + } + + return(output) # list(hcst = hcst, fcst = fcst, obs = obs) +} + +load_single_datasets <- function(recipe) { + + archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive + +======= load_decadal <- function(recipe) { ## archive <- read_yaml(paste0("conf/archive_decadal.yml"))$esarchive +>>>>>>> b92645e34fd3f703145f77d37f7defd98fb3a6d2:modules/Loading/R/load_decadal.R # Print Start() info or not DEBUG <- FALSE - + ## TODO: this should come from the main script # Create output folder and log: @@ -40,14 +81,14 @@ load_decadal <- function(recipe) { lats.max <- as.numeric(recipe$Analysis$Region$latmax) #10 lons.min <- as.numeric(recipe$Analysis$Region$lonmin) #0 lons.max <- as.numeric(recipe$Analysis$Region$lonmax) #10 - + # change to: sdates <- dates2load(recipe, logger) sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) #1960:2015 sdates_fcst <- recipe$Analysis$Time$fcst - + if (store.freq == "monthly_mean") { time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) - + } else if (store.freq == "daily_mean") { time_ind <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min), ftimemax = as.numeric(recipe$Analysis$Time$ftime_max), @@ -55,10 +96,10 @@ load_decadal <- function(recipe) { sdates = sdates_hcst, calendar = archive$System[[exp.name]]$calendar) } - -#NOTE: May be used in the future -# season <- recipe$Analysis$Time$season - + + #NOTE: May be used in the future + # season <- recipe$Analysis$Time$season + #------------------------- # Read from archive: #------------------------- @@ -72,20 +113,20 @@ load_decadal <- function(recipe) { if (identical(member, 'all')) { member <- strsplit(archive$System[[exp.name]]$member, ',')[[1]] } - + #------------------------- # derived from above: #------------------------- # Check lat and lon and decide CircularSort - circularsort <- check_latlon(latmin = lats.min, latmax = lats.max, lonmin = lons.min, lonmax = lons.max) - + circularsort <- check_latlon(latmin = lats.min, latmax = lats.max, lonmin = lons.min, lonmax = lons.max) + # generate transform params for system and ref regrid_params <- get_regrid_params(recipe, archive) - + # Only if the time length in each chunk may differ that we need largest_dims_length to be TRUE. Otherwise, set FALSE to increase efficiency. need_largest_dims_length <- ifelse(exp.name == 'EC-Earth3-i2', TRUE, FALSE) - - + + #------------------------------------------- # Step 1: Load the hcst #------------------------------------------- @@ -138,21 +179,21 @@ load_decadal <- function(recipe) { if (!multi_path) { Start_hcst_arg_list <- Start_default_arg_list hcst <- do.call(Start, Start_hcst_arg_list) - + } else { Start_hcst_arg_list <- Start_default_arg_list Start_hcst_arg_list[['syear']] <- NULL Start_hcst_arg_list[['chunk_depends']] <- NULL remove_ind <- which(Start_hcst_arg_list[['return_vars']][['time']] == 'syear') Start_hcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind] - + hcst <- do.call(Start, Start_hcst_arg_list) - + # Reshape and reorder dimensions ## dat should be 1, syear should be length of dat; reorder dimensions dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6]) hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) - + # Manipulate time attr because Start() cannot read it correctly wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only tmp <- array(dim = c(dim(hcst)[c('syear', 'time')])) @@ -162,120 +203,121 @@ load_decadal <- function(recipe) { tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) } attr(hcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') - + } - + tmp_time_attr <- attr(hcst, 'Variables')$common$time - + # change syear to c(sday, sweek, syear) # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7]) if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) { error(recipe$Run$logger, - "hcst has problem in matching data and time attr dimension.") + "hcst has problem in matching data and time attr dimension.") stop() } dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) - + #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 if (multi_path) { attributes(hcst)$Variables$common[[variable]] <- attributes(hcst)$Variables$dat1[[variable]] } - + # Change class from startR_array to s2dv_cube suppressWarnings( - hcst <- as.s2dv_cube(hcst) + hcst <- as.s2dv_cube(hcst) ) - -#------------------------------------------- -# Step 2: Load the fcst -#------------------------------------------- + + #------------------------------------------- + # Step 2: Load the fcst + #------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst)) { - tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, - version = version, sdates = sdates_fcst) - path_list <- tmp$path_list - multi_path <- tmp$multi_path - - #TODO: to make this case work; enhance Start() if it's possible - if (multi_path & length(variable) > 1) { - stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") - } - - # monthly & daily - if (!multi_path) { - #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst - Start_fcst_arg_list <- Start_default_arg_list - Start_fcst_arg_list[['dat']] <- path_list - Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) - fcst <- do.call(Start, Start_fcst_arg_list) - - - } else { # multi_path - - #TODO: time attribute is not correct. Improve Start(). - Start_fcst_arg_list <- Start_default_arg_list - Start_fcst_arg_list[['dat']] <- path_list - Start_fcst_arg_list[['syear']] <- NULL - Start_fcst_arg_list[['chunk_depends']] <- NULL - remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear') - Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] - fcst <- do.call(Start, Start_fcst_arg_list) - - # Reshape and reorder dimensions - ## dat should be 1, syear should be length of dat; reorder dimensions - ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble] - dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) - fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) - - # Manipulate time attr because Start() cannot read it correctly - wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only - tmp <- array(dim = c(dim(fcst)[c('syear', 'time')])) - tmp[1, ] <- wrong_time_attr - yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst) - for (i_syear in 1:length(yr_diff)) { - tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) + tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, + version = version, sdates = sdates_fcst) + path_list <- tmp$path_list + multi_path <- tmp$multi_path + + #TODO: to make this case work; enhance Start() if it's possible + if (multi_path & length(variable) > 1) { + stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") } - attr(fcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') - - } - - tmp_time_attr <- attr(fcst, 'Variables')$common$time - - # change syear to c(sday, sweek, syear) - # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] - dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) - if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { - error(recipe$Run$logger, - "fcst has problem in matching data and time attr dimension.") - stop() - } - dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) - - #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 - if (multi_path) { - attributes(fcst)$Variables$common[[variable]] <- attributes(fcst)$Variables$dat1[[variable]] - } - - # Change class from startR_array to s2dv_cube - suppressWarnings( - fcst <- as.s2dv_cube(fcst) - ) - - # Only syear could be different - if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { - error(recipe$Run$logger, - "hcst and fcst do not share the same dimension structure.") - stop() - } + # monthly & daily + if (!multi_path) { + #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst + Start_fcst_arg_list <- Start_default_arg_list + Start_fcst_arg_list[['dat']] <- path_list + Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) + fcst <- do.call(Start, Start_fcst_arg_list) + + + } else { # multi_path + + #TODO: time attribute is not correct. Improve Start(). + Start_fcst_arg_list <- Start_default_arg_list + Start_fcst_arg_list[['dat']] <- path_list + Start_fcst_arg_list[['syear']] <- NULL + Start_fcst_arg_list[['chunk_depends']] <- NULL + remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear') + Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] + fcst <- do.call(Start, Start_fcst_arg_list) + + # Reshape and reorder dimensions + ## dat should be 1, syear should be length of dat; reorder dimensions + ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble] + dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) + fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble')) + + # Manipulate time attr because Start() cannot read it correctly + wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only + tmp <- array(dim = c(dim(fcst)[c('syear', 'time')])) + tmp[1, ] <- wrong_time_attr + yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst) + for (i_syear in 1:length(yr_diff)) { + tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) + } + attr(fcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC') + + } + + tmp_time_attr <- attr(fcst, 'Variables')$common$time + + # change syear to c(sday, sweek, syear) + # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble] + dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7]) + if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) { + error(recipe$Run$logger, + "fcst has problem in matching data and time attr dimension.") + stop() + } + dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr)) + + #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1 + if (multi_path) { + attributes(fcst)$Variables$common[[variable]] <- attributes(fcst)$Variables$dat1[[variable]] + } + + # Change class from startR_array to s2dv_cube + suppressWarnings( + fcst <- as.s2dv_cube(fcst) + ) + + # Only syear could be different + if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) { + error(recipe$Run$logger, + "hcst and fcst do not share the same dimension structure.") + stop() + } + } else { fcst <- NULL } + -#------------------------------------------- -# Step 3. Load the reference -#------------------------------------------- + #------------------------------------------- + # Step 3. Load the reference + #------------------------------------------- obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, store.freq, "$var$$var_dir$", "$var$_$file_date$.nc") var_dir_obs <- archive$Reference[[ref.name]][[store.freq]][variable] # list(tas = "_f1h-r1440x721cds", tos = "_f1h-r1440x721cds") @@ -290,70 +332,70 @@ load_decadal <- function(recipe) { dates <- hcst$attrs$Dates dates_file <- sapply(dates, format, '%Y%m') dim(dates_file) <- dim(dates) - + if (store.freq == "daily_mean") { -#//////////////// -# Method 1: use hcst time attr as obs time selector -#//////////////// - - # Set hour to 12:00 to ensure correct date retrieval for daily data - lubridate::hour(dates) <- 12 - lubridate::minute(dates) <- 00 - # Restore correct dimensions - dim(dates) <- dim(dates_file) - - obs <- Start(dat = obs.path, - var = variable, - var_dir = var_dir_obs, - var_dir_depends = 'var', - file_date = unique(format(dates, '%Y%m')), - time = dates, # [sday, sweek, syear, time] - time_across = 'file_date', - merge_across_dims = TRUE, - split_multiselected_dims = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = TRUE), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_extra_cells = 2, - transform_params = list(grid = regrid_params$obs.gridtype, #nc file - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = NULL, longitude = NULL, - time = 'file_date'), - silent = !DEBUG, - retrieve = TRUE) + #//////////////// + # Method 1: use hcst time attr as obs time selector + #//////////////// + + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = unique(format(dates, '%Y%m')), + time = dates, # [sday, sweek, syear, time] + time_across = 'file_date', + merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_extra_cells = 2, + transform_params = list(grid = regrid_params$obs.gridtype, #nc file + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = 'file_date'), + silent = !DEBUG, + retrieve = TRUE) } else if (store.freq == "monthly_mean") { -#//////////////// -# Method 2: reshape hcst time attr's date into an array with time dim then as obs date selector -#//////////////// - - obs <- Start(dat = obs.path, - var = variable, - var_dir = var_dir_obs, - var_dir_depends = 'var', - file_date = dates_file, #dates_arr, # [sday, sweek, syear, time] - split_multiselected_dims = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = TRUE), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_extra_cells = 2, - transform_params = list(grid = regrid_params$obs.gridtype, #nc file - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = NULL, longitude = NULL, - time = 'file_date'), - metadata_dims = 'var', - silent = !DEBUG, - retrieve = TRUE) + #//////////////// + # Method 2: reshape hcst time attr's date into an array with time dim then as obs date selector + #//////////////// + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = dates_file, #dates_arr, # [sday, sweek, syear, time] + split_multiselected_dims = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_extra_cells = 2, + transform_params = list(grid = regrid_params$obs.gridtype, #nc file + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = 'file_date'), + metadata_dims = 'var', + silent = !DEBUG, + retrieve = TRUE) } @@ -367,91 +409,92 @@ load_decadal <- function(recipe) { # Only ensemble dim could be different if (!identical(dim(obs), dim(hcst$data)[-9])) { error(recipe$Run$logger, - "obs and hcst dimensions do not match.") + "obs and hcst dimensions do not match.") stop() } # Add ensemble dim to obs dim(obs) <- c(dim(obs), ensemble = 1) - + # Change class from startR_array to s2dv_cube suppressWarnings( - obs <- as.s2dv_cube(obs) + obs <- as.s2dv_cube(obs) ) - -#------------------------------------------- -# Step 4. Verify the consistance between data -#------------------------------------------- + + + #------------------------------------------- + # Step 4. Verify the consistance between data + #------------------------------------------- # dimension if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) { error(recipe$Run$logger, - "hcst and obs don't share the same dimension names.") + "hcst and obs don't share the same dimension names.") stop() } else { ens_ind <- which(names(dim(obs$data)) == 'ensemble') match_ind <- match(names(dim(obs$data))[-ens_ind], names(dim(hcst$data))) if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) { error(recipe$Run$logger, - "hcst and obs don't share the same dimension length.") + "hcst and obs don't share the same dimension length.") stop() } } - + # time attribute if (!identical(format(hcst$attrs$Dates, '%Y%m'), format(obs$attrs$Dates, '%Y%m'))) { error(recipe$Run$logger, - "hcst and obs don't share the same time.") + "hcst and obs don't share the same time.") stop() } - + # lat and lon attributes if (!(recipe$Analysis$Regrid$type == 'none')) { if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lat_error_msg) hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) info(recipe$Run$logger, hcst_lat_msg) obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) + "; Last obs lat: ", obs$lat[length(obs$lat)]) info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } - + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lon_error_msg) hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) info(recipe$Run$logger, hcst_lon_msg) obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) + "; Last obs lon: ", obs$lon[length(obs$lon)]) info(recipe$Run$logger, obs_lon_msg) stop("hcst and obs don't share the same longitudes.") } - } - + } + # Check fcst if (!is.null(fcst)) { # dimension if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) { error(recipe$Run$logger, - "hcst and fcst don't share the same dimension names.") + "hcst and fcst don't share the same dimension names.") stop() } else { ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear')) match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data))) if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) { error(recipe$Run$logger, - "hcst and fcst don't share the same dimension length.") + "hcst and fcst don't share the same dimension length.") stop() } } - + # lat and lon attributes if (!(recipe$Analysis$Regrid$type == 'none')) { if (!identical(as.vector(hcst$lat), as.vector(fcst$lat))) { @@ -467,7 +510,7 @@ load_decadal <- function(recipe) { info(recipe$Run$logger, fcst_lat_msg) stop("hcst and fcst don't share the same latitudes.") } - + if (!identical(as.vector(hcst$lon), as.vector(fcst$lon))) { lon_error_msg <- paste("Longitude mismatch between hcst and fcst.", "Please check the original grids and the", @@ -482,8 +525,74 @@ load_decadal <- function(recipe) { stop("hcst and fcst don't share the same longitudes.") } } - + } +<<<<<<< HEAD:modules/Loading/Loading_decadal.R + + + #------------------------------------------- + # Step 5. Tune data + #------------------------------------------- + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + for (var_idx in 1:length(variable)) { + var_name <- variable[var_idx] + if (dictionary$vars[[var_name]]$accum) { + info(recipe$Run$logger, + paste0("Accumulated variable ", var_name, + ": setting negative values to zero.")) + # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, + # along = "var", + # indices = var_idx, F), 0) + obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 + hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + } + } + + # Convert prlr from m/s to mm/day + ## TODO: Make a unit conversion function + if (variable[[var_idx]] == "prlr") { + # Verify that the units are m/s and the same in obs and hcst + if (((obs$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (obs$attrs$Variable$metadata[[var_name]]$units == "m s**-1")) && + ((hcst$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (hcst$attrs$Variable$metadata[[var_name]]$units == "m s**-1"))) { + info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") + obs$data[, var_idx, , , , , , , ] <- + obs$data[, var_idx, , , , , , , ]*86400*1000 + obs$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + hcst$data[, var_idx, , , , , , , ] <- + hcst$data[, var_idx, , , , , , , ]*86400*1000 + hcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ] <- + fcst$data[, var_idx, , , , , , , ]*86400*1000 + fcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + } + } + } + } + + #------------------------------------------- + # Step 6. Print summary + #------------------------------------------- + + # Print a summary of the loaded data for the user, for each object + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } + } + + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + +======= #------------------------------------------- # Step 5. Tune data @@ -514,5 +623,6 @@ load_decadal <- function(recipe) { "##### DATA LOADING COMPLETED SUCCESSFULLY #####") .log_memory_usage(recipe$Run$logger, when = "After loading") +>>>>>>> b92645e34fd3f703145f77d37f7defd98fb3a6d2:modules/Loading/R/load_decadal.R return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Loading/R/load_seasonal_calibrated.R b/modules/Loading/R/load_seasonal_calibrated.R new file mode 100644 index 0000000000000000000000000000000000000000..f8e3ea63be9f93203cc69f1cfada2e563f283432 --- /dev/null +++ b/modules/Loading/R/load_seasonal_calibrated.R @@ -0,0 +1,265 @@ +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") + +#TODO: Get 'output_module' from recipe (e.g., multimodel$readFrom) +load_seasonal_calibrated <- function(recipe, output_module) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", output_module) + hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), + "$var$_$file_date$.nc") + hcst.path <- gsub(variable[1], "$var$", hcst.path) + fcst.path <- obs.path <- hcst.path + obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + ############################### + + # Convert hcst to s2dv_cube object + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- Start(dat = fcst.path, + var = variable, + file_date = sdates$fcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + ############################# + + # Convert fcst to s2dv_cube + fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$attrs$Dates[] <- + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + } else { + fcst <- NULL + } + + + + # Load reference + #------------------------------------------------------------------- + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + if (store.freq == "monthly_mean") { + + dates_file <- paste0(format(as.Date(dates, '%Y%m%d'), "%Y%m"), "01") + dim(dates_file) <- dim(dates) + + obs <- Start(dat = obs.path, + var = variable, + file_date = dates_file, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq %in% c("daily_mean", "daily")) { + + ############################# + #NOTE: NOT TESTED YET + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + ############################# + + } + + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + + # Check for consistency between hcst and obs grid + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + + # Print a summary of the loaded data for the user, for each object + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } + } + + info(recipe$Run$logger, + "##### CALIBRATED DATA LOADING COMPLETED SUCCESSFULLY #####") + + + return(list(hcst = hcst, fcst = fcst, obs = obs)) + +} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index fd54f17f3471cd0ca880712eee91f64f304666cf..189d1701c07d415524f40060de9a383b9a698229 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -43,6 +43,7 @@ Skill <- function(recipe, data, agg = 'global') { # correct.") # } time_dim <- 'syear' + dat_dim <- 'dat' memb_dim <- 'ensemble' metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) if (is.null(recipe$Analysis$ncores)) { @@ -81,6 +82,7 @@ Skill <- function(recipe, data, agg = 'global') { if (metric %in% c('rps', 'frps')) { skill <- RPS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, Fair = Fair, cross.val = cross.val, @@ -96,6 +98,7 @@ Skill <- function(recipe, data, agg = 'global') { } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, Fair = Fair, cross.val = cross.val, @@ -108,6 +111,7 @@ Skill <- function(recipe, data, agg = 'global') { } else if (metric == 'bss10') { skill <- RPSS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, prob_thresholds = 0.1, cross.val = cross.val, @@ -121,6 +125,7 @@ Skill <- function(recipe, data, agg = 'global') { } else if (metric == 'bss90') { skill <- RPSS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, prob_thresholds = 0.9, cross.val = cross.val, @@ -134,6 +139,7 @@ Skill <- function(recipe, data, agg = 'global') { } else if (metric %in% c('crps', 'fcrps')) { skill <- CRPS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) @@ -147,6 +153,7 @@ Skill <- function(recipe, data, agg = 'global') { } else if (metric %in% c('crpss', 'fcrpss')) { skill <- CRPSS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, clim.cross.val = cross.val, Fair = Fair, @@ -175,11 +182,13 @@ Skill <- function(recipe, data, agg = 'global') { (recipe$Analysis$Workflow$Anomalies$compute)) { skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, ncores = ncores) } else { skill <- Bias(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, ncores = ncores) } @@ -191,11 +200,13 @@ Skill <- function(recipe, data, agg = 'global') { (recipe$Analysis$Workflow$Anomalies$compute)) { skill <- AbsBiasSS(data$hcst.full_val$data, data$obs.full_val$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, ncores = ncores) } else { skill <- AbsBiasSS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, ncores = ncores) } @@ -211,6 +222,7 @@ Skill <- function(recipe, data, agg = 'global') { dat_dim = 'dat', time_dim = time_dim, method = 'pearson', + dat_dim = dat_dim, memb_dim = memb_dim, memb = memb, conf = F, @@ -227,6 +239,7 @@ Skill <- function(recipe, data, agg = 'global') { skill <- RMSSS(data$hcst$data, data$obs$data, dat_dim = 'dat', time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, pval = FALSE, sign = TRUE, @@ -240,6 +253,7 @@ Skill <- function(recipe, data, agg = 'global') { } else if (metric == 'mse') { skill <- MSE(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, dat_dim = 'dat', comp_dim = NULL, @@ -252,6 +266,7 @@ Skill <- function(recipe, data, agg = 'global') { } else if (metric == 'msss') { skill <- MSSS(data$hcst$data, data$obs$data, time_dim = time_dim, + dat_dim = dat_dim, memb_dim = memb_dim, dat_dim = 'dat', sign = TRUE, diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 3d00742dd4de5443c9cafddd13a7990ffafc1dd4..422db28de308d0e92d16660f9aaccc449ce08372 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -22,8 +22,18 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } else { projection <- "cylindrical_equidistant" } +<<<<<<< HEAD + +#TODO: Add a for loop for dat +======= +>>>>>>> b92645e34fd3f703145f77d37f7defd98fb3a6d2 # Compute ensemble mean - ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') + if (length(recipe$Analysis$Datasets$System$name) == 1) { + ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') + } else { + # multi-model + ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble', na.rm = TRUE) + } # Loop over variable dimension for (var in 1:fcst$dims[['var']]) { variable <- fcst$attrs$Variable$varName[[var]] @@ -77,10 +87,15 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o for (i_syear in start_date) { if (length(start_date) == 1) { +<<<<<<< HEAD + i_var_ens_mean <- var_ens_mean[which(start_date == i_syear), , , ] + ## TODO: Consider multimodel here +======= i_var_ens_mean <- ClimProjDiags::Subset(var_ens_mean, along = c("syear"), indices = which(start_date == i_syear), drop = 'selected') +>>>>>>> b92645e34fd3f703145f77d37f7defd98fb3a6d2 outfile <- paste0(outdir[[var]], "forecast_ensemble_mean-", start_date) } else { i_var_ens_mean <- ClimProjDiags::Subset(var_ens_mean, diff --git a/recipes/tests/recipe_model_decadal.yml b/recipes/tests/recipe_model_decadal.yml new file mode 100644 index 0000000000000000000000000000000000000000..d7d5339d66c71de4b8ac018014926ba08c70fbff --- /dev/null +++ b/recipes/tests/recipe_model_decadal.yml @@ -0,0 +1,51 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for one decadal forecast system (only hindcast) +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 + member: r1i4p1f1,r2i4p1f1 #'all' + Multimodel: no + Reference: + name: JRA-55 + Time: + fcst_year: + hcst_start: 2015 # 2015-2016 in dcppA, 2017-2018 in dcppB + hcst_end: 2018 +# season: 'Annual' + ftime_min: 6 # Apr + ftime_max: 8 + Region: + latmin: 40 + latmax: 55 + lonmin: 0 + lonmax: 10 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: 'bias' + Skill: + metric: EnsCorr RPSS + Probabilities: + percentiles: [[1/3, 2/3]] + Indicators: + index: FALSE + ncores: 8 # Optional, int: number of cores, defaults to 1 + remove_NAs: FALSE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/cdelgado/auto-s2s_outputs/ + code_dir: /esarchive/scratch/cdelgado/gitlab/auto-s2s/ + diff --git a/recipes/tests/recipe_model_seasonal.yml b/recipes/tests/recipe_model_seasonal.yml new file mode 100644 index 0000000000000000000000000000000000000000..a45ff2c88a799f027c9153ced6cbdcee1cf08e34 --- /dev/null +++ b/recipes/tests/recipe_model_seasonal.yml @@ -0,0 +1,55 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for one seasonal forecast system (only hindcast) + +Analysis: + Horizon: seasonal # Mandatory, str: 'subseasonal', 'seasonal', or 'decadal' + Variables: + name: tas # Mandatory, str: variable name in /esarchive/ + freq: monthly_mean # Mandatory, str: 'monthly_mean' or 'daily_mean' + Datasets: + System: + name: DWD-GCFS2.1 # ECMWF-SEAS5 # Mandatory, str: System name. + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference name. + Time: + sdate: '1101' # Mandatory, int: Start date, 'mmdd' + fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast initialization start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY' + ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”. + ftime_max: 2 # Mandatory, int: Last forecast time step in months. Starts at “1”. + Region: + latmin: 0 # Mandatory, int: minimum latitude + latmax: 3 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 2 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. + type: 'r180x90' # Mandatory, str: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. + Workflow: + Calibration: + method: mse_min # Mandatory, str: Calibration method. + Anomalies: + compute: no # Mandatory, bool: Either yes/true or no/false + cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false + Skill: + metric: RPSS CRPSS # Mandatory, str: List of skill metrics. + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds + # for quantiles and probability categories. # Each set of thresholds should be + # enclosed within brackets. + Indicators: + index: no # This feature is not implemented yet + ncores: 10 # Optional, int: number of cores to be used in parallel computation. + # If left empty, defaults to 1. + remove_NAs: TRUE # Optional, bool: Whether to remove NAs. + # If left empty, defaults to FALSE. + Output_format: S2S4E # This feature is not implemented yet +Run: + Loglevel: INFO # Optional, str: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. Default 'INFO'. + Terminal: TRUE # Optional, bool: Whether to display log messages in the terminal. Default 'TRUE'. + output_dir: /esarchive/scratch/cdelgado/auto-s2s_outputs # Mandatory, str: output directory + code_dir: /esarchive/scratch/cdelgado/gitlab/auto-s2s/ diff --git a/recipes/tests/recipe_multimodel_decadal.yml b/recipes/tests/recipe_multimodel_decadal.yml new file mode 100644 index 0000000000000000000000000000000000000000..2ec31fb39fa6c10ce70c93b15001a7f76ca1c756 --- /dev/null +++ b/recipes/tests/recipe_multimodel_decadal.yml @@ -0,0 +1,51 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for multi-model decadal forecast (only hindcast) +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: [EC-Earth3-i4, FGOALS-f3-L] + member: [[r1i4p1f1,r2i4p1f1], [r1i1p1f1,r2i1p1f1,r3i1p1f1]] + Multimodel: both ## yes, no, both + Reference: + name: JRA-55 + Time: + fcst_year: + hcst_start: 2015 # 2015-2016 in dcppA, 2017-2018 in dcppB + hcst_end: 2018 +# season: 'Annual' + ftime_min: 6 # Apr + ftime_max: 8 + Region: + latmin: 40 + latmax: 55 + lonmin: 0 + lonmax: 10 + Regrid: + method: bilinear + type: to_reference + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: 'bias' + Skill: + metric: EnsCorr RPSS + Probabilities: + percentiles: [[1/3, 2/3]] + Indicators: + index: FALSE + ncores: 8 # Optional, int: number of cores, defaults to 1 + remove_NAs: FALSE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/cdelgado/auto-s2s_outputs/ + code_dir: /esarchive/scratch/cdelgado/gitlab/auto-s2s/ + diff --git a/recipes/tests/recipe_multimodel_seasonal.yml b/recipes/tests/recipe_multimodel_seasonal.yml new file mode 100644 index 0000000000000000000000000000000000000000..87a5e31e6f8e01cc5468c57db5b72cff9742e5d4 --- /dev/null +++ b/recipes/tests/recipe_multimodel_seasonal.yml @@ -0,0 +1,55 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for one seasonal forecast system (only hindcast) + +Analysis: + Horizon: seasonal # Mandatory, str: 'subseasonal', 'seasonal', or 'decadal' + Variables: + name: tas # Mandatory, str: variable name in /esarchive/ + freq: monthly_mean # Mandatory, str: 'monthly_mean' or 'daily_mean' + Datasets: + System: + name: [ECMWF-SEAS5, DWD-GCFS2.1] # Mandatory, str: System name. + Multimodel: both # Mandatory, bool: Either yes/true or no/false or both + Reference: + name: ERA5 # Mandatory, str: Reference name. + Time: + sdate: '1101' # Mandatory, int: Start date, 'mmdd' + fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast initialization start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY' + ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”. + ftime_max: 2 # Mandatory, int: Last forecast time step in months. Starts at “1”. + Region: + latmin: 0 # Mandatory, int: minimum latitude + latmax: 3 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 2 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. + type: 'r180x90' # Mandatory, str: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. + Workflow: + Calibration: + method: mse_min # Mandatory, str: Calibration method. + Anomalies: + compute: no # Mandatory, bool: Either yes/true or no/false + cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false + Skill: + metric: RPSS CRPSS # Mandatory, str: List of skill metrics. + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds + # for quantiles and probability categories. # Each set of thresholds should be + # enclosed within brackets. + Indicators: + index: no # This feature is not implemented yet + ncores: 10 # Optional, int: number of cores to be used in parallel computation. + # If left empty, defaults to 1. + remove_NAs: TRUE # Optional, bool: Whether to remove NAs. + # If left empty, defaults to FALSE. + Output_format: S2S4E # This feature is not implemented yet +Run: + Loglevel: INFO # Optional, str: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. Default 'INFO'. + Terminal: TRUE # Optional, bool: Whether to display log messages in the terminal. Default 'TRUE'. + output_dir: /esarchive/scratch/cdelgado/auto-s2s_outputs # Mandatory, str: output directory + code_dir: /esarchive/scratch/cdelgado/gitlab/auto-s2s/ diff --git a/recipes/tests/recipe_multimodel_seasonal_aho.yml b/recipes/tests/recipe_multimodel_seasonal_aho.yml new file mode 100644 index 0000000000000000000000000000000000000000..f56119fc400d21e1e6c4b75cb860f44e412bd1a1 --- /dev/null +++ b/recipes/tests/recipe_multimodel_seasonal_aho.yml @@ -0,0 +1,61 @@ +Description: + Author: An-Chi Ho + Info: Test for one seasonal forecast system (only hindcast) + +Analysis: + Horizon: seasonal # Mandatory, str: 'subseasonal', 'seasonal', or 'decadal' + Variables: + name: tas # Mandatory, str: variable name in /esarchive/ + freq: monthly_mean # Mandatory, str: 'monthly_mean' or 'daily_mean' + Datasets: + System: + name: [ECMWF-SEAS5, DWD-GCFS2.1] # Mandatory, str: System name. + Multimodel: both # Mandatory, bool: Either yes/true or no/false or both + Reference: + name: ERA5 # Mandatory, str: Reference name. + Time: + sdate: '1101' # Mandatory, int: Start date, 'mmdd' + fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY' + hcst_start: '2013' # Mandatory, int: Hindcast initialization start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY' + ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”. + ftime_max: 5 # Mandatory, int: Last forecast time step in months. Starts at “1”. + Region: + latmin: 0 # Mandatory, int: minimum latitude + latmax: 3 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 2 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. + type: 'r180x90' # Mandatory, str: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. + Workflow: + Calibration: + method: mse_min # Mandatory, str: Calibration method. + save: none #all + Anomalies: + compute: yes # Mandatory, bool: Either yes/true or no/false + cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false + save: none + Skill: + metric: RPSS CRPSS # Mandatory, str: List of skill metrics. + save: none #all + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds + # for quantiles and probability categories. # Each set of thresholds should be + # enclosed within brackets. + save: none #all + Visualization: + plots: forecast_ensemble_mean most_likely_terciles #skill_metrics + Indicators: + index: no # This feature is not implemented yet + ncores: 1 # Optional, int: number of cores to be used in parallel computation. + # If left empty, defaults to 1. + remove_NAs: TRUE # Optional, bool: Whether to remove NAs. + # If left empty, defaults to FALSE. + Output_format: S2S4E # This feature is not implemented yet +Run: + Loglevel: INFO # Optional, str: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. Default 'INFO'. + Terminal: TRUE # Optional, bool: Whether to display log messages in the terminal. Default 'TRUE'. + output_dir: /esarchive/scratch/aho/tmp/auto-s2s_outputs # Mandatory, str: output directory + code_dir: /esarchive/scratch/aho/git/auto-s2s/ diff --git a/recipes/tests/recipe_multimodel_seasonal_aho_single.yml b/recipes/tests/recipe_multimodel_seasonal_aho_single.yml new file mode 100644 index 0000000000000000000000000000000000000000..5a8d2d3f3ab533826281b95cfe8f8ba599ea14fe --- /dev/null +++ b/recipes/tests/recipe_multimodel_seasonal_aho_single.yml @@ -0,0 +1,61 @@ +Description: + Author: An-Chi Ho + Info: Test for one seasonal forecast system (only hindcast) + +Analysis: + Horizon: seasonal # Mandatory, str: 'subseasonal', 'seasonal', or 'decadal' + Variables: + name: tas # Mandatory, str: variable name in /esarchive/ + freq: monthly_mean # Mandatory, str: 'monthly_mean' or 'daily_mean' + Datasets: + System: + name: ECMWF-SEAS5 # Mandatory, str: System name. + Multimodel: no # Mandatory, bool: Either yes/true or no/false or both + Reference: + name: ERA5 # Mandatory, str: Reference name. + Time: + sdate: '1101' # Mandatory, int: Start date, 'mmdd' + fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast initialization start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY' + ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”. + ftime_max: 5 # Mandatory, int: Last forecast time step in months. Starts at “1”. + Region: + latmin: 0 # Mandatory, int: minimum latitude + latmax: 3 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 2 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. + type: 'r180x90' # Mandatory, str: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. + Workflow: + Calibration: + method: mse_min # Mandatory, str: Calibration method. + save: none + Anomalies: + compute: yes # Mandatory, bool: Either yes/true or no/false + cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false + save: none + Skill: + metric: RPSS CRPSS # Mandatory, str: List of skill metrics. + save: none + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds + # for quantiles and probability categories. # Each set of thresholds should be + # enclosed within brackets. + save: none + Visualization: + plots: forecast_ensemble_mean + Indicators: + index: no # This feature is not implemented yet + ncores: 10 # Optional, int: number of cores to be used in parallel computation. + # If left empty, defaults to 1. + remove_NAs: TRUE # Optional, bool: Whether to remove NAs. + # If left empty, defaults to FALSE. + Output_format: S2S4E # This feature is not implemented yet +Run: + Loglevel: INFO # Optional, str: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. Default 'INFO'. + Terminal: TRUE # Optional, bool: Whether to display log messages in the terminal. Default 'TRUE'. + output_dir: /esarchive/scratch/aho/tmp/auto-s2s_outputs # Mandatory, str: output directory + code_dir: /esarchive/scratch/aho/git/auto-s2s/