diff --git a/conf/archive.yml b/conf/archive.yml index a982b84f821ed5a92ca765f36384f2384d188a5a..88872cb85e4bda71134af98417c64720df05a28c 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -16,7 +16,7 @@ esarchive: "tasmin":"_f24h/", "tasmax":"_f24h/", "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", - "tdps":"_f6h/"} + "tdps":"_f6h/", "tos":"_f6h/"} nmember: fcst: 51 hcst: 25 @@ -156,7 +156,9 @@ esarchive: "tasmin":"_f1h-r1440x721cds/", "ta300":"_f1h-r1440x721cds/", "ta500":"_f1h-r1440x721cds/", - "ta850":"_f1h-r1440x721cds/"} + "ta850":"_f1h-r1440x721cds/", + "tos":"_f1h-r1440x721cds" + } calendar: "standard" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" ERA5-Land: @@ -197,6 +199,22 @@ esarchive: monthly_mean: {"prlr":"_f6h-r2631x1113/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/cerraland/monthly_mean/prlr_f6h-r2631x1113/prlr_200412.nc" + HadCRUT5: + name: "HadCRUT5" + institution: "Met Office" + src: "obs/ukmo/hadcrut_v5.0_analysis/" + monthly_mean: {"tasanomaly":"/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/obs/ukmo/hadcrut_v5.0_analysis/monthly_mean/tasanomaly/tasanomaly_202001.nc" + BEST: + name: "BEST" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "obs/berkeleyearth/berkeleyearth/" + daily_mean: {"tas":"/"} + monthly_mean: {"tas":"/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/obs/berkeleyearth/berkeleyearth/monthly_mean/tas/tas_201805.nc" + diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 91637024e099c9141898a17651d07cc7a0c61c24..2e0a1b296c14cdf1230e872ad94fd3d1e556829d 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -91,9 +91,9 @@ esarchive: first_dcppB_syear: 2019 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon", "tos":"Omon"} - grid: {"tas":"gr", "psl":"gr", "pr":"gr", "ts":"gr", "tos":"gr"} + grid: {"tas":"gn", "psl":"gr", "pr":"gr", "ts":"gr", "tos":"gr"} #version depends on member and variable - version: {"tas":"v20200316", "psl":"v20200316", "pr":"v20200316", "ts":"v20200316", "tos":"v20200417"} + version: {"tas":"v20200417", "psl":"v20200316", "pr":"v20200316", "ts":"v20200316", "tos":"v20200417"} daily_mean: grid: {"tas":"gn"} version: {"tasmin":"v20200101", "tasmax":"v20200101", "pr":"v20200417"} @@ -132,10 +132,10 @@ esarchive: fcst: "exp/canesm5/cmip6-dcppB-forecast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppB-forecast/" first_dcppB_syear: 2020 monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon", "tos":"Omon"} - grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn"} - version: {"tas":"v20190429", "pr":"v20190429", "psl":"v20190429", "tasmin":"v20190429", "tasmax":"v20190429"} + grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn", "tos":"gr"} + version: {"tas":"v20190429", "pr":"v20190429", "psl":"v20190429", "tasmin":"v20190429", "tasmax":"v20190429", "tos":"v20190429"} daily_mean: grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} version: {"pr":"v20190429", "tas":"v20190429", "tasmax":"v20190429", "tasmin":"v20190429"} diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index 917abc64405d7a3297b557ac71547679b371cf6c..0bfbffe002c81d389207a54b8fb8687f991fd33e 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -204,6 +204,14 @@ vars: long_name: "Surface Upward Sensible Heat Flux" standard_name: "surface_upward_sensible_heat_flux" accum: no +## Adding new variable + tasanomaly: + units: "K" + long_name: "Near-Surface Air Temperature Anomaly" + standard_name: "air_temperature_anom" + accum: no + + # Coordinates coords: diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 97ce1f51ea976bc7da561bb1d968fbf6dbb37dfa..28092625686e5136f05f7d14e10c7f1f1a62b93a 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -18,7 +18,7 @@ compute_anomalies <- function(recipe, data) { cross <- FALSE cross_msg <- "without" } - original_dims <- dim(data$hcst$data) + original_dims <- data$hcst$dim # Compute anomalies anom <- CST_Anomaly(data$hcst, data$obs, @@ -45,10 +45,10 @@ compute_anomalies <- function(recipe, data) { # Change variable metadata for (var in data$hcst$attrs$Variable$varName) { # Change hcst longname - data$hcst$attrs$Variable$variables[[var]]$long_name <- + data$hcst$attrs$Variable$metadata[[var]]$long_name <- paste(data$hcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") # Change obs longname - data$obs$attrs$Variable$variables[[var]]$long_name <- + data$obs$attrs$Variable$metadata[[var]]$long_name <- paste(data$obs$attrs$Variable$metadata[[var]]$long_name, "anomaly") } # Compute forecast anomaly field @@ -63,10 +63,13 @@ compute_anomalies <- function(recipe, data) { ncores = recipe$Analysis$ncores) clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, name = "syear") + # Store original dimensions dims <- dim(clim_hcst) - clim_hcst <- rep(clim_hcst, dim(data$fcst$data)[['ensemble']]) - dim(clim_hcst) <- c(dims, ensemble = dim(data$fcst$data)[['ensemble']]) - clim_hcst <- Reorder(clim_hcst, order = names(dim(data$fcst$data))) + # Repeat the array as many times as ensemble members + clim_hcst <- rep(clim_hcst, data$fcst$dim[['ensemble']]) + # Rename and reorder dimensions + dim(clim_hcst) <- c(dims, ensemble = data$fcst$dim[['ensemble']]) + clim_hcst <- Reorder(clim_hcst, order = names(data$fcst$dim)) # Get fcst anomalies data$fcst$data <- data$fcst$data - clim_hcst # Change metadata diff --git a/modules/Loading/Dev_Loading.R b/modules/Loading/Dev_Loading.R new file mode 100644 index 0000000000000000000000000000000000000000..fb456eb31aee1f0ba2b0eded1839ca2dd8d4c723 --- /dev/null +++ b/modules/Loading/Dev_Loading.R @@ -0,0 +1,501 @@ +## TODO: remove paths to personal scratchs +source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") +# Load required libraries/funs +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +## TODO: Move to prepare_outputs.R +source("tools/libs.R") +## TODO: remove these two lines when new as.s2dv_cube() is in CSTools +source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R') +source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R') + +## TODO: Source new s2dv_cube version +## TODO: Eliminate dim_var dimension (merge_across_dims?) + +load_datasets <- function(recipe) { + + # ------------------------------------------- + # Set params ----------------------------------------- + + hcst.inityear <- recipe$Analysis$Time$hcst_start + hcst.endyear <- recipe$Analysis$Time$hcst_end + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]][1] + vars <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + store.freq <- recipe$Analysis$Variables$freq + + # get sdates array + ## LOGGER: Change dates2load to extract logger from recipe? + sdates <- dates2load(recipe, recipe$Run$logger) + + idxs <- NULL + idxs$hcst <- get_timeidx(sdates$hcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + + if (!(is.null(sdates$fcst))) { + idxs$fcst <- get_timeidx(sdates$fcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + } + + ## TODO: Examine this verifications part, verify if it's necessary + # stream <- verifications$stream + # sdates <- verifications$fcst.sdate + + ## TODO: define fcst.name + ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name + + # get esarchive datasets dict: + ## TODO: Adapt to 'filesystem' option in recipe + archive <- read_yaml("conf/archive.yml")$esarchive + exp_descrip <- archive$System[[exp.name]] + + freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) + reference_descrip <- archive$Reference[[ref.name]] + freq.obs <- unlist(reference_descrip[[store.freq]][variable]) + obs.dir <- reference_descrip$src + fcst.dir <- exp_descrip$src + hcst.dir <- exp_descrip$src + fcst.nmember <- exp_descrip$nmember$fcst + hcst.nmember <- exp_descrip$nmember$hcst + + ## TODO: it is necessary? + ##if ("accum" %in% names(reference_descrip)) { + ## accum <- unlist(reference_descrip$accum[store.freq][[1]]) + ##} else { + ## accum <- FALSE + ##} + + var_dir_obs <- reference_descrip[[store.freq]][vars] + var_dir_exp <- exp_descrip[[store.freq]][vars] + + # ----------- + obs.path <- paste0(archive$src, + obs.dir, store.freq, "/$var$", "$var_dir$", + "/$var$_$file_date$.nc") + + hcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", "$var_dir$", + "$var$_$file_date$.nc") + + fcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", "$var_dir$", + "/$var$_$file_date$.nc") + + # Define regrid parameters: + #------------------------------------------------------------------- + regrid_params <- get_regrid_params(recipe, archive) + + # Longitude circular sort and latitude check + #------------------------------------------------------------------- + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean"){ + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = vars, + var_dir = var_dir_exp, + file_date = sdates$hcst, + time = idxs$hcst, + var_dir_depends = 'var', + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:hcst.nmember), + metadata_dims = 'var', # change to just 'var'? + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(hcst))) { + hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (recipe$Analysis$Variables$freq == "daily_mean") { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + + # Convert hcst to s2dv_cube object + ## TODO: Give correct dimensions to $Dates + ## (sday, sweek, syear instead of file_date) + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + ## Combine tas and tos data into one variable: tas-tos + if(recipe$Analysis$Variables$name == 'tas tos'){ + #if(recipe$Analysis$Datasets$Reference$name == 'HadCRUT5' || recipe$Analysis$Datasets$Reference$name == 'BEST') { + source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') + hcst <- mask_tas_tos(input_data = hcst, region = c(lons.min, lons.max,lats.min, lats.max), + grid = 'r360x181', + lon = hcst$coords$longitude, + lat = hcst$coords$latitude, + lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + + hcst$dims[['var']] <- dim(hcst$data)[['var']] + #} + } + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + # the call uses file_date instead of fcst_syear so that it can work + # with the daily case and the current version of startR not allowing + # multiple dims split + + fcst <- Start(dat = fcst.path, + var = vars, + var_dir = var_dir_exp, + var_dir_depends = 'var', + file_date = sdates$fcst, + time = idxs$fcst, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + if ("var_dir" %in% names(dim(fcst))) { + fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (recipe$Analysis$Variables$freq == "daily_mean") { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + + # Convert fcst to s2dv_cube + fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$attrs$Dates[] <- + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Separate Start() call for monthly vs daily data + if (store.freq == "monthly_mean") { + + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") + dim(dates_file) <- dim(dates) + + ## tas tos mask + if (recipe$Analysis$Variables$name == 'tas tos'){ + if (recipe$Analysis$Datasets$Reference$name == 'HadCRUT5'){ + vars <- 'tasanomaly' + var_dir_obs <- reference_descrip[[store.freq]][vars] + } + } + + if (recipe$Analysis$Variables$name == 'tas tos'){ + if (recipe$Analysis$Datasets$Reference$name == 'BEST'){ + vars <- 'tas' + var_dir_obs <- reference_descrip[[store.freq]][vars] + } + } + + obs <- Start(dat = obs.path, + var = vars, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq == "daily_mean") { + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = vars, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + } + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(obs))) { + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + } + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + ## Combine tas and tos data into one variable: tas-tos + if(recipe$Analysis$Variables$name == 'tas tos'){ + if(recipe$Analysis$Datasets$Reference$name != 'HadCRUT5' & recipe$Analysis$Datasets$Reference$name != 'BEST'){ + source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') + obs <- mask_tas_tos(input_data = obs, region = c(lons.min, lons.max,lats.min, lats.max), + grid = 'r360x181', + lon = obs$coords$longitude, + lat = obs$coords$latitude, + lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + + obs$dims[['var']] <- dim(obs$data)[['var']] + } + } + + # Check for consistency between hcst and obs grid + if (!(recipe$Analysis$Regrid$type == 'none')) { + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + + } + } + + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + for (var_idx in 1:length(vars)) { + var_name <- vars[var_idx] + if (dictionary$vars[[var_name]]$accum) { + info(recipe$Run$logger, + paste0("Accumulated variable ", var_name, + ": setting negative values to zero.")) + # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, + # along = "var", + # indices = var_idx, F), 0) + obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 + hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + } + } + + # Convert prlr from m/s to mm/day + ## TODO: Make a unit conversion function + if (vars[[var_idx]] == "prlr") { + # Verify that the units are m/s and the same in obs and hcst + if (((obs$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (obs$attrs$Variable$metadata[[var_name]]$units == "m s**-1")) && + ((hcst$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (hcst$attrs$Variable$metadata[[var_name]]$units == "m s**-1"))) { + info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") + obs$data[, var_idx, , , , , , , ] <- + obs$data[, var_idx, , , , , , , ]*86400*1000 + obs$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + hcst$data[, var_idx, , , , , , , ] <- + hcst$data[, var_idx, , , , , , , ]*86400*1000 + hcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ] <- + fcst$data[, var_idx, , , , , , , ]*86400*1000 + fcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + } + } + } + } + # Compute anomalies if requested + # Print a summary of the loaded data for the user, for each object + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } + } + + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + ############################################################################ + # + # CHECKS ON MISSING FILES + # + ############################################################################ + + #obs.NA_dates.ind <- Apply(obs, + # fun=(function(x){ all(is.na(x))}), + # target_dims=c('time', 'latitude', 'longitude'))[[1]] + #obs.NA_dates <- dates_file[obs.NA_dates.ind] + #obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] + #obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", + # freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") + # + #if (any(is.na(hcst))){ + # fatal(recipe$Run$logger, + # paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "hcst files:", + # hcst.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status = 1) + #} + # + #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ + # fatal(recipe$logger, + # paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "obs files:", + # obs.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status=1) + #} + # + #info(recipe$logger, + # "######### DATA LOADING COMPLETED SUCCESFULLY ##############") + + ############################################################################ + ############################################################################ + + return(list(hcst = hcst, fcst = fcst, obs = obs)) + +} diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index c5eb41e6aed1740c11dfc99eae751240947c5dcf..ebeadbf2b2ac4c372c6f99bd04d776eeb16fc92b 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -6,12 +6,18 @@ source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") ## TODO: Move to prepare_outputs.R source("tools/libs.R") +## TODO: remove these two lines when new as.s2dv_cube() is in CSTools +source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R') +source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R') -load_datasets <- function(recipe) { +## TODO: Source new s2dv_cube version +## TODO: Eliminate dim_var dimension (merge_across_dims?) +load_datasets <- function(recipe) { + # ------------------------------------------- # Set params ----------------------------------------- - + hcst.inityear <- recipe$Analysis$Time$hcst_start hcst.endyear <- recipe$Analysis$Time$hcst_end lats.min <- recipe$Analysis$Region$latmin @@ -20,13 +26,14 @@ load_datasets <- function(recipe) { lons.max <- recipe$Analysis$Region$lonmax ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name - variable <- recipe$Analysis$Variables$name + + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq - + # get sdates array ## LOGGER: Change dates2load to extract logger from recipe? sdates <- dates2load(recipe, recipe$Run$logger) - + idxs <- NULL idxs$hcst <- get_timeidx(sdates$hcst, recipe$Analysis$Time$ftime_min, @@ -39,7 +46,7 @@ load_datasets <- function(recipe) { recipe$Analysis$Time$ftime_max, time_freq=store.freq) } - + ## TODO: Examine this verifications part, verify if it's necessary # stream <- verifications$stream # sdates <- verifications$fcst.sdate @@ -52,9 +59,9 @@ load_datasets <- function(recipe) { archive <- read_yaml("conf/archive.yml")$esarchive exp_descrip <- archive$System[[exp.name]] - freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) + freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]]) reference_descrip <- archive$Reference[[ref.name]] - freq.obs <- unlist(reference_descrip[[store.freq]][variable]) + freq.obs <- unlist(reference_descrip[[store.freq]][variable[1]]) obs.dir <- reference_descrip$src fcst.dir <- exp_descrip$src hcst.dir <- exp_descrip$src @@ -67,22 +74,22 @@ load_datasets <- function(recipe) { ##} else { ## accum <- FALSE ##} + + var_dir_obs <- reference_descrip[[store.freq]][variable] + var_dir_exp <- exp_descrip[[store.freq]][variable] # ----------- obs.path <- paste0(archive$src, - obs.dir, store.freq, "/$var$", - reference_descrip[[store.freq]][[variable]], - "$var$_$file_date$.nc") + obs.dir, store.freq, "/$var$", "$var_dir$", + "/$var$_$file_date$.nc") hcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], + hcst.dir, store.freq, "/$var$", "$var_dir$", "$var$_$file_date$.nc") fcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], - "$var$_$file_date$.nc") + hcst.dir, store.freq, "/$var$", "$var_dir$", + "/$var$_$file_date$.nc") # Define regrid parameters: #------------------------------------------------------------------- @@ -91,7 +98,7 @@ load_datasets <- function(recipe) { # Longitude circular sort and latitude check #------------------------------------------------------------------- circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) - + if (recipe$Analysis$Variables$freq == "monthly_mean"){ split_multiselected_dims = TRUE } else { @@ -102,8 +109,10 @@ load_datasets <- function(recipe) { #------------------------------------------------------------------- hcst <- Start(dat = hcst.path, var = variable, + var_dir = var_dir_exp, file_date = sdates$hcst, time = idxs$hcst, + var_dir_depends = 'var', latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), @@ -116,12 +125,18 @@ load_datasets <- function(recipe) { longitude = c('lon', 'longitude'), ensemble = c('member', 'ensemble')), ensemble = indices(1:hcst.nmember), + metadata_dims = 'var', # change to just 'var'? return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(hcst))) { + hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") + } + if (recipe$Analysis$Variables$freq == "daily_mean") { # Adjusts dims for daily case, could be removed if startR allows # multidim split @@ -134,12 +149,12 @@ load_datasets <- function(recipe) { # Change time attribute dimensions default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) names(dim(attr(hcst, "Variables")$common$time))[which(names( - dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- dim(attr(hcst, "Variables")$common$time) dim(attr(hcst, "Variables")$common$time) <- default_time_dims } - + # Convert hcst to s2dv_cube object ## TODO: Give correct dimensions to $Dates ## (sday, sweek, syear instead of file_date) @@ -148,16 +163,18 @@ load_datasets <- function(recipe) { if (recipe$Analysis$Variables$freq == "monthly_mean") { hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) } - + # Load forecast #------------------------------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst_year)) { # the call uses file_date instead of fcst_syear so that it can work # with the daily case and the current version of startR not allowing # multiple dims split - - fcst <- Start(dat = fcst.path, + + fcst <- Start(dat = fcst.path, var = variable, + var_dir = var_dir_exp, + var_dir_depends = 'var', file_date = sdates$fcst, time = idxs$fcst, latitude = values(list(lats.min, lats.max)), @@ -172,12 +189,17 @@ load_datasets <- function(recipe) { longitude = c('lon', 'longitude'), ensemble = c('member', 'ensemble')), ensemble = indices(1:fcst.nmember), + metadata_dims = 'var', return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) + if ("var_dir" %in% names(dim(fcst))) { + fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") + } + if (recipe$Analysis$Variables$freq == "daily_mean") { # Adjusts dims for daily case, could be removed if startR allows # multidim split @@ -190,12 +212,12 @@ load_datasets <- function(recipe) { # Change time attribute dimensions default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) names(dim(attr(fcst, "Variables")$common$time))[which(names( - dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- dim(attr(fcst, "Variables")$common$time) dim(attr(fcst, "Variables")$common$time) <- default_time_dims } - + # Convert fcst to s2dv_cube fcst <- as.s2dv_cube(fcst) # Adjust dates for models where the time stamp goes into the next month @@ -203,30 +225,29 @@ load_datasets <- function(recipe) { fcst$attrs$Dates[] <- fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) } - + } else { fcst <- NULL } # Load reference #------------------------------------------------------------------- - + # Obtain dates and date dimensions from the loaded hcst data to make sure # the corresponding observations are loaded correctly. dates <- hcst$attrs$Dates - dim(dates) <- dim(Subset(hcst$data, - along=c('dat', 'var', - 'latitude', 'longitude', 'ensemble'), - list(1,1,1,1,1), drop="selected")) - + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + # Separate Start() call for monthly vs daily data if (store.freq == "monthly_mean") { - + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") dim(dates_file) <- dim(dates) - + obs <- Start(dat = obs.path, var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', file_date = dates_file, latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), @@ -236,16 +257,17 @@ load_datasets <- function(recipe) { transform_params = list(grid = regrid_params$obs.gridtype, method = regrid_params$obs.gridmethod), transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'y', 'latitude'), - longitude = c('lon', 'x', 'longitude')), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), split_multiselected_dims = TRUE, retrieve = TRUE) - + } else if (store.freq == "daily_mean") { - + # Get year and month for file_date dates_file <- sapply(dates, format, '%Y%m') dim(dates_file) <- dim(dates) @@ -254,9 +276,11 @@ load_datasets <- function(recipe) { lubridate::minute(dates) <- 00 # Restore correct dimensions dim(dates) <- dim(dates_file) - + obs <- Start(dat = obs.path, var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', file_date = sort(unique(dates_file)), time = dates, time_var = 'time', @@ -269,24 +293,30 @@ load_datasets <- function(recipe) { longitude_reorder = circularsort, transform = regrid_params$obs.transform, transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), + method = regrid_params$obs.gridmethod), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), + longitude = c('lon','longitude')), + metadata_dims = 'var', return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), + longitude = 'dat', + time = 'file_date'), split_multiselected_dims = TRUE, retrieve = TRUE) } + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(obs))) { + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + } # Adds ensemble dim to obs (for consistency with hcst/fcst) default_dims <- c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 1, latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(obs))] <- dim(obs) dim(obs) <- default_dims - + # Convert obs to s2dv_cube obs <- as.s2dv_cube(obs) @@ -301,7 +331,7 @@ load_datasets <- function(recipe) { "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) info(recipe$Run$logger, hcst_lat_msg) obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) + "; Last obs lat: ", obs$lat[length(obs$lat)]) info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } @@ -314,47 +344,54 @@ load_datasets <- function(recipe) { "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) info(recipe$Run$logger, hcst_lon_msg) obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) + "; Last obs lon: ", obs$lon[length(obs$lon)]) info(recipe$Run$logger, obs_lon_msg) stop("hcst and obs don't share the same longitudes.") - + } } - + # Remove negative values in accumulative variables dictionary <- read_yaml("conf/variable-dictionary.yml") - if (dictionary$vars[[variable]]$accum) { - info(recipe$Run$logger, - "Accumulated variable: setting negative values to zero.") - obs$data[obs$data < 0] <- 0 - hcst$data[hcst$data < 0] <- 0 - if (!is.null(fcst)) { - fcst$data[fcst$data < 0] <- 0 - } - } - - # Convert prlr from m/s to mm/day - ## TODO: Revise this - ## TODO: Make a unit conversion function? - if (variable == "prlr") { - # Verify that the units are m/s and the same in obs and hcst - if (((obs$attrs$Variable$metadata[[variable]]$units == "m s-1") || - (obs$attrs$Variable$metadata[[variable]]$units == "m s**-1")) && - ((hcst$attrs$Variable$metadata[[variable]]$units == "m s-1") || - (hcst$attrs$Variable$metadata[[variable]]$units == "m s**-1"))) { - - info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") - obs$data <- obs$data*86400*1000 - obs$attrs$Variable$metadata[[variable]]$units <- "mm/day" - hcst$data <- hcst$data*86400*1000 - hcst$attrs$Variable$metadata[[variable]]$units <- "mm/day" + for (var_idx in 1:length(variable)) { + var_name <- variable[var_idx] + if (dictionary$vars[[var_name]]$accum) { + info(recipe$Run$logger, + paste0("Accumulated variable ", var_name, + ": setting negative values to zero.")) + # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, + # along = "var", + # indices = var_idx, F), 0) + obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 + hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 if (!is.null(fcst)) { - fcst$data <- fcst$data*86400*1000 - fcst$attrs$Variable$metadata[[variable]]$units <- "mm/day" + fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + } + } + + # Convert prlr from m/s to mm/day + ## TODO: Make a unit conversion function + if (variable[[var_idx]] == "prlr") { + # Verify that the units are m/s and the same in obs and hcst + if (((obs$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (obs$attrs$Variable$metadata[[var_name]]$units == "m s**-1")) && + ((hcst$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (hcst$attrs$Variable$metadata[[var_name]]$units == "m s**-1"))) { + info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") + obs$data[, var_idx, , , , , , , ] <- + obs$data[, var_idx, , , , , , , ]*86400*1000 + obs$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + hcst$data[, var_idx, , , , , , , ] <- + hcst$data[, var_idx, , , , , , , ]*86400*1000 + hcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ] <- + fcst$data[, var_idx, , , , , , , ]*86400*1000 + fcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + } } } } - # Compute anomalies if requested # Print a summary of the loaded data for the user, for each object if (recipe$Run$logger$threshold <= 2) { @@ -364,7 +401,7 @@ load_datasets <- function(recipe) { data_summary(fcst, recipe) } } - + info(recipe$Run$logger, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") @@ -415,7 +452,7 @@ load_datasets <- function(recipe) { ############################################################################ ############################################################################ - + return(list(hcst = hcst, fcst = fcst, obs = obs)) - + } diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index b9a145e3029f176b5f2f51f067444f76591567a8..43b3c54fef12287133915203af83bb036c5356d5 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -15,8 +15,8 @@ source("tools/libs.R") #==================================================================== -# recipe_file <- "modules/Loading/testing_recipes/recipe_decadal.yml" -# recipe_file <- "modules/Loading/testing_recipes/recipe_decadal_daily.yml" +# recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml" +# recipe_file <- "recipes/atomic_recipes/recipe_decadal_daily.yml" load_datasets <- function(recipe) { @@ -35,7 +35,8 @@ load_datasets <- function(recipe) { exp.name <- recipe$Analysis$Datasets$System$name #'HadGEM3' ref.name <- recipe$Analysis$Datasets$Reference$name #'era5' member <- strsplit(recipe$Analysis$Datasets$System$member, ',')[[1]] #c("r1i1p1f2", "r2i1p1f2") - variable <- recipe$Analysis$Variables$name #'tas' +# variable <- recipe$Analysis$Variables$name #'tas' + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq #monthly_mean lats.min <- as.numeric(recipe$Analysis$Region$latmin) #0 lats.max <- as.numeric(recipe$Analysis$Region$latmax) #10 @@ -64,12 +65,12 @@ load_datasets <- function(recipe) { # Read from archive: #------------------------- if (store.freq == "monthly_mean") { - table <- archive$System[[exp.name]][[store.freq]]$table[[variable]] #'Amon' + table <- archive$System[[exp.name]][[store.freq]]$table[variable] #list(tas = 'Amon') } else { table <- 'day' } - grid <- archive$System[[exp.name]][[store.freq]]$grid[[variable]] - version <- archive$System[[exp.name]][[store.freq]]$version[[variable]] + grid <- archive$System[[exp.name]][[store.freq]]$grid[variable] #list(tas = 'gr') + version <- archive$System[[exp.name]][[store.freq]]$version[variable] #list(tas = 'v20210910') if (identical(member, 'all')) { member <- strsplit(archive$System[[exp.name]]$member, ',')[[1]] } @@ -95,13 +96,14 @@ load_datasets <- function(recipe) { version = version, sdates = sdates_hcst) path_list <- tmp$path_list multi_path <- tmp$multi_path -# hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, -# '$ensemble$', table, '$var$', grid, version) -# hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + #TODO: to make this case work; enhance Start() if it's possible + if (multi_path & length(variable) > 1) { + stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") + } Start_default_arg_list <- list( - dat = path_list, #file.path(hcst.path, hcst.files), + dat = path_list, var = variable, syear = paste0(sdates_hcst), chunk = 'all', @@ -120,7 +122,7 @@ load_datasets <- function(recipe) { transform_params = list(grid = regrid_params$fcst.gridtype, method = regrid_params$fcst.gridmethod), transform_vars = c('latitude', 'longitude'), - path_glob_permissive = 2, # for version +# path_glob_permissive = 2, # for version synonims = list(longitude = c('lon', 'longitude'), latitude = c('lat', 'latitude')), return_vars = list(latitude = NULL, longitude = NULL, @@ -128,6 +130,12 @@ load_datasets <- function(recipe) { silent = !DEBUG, retrieve = T) + if (length(variable) > 1) { + Start_default_arg_list <- c(Start_default_arg_list, + list(table = table, grid = grid, version = version, + table_depends = 'var', grid_depends = 'var', version_depends = 'var')) + } + if (!multi_path) { Start_hcst_arg_list <- Start_default_arg_list hcst <- do.call(Start, Start_hcst_arg_list) @@ -189,10 +197,11 @@ load_datasets <- function(recipe) { version = version, sdates = sdates_fcst) path_list <- tmp$path_list multi_path <- tmp$multi_path -# fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, -# '$ensemble$', table, '$var$', grid, version) -# fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + #TODO: to make this case work; enhance Start() if it's possible + if (multi_path & length(variable) > 1) { + stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") + } # monthly & daily if (!multi_path) { diff --git a/modules/Loading/R/mask_tas_tos.R b/modules/Loading/R/mask_tas_tos.R new file mode 100644 index 0000000000000000000000000000000000000000..a2eeb0b610e3cdbd5173ef6770442913dfab03e0 --- /dev/null +++ b/modules/Loading/R/mask_tas_tos.R @@ -0,0 +1,84 @@ +library(multiApply) +library(startR) +library(s2dv) + +mask_tas_tos <- function(input_data, grid, lon, lat, region = region , + lon_dim = 'lon', lat_dim = 'lat', ncores = NULL){ + + + mask <- .load_mask(grid = grid, lon_dim = lon_dim, lat_dim = lat_dim, + sea_value = 1, land_value = 0, region = region) + + + ## TO DO: improve the check and correct lats + stopifnot(all(lon == mask$lon)) + stopifnot(max(abs(as.numeric(round(lat,2) - round(mask$lat,2)))) < 0.1) # stopifnot(all(lat == mask$lat)) + + tas <- Subset(input_data$data, along = 'var', indices = 1) + tos <- Subset(input_data$data, along = 'var', indices = 2) + + tas_tos <- multiApply::Apply(data = list(tas, tos), + target_dims = c(lon_dim, lat_dim), + fun = .mask_tas_tos, + mask = mask$mask, + sea_value = 1, + ncores = ncores)$output1 + input_data$data <- tas_tos + + return(input_data) +} + +.mask_tas_tos <- function(data_tas, data_tos, mask, sea_value){ + data_tas[mask == sea_value] <- data_tos[mask == sea_value] + return(data_tas) +} + +.load_mask <- function(grid, mask_path = NULL, land_value = 0, sea_value = 1, + lon_dim = 'lon', lat_dim = 'lat', region = region){ + + if (is.null(mask_path)){ + mask_sea_land_path <- '/esarchive/exp/ecmwf/system5c3s/constant/lsm/lsm.nc' ## /esarchive/recon/ecmwf/era5land/constant/lsm-r3600x1801cds/lsm.nc' + } else if (is.character(mask_path)){ + mask_sea_land_path <- mask_path + } else { + stop("mask_path must be NULL (to use the default mask and interpolate it to + the specified grid) or a string with the mask's path you want to load") + } + + lons.min <- region[1] + lons.max <- region[2] + lats.min <- region[3] + lats.max <- region[4] + + ## TO DO: + ## Fix region filter for lat and lon + ## Fix 'number' parameter for mask + + + data <- startR::Start(dat = mask_sea_land_path, + var = 'lsm', + lon = 'all', + lat = 'all', + number = 1, ## needed to add for ensemble member dimension of lsm.nc + # lon = values(list(lons.min, lons.max)), + # lat = values(list(lats.min, lats.max)), + transform = CDORemapper, transform_extra_cells = 2, + transform_params = list(grid = grid, method = 'con', crop = region), + transform_vars = c('lat','lon'), + return_vars = list(lat = NULL, lon = NULL), + synonims = list(lon = c('lon','longitude'), + lat = c('lat','latitude')), + lat_reorder = Sort(decreasing = FALSE), + lon_reorder = CircularSort(0,359.9), + num_procs = 1, retrieve = TRUE) + + mask <- list(mask = drop(data), + lon = as.numeric(attr(data,'Variables')$common$lon), + lat = as.numeric(attr(data,'Variables')$common$lat)) + + mask$mask[data <= 0.5] <- sea_value + mask$mask[data > 0.5] <- land_value + names(dim(mask$mask)) <- c(lon_dim, lat_dim) + + return(mask) +} diff --git a/modules/Loading/helper_loading_decadal.R b/modules/Loading/helper_loading_decadal.R index f4f1ec3246d675d7eb39809c8665fb13a19fe123..b93f32792359739042af1613b8ba4b2e26aff92c 100644 --- a/modules/Loading/helper_loading_decadal.R +++ b/modules/Loading/helper_loading_decadal.R @@ -106,22 +106,36 @@ correct_daily_for_leap <- function(data = NULL, time_attr, return_time = TRUE) { #========================================== # This function generates the path for Start() call. It shouldn't be needed when Start() is improved. +# table, grid, version: A list with variables as name. E.g., list(tas = 'Amon') get_dcpp_path <- function(archive, exp.name, table, grid, version, sdates) { # Define path (monthly and daily) multi_path <- FALSE if (is.null(archive$System[[exp.name]]$src$first_dcppB_syear) | isTRUE(all(sdates < archive$System[[exp.name]]$src$first_dcppB_syear))) { # only dcppA - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, - '$ensemble$', table, '$var$', grid, version) - fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + if (length(table) == 1) { # only one variable + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + '$ensemble$', table, '$var$', grid, version) + fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + } else { # multiple vars + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + '$ensemble$', '$table$', '$var$', '$grid$', '$version$') + fcst.files <- paste0('$var$_', '$table$', '_*_dcppA-hindcast_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') + } path_list <- file.path(fcst.path, fcst.files) } else { if (all(sdates >= archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppB - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, - '$ensemble$', table, '$var$', grid) #, version) - fcst.files <- paste0('v*/$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + if (length(table) == 1) { # only one variable + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, + '$ensemble$', table, '$var$', grid, version) + + fcst.files <- paste0('$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + } else { + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, + '$ensemble$', '$table$', '$var$', '$grid$', '$version$') + fcst.files <- paste0('$var$_', '$table$', '_*_dcppB-forecast_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') + } path_list <- file.path(fcst.path, fcst.files) } else { # have both dcppA and dcppB diff --git a/modules/Saving/R/Utils.R b/modules/Saving/R/Utils.R index 9ff695a649f6ee8a202aecb56013b70ee74ca8cc..a5bd5d0c9d6d299f2b4fa8340a529a9eecb05459 100644 --- a/modules/Saving/R/Utils.R +++ b/modules/Saving/R/Utils.R @@ -34,7 +34,7 @@ dim(time) <- length(time) sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), - calendar = calendar)) + calendar = calendar)) attr(time, 'variables') <- metadata names(dim(time)) <- 'time' diff --git a/modules/Saving/R/get_dir.R b/modules/Saving/R/get_dir.R index ac63396815957fa71933f4731bb5931327eeaeb5..915c3f11c5005dec8cb79b4dc1df8969e1e97ec5 100644 --- a/modules/Saving/R/get_dir.R +++ b/modules/Saving/R/get_dir.R @@ -1,16 +1,14 @@ ## TODO: Separate by time aggregation -## TODO: Build a default path that accounts for: -## variable, system, reference, start date and region name -get_dir <- function(recipe, agg = "global") { +get_dir <- function(recipe, variable, agg = "global") { # This function builds the path for the output directory. The output # directories will be subdirectories within outdir, organized by variable, # startdate, and aggregation. ## TODO: Get aggregation from recipe + ## TODO: Change variable name to s2dv_cube name + # variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] outdir <- recipe$Run$output_dir - ## TODO: multivar case - variable <- recipe$Analysis$Variables$name system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) if (tolower(recipe$Analysis$Output_format) == 'scorecards') { @@ -23,18 +21,18 @@ get_dir <- function(recipe, agg = "global") { # Get startdate or hindcast period if (!is.null(recipe$Analysis$Time$fcst_year)) { if (tolower(recipe$Analysis$Horizon) == 'decadal') { - # decadal doesn't have sdate + ## PROBLEM: decadal doesn't have sdate fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') } else { fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) + recipe$Analysis$Time$sdate) } } else { if (tolower(recipe$Analysis$Horizon) == 'decadal') { - # decadal doesn't have sdate + ## PROBLEM: decadal doesn't have sdate fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$hcst_end, - sep = '_')) + recipe$Analysis$Time$hcst_end, + sep = '_')) } else { fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) } @@ -43,15 +41,18 @@ get_dir <- function(recipe, agg = "global") { calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) store.freq <- recipe$Analysis$Variables$freq ## TODO: Change "_country" + if (!is.null(recipe$Analysis$Region$name)) { + outdir <- paste0(outdir, "/", recipe$Analysis$Region$name) + } switch(tolower(agg), - "country" = {dir <- paste0(outdir, "/", system, "/", calib.method, - "-", store.freq, "/", variable, - "_country/", fcst.sdate, "/")}, - "global" = {dir <- paste0(outdir, "/", system, "/", calib.method, - "-", store.freq, "/", variable, "/", - fcst.sdate, "/")}) + "country" = {dir <- paste0(outdir, "/", system, "/", calib.method, + "-", store.freq, "/", variable, + "_country/", fcst.sdate, "/")}, + "global" = {dir <- paste0(outdir, "/", system, "/", calib.method, + "-", store.freq, "/", variable, "/", + fcst.sdate, "/")}) } - ## TODO: Multivar case - dir.create(dir, showWarnings = FALSE, recursive = TRUE) + ## TODO: Dir creation? + return(dir) } diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R index 8b9453186578e1610bae65bba9f7cb578782a69a..65349acd9eb52b6d9f17332c37752c2114ba406c 100644 --- a/modules/Saving/R/save_corr.R +++ b/modules/Saving/R/save_corr.R @@ -10,11 +10,6 @@ save_corr <- function(recipe, dictionary <- read_yaml("conf/variable-dictionary.yml") # Define grid dimensions and names lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x) { - Reorder(x, c(lalo, 'ensemble', 'time'))}) - } # Add global and variable attributes global_attributes <- .get_global_attributes(recipe, archive) ## TODO: Sort out the logic once default behavior is decided @@ -26,27 +21,6 @@ save_corr <- function(recipe, global_attributes <- c(global_attributes, list(from_anomalies = "No")) } - attr(skill[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(skill)) { - metric <- names(skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - skill[[i]][is.na(skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'ensemble', 'time') - } else { - sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) - dims <- c(lalo, 'ensemble', 'time') - } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(skill[[i]], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) @@ -55,7 +29,7 @@ save_corr <- function(recipe, # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + cal = calendar) if (fcst.horizon == 'decadal') { init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', @@ -92,26 +66,62 @@ save_corr <- function(recipe, times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) time <- times$time - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "corr") + # Loop over variable dimension + for (var in 1:data_cube$dims[['var']]) { + # Subset skill arrays + subset_skill <- lapply(skill, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- .get_countries(grid) - ArrayToNc(append(country, time, skill), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill) - ArrayToNc(vars, outfile) + # Generate name of output file + variable <- data_cube$attrs$Variable$varName[[var]] + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) + } + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "corr") + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + subset_skill <- lapply(subset_skill, function(x) { + Reorder(x, c(lalo, 'ensemble', 'time'))}) + } + attr(subset_skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(subset_skill)) { + metric <- names(subset_skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + skill[[i]][is.na(subset_skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'ensemble', 'time') + } else { + sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) + dims <- c(lalo, 'ensemble', 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- .get_countries(grid) + ArrayToNc(append(country, time, subset_skill), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, subset_skill) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, + "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") } - info(recipe$Run$logger, - "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R index 9ec8499c181e96afc4f5580080232f34ca64dcd6..07fdddf76f4f917c294629555cf3b7084604fe0d 100644 --- a/modules/Saving/R/save_forecast.R +++ b/modules/Saving/R/save_forecast.R @@ -13,17 +13,11 @@ save_forecast <- function(recipe, lalo <- c('longitude', 'latitude') archive <- get_archive(recipe) dictionary <- read_yaml("conf/variable-dictionary.yml") - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name global_attributes <- .get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$System[[global_attributes$system]]$calendar - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) @@ -55,83 +49,99 @@ save_forecast <- function(recipe, } # Get time difference in hours leadtimes <- as.numeric(dates - init_date)/3600 - + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) # expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) + + # Loop over variables + for (var in 1:data_cube$dims[['var']]) { + subset_cube <- CST_Subset(data_cube, along = 'var', indices = var, + drop = F, var_dim = 'var', dat_dim = 'dat') + variable <- subset_cube$attrs$Variable$varName + var.longname <- subset_cube$attrs$Variable$metadata[[variable]]$long_name - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) + # Create output directory + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) } - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'ensemble', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- attr(data_cube$Variable, 'variable')$units - } else { - dims <- c(lalo, 'ensemble', 'time') - var.expname <- variable - var.sdname <- var.sdname - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } + # Loop over each year in the data and save independently + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(subset_cube$data, 'syear', i, drop = T) - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'ensemble', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- subset_cube$attrs$Variable$metadata[[variable]]$units + } else { + dims <- c(lalo, 'ensemble', 'time') + var.expname <- variable + var.sdname <- var.sdname + var.units <- subset_cube$attrs$Variable$metadata[[variable]]$units + } - # Select start date - if (fcst.horizon == 'decadal') { - ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal - ## data has been reshaped - # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') - - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') - - } else { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - } - - # Get time dimension values and metadata - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "exp") + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal + ## data has been reshaped + # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, variable, fcst.sdate, + agg, "exp") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } } + info(recipe$Run$logger, paste("#####", toupper(type), + "SAVED TO NETCDF FILE #####")) } - info(recipe$Run$logger, paste("#####", toupper(type), - "SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index d48d9d8d6e83b24606f68e664151225bfd602dba..609537c0226b39c1ac4058eec7029fb9098720f6 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -11,12 +11,6 @@ save_metrics <- function(recipe, archive <- get_archive(recipe) dictionary <- read_yaml("conf/variable-dictionary.yml") - - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x) { - Reorder(x, c(lalo, 'time'))}) - } # Add global and variable attributes global_attributes <- .get_global_attributes(recipe, archive) ## TODO: Sort out the logic once default behavior is decided @@ -28,27 +22,6 @@ save_metrics <- function(recipe, global_attributes <- c(list(from_anomalies = "No"), global_attributes) } - attr(skill[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(skill)) { - metric <- names(skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - skill[[i]][is.na(skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'time') - } else { - sdname <- paste0(metric) #, " grid point metric") - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(skill[[i]], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) @@ -57,13 +30,13 @@ save_metrics <- function(recipe, # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + cal = calendar) if (fcst.horizon == 'decadal') { init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -95,25 +68,60 @@ save_metrics <- function(recipe, times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) time <- times$time - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "skill") + # Loop over variable dimension + for (var in 1:data_cube$dims[['var']]) { + # Subset skill arrays + subset_skill <- lapply(skill, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + # Generate name of output file + variable <- data_cube$attrs$Variable$varName[[var]] + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) + } + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "skill") + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + subset_skill <- lapply(subset_skill, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + attr(subset_skill[[1]], 'global_attrs') <- global_attributes - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, skill), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill) - ArrayToNc(vars, outfile) + for (i in 1:length(subset_skill)) { + metric <- names(subset_skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + subset_skill[[i]][is.na(subset_skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'time') + } else { + sdname <- paste0(metric) + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(subset_skill[[i]], 'variables') <- metadata + names(dim(subset_skill[[i]])) <- dims + } + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, subset_skill), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, subset_skill) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") } - info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R index dcaf0765b783e6ebe1f4e5061ea69b2d469b84a3..fcf91d2fe1cb890ba715ae43f7bb26efc1a5a6c7 100644 --- a/modules/Saving/R/save_observations.R +++ b/modules/Saving/R/save_observations.R @@ -12,30 +12,24 @@ save_observations <- function(recipe, lalo <- c('longitude', 'latitude') archive <- get_archive(recipe) dictionary <- read_yaml("conf/variable-dictionary.yml") - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name global_attributes <- .get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$Reference[[global_attributes$reference]]$calendar - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - # Generate vector containing leadtimes ## TODO: Move to a separate function? dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + cal = calendar) if (fcst.horizon == 'decadal') { ## Method 1: Use the first date as init_date. But it may be better to use ## the real initialized date (ask users) -# init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') + # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') ## Method 2: use initial month init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, @@ -48,90 +42,104 @@ save_observations <- function(recipe, syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'time'))) - } + # Loop over variables + for (var in 1:data_cube$dims[['var']]) { + subset_cube <- CST_Subset(data_cube, along = 'var', indices = var, + drop = F, var_dim = 'var', dat_dim = 'dat') + variable <- subset_cube$attrs$Variable$varName + var.longname <- subset_cube$attrs$Variable$metadata[[variable]]$long_name - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } else { - dims <- c(lalo, 'time') - var.expname <- variable - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + # Create output directory + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes - - # Select start date. The date is computed for each year, and adapted for - # consistency with the hcst/fcst dates, so that both sets of files have - # the same name pattern. - ## Because observations are loaded differently in the daily vs. monthly - ## cases, different approaches are necessary. - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - } else { + + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(subset_cube$data, 'syear', i, drop = T) + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'time'))) + } - if (store.freq == "monthly_mean") { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units } else { - fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) + dims <- c(lalo, 'time') + var.expname <- variable + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units } - } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date. The date is computed for each year, and adapted for + # consistency with the hcst/fcst dates, so that both sets of files have + # the same name pattern. + ## Because observations are loaded differently in the daily vs. monthly + ## cases, different approaches are necessary. + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + } else { - # Ensure the year is correct if the first leadtime goes to the next year - init_date <- as.POSIXct(init_date) - if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { - lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 - } - # Ensure that the initialization month is consistent with the hindcast - lubridate::month(fcst.sdate) <- lubridate::month(init_date) - fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') + if (store.freq == "monthly_mean") { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + } else { + fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) + } + } - # Get time dimension values and metadata - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + # Ensure the year is correct if the first leadtime goes to the next year + init_date <- as.POSIXct(init_date) + if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { + lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 + } + # Ensure that the initialization month is consistent with the hindcast + lubridate::month(fcst.sdate) <- lubridate::month(init_date) + fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "obs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "obs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } } + info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") } - info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index b4aab60550000a5a930e764632cff9de0326905a..6809567170002ce96c6085f7990ee5176c0c2201 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -6,15 +6,8 @@ save_percentiles <- function(recipe, # This function adds metadata to the percentiles # and exports them to a netCDF file inside 'outdir'. archive <- get_archive(recipe) - # Define grid dimensions and names lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - percentiles <- lapply(percentiles, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - # Add global and variable attributes global_attributes <- .get_global_attributes(recipe, archive) ## TODO: Sort out the logic once default behavior is decided @@ -26,21 +19,6 @@ save_percentiles <- function(recipe, global_attributes <- c(list(from_anomalies = "No"), global_attributes) } - attr(percentiles[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(percentiles)) { - ## TODO: replace with proper standard names - percentile <- names(percentiles[i]) - long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - } else { - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = percentile, long_name = long_name)) - attr(percentiles[[i]], 'variables') <- metadata - names(dim(percentiles[[i]])) <- dims - } # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) @@ -83,25 +61,57 @@ save_percentiles <- function(recipe, } times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) time <- times$time + + for (var in 1:data_cube$dims[['var']]) { + # Subset arrays + subset_percentiles <- lapply(percentiles, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + # Generate name of output file + variable <- data_cube$attrs$Variable$varName[[var]] + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) + } + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "percentiles") - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "percentiles") - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, percentiles), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, percentiles) - ArrayToNc(vars, outfile) + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + subset_percentiles <- lapply(subset_percentiles, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + + attr(subset_percentiles[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(subset_percentiles)) { + ## TODO: replace with proper standard names + percentile <- names(subset_percentiles[i]) + long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = percentile, long_name = long_name)) + attr(subset_percentiles[[i]], 'variables') <- metadata + names(dim(subset_percentiles[[i]])) <- dims + } + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, subset_percentiles), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, subset_percentiles) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") } - info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index 20af9be7858388c08f8d7de62d90092134599495..974ef7dc80db98542d88ad9886f67b90e76fa2c0 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -16,12 +16,7 @@ save_probabilities <- function(recipe, lalo <- c('longitude', 'latitude') archive <- get_archive(recipe) - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name global_attributes <- .get_global_attributes(recipe, archive) - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } # Add anomaly computation to global attributes ## TODO: Sort out the logic once default behavior is decided if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && @@ -39,7 +34,7 @@ save_probabilities <- function(recipe, # Generate vector containing leadtimes ## TODO: Move to a separate function? dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + cal = calendar) if (fcst.horizon == 'decadal') { init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', @@ -57,68 +52,82 @@ save_probabilities <- function(recipe, syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') - if (tolower(agg) == "global") { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c(lalo, 'time'))}) - } else { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c('country', 'time'))}) + + # Loop over variable dimension + for (var in 1:data_cube$dims[['var']]) { + subset_probs <- lapply(probs, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + # Create output directory + variable <- data_cube$attrs$Variable$varName[[var]] + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) } - ## TODO: Replace for loop with something more efficient? - for (bin in 1:length(probs_syear)) { - prob_bin <- names(probs_syear[bin]) - long_name <- paste0(prob_bin, " probability category") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') + # Loop over each year in the data and save independently + for (i in syears) { + # Select year from array and rearrange dimensions + probs_syear <- lapply(subset_probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') + if (tolower(agg) == "global") { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c(lalo, 'time'))}) } else { - dims <- c(lalo, 'time') + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c('country', 'time'))}) } - metadata <- list(metric = list(name = prob_bin, long_name = long_name)) - attr(probs_syear[[bin]], 'variables') <- metadata - names(dim(probs_syear[[bin]])) <- dims # is this necessary? - } - # Add global attributes - attr(probs_syear[[1]], 'global_attrs') <- global_attributes - - # Select start date - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') - } else { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - } + for (bin in 1:length(probs_syear)) { + prob_bin <- names(probs_syear[bin]) + long_name <- paste0(prob_bin, " probability category") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = prob_bin, long_name = long_name)) + attr(probs_syear[[bin]], 'variables') <- metadata + names(dim(probs_syear[[bin]])) <- dims # is this necessary? + } - # Get time dimension values and metadata - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + # Add global attributes + attr(probs_syear[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "probs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, probs_syear), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, probs_syear) - ArrayToNc(vars, outfile) + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "probs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, probs_syear), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, probs_syear) + ArrayToNc(vars, outfile) + } } + info(recipe$Run$logger, + paste("#####", toupper(type), + "PROBABILITIES SAVED TO NETCDF FILE #####")) } - - info(recipe$Run$logger, - paste("#####", toupper(type), - "PROBABILITIES SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 75c7908cc83fd3999173f7b1bd4c0bd9cd68577c..c52991d38337c59a40a422b4b5fd526a5365e761 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -1,4 +1,5 @@ ## TODO: Save obs percentiles +## TODO: Insert vardim to simplify the code? source("modules/Saving/R/get_dir.R") source("modules/Saving/R/get_filename.R") @@ -31,24 +32,9 @@ save_data <- function(recipe, data, if (is.null(data)) { error(recipe$Run$logger, paste("The 'data' parameter is mandatory. It should be a list", - "of at least two s2dv_cubes containing the hcst and obs.")) + "of at least two s2dv_cubes containing the hcst and obs.")) stop() } - # Create output directory - outdir <- get_dir(recipe) - dir.create(outdir, showWarnings = FALSE, recursive = TRUE) - - # Export hindcast, forecast and observations onto outfile - save_forecast(recipe = recipe, data_cube = data$hcst, - type = 'hcst', - outdir = outdir) - if (!is.null(data$fcst)) { - save_forecast(recipe = recipe, data_cube = data$fcst, - type = 'fcst', - outdir = outdir) - } - save_observations(recipe = recipe, data_cube = data$obs, - outdir = outdir) # Separate ensemble correlation from the rest of the metrics, as it has one # extra dimension "ensemble" and must be saved to a different file @@ -63,102 +49,51 @@ save_data <- function(recipe, data, corr_metrics <- NULL } - # Export skill metrics onto outfile + # Iterate over variables to subset s2dv_cubes and save outputs + save_forecast(recipe = recipe, + data_cube = data$hcst, + outdir = outdir[var], + type = 'hcst') + if (!is.null(data$fcst)) { + save_forecast(recipe = recipe, + data_cube = data$fcst, + outdir = outdir[var], + type = 'fcst') + } + save_observations(recipe = recipe, + data_cube = data$obs, + outdir = outdir[var]) + + # Export skill metrics if (!is.null(skill_metrics)) { - save_metrics(recipe = recipe, skill = skill_metrics, - data_cube = data$hcst, - outdir = outdir) + save_metrics(recipe = recipe, + skill = skill_metrics, + data_cube = data$hcst, + outdir = outdir[var]) } if (!is.null(corr_metrics)) { - save_corr(recipe = recipe, skill = corr_metrics, + save_corr(recipe = recipe, + skill = corr_metrics, data_cube = data$hcst, - outdir = outdir) - } - + outdir = outdir[var]) # Export probabilities onto outfile if (!is.null(probabilities)) { - save_percentiles(recipe = recipe, percentiles = probabilities$percentiles, - data_cube = data$hcst, - outdir = outdir) - save_probabilities(recipe = recipe, probs = probabilities$probs, + save_percentiles(recipe = recipe, + percentiles = probabilities$percentiles, data_cube = data$hcst, - type = "hcst", - outdir = outdir) - if (!is.null(probabilities$probs_fcst)) { - save_probabilities(recipe = recipe, probs = probabilities$probs_fcst, - data_cube = data$fcst, - type = "fcst", - outdir = outdir) + outdir = outdir[var]) + save_probabilities(recipe = recipe, + probs = probabilities$probs, + data_cube = data$hcst, + outdir = outdir[var], + type = "hcst") + if (!is.null(probabilities$probs_fcst)) { + save_probabilities(recipe = recipe, + probs = probabilities$probs_fcst, + data_cube = data$fcst, + outdir = outdir[var], + type = "fcst") + } } } } - -get_global_attributes <- function(recipe, archive) { - # Generates metadata of interest to add to the global attributes of the - # netCDF files. - parameters <- recipe$Analysis - hcst_period <- paste0(parameters$Time$hcst_start, " to ", - parameters$Time$hcst_end) - current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) - system_name <- parameters$Datasets$System$name - reference_name <- parameters$Datasets$Reference$name - - attrs <- list(reference_period = hcst_period, - institution_system = archive$System[[system_name]]$institution, - institution_reference = archive$Reference[[reference_name]]$institution, - system = system_name, - reference = reference_name, - calibration_method = parameters$Workflow$Calibration$method, - computed_on = current_time) - - return(attrs) -} - -get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { - # Generates time dimensions and the corresponding metadata. - ## TODO: Subseasonal - - switch(fcst.horizon, - "seasonal" = {time <- leadtimes; ref <- 'hours since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- 4; ref <- 'hours since '; - stdname <- ''}, - "decadal" = {time <- leadtimes; ref <- 'hours since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}) - - dim(time) <- length(time) - sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting - metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), - calendar = calendar)) - attr(time, 'variables') <- metadata - names(dim(time)) <- 'time' - - sdate <- 1:length(sdate) - dim(sdate) <- length(sdate) - metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), - collapse=", "), - units = paste0('Init date'))) - attr(sdate, 'variables') <- metadata - names(dim(sdate)) <- 'sdate' - - return(list(time=time)) -} - -get_latlon <- function(latitude, longitude) { - # Adds dimensions and metadata to lat and lon - # latitude: array containing the latitude values - # longitude: array containing the longitude values - - dim(longitude) <- length(longitude) - metadata <- list(longitude = list(units = 'degrees_east')) - attr(longitude, 'variables') <- metadata - names(dim(longitude)) <- 'longitude' - - dim(latitude) <- length(latitude) - metadata <- list(latitude = list(units = 'degrees_north')) - attr(latitude, 'variables') <- metadata - names(dim(latitude)) <- 'latitude' - - return(list(lat=latitude, lon=longitude)) - -} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 7eb443215af8bdf26947dc5d5be9825435cd3aaf..f22d6b9b8bc75bd6639471705b90a5deb03a86f0 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -425,11 +425,11 @@ compute_probabilities <- function(recipe, data) { named_quantiles <- lapply(named_quantiles, function(x) {.drop_dims(x)}) if (!is.null(data$fcst)) { fcst_years <- dim(data$fcst$data)[['syear']] - named_probs_fcst <- lapply(named_probs_fcst, - function(x) {Subset(x, - along = 'syear', - indices = 1:fcst_years, - drop = 'non-selected')}) + named_probs_fcst <- lapply(named_probs_fcst, function(x) {.drop_dims(x)}) + # function(x) {Subset(x, + # along = 'syear', + # indices = 1:fcst_years, + # drop = 'non-selected')}) results <- list(probs = named_probs, probs_fcst = named_probs_fcst, percentiles = named_quantiles) @@ -464,20 +464,21 @@ compute_probabilities <- function(recipe, data) { } } -## TODO: Replace with ClimProjDiags::Subset .drop_dims <- function(metric_array) { - # Drop all singleton dimensions - metric_array <- drop(metric_array) - # If time happened to be a singleton dimension, add it back in the array - if (!("time" %in% names(dim(metric_array)))) { - dim(metric_array) <- c("time" = 1, dim(metric_array)) - } + # Define dimensions that are not essential for saving + droppable_dims <- c("dat", "sday", "sweek", "ensemble", "nobs", + "nexp", "exp_memb", "obs_memb", "bin") + # Select non-essential dimensions of length 1 + dims_to_drop <- intersect(names(which(dim(metric_array) == 1)), + droppable_dims) + drop_indices <- grep(paste(dims_to_drop, collapse = "|"), + names(dim(metric_array))) + # Drop selected dimensions + metric_array <- abind::adrop(metric_array, drop = drop_indices) # If array has memb dim (Corr case), change name to 'ensemble' if ("exp_memb" %in% names(dim(metric_array))) { - names(dim(metric_array))[which(names(dim(metric_array)) == + names(dim(metric_array))[which(names(dim(metric_array)) == "exp_memb")] <- "ensemble" - # } else { - # dim(metric_array) <- c(dim(metric_array), "ensemble" = 1) } return(metric_array) } diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index e0fa8b8432dcd1972752313634d7461504afc14f..e3d75138973205eaf77425618227c90c929e88fa 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -10,80 +10,72 @@ plot_ensemble_mean <- function(recipe, fcst, outdir) { longitude <- fcst$coords$lon archive <- get_archive(recipe) system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - variable <- recipe$Analysis$Variables$name - units <- attr(fcst$Variable, "variable")$units start_date <- paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate) # Compute ensemble mean ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') - # Drop extra dims, add time dim if missing: - ensemble_mean <- drop(ensemble_mean) + # Loop over variable dimension + for (var in 1:fcst$dims[['var']]) { + variable <- fcst$attrs$Variable$varName[[var]] + units <- fcst$attrs$Variable$metadata[[variable]]$units + var_ens_mean <- ClimProjDiags::Subset(ensemble_mean, + along = c("dat", "var", + "sday", "sweek"), + indices = list(1, var, 1, 1), + drop = 'selected') - if (!("time" %in% names(dim(ensemble_mean)))) { - dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) - } - if (!'syear' %in% names(dim(ensemble_mean))) { - ensemble_mean <- Reorder(ensemble_mean, c("time", - "longitude", - "latitude")) - } else { - ensemble_mean <- Reorder(ensemble_mean, c("syear", - "time", - "longitude", - "latitude")) - } - ## TODO: Redefine column colors, possibly depending on variable - if (variable == 'prlr') { - palette = "BrBG" - rev = F - } else { - palette = "RdBu" - rev = T - } - # Define brks, centered on in the case of anomalies - ## - if (grepl("anomaly", - fcst$attrs$Variable$metadata[[variable]]$long_name)) { - variable <- paste(variable, "anomaly") - max_value <- max(abs(ensemble_mean)) - ugly_intervals <- seq(-max_value, max_value, max_value/20) - brks <- pretty(ugly_intervals, n = 12, min.n = 8) - } else { - brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) - } - cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) - options(bitmapType = "cairo") - - for (i_syear in start_date) { - # Define name of output file and titles - if (length(start_date) == 1) { - i_ensemble_mean <- ensemble_mean - outfile <- paste0(outdir, "forecast_ensemble_mean-", start_date, ".png") + var_ens_mean <- Reorder(var_ens_mean, c("syear", + "time", + "longitude", + "latitude")) + ## TODO: Redefine column colors, possibly depending on variable + if (variable == 'prlr') { + palette = "BrBG" + rev = F } else { - i_ensemble_mean <- ensemble_mean[which(start_date == i_syear), , , ] - outfile <- paste0(outdir, "forecast_ensemble_mean-", i_syear, ".png") + palette = "RdBu" + rev = T + } + # Define brks, centered on in the case of anomalies + ## + if (grepl("anomaly", + fcst$attrs$Variable$metadata[[variable]]$long_name)) { + variable <- paste(variable, "anomaly") + max_value <- max(abs(var_ens_mean)) + ugly_intervals <- seq(-max_value, max_value, max_value/20) + brks <- pretty(ugly_intervals, n = 12, min.n = 8) + } else { + brks <- pretty(range(var_ens_mean, na.rm = T), n = 15, min.n = 8) + } + cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) + options(bitmapType = "cairo") + + for (i_syear in start_date) { + # Define name of output file and titles + i_var_ens_mean <- var_ens_mean[which(start_date == i_syear), , , ] + outfile <- paste0(outdir[[var]], "forecast_ensemble_mean-", i_syear, ".png") + toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, + "- Initialization:", i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + titles <- as.vector(months) + # Plots + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + i_var_ens_mean, longitude, latitude, + filled.continents = F, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + units = units, + cols = cols, + brks = brks, + fileout = outfile, + bar_label_digits = 4, + bar_extra_margin = rep(0.7, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) } - toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, - "- Initialization:", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) - titles <- as.vector(months) - # Plots - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - i_ensemble_mean, longitude, latitude, - filled.continents = F, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - units = units, - cols = cols, - brks = brks, - fileout = outfile, - bar_label_digits = 4, - bar_extra_margin = rep(0.7, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) + info(recipe$Run$logger, + "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") } - info(recipe$Run$logger, - "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") } diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index 9ab0199e2026be5ab9e5e6487a83d206fd68c4f9..f912e24968a917142cb2270fce9d680f1c2c2e9c 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -1,3 +1,4 @@ +## TODO: Change name plot_most_likely_terciles <- function(recipe, fcst, probabilities, @@ -34,55 +35,47 @@ plot_most_likely_terciles <- function(recipe, ## TODO: Improve this section # Drop extra dims, add time dim if missing: - probs_fcst <- drop(probs_fcst) - if (!("time" %in% names(dim(probs_fcst)))) { - dim(probs_fcst) <- c("time" = 1, dim(probs_fcst)) - } - if (!'syear' %in% names(dim(probs_fcst))) { - probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) - } else { - probs_fcst <- Reorder(probs_fcst, - c("syear", "time", "bin", "longitude", "latitude")) - } + for (var in 1:fcst$dims[['var']]) { + variable <- fcst$attrs$Variable$varName[[var]] + var_probs <- ClimProjDiags::Subset(probs_fcst, + along = c("var"), + indices = var, + drop = 'selected') - for (i_syear in start_date) { - # Define name of output file and titles - if (length(start_date) == 1) { - i_probs_fcst <- probs_fcst - outfile <- paste0(outdir, "forecast_most_likely_tercile-", start_date, - ".png") - } else { - i_probs_fcst <- probs_fcst[which(start_date == i_syear), , , , ] - outfile <- paste0(outdir, "forecast_most_likely_tercile-", i_syear, ".png") + var_probs <- Reorder(var_probs, + c("syear", "time", "bin", "longitude", "latitude")) + for (i_syear in start_date) { + # Define name of output file and titles + i_var_probs <- var_probs[which(start_date == i_syear), , , , ] + outfile <- paste0(outdir[[var]], "forecast_most_likely_tercile-", + i_syear, ".png") + toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", + "Initialization:", i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + ## TODO: Ensure this works for daily and sub-daily cases + titles <- as.vector(months) + # Plots + ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked + ## on. + suppressWarnings( + PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), + cat_dim = 'bin', + i_var_probs, longitude, latitude, + coast_width = 1.5, + title_scale = 0.6, + legend_scale = 0.8, #cex_bar_titles = 0.6, + toptitle = toptitle, + titles = titles, + fileout = outfile, + bar_label_digits = 2, + bar_scale = rep(0.7, 4), + bar_label_scale = 1.2, + axes_label_scale = 1.3, + triangle_ends = c(F, F), width = 11, height = 8) + ) } - toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", - "Initialization:", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) - ## TODO: Ensure this works for daily and sub-daily cases - titles <- as.vector(months) - - # Plots - ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked - ## on. - suppressWarnings( - PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), - cat_dim = 'bin', - i_probs_fcst, longitude, latitude, - coast_width = 1.5, - title_scale = 0.6, - legend_scale = 0.8, #cex_bar_titles = 0.6, - toptitle = toptitle, - titles = titles, - fileout = outfile, - bar_label_digits = 2, - bar_scale = rep(0.7, 4), - bar_label_scale = 1.2, - axes_label_scale = 1.3, - triangle_ends = c(F, F), width = 11, height = 8) - ) - } - info(recipe$Run$logger, "##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") + } } diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R index f8be19d9055f483f8cfe8dea391c29eee645e540..e62496f6d48cf5cc0c48615de671eda0d92c613e 100644 --- a/modules/Visualization/R/plot_skill_metrics.R +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -47,116 +47,123 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, scores <- c("rps", "frps", "crps", "frps_specs") # Assign colorbar to each metric type ## TODO: Triangle ends - for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { - if (name %in% names(skill_metrics)) { - # Define plot characteristics and metric name to display in plot - if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90_specs", "bss10_specs", - "rmsss")) { - display_name <- toupper(strsplit(name, "_")[[1]][1]) - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL - } else if (name == "mean_bias_ss") { - display_name <- "Mean Bias Skill Score" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL - } else if (name %in% c("enscorr", "enscorr_specs")) { - display_name <- "Ensemble Mean Correlation" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - cols <- clim.colors(length(brks) - 1, diverging_palette) - col_inf <- NULL - col_sup <- NULL - } else if (name %in% scores) { - skill <- skill_metrics[[name]] - display_name <- toupper(strsplit(name, "_")[[1]][1]) - brks <- seq(0, 1, by = 0.1) - colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) - cols <- colorbar[1:(length(colorbar) - 1)] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] - } else if (name == "enssprerr") { - skill <- skill_metrics[[name]] - display_name <- "Spread-to-Error Ratio" - brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) - colorbar <- clim.colors(length(brks), diverging_palette) - cols <- colorbar[1:length(colorbar) - 1] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] - } else if (name == "mean_bias") { - skill <- skill_metrics[[name]] - display_name <- "Mean Bias" - max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), - abs(quantile(skill, 0.98, na.rm = T))) - brks <- max_value * seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- colorbar[length(colorbar)] - } - options(bitmapType = "cairo") - # Reorder dimensions - skill <- Reorder(skill, c("time", "longitude", "latitude")) - # If the significance has been requested and the variable has it, - # retrieve it and reorder its dimensions. - significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(skill_metrics))) { - skill_significance <- skill_metrics[[significance_name]] - skill_significance <- Reorder(skill_significance, c("time", - "longitude", - "latitude")) - # Split skill significance into list of lists, along the time dimension - # This allows for plotting the significance dots correctly. - skill_significance <- ClimProjDiags::ArrayToList(skill_significance, - dim = 'time', - level = "sublist", - names = "dots") - } else { - skill_significance <- NULL - } - # Define output file name and titles - if (tolower(recipe$Analysis$Horizon) == "seasonal") { - outfile <- paste0(outdir, name, "-", month_label, ".png") - } else { - outfile <- paste0(outdir, name, ".png") + for (var in 1:data_cube$dims[['var']]) { + var_skill <- lapply(skill_metrics, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + + for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { + if (name %in% names(skill_metrics)) { + # Define plot characteristics and metric name to display in plot + if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", + "rpss_specs", "bss90_specs", "bss10_specs", + "rmsss")) { + display_name <- toupper(strsplit(name, "_")[[1]][1]) + skill <- var_skill[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name == "mean_bias_ss") { + display_name <- "Mean Bias Skill Score" + skill <- var_skill[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name %in% c("enscorr", "enscorr_specs")) { + display_name <- "Ensemble Mean Correlation" + skill <- var_skill[[name]] + brks <- seq(-1, 1, by = 0.2) + cols <- clim.colors(length(brks) - 1, diverging_palette) + col_inf <- NULL + col_sup <- NULL + } else if (name %in% scores) { + skill <- var_skill[[name]] + display_name <- toupper(strsplit(name, "_")[[1]][1]) + brks <- seq(0, 1, by = 0.1) + colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) + cols <- colorbar[1:(length(colorbar) - 1)] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name == "enssprerr") { + skill <- var_skill[[name]] + display_name <- "Spread-to-Error Ratio" + brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) + colorbar <- clim.colors(length(brks), diverging_palette) + cols <- colorbar[1:length(colorbar) - 1] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name == "mean_bias") { + skill <- var_skill[[name]] + display_name <- "Mean Bias" + max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), + abs(quantile(skill, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] + } + options(bitmapType = "cairo") + # Reorder dimensions + skill <- Reorder(skill, c("time", "longitude", "latitude")) + # If the significance has been requested and the variable has it, + # retrieve it and reorder its dimensions. + significance_name <- paste0(name, "_significance") + if ((significance) && (significance_name %in% names(skill_metrics))) { + skill_significance <- var_skill[[significance_name]] + skill_significance <- Reorder(skill_significance, c("time", + "longitude", + "latitude")) + # Split skill significance into list of lists, along the time dimension + # This allows for plotting the significance dots correctly. + skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + dim = 'time', + level = "sublist", + names = "dots") + } else { + skill_significance <- NULL + } + # Define output file name and titles + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + outfile <- paste0(outdir[var], name, "-", month_label, ".png") + } else { + outfile <- paste0(outdir[var], name, ".png") + } + toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName[var], + "-", system_name, "-", month_abbreviation, + hcst_period) + months <- unique(lubridate::month(data_cube$attrs$Dates, + label = T, abb = F)) + titles <- as.vector(months) + # Plot + suppressWarnings( + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + asplit(skill, MARGIN=1), # Splitting array into a list + longitude, latitude, + special_args = skill_significance, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents=F, + brks = brks, + cols = cols, + col_inf = col_inf, + col_sup = col_sup, + fileout = outfile, + bar_label_digits = 3, + bar_extra_margin = rep(0.9, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) + ) } - toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName, - "-", system_name, "-", month_abbreviation, - hcst_period) - months <- unique(lubridate::month(data_cube$attrs$Dates, - label = T, abb = F)) - titles <- as.vector(months) - # Plot - suppressWarnings( - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - asplit(skill, MARGIN=1), # Splitting array into a list - longitude, latitude, - special_args = skill_significance, - dot_symbol = 20, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - filled.continents=F, - brks = brks, - cols = cols, - col_inf = col_inf, - col_sup = col_sup, - fileout = outfile, - bar_label_digits = 3, - bar_extra_margin = rep(0.9, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) - ) } + info(recipe$Run$logger, + "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") } - info(recipe$Run$logger, - "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") } diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 1ae481093f30e7b7fbdcc6f384390d311cdef0c1..446400550cfac32aa8f3bdb918398245465ed10a 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,11 +1,12 @@ ## TODO: Add the possibility to read the data directly from netCDF ## TODO: Adapt to multi-model case +## TODO: Adapt to multi-variable case ## TODO: Add param 'raw'? ## TODO: Decadal plot names -source("modules/Visualization/R/plot_ensemble_mean.R") -source("modules/Visualization/R/plot_most_likely_terciles_map.R") source("modules/Visualization/R/plot_skill_metrics.R") +source("modules/Visualization/R/plot_most_likely_terciles_map.R") +source("modules/Visualization/R/plot_ensemble_mean.R") plot_data <- function(recipe, data, @@ -22,8 +23,12 @@ plot_data <- function(recipe, plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, ", | |,")[[1]] recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/plots/") - outdir <- paste0(get_dir(recipe)) - dir.create(outdir, showWarnings = FALSE, recursive = TRUE) + ## TODO: Sort this out + outdir <- get_dir(recipe = recipe, + variable = data$hcst$attrs$Variable$varName) + for (directory in outdir) { + dir.create(directory, showWarnings = FALSE, recursive = TRUE) + } if ((is.null(skill_metrics)) && (is.null(data$fcst))) { error(recipe$Run$logger, "The Visualization module has been called, diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index 4535d41bccab6628aeedc12c147df5a3e57fdad8..575585dbd01667a7477cd8186e6d140990e23624 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -5,7 +5,7 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "recipes/atomic_recipes/recipe_system7c3s-tas.yml" +recipe_file <- "recipes/atomic_recipes/recipe_test_multivar.yml" recipe <- prepare_outputs(recipe_file) # Load datasets @@ -19,7 +19,8 @@ skill_metrics <- compute_skill_metrics(recipe, data) # Compute percentiles and probability bins probabilities <- compute_probabilities(recipe, data) # Export all data to netCDF +## TODO: Fix plotting # save_data(recipe, data, skill_metrics, probabilities) # Plot data -plot_data(recipe, data, skill_metrics, probabilities, - significance = T) +# plot_data(recipe, calibrated_data, skill_metrics, probabilities, +# significance = T) diff --git a/recipes/atomic_recipes/recipe_seasonal-tests.yml b/recipes/atomic_recipes/recipe_seasonal-tests.yml index 2e1ac394f05dbb62e0ff275ce7c66abc834723b2..3da81a34d533b098d27131b332a10ecbf32e1792 100644 --- a/recipes/atomic_recipes/recipe_seasonal-tests.yml +++ b/recipes/atomic_recipes/recipe_seasonal-tests.yml @@ -4,7 +4,7 @@ Description: Analysis: Horizon: Seasonal Variables: - name: tas + name: tas prlr freq: monthly_mean Datasets: System: diff --git a/recipes/atomic_recipes/recipe_test_multivar.yml b/recipes/atomic_recipes/recipe_test_multivar.yml new file mode 100644 index 0000000000000000000000000000000000000000..94e41223aed39ffd5757d8f636e8a260ce3fe7b0 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_multivar.yml @@ -0,0 +1,53 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas prlr + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1999' + hcst_end: '2010' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: raw + save: 'exp_only' + Anomalies: + compute: yes + cross_validation: yes + save: 'none' + Skill: + metric: RPS RPSS CRPS CRPSS BSS10 BSS90 EnsCorr mean_bias mean_bias_SS + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' + Indicators: + index: no + ncores: 7 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/recipes/atomic_recipes/recipe_test_multivar_decadal.yml b/recipes/atomic_recipes/recipe_test_multivar_decadal.yml new file mode 100644 index 0000000000000000000000000000000000000000..00563fe2525844898975b308f89f6781ac0f730d --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_multivar_decadal.yml @@ -0,0 +1,57 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas tos + freq: monthly_mean + Datasets: + System: + name: CanESM5 + member: r1i1p2f1,r2i1p2f1,r3i1p2f1 #'all' + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst_year: [2020,2021] + hcst_start: 1990 + hcst_end: 1993 +# season: 'Annual' + ftime_min: 2 + ftime_max: 14 + Region: + latmin: 10 + latmax: 20 + lonmin: 150 + lonmax: 170 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Anomalies: + compute: no + cross_validation: + save: + Calibration: + method: bias + save: 'all' + Skill: + metric: RPSS Corr + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'all' + Indicators: + index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/recipes/atomic_recipes/recipe_test_multivar_decadal_multipath.yml b/recipes/atomic_recipes/recipe_test_multivar_decadal_multipath.yml new file mode 100644 index 0000000000000000000000000000000000000000..a38f81b8dccb75fdee9d67de5f4b47b2c6a64596 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_multivar_decadal_multipath.yml @@ -0,0 +1,57 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas pr + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 + member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst_year: [2020,2021] + hcst_start: 1990 + hcst_end: 1993 +# season: 'Annual' + ftime_min: 2 + ftime_max: 14 + Region: + latmin: 10 #-90 + latmax: 20 #90 + lonmin: 0 + lonmax: 15 #359.9 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Anomalies: + compute: no + cross_validation: + save: + Calibration: + method: bias + save: 'all' + Skill: + metric: RPSS Corr + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'all' + Indicators: + index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/recipes/atomic_recipes/recipe_test_multivar_nadia.yml b/recipes/atomic_recipes/recipe_test_multivar_nadia.yml new file mode 100644 index 0000000000000000000000000000000000000000..50cc62f7bae05e3af1e0af600186b13b183004d2 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_multivar_nadia.yml @@ -0,0 +1,49 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas tos + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 + Multimodel: False + Reference: + name: BEST + Time: + sdate: '0101' + fcst_year: + hcst_start: '1993' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 6 + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: raw + Anomalies: + compute: yes + cross_validation: yes + Skill: + metric: mean_bias EnsCorr RPS RPSS CRPS CRPSS enssprerr + Probabilities: + percentiles: [[1/3, 2/3]] + Indicators: + index: no + ncores: 7 + remove_NAs: yes + Output_format: scorecards +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nmilders/scorecards_data/to_system/tas-tos/ECMWF-SEAS5/tas-tos/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/ diff --git a/recipes/recipe_scorecards_s2s-suite.yml b/recipes/recipe_scorecards_s2s-suite.yml new file mode 100644 index 0000000000000000000000000000000000000000..ae17f9cd1dbbcf2ada5b47a68219272d983aff6d --- /dev/null +++ b/recipes/recipe_scorecards_s2s-suite.yml @@ -0,0 +1,50 @@ +Description: + Author: nmilders + Info: scorecards data + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: prlr # Mandatory, str: tas prlr psl sfcWind + freq: monthly_mean # Mandatory, str: either monthly_mean or daily_mean + Datasets: + System: + name: ECMWF-SEAS5 # Mandatory, str: system5c3s system21_m1 system35c3s system3_m1-c3s system2_m1 system7c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0101' ## MMDD + fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -90 # Mandatory, int: minimum latitude + latmax: 90 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 359.9 # Mandatory, int: maximum longitude + Regrid: + method: conservative # conservative for prlr, bilinear for tas, psl, sfcWind + type: to_system + Workflow: + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + Anomalies: + compute: yes + cross_validation: no + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # str: Skill metric or list of skill metrics. See docu. + Probabilities: + percentiles: [[1/3, 2/3], [1/10], [9/10]] # frac: Quantile thresholds. + Indicators: + index: no + ncores: 15 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: Scorecards #S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nmilders/scorecards_data/to_system/cross_validation/tercile_cross_val/ECMWF-SEAS5/prlr/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/ diff --git a/tas-tos_scorecards_data_loading.R b/tas-tos_scorecards_data_loading.R new file mode 100644 index 0000000000000000000000000000000000000000..da124cd4af12fab20f7e4cde01def646310bea84 --- /dev/null +++ b/tas-tos_scorecards_data_loading.R @@ -0,0 +1,63 @@ + +rm(list = ls()); gc() + +args <- commandArgs(trailingOnly = TRUE) + +setwd("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/") + +#source("modules/Loading/Loading.R") +source("modules/Loading/Dev_Loading.R") +source("modules/Anomalies/Anomalies.R") +#source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +#source("modules/Visualization/Visualization.R") +source("tools/prepare_outputs.R") + +recipe_file <- "recipes/atomic_recipes/recipe_test_multivar_nadia.yml" +recipe <- prepare_outputs(recipe_file) + +## Run job for each start month +recipe$Analysis$Time$sdate <- paste0(sprintf("%02d", as.numeric(args)), '01') + +## Load datasets +data <- load_datasets(recipe) + + +################################################################################ + +### For Testing ### +# +# lon <- attributes(data$hcst$coords$longitude) +# lat <- attributes(data$hcst$coords$latitude) +# +# +# source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') +# tas_tos_hcst <- mask_tas_tos(input_data = data$hcst, region = c(20, 40, 30, 50), +# grid = 'r360x181', lon = lon, lat = lat, +# lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) +# +# tas_tos_obs <- mask_tas_tos(input_data = data$obs, region = c(0.1, 359.95, -90, 90), +# grid = 'r360x181', lon = lon, lat = lat, +# lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) +# +# +# source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') +# mask <- .load_mask(grid = 'r360x181', mask_path = NULL, +# land_value = 0, sea_value = 1, +# lon_dim = 'lon', lat_dim = 'lat', region = NULL) + +################################################################################ + +## compute anomalies +anomalies <- compute_anomalies(recipe, data) + +## Compute skill metrics of data +skill_metrics <- compute_skill_metrics(recipe, anomalies) + +## save data +save_data(recipe, data, skill_metrics = skill_metrics) +gc() + +## plot metrics maps +## plot_data(recipe, anomalies, skill_metrics, significance = T) diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 9b46cce8e0d80e0727b137a32add29196229d54f..a5c95e0cd5330f50691e01b7e3d997ddc951003c 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -187,7 +187,7 @@ class(skill_metrics$rpss), ) expect_equal( dim(skill_metrics$rpss), -c(time = 3, latitude = 5, longitude = 4) +c(var = 1, time = 3, latitude = 5, longitude = 4) ) expect_equal( dim(skill_metrics$rpss_significance), @@ -218,27 +218,27 @@ c('percentile_33', 'percentile_66', 'percentile_10', 'percentile_90') ) expect_equal( dim(probs$probs$prob_b33), -c(syear = 4, time = 3, latitude = 5, longitude = 4) +c(var = 1, syear = 4, time = 3, latitude = 5, longitude = 4) ) expect_equal( dim(probs$percentiles$percentile_33), -c(time = 3, latitude = 5, longitude = 4) +c(var = 1, time = 3, latitude = 5, longitude = 4) ) expect_equal( -as.vector(probs$probs$prob_b33[, 1, 2, 2]), +as.vector(probs$probs$prob_b33[, , 1, 2, 2]), c(0.0, 0.5, 0.0, 1.0) ) expect_equal( -as.vector(probs$probs$prob_10_to_90[, 1, 2, 2]), +as.vector(probs$probs$prob_10_to_90[, , 1, 2, 2]), c(1.0, 1.0, 0.5, 0.5) ) expect_equal( -as.vector(probs$percentiles$percentile_33[, 1, 2]), +as.vector(probs$percentiles$percentile_33[, , 1, 2]), c(293.7496, 287.4263, 285.8295), tolerance = 0.0001 ) expect_equal( -as.vector(probs$percentiles$percentile_10[, 1, 2]), +as.vector(probs$percentiles$percentile_10[, , 1, 2]), c(293.1772, 286.9533, 284.7887), tolerance = 0.0001 ) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 01fe0440c3f218b8c78389c48ac3b489e3e51793..2b6051097abc9807266fe7920211f31e26d57f2a 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -37,7 +37,7 @@ plot_data(recipe = recipe, data = calibrated_data, ))}) -outdir <- get_dir(recipe) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) #====================================== @@ -171,11 +171,11 @@ class(skill_metrics$rpss_specs), "array" ) expect_equal( -all(unlist(lapply(lapply(skill_metrics, dim), all.equal, c(time = 14, latitude = 8, longitude = 5)))), +all(unlist(lapply(lapply(skill_metrics, dim), all.equal, c(var = 1, time = 14, latitude = 8, longitude = 5)))), TRUE ) expect_equal( -as.vector(skill_metrics$rpss_specs[6:8, 1, 2]), +as.vector(skill_metrics$rpss_specs[, 6:8, 1, 2]), c(-0.3333333, 0.1666667, -0.3333333), tolerance = 0.0001 ) @@ -184,26 +184,26 @@ tolerance = 0.0001 #TRUE #) expect_equal( -as.vector(skill_metrics$enscorr_specs[6:8, 1, 2]), +as.vector(skill_metrics$enscorr_specs[, 6:8, 1, 2]), c(0.4474382, 0.1026333, 0.4042823), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$frps_specs[6:8, 1, 2]), +as.vector(skill_metrics$frps_specs[, 6:8, 1, 2]), c(0.4444444, 0.2222222, 0.4444444), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$frpss_specs[4:7, 1, 5]), +as.vector(skill_metrics$frpss_specs[, 4:7, 1, 5]), c( 1.0, -0.5, -0.5, 0.5), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$bss10_specs[6:8, 1, 2]), +as.vector(skill_metrics$bss10_specs[, 6:8, 1, 2]), c(0.5, -0.5, -0.5), ) expect_equal( -as.vector(skill_metrics$frps[6:8, 1, 2]), +as.vector(skill_metrics$frps[, 6:8, 1, 2]), c(0.4444444, 0.2222222, 0.4444444), tolerance = 0.0001 ) @@ -223,19 +223,19 @@ c('percentile_33', 'percentile_66') ) expect_equal( dim(probs$probs$prob_b33), -c(syear = 3, time = 14, latitude = 8, longitude = 5) +c(var = 1, syear = 3, time = 14, latitude = 8, longitude = 5) ) expect_equal( dim(probs$percentiles$percentile_33), -c(time = 14, latitude = 8, longitude = 5) +c(var = 1,time = 14, latitude = 8, longitude = 5) ) expect_equal( -as.vector(probs$probs$prob_b33[, 1, 2, 2]), +as.vector(probs$probs$prob_b33[, , 1, 2, 2]), c(0.0, 0.3333333, 0.6666667), tolerance = 0.0001 ) expect_equal( -as.vector(probs$percentiles$percentile_33[1:3, 1, 2]), +as.vector(probs$percentiles$percentile_33[, 1:3, 1, 2]), c(271.7508, 273.1682, 274.1937), tolerance = 0.0001 ) diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 988172c6ad6ed3a21571816874fc9aa7c023568a..7232ebfdcc8ac6ce38da9e9dbc8328b79fe38703 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -140,15 +140,15 @@ class(skill_metrics[[1]]), "array" ) expect_equal( -all(unlist(lapply(lapply(skill_metrics, dim)[1:2], all.equal, c(time = 3, latitude = 25, longitude = 16)))), +all(unlist(lapply(lapply(skill_metrics, dim)[1:2], all.equal, c(var = 1, time = 3, latitude = 25, longitude = 16)))), TRUE ) expect_equal( -all(unlist(lapply(lapply(skill_metrics, dim)[3:4], all.equal, c(ensemble = 3, time = 3, latitude = 25, longitude = 16)))), +all(unlist(lapply(lapply(skill_metrics, dim)[3:4], all.equal, c(ensemble = 3, var = 1, time = 3, latitude = 25, longitude = 16)))), TRUE ) expect_equal( -as.vector(skill_metrics$bss10[, 1, 2]), +as.vector(skill_metrics$bss10[, , 1, 2]), c(-0.1904762, -0.1904762, -0.1904762), tolerance = 0.0001 ) @@ -157,7 +157,7 @@ any(as.vector(skill_metrics$bss10_significance)), FALSE ) expect_equal( -as.vector(skill_metrics$corr[2, , 1, 2]), +as.vector(skill_metrics$corr[2, , , 1, 2]), c(-0.2015265, 0.4635463, -0.1019575), tolerance = 0.0001 ) @@ -177,19 +177,19 @@ c('percentile_33', 'percentile_66') ) expect_equal( dim(probs$probs$prob_b33), -c(syear = 4, time = 3, latitude = 25, longitude = 16) +c(var = 1, syear = 4, time = 3, latitude = 25, longitude = 16) ) expect_equal( dim(probs$percentiles$percentile_33), -c(time = 3, latitude = 25, longitude = 16) +c(var = 1, time = 3, latitude = 25, longitude = 16) ) expect_equal( -as.vector(probs$probs$prob_b33[, 1, 2, 2]), +as.vector(probs$probs$prob_b33[, , 1, 2, 2]), c(0.0, 0.3333333, 0.3333333, 0.6666667), tolerance = 0.0001 ) expect_equal( -as.vector(probs$percentiles$percentile_33[1:3, 1, 2]), +as.vector(probs$percentiles$percentile_33[, 1:3, 1, 2]), c(278.1501, 279.5226, 282.0237), tolerance = 0.0001 ) diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index da0e789bd0e7acd50ca0556b5e9387c9e04d58aa..8a51cc86af6f2c91e92b70da1a2847a069e33af0 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -155,10 +155,10 @@ class(skill_metrics$enscorr_specs), ) expect_equal( dim(skill_metrics$enscorr_specs), -c(time = 31, latitude = 4, longitude = 4) +c(var = 1, time = 31, latitude = 4, longitude = 4) ) expect_equal( -skill_metrics$enscorr_specs[1:3, 1, 1], +skill_metrics$enscorr_specs[, 1:3, 1, 1], c(0.7509920, 0.6514916, 0.5118371), tolerance=0.0001 ) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 83b5ceab15c0ccc2c4dafa7ba593aa6a66be6e97..0e311424b5ef4db3d49f80e9a980a273e42fa8e0 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -41,7 +41,7 @@ plot_data(recipe = recipe, data = calibrated_data, skill_metrics = skill_metrics, probabilities = probs, significance = T) ))}) -outdir <- get_dir(recipe) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) # ------- TESTS -------- @@ -193,19 +193,19 @@ class(skill_metrics$rpss), ) expect_equal( dim(skill_metrics$rpss), -c(time = 3, latitude = 3, longitude = 3) +c(var = 1, time = 3, latitude = 3, longitude = 3) ) expect_equal( dim(skill_metrics$rpss_significance), dim(skill_metrics$rpss) ) expect_equal( -as.vector(skill_metrics$rpss[, 2, 3]), +as.vector(skill_metrics$rpss[, , 2, 3]), c(-0.2918857, -1.4809143, -1.3842286), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$rpss_significance[, 2, 3]), +as.vector(skill_metrics$rpss_significance[, , 2, 3]), rep(FALSE, 3) ) diff --git a/tools/data_summary.R b/tools/data_summary.R index 92dcb353c751abfae966868e9f6d1b8549eabd68..714c5a0b39b61631fc0f0bab1abd31e0c661aec1 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -1,5 +1,7 @@ # Print a summary of the loaded data for the user, for each object. -# object: hindcast, forecast or reference data in s2dv_cube format. +# data_cube: An s2dv_cube returned by one of the functions in Auto-S2S +# recipe: The Auto-S2S recipe as returned by read_yaml() + ## TODO: Adapt to daily/subseasonal cases ## TODO: Add check for missing files/NAs by dimension @@ -13,9 +15,11 @@ data_summary <- function(data_cube, recipe) { } months <- unique(format(as.Date(data_cube$attrs$Dates), format = '%B')) months <- paste(as.character(months), collapse=", ") - sdate_min <- format(min(as.Date(data_cube$attrs$Dates)), format = date_format) - sdate_max <- format(max(as.Date(data_cube$attrs$Dates)), format = date_format) - # Create log instance and sink output to logfile and terminal + sdate_min <- format(min(as.Date(data_cube$attrs$Dates)), + format = date_format) + sdate_max <- format(max(as.Date(data_cube$attrs$Dates)), + format = date_format) + # Log the summary info(recipe$Run$logger, "DATA SUMMARY:") info(recipe$Run$logger, paste(object_name, "months:", months)) info(recipe$Run$logger, paste(object_name, "range:", sdate_min, "to", @@ -26,11 +30,18 @@ data_summary <- function(data_cube, recipe) { for (i in output_string) { info(recipe$Run$logger, i) } + # Print statistical summary of the data for every variable info(recipe$Run$logger, paste0("Statistical summary of the data in ", object_name, ":")) - output_string <- capture.output(summary(data_cube$data)) - for (i in output_string) { - info(recipe$Run$logger, i) + for (var_index in 1:data_cube$dims[['var']]) { + info(recipe$Run$logger, + paste("Variable:", data_cube$attrs$Variable$varName[var_index])) + output_string <- capture.output(summary(Subset(data_cube$data, + along = "var", + indices = var_index))) + for (i in output_string) { + info(recipe$Run$logger, i) + } } info(recipe$Run$logger, "---------------------------------------------") invisible(gc())