diff --git a/conf/archive.yml b/conf/archive.yml index 0b643ae92d93bf55306b59c815c8daeddb8a3598..cca68c22bd5d2d42b2af9964a1f0cbc44d77874b 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -21,13 +21,14 @@ esarchive: "ta850":"monthly_mean/ta850_f12h/", "g300":"monthly_mean/g300_f12h/", "g500":"monthly_mean/g500_f12h/", "g850":"monthly_mean/g500_f12h/", "tdps":"monthly_mean/tdps_f6h/", "psl":"monthly_mean/psl_f6h/", - "tos":"monthly_mean/tos_f6h/"} + "tos":"monthly_mean/tos_f6h/", "sic":"monthly_mean/sic_f24h/"} nmember: fcst: 51 hcst: 25 calendar: "proleptic_gregorian" time_stamp_lag: "0" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" + land_sea_mask: "/esarchive/exp/ecmwf/system5c3s/constant/lsm/lsm.nc" ECMWF-SEAS5.1: name: "ECMWF SEAS5 (v5.1)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -165,9 +166,11 @@ esarchive: "ta300":"montly_mean/ta300_f1h-r1440x721cds/", "ta500":"monthly_mean/ta500_f1h-r1440x721cds/", "ta850":"monthly_mean/ta850_f1h-r1440x721cds/", - "tos":"monthly_mean/tos_f1h-r1440x721cds/"} + "tos":"monthly_mean/tos_f1h-r1440x721cds/", + "sic":"monthly_mean/sic_f1h-r1440x721cds/"} calendar: "standard" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/esarchive/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" ERA5-Land: name: "ERA5-Land" institution: "European Centre for Medium-Range Weather Forecasts" diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index 0bfbffe002c81d389207a54b8fb8687f991fd33e..c440eac1baa910973cf45e6918802135d2310bd4 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -204,13 +204,16 @@ vars: long_name: "Surface Upward Sensible Heat Flux" standard_name: "surface_upward_sensible_heat_flux" accum: no -## Adding new variable - tasanomaly: + tas-tos: units: "K" - long_name: "Near-Surface Air Temperature Anomaly" - standard_name: "air_temperature_anom" + long_name: "Blended air - sea temperature" + standard_name: "air_sea_temperature" + accum: no + sic: + units: "1" + long_name: "Sea Ice Concentration" + standard_name: "sea_ice_concentration" accum: no - # Coordinates diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 315ef78dae696035ac94e09db5baaf391cb62a79..43dea0651fc7eb02cb8cb69a1d44d580903a4666 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -12,8 +12,13 @@ Loading <- function(recipe) { # Case: esarchive time_horizon <- tolower(recipe$Analysis$Horizon) if (time_horizon == "seasonal") { - source("modules/Loading/R/load_seasonal.R") - data <- load_seasonal(recipe) + if(recipe$Analysis$Variables$name == 'tas-tos') { + source("modules/Loading/R/load_tas_tos.R") + data <- load_tas_tos(recipe) + } else { + source("modules/Loading/R/load_seasonal.R") + data <- load_seasonal(recipe) + } } else if (time_horizon == "decadal") { source("modules/Loading/R/load_decadal.R") data <- load_decadal(recipe) diff --git a/modules/Loading/R/load_tas_tos.R b/modules/Loading/R/load_tas_tos.R new file mode 100644 index 0000000000000000000000000000000000000000..5e6b88370da9183aa6c18aa9bd5ff9e8cd8ff1ff --- /dev/null +++ b/modules/Loading/R/load_tas_tos.R @@ -0,0 +1,513 @@ +source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") +# Load required libraries/funs +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source('modules/Loading/R/mask_tas_tos.R') + + +load_tas_tos <- function(recipe) { + + # ------------------------------------------- + # Set params ----------------------------------------- + + hcst.inityear <- recipe$Analysis$Time$hcst_start + hcst.endyear <- recipe$Analysis$Time$hcst_end + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + + variable <- c("tas", "tos", "sic") + store.freq <- recipe$Analysis$Variables$freq + + if(is.null(recipe$Analysis$Variables$sic_threshold)){ + sic.threshold = 0.15 + } else { + sic.threshold <- recipe$Analysis$Variables$sic_threshold + } + + data_order <- c('dat', 'var', 'sday', 'sweek', 'syear', 'time', 'latitude', 'longitude', 'ensemble') + + + # get sdates array + ## LOGGER: Change dates2load to extract logger from recipe? + sdates <- dates2load(recipe, recipe$Run$logger) + + idxs <- NULL + idxs$hcst <- get_timeidx(sdates$hcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + + if (!(is.null(sdates$fcst))) { + idxs$fcst <- get_timeidx(sdates$fcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + } + + # get esarchive datasets dict: + archive <- read_yaml("conf/archive.yml")[[recipe$Run$filesystem]] + exp_descrip <- archive$System[[exp.name]] + + freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]]) + reference_descrip <- archive$Reference[[ref.name]] + freq.obs <- unlist(reference_descrip[[store.freq]][variable[1]]) + obs.dir <- reference_descrip$src + fcst.dir <- exp_descrip$src + hcst.dir <- exp_descrip$src + fcst.nmember <- exp_descrip$nmember$fcst + hcst.nmember <- exp_descrip$nmember$hcst + + var_dir_obs <- reference_descrip[[store.freq]][variable] + var_dir_exp <- exp_descrip[[store.freq]][variable] + + # ----------- + obs.path <- paste0(archive$src, obs.dir, "$var_dir$", + "$var$_$file_date$.nc") + + hcst.path <- paste0(archive$src, hcst.dir, "$var_dir$", + "$var$_$file_date$.nc") + + fcst.path <- paste0(archive$src, hcst.dir, "$var_dir$", + "/$var$_$file_date$.nc") + + # Define regrid parameters: + #------------------------------------------------------------------- + regrid_params <- get_regrid_params(recipe, archive) + + # Longitude circular sort and latitude check + #------------------------------------------------------------------- + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean"){ + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Load hindcast data without regrid + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + var_dir = var_dir_exp, + file_date = sdates$hcst, + time = idxs$hcst, + var_dir_depends = 'var', + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:hcst.nmember), + metadata_dims = 'var', # change to just 'var'? + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(hcst))) { + hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (recipe$Analysis$Variables$freq == "daily_mean") { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + + # Define sea-ice grid points based of sea-ice concentration threshold + ice_hcst <- hcst[,3,,,,,,,] >= sic.threshold + + # Replace Tos with Tas for data points with sea ice + hcst[,2,,,,,,,][ice_hcst] <- hcst[,1,,,,,,,][ice_hcst] + + + # Convert hcst to s2dv_cube object + ## TODO: Give correct dimensions to $Dates + ## (sday, sweek, syear instead of file_date) + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + # Combine hcst tas and tos data + #------------------------------------------------------------------- + + hcst <- mask_tas_tos(input_data = hcst, region = c(lons.min, lons.max,lats.min, lats.max), + mask_path = archive$System[[exp.name]]$land_sea_mask, lsm_var_name = 'lsm', + lon = hcst$coords$longitude, lat = hcst$coords$latitude, + lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + + hcst$dims[['var']] <- dim(hcst$data)[['var']] + hcst$attrs$Variable$varName <- 'tas-tos' + + hcst$data <- Reorder(hcst$data, data_order) + + # Load forecast data without regrid + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + # the call uses file_date instead of fcst_syear so that it can work + # with the daily case and the current version of startR not allowing + # multiple dims split + + fcst <- Start(dat = fcst.path, + var = variable, + var_dir = var_dir_exp, + var_dir_depends = 'var', + file_date = sdates$fcst, + time = idxs$fcst, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + if ("var_dir" %in% names(dim(fcst))) { + fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (recipe$Analysis$Variables$freq == "daily_mean") { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + + # Define sea-ice grid points based of sea-ice concentration threshold + ice_fcst <- fcst[,3,,,,,,,] >= sic.threshold + + # Replace Tos with Tas for datapoints with sea ice + fcst[,2,,,,,,,][ice_fcst] <- fcst[,1,,,,,,,][ice_fcst] + + + # Convert fcst to s2dv_cube + fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$attrs$Dates[] <- + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + # Combine fcst tas and tos data + #------------------------------------------------------------------- + + fcst <- mask_tas_tos(input_data = fcst, region = c(lons.min, lons.max,lats.min, lats.max), + mask_path = archive$System[[exp.name]]$land_sea_mask, lsm_var_name = 'lsm', + lon = fcst$coords$longitude, lat = fcst$coords$latitude, + lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + + fcst$dims[['var']] <- dim(fcst$data)[['var']] + fcst$attrs$Variable$varName <- 'tas-tos' + + fcst$data <- Reorder(fcst$data, data_order) + + } else { + fcst <- NULL + } + + # Load obs data without regrid + #------------------------------------------------------------------- + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Separate Start() call for monthly vs daily data + if (store.freq == "monthly_mean") { + + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") + dim(dates_file) <- dim(dates) + + + # Define variables for blended tas-tos datasets + if (recipe$Analysis$Datasets$Reference$name == 'BEST'){ + variable <- 'tas' + var_dir_obs <- reference_descrip[[store.freq]][variable] + } + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + + } else if (store.freq == "daily_mean") { + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(obs))) { + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + } + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + if(!recipe$Analysis$Datasets$Reference$name %in% c('HadCRUT4','HadCRUT5','BEST','GISTEMPv4')){ + + # Define sea-ice grid points based of sea-ice concentration threshold + ice_obs <- (obs[,3,,,,,,,]) >= sic.threshold + + # Replace NA values with False + ice_obs[is.na(ice_obs)] <- FALSE + + # Replace Tos with Tas for datapoints with sea ice + obs[,2,,,,,,,][ice_obs] <- obs[,1,,,,,,,][ice_obs] + } + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + + # Combine obs tas and tos data + #------------------------------------------------------------------- + ## TODO: Ask about this list + if(!recipe$Analysis$Datasets$Reference$name %in% c('HadCRUT4','HadCRUT5','BEST','GISTEMPv4')){ + + obs <- mask_tas_tos(input_data = obs, region = c(lons.min, lons.max,lats.min, lats.max), + mask_path = archive$Reference[[ref.name]]$land_sea_mask, lsm_var_name = 'sftof', + lon = obs$coords$longitude, lat = obs$coords$latitude, + lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + + obs$dims[['var']] <- dim(obs$data)[['var']] + obs$attrs$Variable$varName <- 'tas-tos' + + } ## close if on reference name + + obs$data <- Reorder(obs$data, data_order) + + + # Regrid data + #------------------------------------------------------------------- + + # Regrid reference to system grid: + if(recipe$Analysis$Regrid$type == 'to_system'){ + + aux <- CDORemap(data_array = obs$data, ## Not regridding to desired grid when latitudes are ordered descending + lons = obs$coords$longitude, + lats = obs$coords$latitude, + grid = regrid_params$obs.gridtype, + method = recipe$Analysis$Regrid$method, + avoid_writes = TRUE, + crop = c(lons.min, lons.max,lats.min, lats.max), + force_remap = TRUE) + + obs$data <- aux$data_array + obs$coords$longitude <- aux$lons + obs$coords$latitude <- aux$lats + obs$dims['longitude'] <- dim(aux$data_array)['longitude'] + obs$dims['latitude'] <- dim(aux$data_array)['latitude'] + rm(aux) + } + + # Regrid system to reference grid: + if(recipe$Analysis$Regrid$type == 'to_reference'){ + + aux <- CDORemap(data_array = hcst$data, + lons = hcst$coords$longitude, lats = hcst$coords$latitude, + grid = regrid_params$fcst.gridtype, method = recipe$Analysis$Regrid$method, + avoid_writes = TRUE, crop = TRUE, + force_remap = TRUE) + + hcst$data <- aux$data_array + hcst$coords$longitude <- aux$lons + hcst$coords$latitude <- aux$lats + hcst$dims['longitude'] <- dim(aux$data_array)['longitude'] + hcst$dims['latitude'] <- dim(aux$data_array)['latitude'] + rm(aux) + + if (!is.null(recipe$Analysis$Time$fcst_year)) { + aux <- CDORemap(data_array = fcst$data, + lons = fcst$coords$longitude, + lats = fcst$coords$latitude, + grid = regrid_params$fcst.gridtype, + method = recipe$Analysis$Regrid$method, + avoid_writes = TRUE, crop = TRUE, + force_remap = TRUE) + + fcst$data <- aux$data_array + fcst$coords$longitude <- aux$lons + fcst$coords$latitude <- aux$lats + fcst$dims['longitude'] <- dim(aux$data_array)['longitude'] + fcst$dims['latitude'] <- dim(aux$data_array)['latitude'] + rm(aux) + } + } + + # Regrid all data to user defined grid: + if(!recipe$Analysis$Regrid$type %in% c('to_system','to_reference')){ + + aux <- CDORemap(data_array = hcst$data, + lons = hcst$coords$longitude, lats = hcst$coords$latitude, + grid = regrid_params$fcst.gridtype, method = recipe$Analysis$Regrid$method, + avoid_writes = TRUE, crop = TRUE, + force_remap = TRUE) + + hcst$data <- aux$data_array + hcst$coords$longitude <- aux$lons + hcst$coords$latitude <- aux$lats + hcst$dims['longitude'] <- dim(aux$data_array)['longitude'] + hcst$dims['latitude'] <- dim(aux$data_array)['latitude'] + rm(aux) + + if (!is.null(recipe$Analysis$Time$fcst_year)) { + aux <- CDORemap(data_array = fcst$data, + lons = fcst$coords$longitude, + lats = fcst$coords$latitude, + grid = regrid_params$fcst.gridtype, + method = recipe$Analysis$Regrid$method, + avoid_writes = TRUE, crop = TRUE, + force_remap = TRUE) + + fcst$data <- aux$data_array + fcst$coords$longitude <- aux$lons + fcst$coords$latitude <- aux$lats + fcst$dims['longitude'] <- dim(aux$data_array)['longitude'] + fcst$dims['latitude'] <- dim(aux$data_array)['latitude'] + rm(aux) + } + + aux <- CDORemap(data_array = obs$data, + lons = obs$coords$longitude, + lats = obs$coords$latitude, + grid = regrid_params$obs.gridtype, + method = recipe$Analysis$Regrid$method, + avoid_writes = TRUE, crop = TRUE, + force_remap = TRUE) + + obs$data <- aux$data_array + obs$coords$longitude <- aux$lons + obs$coords$latitude <- aux$lats + obs$dims['longitude'] <- dim(aux$data_array)['longitude'] + obs$dims['latitude'] <- dim(aux$data_array)['latitude'] + rm(aux) + } + + + # Check for consistency between hcst and obs grid + if (!(recipe$Analysis$Regrid$type == 'none')) { + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + + } + } + + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/modules/Loading/R/mask_tas_tos.R b/modules/Loading/R/mask_tas_tos.R index a2eeb0b610e3cdbd5173ef6770442913dfab03e0..cc6937957a4aa88993b95b905765622e852f9560 100644 --- a/modules/Loading/R/mask_tas_tos.R +++ b/modules/Loading/R/mask_tas_tos.R @@ -2,29 +2,28 @@ library(multiApply) library(startR) library(s2dv) -mask_tas_tos <- function(input_data, grid, lon, lat, region = region , - lon_dim = 'lon', lat_dim = 'lat', ncores = NULL){ +mask_tas_tos <- function(input_data, mask_path, lsm_var_name = lsm_var_name, lon, lat, region = region, + lon_dim = 'lon', lat_dim = 'lat', + ncores = NULL){ - - mask <- .load_mask(grid = grid, lon_dim = lon_dim, lat_dim = lat_dim, + + mask <- .load_mask(mask_path = mask_path, lsm_var_name = lsm_var_name, lon_dim = lon_dim, lat_dim = lat_dim, sea_value = 1, land_value = 0, region = region) - ## TO DO: improve the check and correct lats stopifnot(all(lon == mask$lon)) stopifnot(max(abs(as.numeric(round(lat,2) - round(mask$lat,2)))) < 0.1) # stopifnot(all(lat == mask$lat)) - + tas <- Subset(input_data$data, along = 'var', indices = 1) tos <- Subset(input_data$data, along = 'var', indices = 2) - tas_tos <- multiApply::Apply(data = list(tas, tos), - target_dims = c(lon_dim, lat_dim), - fun = .mask_tas_tos, - mask = mask$mask, - sea_value = 1, - ncores = ncores)$output1 - input_data$data <- tas_tos - + input_data$data <- multiApply::Apply(data = list(tas, tos), + target_dims = c(lon_dim, lat_dim), + fun = .mask_tas_tos, + mask = mask$mask, + sea_value = 1, + ncores = ncores)$output1 + return(input_data) } @@ -33,52 +32,32 @@ mask_tas_tos <- function(input_data, grid, lon, lat, region = region , return(data_tas) } -.load_mask <- function(grid, mask_path = NULL, land_value = 0, sea_value = 1, +.load_mask <- function(mask_path = mask_path, lsm_var_name = lsm_var_name, land_value = 0, sea_value = 1, lon_dim = 'lon', lat_dim = 'lat', region = region){ - if (is.null(mask_path)){ - mask_sea_land_path <- '/esarchive/exp/ecmwf/system5c3s/constant/lsm/lsm.nc' ## /esarchive/recon/ecmwf/era5land/constant/lsm-r3600x1801cds/lsm.nc' - } else if (is.character(mask_path)){ - mask_sea_land_path <- mask_path - } else { - stop("mask_path must be NULL (to use the default mask and interpolate it to - the specified grid) or a string with the mask's path you want to load") - } - lons.min <- region[1] lons.max <- region[2] lats.min <- region[3] lats.max <- region[4] - - ## TO DO: - ## Fix region filter for lat and lon - ## Fix 'number' parameter for mask - - data <- startR::Start(dat = mask_sea_land_path, - var = 'lsm', - lon = 'all', - lat = 'all', - number = 1, ## needed to add for ensemble member dimension of lsm.nc - # lon = values(list(lons.min, lons.max)), - # lat = values(list(lats.min, lats.max)), - transform = CDORemapper, transform_extra_cells = 2, - transform_params = list(grid = grid, method = 'con', crop = region), - transform_vars = c('lat','lon'), + data <- startR::Start(dat = mask_path, + var = lsm_var_name, + lon = values(list(lons.min, lons.max)), + lat = values(list(lats.min, lats.max)), return_vars = list(lat = NULL, lon = NULL), synonims = list(lon = c('lon','longitude'), lat = c('lat','latitude')), - lat_reorder = Sort(decreasing = FALSE), - lon_reorder = CircularSort(0,359.9), + lat_reorder = Sort(decreasing = TRUE), + lon_reorder = CircularSort(0,360), num_procs = 1, retrieve = TRUE) - + mask <- list(mask = drop(data), lon = as.numeric(attr(data,'Variables')$common$lon), lat = as.numeric(attr(data,'Variables')$common$lat)) mask$mask[data <= 0.5] <- sea_value mask$mask[data > 0.5] <- land_value - names(dim(mask$mask)) <- c(lon_dim, lat_dim) + names(dim(mask$mask)) <- c(lon_dim, lat_dim) return(mask) } diff --git a/recipes/atomic_recipes/recipe_tas-tos_nadia.yml b/recipes/atomic_recipes/recipe_tas-tos_nadia.yml new file mode 100644 index 0000000000000000000000000000000000000000..466659a228e2a881f8a7418db8bbae099fefb724 --- /dev/null +++ b/recipes/atomic_recipes/recipe_tas-tos_nadia.yml @@ -0,0 +1,55 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas-tos + sic_threshold: 0.15 ## sea ice threshold for tas-tos blending, default = 0.15 + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 + Multimodel: False + Reference: + name: BEST + Time: + sdate: '0101' + #fcst_year: + hcst_start: '2014' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 1 + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: raw + save: 'none' + Anomalies: + compute: yes + cross_validation: no + save: 'none' + Skill: + metric: mean_bias EnsCorr RPS RPSS CRPS CRPSS enssprerr + cross_validation: no + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'none' + Indicators: + index: no + ncores: 15 + remove_NAs: yes + Output_format: scorecards +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nmilders/scorecards_data/test/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite_tas-tos/ diff --git a/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml b/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml new file mode 100644 index 0000000000000000000000000000000000000000..41048db127759ba614115fdc77eeb9ab398a95f6 --- /dev/null +++ b/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml @@ -0,0 +1,56 @@ +Description: + Author: N. Milders + +Analysis: + Horizon: Seasonal + Variables: + name: tas-tos + sic_threshold: 0.15 + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '0101' + fcst_year: '2018' + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + save: 'none' + Calibration: + method: mse_min + save: 'all' + Skill: + metric: RPSS CRPSS EnsCorr Corr_individual_members Enscorr_specs + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean + multi_panel: yes + projection: cylindrical_equidistant + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/ ##/esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/testthat/test-seasonal_monthly_tas-tos.R b/tests/testthat/test-seasonal_monthly_tas-tos.R new file mode 100644 index 0000000000000000000000000000000000000000..1345f63dfb1190988413905221cbda99c2f37c74 --- /dev/null +++ b/tests/testthat/test-seasonal_monthly_tas-tos.R @@ -0,0 +1,98 @@ +context("Seasonal monthly data") + +source("./modules/Loading/Loading.R") + +recipe_file <- "tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + + +# ------- TESTS -------- + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 4, longitude = 4, ensemble = 25) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 4, longitude = 4, ensemble = 51) +) +expect_equal( +dim(data$hcst$attrs$Dates), +c(sday = 1, sweek = 1, syear = 4, time = 3) +) +expect_equal( +as.vector(drop(data$hcst$data)[1:2,1:2,1,2,3]), +c(285.5869, 287.8836, 285.9362, 289.0483), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +290.1099, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(283.2845, 299.7845), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$attrs$Dates)[1], +as.POSIXct("1993-01-31 18:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[2], +as.POSIXct("1994-01-31 18:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[5], +as.POSIXct("1993-02-28 18:00:00", tz = 'UTC') +) +expect_equal( +(data$obs$attrs$Dates)[10], +as.POSIXct("1994-03-15 12:00:00", tz = 'UTC') +) + +}) + +# Delete files +unlink(recipe$Run$output_dir, recursive = T) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index e4c59a5592724837a65f71ff1fe475f8e33780d5..69a7dd4cd4c411e3acd5dcf7ae49ce2c12991362 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -62,6 +62,18 @@ check_recipe <- function(recipe) { } else { archive <- NULL } + # Check variable parameters + variables <- tolower(strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]]) + # Sea-ice threshold check + if (("tas-tos" %in% variables) && + (!is.null(recipe$Analysis$Variables$sic_threshold))) { + if (!is.numeric(recipe$Analysis$Variables$sic_threshold) || + dplyr::between(recipe$Analysis$Variables$sic_threshold, 0, 1)) { + error(recipe$Run$logger, + paste("The element Analysis:Variables:sic_threshold must be a", + "numeric value between 0 and 1.")) + } + } # Check system names if (!is.null(archive)) { if (!all(recipe$Analysis$Datasets$System$name %in% names(archive$System))) { diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index fa4d23ed13190021478009dee6a44582f451d9fa..54e8209569afd34a4b14bbf5b49bcb069ede1c87 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -89,6 +89,10 @@ prepare_outputs <- function(recipe_file, warn(recipe$Run$logger, "Filesystem not specified in the recipe. Setting it to 'esarchive'.") } + # Restructure the recipe to make the atomic recipe more readable + if (restructure) { + recipe <- restructure_recipe(recipe) + } # Run recipe checker if (disable_checks) { warn(recipe$Run$logger, @@ -96,9 +100,5 @@ prepare_outputs <- function(recipe_file, } else { check_recipe(recipe) } - # Restructure the recipe to make the atomic recipe more readable - if (restructure) { - recipe <- restructure_recipe(recipe) - } return(recipe) }