diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index e41d46a035c8f4dd30da56544ca62c3343792e5c..28ff6028f04083362ed749c807e8f2f582ad86af 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -44,6 +44,9 @@ archive: src: hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" + first_dcppB_syear: 2021 +# hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" +# fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" # src: {"1960:2020": "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/", # "2021:2021": "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/"} monthly_mean: @@ -56,8 +59,7 @@ archive: grid: {"tas":"gr", "pr":"gr", "psl":"gr", "clt":"gr", "hfls":"gr", "hurs":"gr", "huss":"gr", "rsds":"gr", "rsut":"gr", "ta":"gr", "tasmax":"gr", "tosa":"gr", "ua":"gr", "va":"gr", "zg":"gr", - "evspsbl":"gr", "hfss":"gr", "hursmin":"gr", "rlut":"gr", - "rsdt":"gr", "sfcWind":"gr", "tasmin":"gr", "ts":"gr", "uas":"gr", + "vas":"gr"} version: {"tas":"v20210910", "pr":"v20210910", "psl":"v20210910", "clt":"v20210910", "hurs":"v20210910", "huss":"v20210910", "rsds":"v20210910", "rsut":"v20210910", "ta":"v20210910", @@ -78,6 +80,7 @@ archive: src: hcst: "exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppB-forecast/" + first_dcppB_syear: 2019 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon", "tos":"Omon"} grid: {"tas":"gr", "psl":"gr", "pr":"gr", "ts":"gr", "tos":"gr"} @@ -115,6 +118,7 @@ archive: src: hcst: "exp/canesm5/cmip6-dcppA-hindcast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/" fcst: "exp/canesm5/cmip6-dcppB-forecast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppB-forecast/" + first_dcppB_syear: 2020 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} @@ -150,6 +154,7 @@ archive: src: hcst: "exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppB-forecast/" + first_dcppB_syear: 2020 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "prc":"Amon", "ts":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn", "prc":"gn", "ts":"gn"} @@ -283,7 +288,9 @@ archive: # ---- NorCPM1-i2: - src: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" + src: + hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" + fcst: monthly_mean: table: {"pr":"Amon", "psl":"Amon"} grid: {"pr":"gn", "psl":"gn"} diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index f94d134315bfc8f144a738d2d8a645a6ebc14f12..6ff9023a52d14dff562d182f8c61ca1df8e3c774 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -9,6 +9,7 @@ source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") #source("/esarchive/scratch/vagudets/repos/cstools/R/s2dv_cube.R") # Load required libraries/funs source("modules/Loading/get_daily_time_ind.R") +source("modules/Loading/dates2load.R") source("modules/Loading/check_latlon.R") source("tools/libs.R") @@ -38,17 +39,18 @@ load_datasets <- function(recipe_file) { # change to: sdates <- dates2load(recipe, logger) sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) #1960:2015 - sdates_fcst <- as.numeric(recipe$Analysis$Time$fcst) + sdates_fcst <- recipe$Analysis$Time$fcst #NOTE: time in recipe starts from 0, but in Start() it starts from 1 if (store.freq == "monthly_mean") { - time <- (as.numeric(recipe$Analysis$Time$leadtimemin):as.numeric(recipe$Analysis$Time$leadtimemax)) + 1 + time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) + 1 + } else if (store.freq == "daily_mean") { - time <- get_daily_time_ind(leadtimemin = as.numeric(recipe$Analysis$Time$leadtimemin), - leadtimemax = as.numeric(recipe$Analysis$Time$leadtimemax), - initial_month = archive$System[[exp.name]]$initial_month, - sdates = sdates_hcst, - calendar = archive$System[[exp.name]][[store.freq]]$calendar) + time_ind <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min), + ftimemax = as.numeric(recipe$Analysis$Time$ftime_max), + initial_month = archive$System[[exp.name]]$initial_month, + sdates = sdates_hcst, + calendar = archive$System[[exp.name]][[store.freq]]$calendar) } #NOTE: May be used in the future @@ -80,63 +82,45 @@ load_datasets <- function(recipe_file) { # Only if the time length in each chunk may differ that we need largest_dims_length to be TRUE. Otherwise, set FALSE to increase efficiency. need_largest_dims_length <- ifelse(exp.name == 'EC-Earth3-i2', TRUE, FALSE) - #time for daily data. If recipe asks for time step, we don't need to calculate here -# fdays <- get_days_dcpp(fyears = fyears, season = season, init = 'Nov', calendar = '360-days') - -#------------------------------------------- -# Step 1: Load the hcst -#------------------------------------------- + #------------------------------------------- + # Step 1: Load the hcst + #------------------------------------------- #monthly and daily hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, '$ensemble$', table, '$var$', grid, version) hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') - # monthly & daily - hcst <- Start(dat = file.path(hcst.path, hcst.files), - var = variable, - ensemble = member, - syear = paste0(sdates_hcst), -#split_multiselected_dims = T, - chunk = 'all', - chunk_depends = 'syear', - time = indices(time), - time_across = 'chunk', - merge_across_dims = TRUE, - largest_dims_length = need_largest_dims_length, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = TRUE), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_extra_cells = 2, - transform_params = list(grid = regrid_params$fcst.gridtype, #nc file - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(longitude = c('lon', 'longitude'), - latitude = c('lat', 'latitude')), - return_vars = list(latitude = NULL, longitude = NULL, - time = c('syear', 'chunk')), - retrieve = T) + Start_hcst_arg_list <- list( + dat = file.path(hcst.path, hcst.files), + var = variable, + ensemble = member, + syear = paste0(sdates_hcst), + chunk = 'all', + chunk_depends = 'syear', + time = indices(time_ind), + time_across = 'chunk', + merge_across_dims = TRUE, + largest_dims_length = need_largest_dims_length, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_extra_cells = 2, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = c('syear', 'chunk')), + retrieve = T) + + hcst <- do.call(Start, Start_hcst_arg_list) tmp_time_attr <- attr(hcst, 'Variables')$common$time - # If daily data with 0229, need to remove 0229 and extra time - #TODO: Make Start() able to read the correct time indices for each sdate, so we - # don't need this awkward correction. - if (store.freq == "daily_mean" & any(format(tmp_time_attr, "%m%d") == "0229")) { - # Save hcst attributes - hcst_attr <- attributes(hcst)[-1] # without $dim - new_hcst <- Apply(hcst, - target_dims = c('syear', 'time'), - output_dims = list(data = c('syear', 'time')), - fun = correct_daily_for_leap, - time_attr = tmp_time_attr, return_time = F)$data - hcst <- Reorder(new_hcst, names(dim(hcst))) - attributes(hcst) <- c(attributes(hcst), hcst_attr) - tmp_time_attr <- correct_daily_for_leap(data = NULL, time_attr = tmp_time_attr, return_time = T)$time_attr - attr(hcst, 'Variables')$common$time <- tmp_time_attr - } # change syear to c(sday, sweek, syear) dim(hcst) <- c(dim(hcst)[1:3], sday = 1, sweek = 1, dim(hcst)[4:7]) @@ -155,45 +139,87 @@ load_datasets <- function(recipe_file) { #------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst)) { - #monthly and daily - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, - '$ensemble$', table, '$var$', grid, version) - fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + # Define path (monthly and daily) + multi_path <- FALSE + if (is.null(archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppA + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + '$ensemble$', table, '$var$', grid, version) + fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + + } else { + if (all(sdates_fcst >= archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppB + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, + '$ensemble$', table, '$var$', grid, version) + fcst.files <- paste0('$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + + } else { + # Create one path for each sdate + #TODO: When *_depends = 'syear' can be more than one, use one path with $dcpp$ + multi_path <- TRUE + path_list <- vector('list', length = length(sdates_fcst)) + for (i_sdate in 1:length(sdates_fcst)) { + if (sdates_fcst[i_sdate] >= archive$System[[exp.name]]$src$first_dcppB_syear) { + path_list[[i_sdate]] <- + list(path = file.path(archive$src, archive$System[[exp.name]]$src$fcst, + '$ensemble$', table, '$var$', grid, version, + paste0('$var$_', table, '_*_dcppB-forecast_s', sdates_fcst[i_sdate], '-$ensemble$_', grid, '_$chunk$.nc'))) + } else { + path_list[[i_sdate]] <- + list(path = file.path(archive$src, archive$System[[exp.name]]$src$hcst, + '$ensemble$', table, '$var$', grid, version, + paste0('$var$_', table, '_*_dcppA-hindcast_s', sdates_fcst[i_sdate], '-$ensemble$_', grid, '_$chunk$.nc'))) + } + } + } + } + +# fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, +# '$ensemble$', table, '$var$', grid, version) +# fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + # monthly & daily - fcst <- Start(dat = file.path(fcst.path, fcst.files), - var = variable, - ensemble = member, - syear = paste0(sdates_fcst), -#split_multiselected_dims = T, - chunk = 'all', - chunk_depends = 'syear', - time = indices(time), - time_across = 'chunk', - merge_across_dims = TRUE, - largest_dims_length = need_largest_dims_length, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = TRUE), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_extra_cells = 2, - transform_params = list(grid = regrid_params$fcst.gridtype, #nc file - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(longitude = c('lon', 'longitude'), - latitude = c('lat', 'latitude')), - return_vars = list(latitude = NULL, longitude = NULL, - time = c('syear', 'chunk')), - retrieve = T) + if (!multi_path) { + #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst + Start_fcst_arg_list <- Start_hcst_arg_list + Start_fcst_arg_list[['dat']] <- file.path(fcst.path, fcst.files) + Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) + fcst <- do.call(Start, Start_fcst_arg_list) + + + } else { # multi_path + + #TODO: time attribute is not correct + Start_fcst_arg_list <- Start_hcst_arg_list + Start_fcst_arg_list[['dat']] <- path_list + Start_fcst_arg_list[['syear']] <- NULL + Start_fcst_arg_list[['chunk_depends']] <- NULL + if ('syear' %in% Start_fcst_arg_list[['return_vars']][['time']]) { + remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear') + Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] + } + + fcst <- do.call(Start, Start_fcst_arg_list) + + # Reshape and reorder dimensions + ## dat should be 1, syear should be length of dat; reorder dimensions + dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) + fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'ensemble', 'syear', 'time', 'latitude', 'longitude')) + + } + + tmp_time_attr <- attr(fcst, 'Variables')$common$time # change syear to c(sday, sweek, syear) dim(fcst) <- c(dim(fcst)[1:3], sday = 1, sweek = 1, dim(fcst)[4:7]) tmp_time_attr <- attr(fcst, 'Variables')$common$time - if (!identical(dim(tmp_time_attr), dim(fcst)[6:7])) { - stop("fcst has problem in matching data and time attr dimension.") + #TODO: This check cannot pass if multi_path + if (!multi_path) { + if (!identical(dim(tmp_time_attr), dim(fcst)[6:7])) { + stop("fcst has problem in matching data and time attr dimension.") + } + dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(fcst)[6:7]) } - dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(fcst)[6:7]) # Change class from startR_array to s2dv_cube suppressWarnings( diff --git a/modules/Loading/get_daily_time_ind.R b/modules/Loading/get_daily_time_ind.R index 3e41cddb6e64c923825cd9297aba103477b51e6e..f7625690f192053a577a72d26609c64b888859e5 100644 --- a/modules/Loading/get_daily_time_ind.R +++ b/modules/Loading/get_daily_time_ind.R @@ -1,68 +1,73 @@ # This function is for decadal-daily data loading. -# The time input (leadtimemin & leadtimemax) units in the recipe is month. For example, -# leadtimemin = 0 means the first month, and leadtimemax = 2 means the 3rd month. -# For daily data, if initial month is November, leadtimemin = 0 equals to c(1:30), -# and leadtimemax = 2, which means January next year, equals to c(31+31:31+31+30). -get_daily_time_ind <- function(leadtimemin, leadtimemax, initial_month, sdates, calendar) { - #TODO: calendar & leap year - # E.g., time = 1:4 means Dec to March, then indices will be c(31:(31+30+31+28+31)) for non-leap yr and c(31:(31+30+31+28), (31+30+31+28+2):(31+30+31+28+2+30)) for leap yr. Calendar needs to be considered too. +# The time input (ftimemin & ftimemax) units in the recipe is month. For example, +# ftimemin = 0 means the first month, and ftimemax = 2 means the 3rd month. +# For daily data, if initial month is November, ftimemin = 0 equals to c(1:30), +# and ftimemax = 2, which means January next year, equals to c(31+31:31+31+30). +# +#NOTE: For the datasets that having one file per sdate, we can use array [syear, time] to define time since +# the Start() call doesn't need time_across = 'chunk' (there is only one chunk). BUT to +# keep the script simple, we use indices always and ignore leap year difference. +# Only distinguish 360-day and non 360-day calendar. + +get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calendar) { +#NOTE: "sdates" is not needed if leap year is not considered if (!calendar %in% c('360-day', '365_day', 'noleap', 'standard', 'proleptic_gregorian', 'gregorian')) stop("The calendar is not recognized. Please contact maintainers.") - total_months <- (initial_month + leadtimemin):(initial_month + leadtimemax) + total_months <- (initial_month + ftimemin):(initial_month + ftimemax) total_months <- ifelse(total_months > 12, total_months %% 12, total_months) total_months <- ifelse(total_months == 0, 12, total_months) if (calendar == '360-day') { - daily_time_ind <- 1:(30 * length(total_months)) + daily_time_ind <- (ftimemin * 30 + 1): ((ftimemin * 30) + (ftimemax - ftimemin + 1) * 30) } else { total_days_sum <- sum(days_in_month(total_months)) - if (leadtimemin == 0) { + if (ftimemin == 0) { daily_time_ind <- 1:total_days_sum } else { - before_start_months <- initial_month:(initial_month + leadtimemin - 1) + before_start_months <- initial_month:(initial_month + ftimemin - 1) before_start_months <- ifelse(before_start_months > 12, before_start_months %% 12, before_start_months) before_start_months <- ifelse(before_start_months == 0, 12, before_start_months) before_start_total_days_sum <- sum(days_in_month(before_start_months)) daily_time_ind <- (before_start_total_days_sum + 1):(before_start_total_days_sum + total_days_sum) } - - if (calendar %in% c('365_day', 'noleap')) { - # daily_time_ind - } else if (calendar %in% c('standard', 'proleptic_gregorian', 'gregorian')) { - -# leap <- leap_year(sdates) - sdates_full <- ymd(paste0(sdates, sprintf('%02d',initial_month), '01')) - idx_min <- sdates_full + months(leadtimemin) - idx_max <- sdates_full + months(leadtimemax + 1) - days(1) - - day0229 <- rep(NA, length(sdates)) - for (i_sdate in 1:length(sdates_full)) { - days <- seq(idx_min[i_sdate],idx_max[i_sdate], by = 'days') - day0229[i_sdate] <- sum(format(days, "%m%d") == "0229") - } - - if (all(day0229 == 0)) { # no 0229 - # daily_time_ind - } else { - # get extra indices and remove 0229 & extra indices after retrieval - #TODO: Make Start() can handle time as a list or array for this case - daily_time_ind <- c(daily_time_ind, - (tail(daily_time_ind, 1) + 1):((tail(daily_time_ind, 1) + 1) + (max(day0229) - 1))) - } - - } +# if (calendar %in% c('365_day', 'noleap')) { +# # daily_time_ind +# } else if (calendar %in% c('standard', 'proleptic_gregorian', 'gregorian')) { +# +## leap <- leap_year(sdates) +# sdates_full <- ymd(paste0(sdates, sprintf('%02d', initial_month), '01')) +# idx_min <- sdates_full + months(ftimemin) +# idx_max <- sdates_full + months(ftimemax + 1) - days(1) +# +# day0229 <- rep(NA, length(sdates)) +# for (i_sdate in 1:length(sdates_full)) { +# days <- seq(idx_min[i_sdate],idx_max[i_sdate], by = 'days') +# day0229[i_sdate] <- sum(format(days, "%m%d") == "0229") +# } +# +# if (all(day0229 == 0)) { # no 0229 +# # daily_time_ind +# } else { +# # get extra indices and remove 0229 & extra indices after retrieval +# #TODO: Make Start() can handle time as a list or array for this case +# daily_time_ind <- c(daily_time_ind, +# (tail(daily_time_ind, 1) + 1):((tail(daily_time_ind, 1) + 1) + (max(day0229) - 1))) +# } +# +# } } return(daily_time_ind) } - +# This function is used to pick out 0229 after loading data. But it is not used in the +# script now. correct_daily_for_leap <- function(data = NULL, time_attr, return_time = TRUE) { # data & time_attr: [syear, time] #NOTE: return_time = TRUE, only return time_attr; FALSE, only return data diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml index 0661fb7bf9e2f65a3afee2bb5fdc581ec9be42b1..d4e568d17237b518d226127bda7553c5bcb59296 100644 --- a/modules/Loading/testing_recipes/recipe_decadal.yml +++ b/modules/Loading/testing_recipes/recipe_decadal.yml @@ -9,19 +9,17 @@ Analysis: Datasets: System: name: EC-Earth3-i4 #CanESM5 - member: 'all' #r1i1p1f1,r2i1p1f1,r3i1p1f1 + member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' Multimodel: no Reference: name: ERA5 #JRA-55 Time: - sdate: - fcst: 2021 -# fcst_sday: '1101' - hcst_start: 1990 #'1993' - hcst_end: 1992 #'2016' + fcst: [2020,2021] + hcst_start: 1990 + hcst_end: 1992 # season: 'Annual' - leadtimemin: 0 - leadtimemax: 13 + ftime_min: 0 + ftime_max: 13 Region: latmin: 10 #-90 latmax: 20 #90 diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml index a025dca345e030f6d5c94797ad327cabcd1db313..91a957dfb57d9308e8dcfddd17ab61acda9b45cd 100644 --- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml +++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml @@ -14,14 +14,12 @@ Analysis: Reference: name: ERA5 Time: - sdate: - fcst: -# fcst_sday: '1101' - hcst_start: 1990 - hcst_end: 1992 + fcst: [2020,2021] + hcst_start: 2016 + hcst_end: 2019 season: 'Annual' - leadtimemin: 2 - leadtimemax: 4 + ftime_min: 2 + ftime_max: 4 Region: latmin: 10 #-90 latmax: 20 #90 diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml new file mode 100644 index 0000000000000000000000000000000000000000..ed81146fc259d6e5b93677350083617bb32419d0 --- /dev/null +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -0,0 +1,44 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: MIROC6 #EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 + member: r1i1p1f1,r2i1p1f1,r3i1p1f1 #'all' + Multimodel: no + Reference: + name: ERA5 + Time: + fcst: [2017,2018] + hcst_start: 1990 + hcst_end: 1992 + season: 'Annual' + ftime_min: 2 + ftime_max: 4 + Region: + latmin: 10 #-90 + latmax: 20 #90 + lonmin: 0 + lonmax: 15 #359.9 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Calibration: + method: qmap + Skill: + metric: RPSS + Indicators: + index: FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml index 5acd642d1e6b1b6ad8512bf4d394671de2b5f8bb..8312f81a26c07729c80e0cae78f2782d8b47cf0f 100644 --- a/tests/recipes/recipe-decadal_monthly_1.yml +++ b/tests/recipes/recipe-decadal_monthly_1.yml @@ -8,7 +8,7 @@ Analysis: freq: monthly_mean Datasets: System: - name: EC-Earth3-i4 #CanESM5 + name: EC-Earth3-i4 member: r1i4p1f1,r2i4p1f1 Multimodel: no Reference: @@ -16,17 +16,16 @@ Analysis: Time: sdate: fcst: 2021 -# fcst_sday: '1101' - hcst_start: 1991 #'1993' - hcst_end: 1994 #'2016' + hcst_start: 1991 + hcst_end: 1994 # season: 'Annual' - leadtimemin: 0 - leadtimemax: 2 + ftime_min: 0 + ftime_max: 2 Region: - latmin: 17 #-90 - latmax: 20 #90 + latmin: 17 + latmax: 20 lonmin: 12 - lonmax: 15 #359.9 + lonmax: 15 Regrid: method: bilinear type: to_system #to_reference diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml new file mode 100644 index 0000000000000000000000000000000000000000..040bbe3e4efc5b52ef599ccdb706d7a8542d04b4 --- /dev/null +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -0,0 +1,44 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 #CanESM5 + member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst: [2020,2021] + hcst_start: 1990 + hcst_end: 1992 +# season: 'Annual' + ftime_min: 0 + ftime_max: 13 + Region: + latmin: -60 #-90 + latmax: -55 #90 + lonmin: -2 + lonmax: 2 #359.9 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Calibration: + method: bias + Skill: + metric: RPSS + Indicators: + index: FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R new file mode 100644 index 0000000000000000000000000000000000000000..150205915e2c57c1defb8669b5c549e064562b5b --- /dev/null +++ b/tests/testthat/test-decadal_daily_1.R @@ -0,0 +1,221 @@ +context("Decadal daily data - 1") + +########################################### + +source("modules/Loading/Loading_decadal.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Save_data.R") + +recipe_file <- "tests/recipes/recipe-decadal_daily_1.yml" + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe_file) +))}) + +#recipe <- read_yaml(recipe_file) +## Calibrate datasets +#suppressWarnings({invisible(capture.output( +# calibrated_data <- calibrate_datasets(data, recipe) +#))}) +# +## Compute skill metrics +#suppressWarnings({invisible(capture.output( +#skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, +# recipe, na.rm = T, ncores = 4) +#))}) + +#====================================== + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 3, time = 90, latitude = 7, longitude = 11) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 2, time = 90, latitude = 7, longitude = 11) +) +expect_equal( +dim(data$hcst$Dates$start), +c(sday = 1, sweek = 1, syear = 3, time = 90) +) +# hcst data +expect_equal( +as.vector(drop(data$hcst$data)[1, 2:3, 1:3, 1, 1]), +c(298.5787, 293.6479, 298.5042, 293.7802, 297.8072, 293.0764), +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(data$hcst$data)[2, , 89:90, 1, 1]), +c(301.6978, 308.9792, 308.4501, 302.1620, 307.6034, 307.6388), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +301.2666, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(285.9326, 314.9579), +tolerance = 0.0001 +) + +# fcst data +expect_equal( +as.vector(drop(data$fcst$data)[1, , 1:3, 1, 1]), +c(295.0745, 291.1006, 296.2279, 291.6309, 295.3123, 290.8995), +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(data$fcst$data)[2, , 89:90, 1, 1]), +c(305.3428, 305.0657, 305.5445, 305.5681), +tolerance = 0.0001 +) + +# time value +expect_equal( +(data$hcst$Dates$start)[1], +as.POSIXct("1991-01-01 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[2], +as.POSIXct("1992-01-01 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[5], +as.POSIXct("1992-01-02 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[1, 1, 3, 90], +as.POSIXct("1993-03-31 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[1, 1, 2, 90], +as.POSIXct("1992-03-30 12:00:00", tz = 'UTC') +) + +}) + +##====================================== +#test_that("2. Calibration", { +# +#expect_equal( +#is.list(calibrated_data), +#TRUE +#) +#expect_equal( +#names(calibrated_data), +#c("hcst", "fcst") +#) +#expect_equal( +#class(calibrated_data$hcst), +#"s2dv_cube" +#) +#expect_equal( +#class(calibrated_data$fcst), +#"s2dv_cube" +#) +#expect_equal( +#dim(calibrated_data$hcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(calibrated_data$fcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#mean(calibrated_data$fcst$data), +#291.8375, +#tolerance = 0.0001 +#) +#expect_equal( +#mean(calibrated_data$hcst$data), +#289.6679, +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), +#c(286.3895, 286.6408, 290.6652, 288.3759), +#tolerance = 0.0001 +#) +#expect_equal( +#range(calibrated_data$fcst$data), +#c(287.2173, 297.4578), +#tolerance = 0.0001 +#) +# +#}) +# +# +##====================================== +#test_that("3. Metrics", { +# +#expect_equal( +#is.list(skill_metrics), +#TRUE +#) +#expect_equal( +#names(skill_metrics), +#c("rpss", "rpss_significance") +#) +#expect_equal( +#class(skill_metrics$rpss[[1]]), +#"array" +#) +#expect_equal( +#dim(skill_metrics$rpss[[1]]), +#c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(skill_metrics$rpss_significance[[1]]), +#dim(skill_metrics$rpss[[1]]) +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]), +#c(-0.2857143, -1.2500000, -1.8928571), +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]), +#rep(FALSE, 3) +#) +# +#}) + + diff --git a/tests/testthat/test-decadal_monthly.R b/tests/testthat/test-decadal_monthly_1.R similarity index 100% rename from tests/testthat/test-decadal_monthly.R rename to tests/testthat/test-decadal_monthly_1.R diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R new file mode 100644 index 0000000000000000000000000000000000000000..ac80fe7758d3dc63c1f7e7ace11c0c53b92d66d8 --- /dev/null +++ b/tests/testthat/test-decadal_monthly_2.R @@ -0,0 +1,214 @@ +context("Decadal monthly data - 2") + +########################################### + +source("modules/Loading/Loading_decadal.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Save_data.R") + +recipe_file <- "tests/recipes/recipe-decadal_monthly_2.yml" + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe_file) +))}) + +#recipe <- read_yaml(recipe_file) +## Calibrate datasets +#suppressWarnings({invisible(capture.output( +# calibrated_data <- calibrate_datasets(data, recipe) +#))}) +# +## Compute skill metrics +#suppressWarnings({invisible(capture.output( +#skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, +# recipe, na.rm = T, ncores = 4) +#))}) + +#====================================== + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 3, time = 14, latitude = 8, longitude = 5) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 2, time = 14, latitude = 8, longitude = 5) +) +expect_equal( +dim(data$hcst$Dates$start), +c(sday = 1, sweek = 1, syear = 3, time = 14) +) +#expect_equal( +#dim(data$fcst$Dates$start), +#c(time = 14) +#) +# hcst data +expect_equal( +as.vector(drop(data$hcst$data)[1, , 1:2, 2, 2]), +c(272.8613, 271.0689, 270.8007, 273.5594, 272.1561, 272.8729), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +269.8822, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(253.8541, 276.6805), +tolerance = 0.0001 +) +# fcst data +expect_equal( +as.vector(drop(data$fcst$data)[1, , 1:2, 2, 2]), +c(271.7708, 271.8424, 272.4980, 273.5842), +tolerance = 0.0001 +) +expect_equal( +mean(data$fcst$data), +271.2158, +tolerance = 0.0001 +) + +expect_equal( +(data$hcst$Dates$start)[1], +as.POSIXct("1990-11-16", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[2], +as.POSIXct("1991-11-16", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[5], +as.POSIXct("1991-12-16 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[10], +as.POSIXct("1991-02-15", tz = 'UTC') +) + +}) + +#====================================== +#test_that("2. Calibration", { +# +#expect_equal( +#is.list(calibrated_data), +#TRUE +#) +#expect_equal( +#names(calibrated_data), +#c("hcst", "fcst") +#) +#expect_equal( +#class(calibrated_data$hcst), +#"s2dv_cube" +#) +#expect_equal( +#class(calibrated_data$fcst), +#"s2dv_cube" +#) +#expect_equal( +#dim(calibrated_data$hcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(calibrated_data$fcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#mean(calibrated_data$fcst$data), +#291.8375, +#tolerance = 0.0001 +#) +#expect_equal( +#mean(calibrated_data$hcst$data), +#289.6679, +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), +#c(286.3895, 286.6408, 290.6652, 288.3759), +#tolerance = 0.0001 +#) +#expect_equal( +#range(calibrated_data$fcst$data), +#c(287.2173, 297.4578), +#tolerance = 0.0001 +#) +# +#}) +# +# +##====================================== +#test_that("3. Metrics", { +# +#expect_equal( +#is.list(skill_metrics), +#TRUE +#) +#expect_equal( +#names(skill_metrics), +#c("rpss", "rpss_significance") +#) +#expect_equal( +#class(skill_metrics$rpss[[1]]), +#"array" +#) +#expect_equal( +#dim(skill_metrics$rpss[[1]]), +#c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(skill_metrics$rpss_significance[[1]]), +#dim(skill_metrics$rpss[[1]]) +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]), +#c(-0.2857143, -1.2500000, -1.8928571), +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]), +#rep(FALSE, 3) +#) +# +#}) +# +# diff --git a/tools/libs.R b/tools/libs.R index 317286508f6929bb3258443ebf5eec29867dbd51..248ada450574cfbcaf6615068989e6884b0d0e46 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -10,6 +10,7 @@ library(abind) # library(easyVerification) library(easyNCDF) library(CSTools) +library(lubridate) # # library(parallel) # library(pryr) # To check mem usage.