From e9905f51d3bdf9ca2271f75eec898b215a396c6d Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 6 Jul 2022 17:58:41 +0200 Subject: [PATCH 1/7] Improve daily loading if dataset is not ec-earth --- modules/Loading/Loading_decadal.R | 59 +++++++++++++++++-- modules/Loading/get_daily_time_ind.R | 5 +- .../testing_recipes/recipe_decadal_daily.yml | 4 +- tools/libs.R | 1 + 4 files changed, 60 insertions(+), 9 deletions(-) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index fe615594..35b95b67 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -9,6 +9,7 @@ source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") source("/esarchive/scratch/vagudets/repos/cstools/R/s2dv_cube.R") # Load required libraries/funs source("modules/Loading/get_daily_time_ind.R") +source("modules/Loading/dates2load.R") source("modules/Loading/check_latlon.R") source("tools/libs.R") @@ -44,11 +45,25 @@ load_datasets <- function(recipe_file) { if (store.freq == "monthly_mean") { time <- (as.numeric(recipe$Analysis$Time$leadtimemin):as.numeric(recipe$Analysis$Time$leadtimemax)) + 1 } else if (store.freq == "daily_mean") { - time <- get_daily_time_ind(leadtimemin = as.numeric(recipe$Analysis$Time$leadtimemin), - leadtimemax = as.numeric(recipe$Analysis$Time$leadtimemax), - initial_month = archive$System[[exp.name]]$initial_month, - sdates = sdates_hcst, - calendar = archive$System[[exp.name]][[store.freq]]$calendar) + if (exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4')) { + # time selector uses indices, and tune the data afterward to remove 0229 + time <- get_daily_time_ind(leadtimemin = as.numeric(recipe$Analysis$Time$leadtimemin), + leadtimemax = as.numeric(recipe$Analysis$Time$leadtimemax), + initial_month = archive$System[[exp.name]]$initial_month, + sdates = sdates_hcst, + calendar = archive$System[[exp.name]][[store.freq]]$calendar) + } else { + # use an array [syear, time] as time selector since time_across = 'chunk' is not needed + initial_month_char <- sprintf('%02d', archive$System[[exp.name]]$initial_month) + + sdates_hcst_full <- paste0(sdates_hcst, initial_month_char, '01') + time_arr <- get_timeidx(sdates = sdates_hcst_full, + ltmin = as.numeric(recipe$Analysis$Time$leadtimemin), + ltmax = as.numeric(recipe$Analysis$Time$leadtimemax), + time_freq = "daily_mean") + names(dim(time_arr)) <- c('syear', 'time') + + } } #NOTE: May be used in the future @@ -92,7 +107,38 @@ load_datasets <- function(recipe_file) { '$ensemble$', table, '$var$', grid, version) hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') - # monthly & daily + # daily non-EC-Earth data + if (store.freq == "daily_mean" & !exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4')) { + hcst <- Start(dat = file.path(hcst.path, hcst.files), + var = variable, + ensemble = member, + syear = paste0(sdates_hcst), +#split_multiselected_dims = T, + chunk = 'all', + chunk_depends = 'syear', + time = time_arr, #indices(time), +# time_across = 'chunk', +# merge_across_dims = TRUE, + largest_dims_length = need_largest_dims_length, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_extra_cells = 2, + transform_params = list(grid = regrid_params$fcst.gridtype, #nc file + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = c('syear')), #, 'chunk')), + retrieve = T) + if (dim(hcst)['chunk'] != 1) stop("hcst loading has problem. The chunk number is > 1") + dim(hcst) <- dim(hcst)[-5] #remove chunk dim + + } else { + # monthly & daily if EC-Earth hcst <- Start(dat = file.path(hcst.path, hcst.files), var = variable, ensemble = member, @@ -118,6 +164,7 @@ load_datasets <- function(recipe_file) { return_vars = list(latitude = NULL, longitude = NULL, time = c('syear', 'chunk')), retrieve = T) + } tmp_time_attr <- attr(hcst, 'Variables')$common$time diff --git a/modules/Loading/get_daily_time_ind.R b/modules/Loading/get_daily_time_ind.R index 3e41cddb..d8bf9a37 100644 --- a/modules/Loading/get_daily_time_ind.R +++ b/modules/Loading/get_daily_time_ind.R @@ -3,6 +3,9 @@ # leadtimemin = 0 means the first month, and leadtimemax = 2 means the 3rd month. # For daily data, if initial month is November, leadtimemin = 0 equals to c(1:30), # and leadtimemax = 2, which means January next year, equals to c(31+31:31+31+30). +# +#NOTE: For the datasets not EC-Earth, we can use array [syear, time] to define time since +# the Start() call doesn't need time_across = 'chunk' (there is only one chunk) get_daily_time_ind <- function(leadtimemin, leadtimemax, initial_month, sdates, calendar) { #TODO: calendar & leap year # E.g., time = 1:4 means Dec to March, then indices will be c(31:(31+30+31+28+31)) for non-leap yr and c(31:(31+30+31+28), (31+30+31+28+2):(31+30+31+28+2+30)) for leap yr. Calendar needs to be considered too. @@ -37,7 +40,7 @@ get_daily_time_ind <- function(leadtimemin, leadtimemax, initial_month, sdates, } else if (calendar %in% c('standard', 'proleptic_gregorian', 'gregorian')) { # leap <- leap_year(sdates) - sdates_full <- ymd(paste0(sdates, sprintf('%02d',initial_month), '01')) + sdates_full <- ymd(paste0(sdates, sprintf('%02d', initial_month), '01')) idx_min <- sdates_full + months(leadtimemin) idx_max <- sdates_full + months(leadtimemax + 1) - days(1) diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml index a025dca3..565ce6a8 100644 --- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml +++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml @@ -8,8 +8,8 @@ Analysis: freq: daily_mean Datasets: System: - name: EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 - member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' + name: MIROC6 #EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 + member: r1i1p1f1,r2i1p1f1,r3i1p1f1 #'all' Multimodel: no Reference: name: ERA5 diff --git a/tools/libs.R b/tools/libs.R index 31728650..248ada45 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -10,6 +10,7 @@ library(abind) # library(easyVerification) library(easyNCDF) library(CSTools) +library(lubridate) # # library(parallel) # library(pryr) # To check mem usage. -- GitLab From 679265fcbc06de8372286287353f2a7fbdcadf0c Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 11 Jul 2022 18:07:32 +0200 Subject: [PATCH 2/7] Allow more than one fcst_syear; change leadtime to ftime. --- conf/archive_decadal.yml | 10 +-- modules/Loading/Loading_decadal.R | 82 +++++++++++++++++-- modules/Loading/get_daily_time_ind.R | 2 +- .../testing_recipes/recipe_decadal.yml | 14 ++-- 4 files changed, 85 insertions(+), 23 deletions(-) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index e41d46a0..aff4330a 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -41,9 +41,10 @@ archive: # ---- EC-Earth3-i4: - src: - hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" - fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" + src: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/" + first_dcppB_syear: 2021 +# hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" +# fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" # src: {"1960:2020": "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/", # "2021:2021": "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/"} monthly_mean: @@ -56,8 +57,7 @@ archive: grid: {"tas":"gr", "pr":"gr", "psl":"gr", "clt":"gr", "hfls":"gr", "hurs":"gr", "huss":"gr", "rsds":"gr", "rsut":"gr", "ta":"gr", "tasmax":"gr", "tosa":"gr", "ua":"gr", "va":"gr", "zg":"gr", - "evspsbl":"gr", "hfss":"gr", "hursmin":"gr", "rlut":"gr", - "rsdt":"gr", "sfcWind":"gr", "tasmin":"gr", "ts":"gr", "uas":"gr", + "vas":"gr"} version: {"tas":"v20210910", "pr":"v20210910", "psl":"v20210910", "clt":"v20210910", "hurs":"v20210910", "huss":"v20210910", "rsds":"v20210910", "rsut":"v20210910", "ta":"v20210910", diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 35b95b67..6daaf1d5 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -39,16 +39,16 @@ load_datasets <- function(recipe_file) { # change to: sdates <- dates2load(recipe, logger) sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) #1960:2015 - sdates_fcst <- as.numeric(recipe$Analysis$Time$fcst) + sdates_fcst <- as.numeric(strsplit(recipe$Analysis$Time$fcst, ' ')[[1]]) #NOTE: time in recipe starts from 0, but in Start() it starts from 1 if (store.freq == "monthly_mean") { - time <- (as.numeric(recipe$Analysis$Time$leadtimemin):as.numeric(recipe$Analysis$Time$leadtimemax)) + 1 + time <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) + 1 } else if (store.freq == "daily_mean") { if (exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4')) { # time selector uses indices, and tune the data afterward to remove 0229 - time <- get_daily_time_ind(leadtimemin = as.numeric(recipe$Analysis$Time$leadtimemin), - leadtimemax = as.numeric(recipe$Analysis$Time$leadtimemax), + time <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min), + ftimemax = as.numeric(recipe$Analysis$Time$ftime_max), initial_month = archive$System[[exp.name]]$initial_month, sdates = sdates_hcst, calendar = archive$System[[exp.name]][[store.freq]]$calendar) @@ -58,8 +58,8 @@ load_datasets <- function(recipe_file) { sdates_hcst_full <- paste0(sdates_hcst, initial_month_char, '01') time_arr <- get_timeidx(sdates = sdates_hcst_full, - ltmin = as.numeric(recipe$Analysis$Time$leadtimemin), - ltmax = as.numeric(recipe$Analysis$Time$leadtimemax), + ltmin = as.numeric(recipe$Analysis$Time$ftime_min), + ltmax = as.numeric(recipe$Analysis$Time$ftime_max), time_freq = "daily_mean") names(dim(time_arr)) <- c('syear', 'time') @@ -203,11 +203,44 @@ load_datasets <- function(recipe_file) { if (!is.null(recipe$Analysis$Time$fcst)) { #monthly and daily - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, - '$ensemble$', table, '$var$', grid, version) - fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + multi_path <- FALSE + if (is.null(archive$System[[exp.name]]$first_dcppB_syear)) { # only dcppA + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src, "dcppA-hindcast", + '$ensemble$', table, '$var$', grid, version) + fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + } else { + if (all(sdates_fcst >= archive$System[[exp.name]]$first_dcppB_syear)) { # only dcppB + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src, "dcppB-forecast", + '$ensemble$', table, '$var$', grid, version) + fcst.files <- paste0('$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + } else { + # Create one path for each sdate + #TODO: When *_depends = 'syear' can be more than one, use one path with $dcpp$ + multi_path <- TRUE + path_list <- vector('list', length = length(sdates_fcst)) + for (i_sdate in 1:length(sdates_fcst)) { + if (sdates_fcst[i_sdate] >= archive$System[[exp.name]]$first_dcppB_syear) { + path_list[[i_sdate]] <- + list(path = file.path(archive$src, archive$System[[exp.name]]$src, "dcppB-forecast", + '$ensemble$', table, '$var$', grid, version, + paste0('$var$_', table, '_*_dcppB-forecast_s', sdates_fcst[i_sdate], '-$ensemble$_', grid, '_$chunk$.nc'))) + } else { + path_list[[i_sdate]] <- + list(path = file.path(archive$src, archive$System[[exp.name]]$src, "dcppA-hindcast", + '$ensemble$', table, '$var$', grid, version, + paste0('$var$_', table, '_*_dcppA-hindcast_s', sdates_fcst[i_sdate], '-$ensemble$_', grid, '_$chunk$.nc'))) + } + } + } + } + +# fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, +# '$ensemble$', table, '$var$', grid, version) +# fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') # monthly & daily + if (!multi_path) { + fcst <- Start(dat = file.path(fcst.path, fcst.files), var = variable, ensemble = member, @@ -234,6 +267,37 @@ load_datasets <- function(recipe_file) { time = c('syear', 'chunk')), retrieve = T) + } else { # multi_path + #TODO: time attribute is not correct + fcst <- Start(dat = path_list, + var = variable, + ensemble = member, + chunk = 'all', + time = indices(time), + time_across = 'chunk', + merge_across_dims = TRUE, + largest_dims_length = need_largest_dims_length, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_extra_cells = 2, + transform_params = list(grid = regrid_params$fcst.gridtype, #nc file + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = c('chunk')), + retrieve = T) + # Reshape dimensions + ## Insert syear at 3rd in [dat, var, ensemble, time, latitude, longitude] + ## dat should be 1, syear should be length of dat + dim(fcst) <- c(dat = 1, var = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[3:6]) + + } + # change syear to c(sday, sweek, syear) dim(fcst) <- c(dim(fcst)[1:3], sday = 1, sweek = 1, dim(fcst)[4:7]) tmp_time_attr <- attr(fcst, 'Variables')$common$time diff --git a/modules/Loading/get_daily_time_ind.R b/modules/Loading/get_daily_time_ind.R index d8bf9a37..b574544f 100644 --- a/modules/Loading/get_daily_time_ind.R +++ b/modules/Loading/get_daily_time_ind.R @@ -6,7 +6,7 @@ # #NOTE: For the datasets not EC-Earth, we can use array [syear, time] to define time since # the Start() call doesn't need time_across = 'chunk' (there is only one chunk) -get_daily_time_ind <- function(leadtimemin, leadtimemax, initial_month, sdates, calendar) { +get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calendar) { #TODO: calendar & leap year # E.g., time = 1:4 means Dec to March, then indices will be c(31:(31+30+31+28+31)) for non-leap yr and c(31:(31+30+31+28), (31+30+31+28+2):(31+30+31+28+2+30)) for leap yr. Calendar needs to be considered too. diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml index 0661fb7b..2ec20856 100644 --- a/modules/Loading/testing_recipes/recipe_decadal.yml +++ b/modules/Loading/testing_recipes/recipe_decadal.yml @@ -9,19 +9,17 @@ Analysis: Datasets: System: name: EC-Earth3-i4 #CanESM5 - member: 'all' #r1i1p1f1,r2i1p1f1,r3i1p1f1 + member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' Multimodel: no Reference: name: ERA5 #JRA-55 Time: - sdate: - fcst: 2021 -# fcst_sday: '1101' - hcst_start: 1990 #'1993' - hcst_end: 1992 #'2016' + fcst: 2020 2021 + hcst_start: 1990 + hcst_end: 1992 # season: 'Annual' - leadtimemin: 0 - leadtimemax: 13 + ftimemin: 0 + ftimemax: 13 Region: latmin: 10 #-90 latmax: 20 #90 -- GitLab From da31596562fb82b4938c304e071758d161229da6 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Jul 2022 22:24:13 +0200 Subject: [PATCH 3/7] Deal with 0229 version (going to remove it later) --- modules/Loading/Loading_decadal.R | 265 ++++++++++-------- modules/Loading/get_daily_time_ind.R | 23 +- .../testing_recipes/recipe_decadal.yml | 6 +- .../testing_recipes/recipe_decadal_daily.yml | 16 +- 4 files changed, 166 insertions(+), 144 deletions(-) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index e2ce9809..b29b30c5 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -39,13 +39,13 @@ load_datasets <- function(recipe_file) { # change to: sdates <- dates2load(recipe, logger) sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) #1960:2015 - sdates_fcst <- as.numeric(strsplit(recipe$Analysis$Time$fcst, ' ')[[1]]) + sdates_fcst <- recipe$Analysis$Time$fcst #NOTE: time in recipe starts from 0, but in Start() it starts from 1 if (store.freq == "monthly_mean") { time <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) + 1 } else if (store.freq == "daily_mean") { - if (exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4')) { + if (exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4', 'HadGEM3-GC31-MM')) { # time selector uses indices, and tune the data afterward to remove 0229 time <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min), ftimemax = as.numeric(recipe$Analysis$Time$ftime_max), @@ -55,7 +55,7 @@ load_datasets <- function(recipe_file) { } else { # use an array [syear, time] as time selector since time_across = 'chunk' is not needed initial_month_char <- sprintf('%02d', archive$System[[exp.name]]$initial_month) - + # hcst sdates_hcst_full <- paste0(sdates_hcst, initial_month_char, '01') time_arr <- get_timeidx(sdates = sdates_hcst_full, ltmin = as.numeric(recipe$Analysis$Time$ftime_min), @@ -63,6 +63,16 @@ load_datasets <- function(recipe_file) { time_freq = "daily_mean") names(dim(time_arr)) <- c('syear', 'time') + # fcst + if (!is.null(recipe$Analysis$Time$fcst)) { + sdates_fcst_full <- paste0(sdates_fcst, initial_month_char, '01') + time_arr_fcst <- get_timeidx(sdates = sdates_fcst_full, + ltmin = as.numeric(recipe$Analysis$Time$ftime_min), + ltmax = as.numeric(recipe$Analysis$Time$ftime_max), + time_freq = "daily_mean") + names(dim(time_arr_fcst)) <- c('syear', 'time') + } + } } @@ -95,10 +105,7 @@ load_datasets <- function(recipe_file) { # Only if the time length in each chunk may differ that we need largest_dims_length to be TRUE. Otherwise, set FALSE to increase efficiency. need_largest_dims_length <- ifelse(exp.name == 'EC-Earth3-i2', TRUE, FALSE) - #time for daily data. If recipe asks for time step, we don't need to calculate here -# fdays <- get_days_dcpp(fyears = fyears, season = season, init = 'Nov', calendar = '360-days') - #------------------------------------------- # Step 1: Load the hcst #------------------------------------------- @@ -106,71 +113,60 @@ load_datasets <- function(recipe_file) { hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, '$ensemble$', table, '$var$', grid, version) hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') - - # daily non-EC-Earth data - if (store.freq == "daily_mean" & !exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4')) { - hcst <- Start(dat = file.path(hcst.path, hcst.files), - var = variable, - ensemble = member, - syear = paste0(sdates_hcst), -#split_multiselected_dims = T, - chunk = 'all', - chunk_depends = 'syear', - time = time_arr, #indices(time), -# time_across = 'chunk', -# merge_across_dims = TRUE, - largest_dims_length = need_largest_dims_length, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = TRUE), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_extra_cells = 2, - transform_params = list(grid = regrid_params$fcst.gridtype, #nc file - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(longitude = c('lon', 'longitude'), - latitude = c('lat', 'latitude')), - return_vars = list(latitude = NULL, longitude = NULL, - time = c('syear')), #, 'chunk')), - retrieve = T) - if (dim(hcst)['chunk'] != 1) stop("hcst loading has problem. The chunk number is > 1") - dim(hcst) <- dim(hcst)[-5] #remove chunk dim + Start_hcst_arg_list <- list( + dat = file.path(hcst.path, hcst.files), + var = variable, + ensemble = member, + syear = paste0(sdates_hcst), + chunk = 'all', + chunk_depends = 'syear', + time = indices(time), + time_across = 'chunk', + merge_across_dims = TRUE, + largest_dims_length = need_largest_dims_length, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_extra_cells = 2, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = c('syear', 'chunk')), + retrieve = T) + + # daily & multiple files per sdate + if (store.freq == "daily_mean" & !exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4', 'HadGEM3-GC31-MM')) { + Start_hcst_arg_list[['time_across']] <- NULL + Start_hcst_arg_list[['merge_across_dims']] <- NULL + Start_hcst_arg_list[['time']] <- time_arr + Start_hcst_arg_list[['return_vars']][['time']] <- 'syear' + + hcst <- do.call(Start, Start_hcst_arg_list) + + if (dim(hcst)['chunk'] != 1) { + stop("hcst loading has problem. The chunk number is > 1") + } else { + dim(hcst) <- dim(hcst)[-5] #remove chunk dim + } } else { - # monthly & daily if EC-Earth - hcst <- Start(dat = file.path(hcst.path, hcst.files), - var = variable, - ensemble = member, - syear = paste0(sdates_hcst), -#split_multiselected_dims = T, - chunk = 'all', - chunk_depends = 'syear', - time = indices(time), - time_across = 'chunk', - merge_across_dims = TRUE, - largest_dims_length = need_largest_dims_length, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = TRUE), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_extra_cells = 2, - transform_params = list(grid = regrid_params$fcst.gridtype, #nc file - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(longitude = c('lon', 'longitude'), - latitude = c('lat', 'latitude')), - return_vars = list(latitude = NULL, longitude = NULL, - time = c('syear', 'chunk')), - retrieve = T) + # monthly & daily if EC-Earth + hcst <- do.call(Start, Start_hcst_arg_list) } tmp_time_attr <- attr(hcst, 'Variables')$common$time +#------------------------------------------------------------------ # If daily data with 0229, need to remove 0229 and extra time - #TODO: Make Start() able to read the correct time indices for each sdate, so we - # don't need this awkward correction. + #TODO: Make Start() able to read the correct time indices for each sdate (for case1), + # so we don't need this awkward correction. + #NOTE: It is not critical though. Having one more or one less day is acceptable. + # If we need to use Compute(), just ignore this part. if (store.freq == "daily_mean" & any(format(tmp_time_attr, "%m%d") == "0229")) { # Save hcst attributes hcst_attr <- attributes(hcst)[-1] # without $dim @@ -184,6 +180,7 @@ load_datasets <- function(recipe_file) { tmp_time_attr <- correct_daily_for_leap(data = NULL, time_attr = tmp_time_attr, return_time = T)$time_attr attr(hcst, 'Variables')$common$time <- tmp_time_attr } +#------------------------------------------------------------------ # change syear to c(sday, sweek, syear) dim(hcst) <- c(dim(hcst)[1:3], sday = 1, sweek = 1, dim(hcst)[4:7]) @@ -202,31 +199,33 @@ load_datasets <- function(recipe_file) { #------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst)) { - #monthly and daily + # Define path (monthly and daily) multi_path <- FALSE - if (is.null(archive$System[[exp.name]]$first_dcppB_syear)) { # only dcppA - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src, "dcppA-hindcast", + if (is.null(archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppA + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, '$ensemble$', table, '$var$', grid, version) fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + } else { - if (all(sdates_fcst >= archive$System[[exp.name]]$first_dcppB_syear)) { # only dcppB - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src, "dcppB-forecast", + if (all(sdates_fcst >= archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppB + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, '$ensemble$', table, '$var$', grid, version) fcst.files <- paste0('$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + } else { # Create one path for each sdate #TODO: When *_depends = 'syear' can be more than one, use one path with $dcpp$ multi_path <- TRUE path_list <- vector('list', length = length(sdates_fcst)) for (i_sdate in 1:length(sdates_fcst)) { - if (sdates_fcst[i_sdate] >= archive$System[[exp.name]]$first_dcppB_syear) { + if (sdates_fcst[i_sdate] >= archive$System[[exp.name]]$src$first_dcppB_syear) { path_list[[i_sdate]] <- - list(path = file.path(archive$src, archive$System[[exp.name]]$src, "dcppB-forecast", + list(path = file.path(archive$src, archive$System[[exp.name]]$src$fcst, '$ensemble$', table, '$var$', grid, version, paste0('$var$_', table, '_*_dcppB-forecast_s', sdates_fcst[i_sdate], '-$ensemble$_', grid, '_$chunk$.nc'))) } else { path_list[[i_sdate]] <- - list(path = file.path(archive$src, archive$System[[exp.name]]$src, "dcppA-hindcast", + list(path = file.path(archive$src, archive$System[[exp.name]]$src$hcst, '$ensemble$', table, '$var$', grid, version, paste0('$var$_', table, '_*_dcppA-hindcast_s', sdates_fcst[i_sdate], '-$ensemble$_', grid, '_$chunk$.nc'))) } @@ -238,66 +237,92 @@ load_datasets <- function(recipe_file) { # '$ensemble$', table, '$var$', grid, version) # fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + # monthly & daily if (!multi_path) { + #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst + Start_fcst_arg_list <- Start_hcst_arg_list + Start_fcst_arg_list[['dat']] <- file.path(fcst.path, fcst.files) + Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) + if (is.array(Start_fcst_arg_list[['time']])) { + Start_fcst_arg_list[['time']] <- time_arr_fcst + } + fcst <- do.call(Start, Start_fcst_arg_list) - fcst <- Start(dat = file.path(fcst.path, fcst.files), - var = variable, - ensemble = member, - syear = paste0(sdates_fcst), -#split_multiselected_dims = T, - chunk = 'all', - chunk_depends = 'syear', - time = indices(time), - time_across = 'chunk', - merge_across_dims = TRUE, - largest_dims_length = need_largest_dims_length, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = TRUE), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_extra_cells = 2, - transform_params = list(grid = regrid_params$fcst.gridtype, #nc file - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(longitude = c('lon', 'longitude'), - latitude = c('lat', 'latitude')), - return_vars = list(latitude = NULL, longitude = NULL, - time = c('syear', 'chunk')), - retrieve = T) + if ('chunk' %in% names(dim(fcst))) { + if (dim(fcst)['chunk'] != 1) { + stop("fcst loading has problem. The chunk number is > 1") + } else { + dim(fcst) <- dim(fcst)[-5] #remove chunk dim + } + } } else { # multi_path #TODO: time attribute is not correct - fcst <- Start(dat = path_list, - var = variable, - ensemble = member, - chunk = 'all', - time = indices(time), - time_across = 'chunk', - merge_across_dims = TRUE, - largest_dims_length = need_largest_dims_length, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(decreasing = TRUE), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_extra_cells = 2, - transform_params = list(grid = regrid_params$fcst.gridtype, #nc file - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(longitude = c('lon', 'longitude'), - latitude = c('lat', 'latitude')), - return_vars = list(latitude = NULL, longitude = NULL, - time = c('chunk')), - retrieve = T) + + Start_fcst_arg_list <- Start_hcst_arg_list + Start_fcst_arg_list[['dat']] <- path_list + Start_fcst_arg_list[['syear']] <- NULL + Start_fcst_arg_list[['chunk_depends']] <- NULL + if ('syear' %in% Start_hcst_arg_list[['return_vars']][['time']]) { + remove_ind <- which(Start_hcst_arg_list[['return_vars']][['time']] == 'syear') + Start_fcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind] + } + + fcst <- do.call(Start, Start_fcst_arg_list) + +# fcst <- Start(dat = path_list, +# var = variable, +# ensemble = member, +# chunk = 'all', +# time = indices(time), +# time_across = 'chunk', +# merge_across_dims = TRUE, +# largest_dims_length = need_largest_dims_length, +# latitude = values(list(lats.min, lats.max)), +# latitude_reorder = Sort(decreasing = TRUE), +# longitude = values(list(lons.min, lons.max)), +# longitude_reorder = circularsort, +# transform = regrid_params$fcst.transform, +# transform_extra_cells = 2, +# transform_params = list(grid = regrid_params$fcst.gridtype, #nc file +# method = regrid_params$fcst.gridmethod), +# transform_vars = c('latitude', 'longitude'), +# synonims = list(longitude = c('lon', 'longitude'), +# latitude = c('lat', 'latitude')), +# return_vars = list(latitude = NULL, longitude = NULL, +# time = c('chunk')), +# retrieve = T) + # Reshape dimensions ## Insert syear at 3rd in [dat, var, ensemble, time, latitude, longitude] ## dat should be 1, syear should be length of dat - dim(fcst) <- c(dat = 1, var = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[3:6]) + dim(fcst) <- c(dat = 1, var = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[3:6]) } + tmp_time_attr <- attr(fcst, 'Variables')$common$time +#------------------------------------------------------------------ + # If daily data with 0229, need to remove 0229 and extra time + #TODO: Make Start() able to read the correct time indices for each sdate (for case1), + # so we don't need this awkward correction. + #NOTE: It is not critical though. Having one more or one less day is acceptable. + # If we need to use Compute(), just ignore this part. + if (store.freq == "daily_mean" & any(format(tmp_time_attr, "%m%d") == "0229")) { + # Save fcst attributes + fcst_attr <- attributes(fcst)[-1] # without $dim + new_fcst <- Apply(fcst, + target_dims = c('syear', 'time'), + output_dims = list(data = c('syear', 'time')), + fun = correct_daily_for_leap, + time_attr = tmp_time_attr, return_time = F)$data + fcst <- Reorder(new_fcst, names(dim(fcst))) + attributes(fcst) <- c(attributes(fcst), fcst_attr) + tmp_time_attr <- correct_daily_for_leap(data = NULL, time_attr = tmp_time_attr, return_time = T)$time_attr + attr(fcst, 'Variables')$common$time <- tmp_time_attr + } +#------------------------------------------------------------------ + # change syear to c(sday, sweek, syear) dim(fcst) <- c(dim(fcst)[1:3], sday = 1, sweek = 1, dim(fcst)[4:7]) tmp_time_attr <- attr(fcst, 'Variables')$common$time diff --git a/modules/Loading/get_daily_time_ind.R b/modules/Loading/get_daily_time_ind.R index b574544f..59f60268 100644 --- a/modules/Loading/get_daily_time_ind.R +++ b/modules/Loading/get_daily_time_ind.R @@ -1,33 +1,32 @@ # This function is for decadal-daily data loading. -# The time input (leadtimemin & leadtimemax) units in the recipe is month. For example, -# leadtimemin = 0 means the first month, and leadtimemax = 2 means the 3rd month. -# For daily data, if initial month is November, leadtimemin = 0 equals to c(1:30), -# and leadtimemax = 2, which means January next year, equals to c(31+31:31+31+30). +# The time input (ftimemin & ftimemax) units in the recipe is month. For example, +# ftimemin = 0 means the first month, and ftimemax = 2 means the 3rd month. +# For daily data, if initial month is November, ftimemin = 0 equals to c(1:30), +# and ftimemax = 2, which means January next year, equals to c(31+31:31+31+30). # -#NOTE: For the datasets not EC-Earth, we can use array [syear, time] to define time since +#NOTE: For the datasets that having one file per sdate, we can use array [syear, time] to define time since # the Start() call doesn't need time_across = 'chunk' (there is only one chunk) get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calendar) { - #TODO: calendar & leap year # E.g., time = 1:4 means Dec to March, then indices will be c(31:(31+30+31+28+31)) for non-leap yr and c(31:(31+30+31+28), (31+30+31+28+2):(31+30+31+28+2+30)) for leap yr. Calendar needs to be considered too. if (!calendar %in% c('360-day', '365_day', 'noleap', 'standard', 'proleptic_gregorian', 'gregorian')) stop("The calendar is not recognized. Please contact maintainers.") - total_months <- (initial_month + leadtimemin):(initial_month + leadtimemax) + total_months <- (initial_month + ftimemin):(initial_month + ftimemax) total_months <- ifelse(total_months > 12, total_months %% 12, total_months) total_months <- ifelse(total_months == 0, 12, total_months) if (calendar == '360-day') { - daily_time_ind <- 1:(30 * length(total_months)) + daily_time_ind <- (ftimemin * 30 + 1): ((ftimemin * 30) + (ftimemax - ftimemin + 1) * 30) } else { total_days_sum <- sum(days_in_month(total_months)) - if (leadtimemin == 0) { + if (ftimemin == 0) { daily_time_ind <- 1:total_days_sum } else { - before_start_months <- initial_month:(initial_month + leadtimemin - 1) + before_start_months <- initial_month:(initial_month + ftimemin - 1) before_start_months <- ifelse(before_start_months > 12, before_start_months %% 12, before_start_months) before_start_months <- ifelse(before_start_months == 0, 12, before_start_months) before_start_total_days_sum <- sum(days_in_month(before_start_months)) @@ -41,8 +40,8 @@ get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calend # leap <- leap_year(sdates) sdates_full <- ymd(paste0(sdates, sprintf('%02d', initial_month), '01')) - idx_min <- sdates_full + months(leadtimemin) - idx_max <- sdates_full + months(leadtimemax + 1) - days(1) + idx_min <- sdates_full + months(ftimemin) + idx_max <- sdates_full + months(ftimemax + 1) - days(1) day0229 <- rep(NA, length(sdates)) for (i_sdate in 1:length(sdates_full)) { diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml index 2ec20856..d4e568d1 100644 --- a/modules/Loading/testing_recipes/recipe_decadal.yml +++ b/modules/Loading/testing_recipes/recipe_decadal.yml @@ -14,12 +14,12 @@ Analysis: Reference: name: ERA5 #JRA-55 Time: - fcst: 2020 2021 + fcst: [2020,2021] hcst_start: 1990 hcst_end: 1992 # season: 'Annual' - ftimemin: 0 - ftimemax: 13 + ftime_min: 0 + ftime_max: 13 Region: latmin: 10 #-90 latmax: 20 #90 diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml index 565ce6a8..91a957df 100644 --- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml +++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml @@ -8,20 +8,18 @@ Analysis: freq: daily_mean Datasets: System: - name: MIROC6 #EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 - member: r1i1p1f1,r2i1p1f1,r3i1p1f1 #'all' + name: EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 + member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' Multimodel: no Reference: name: ERA5 Time: - sdate: - fcst: -# fcst_sday: '1101' - hcst_start: 1990 - hcst_end: 1992 + fcst: [2020,2021] + hcst_start: 2016 + hcst_end: 2019 season: 'Annual' - leadtimemin: 2 - leadtimemax: 4 + ftime_min: 2 + ftime_max: 4 Region: latmin: 10 #-90 latmax: 20 #90 -- GitLab From 1800a477c4c040c2bf89cc09382727db22853404 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Jul 2022 22:43:00 +0200 Subject: [PATCH 4/7] decadal monthly loading done (modification for multiple fcst years and flexibility in dcppA & dcppB --- conf/archive_decadal.yml | 13 +- modules/Loading/Loading_decadal.R | 13 +- tests/recipes/recipe-decadal_daily_1.yml | 44 +++++ tests/recipes/recipe-decadal_monthly_1.yml | 17 +- tests/testthat/test-decadal_daily.R | 197 +++++++++++++++++++++ 5 files changed, 267 insertions(+), 17 deletions(-) create mode 100644 tests/recipes/recipe-decadal_daily_1.yml create mode 100644 tests/testthat/test-decadal_daily.R diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index aff4330a..28ff6028 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -41,8 +41,10 @@ archive: # ---- EC-Earth3-i4: - src: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/" - first_dcppB_syear: 2021 + src: + hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" + fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" + first_dcppB_syear: 2021 # hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" # fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" # src: {"1960:2020": "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/", @@ -78,6 +80,7 @@ archive: src: hcst: "exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppB-forecast/" + first_dcppB_syear: 2019 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon", "tos":"Omon"} grid: {"tas":"gr", "psl":"gr", "pr":"gr", "ts":"gr", "tos":"gr"} @@ -115,6 +118,7 @@ archive: src: hcst: "exp/canesm5/cmip6-dcppA-hindcast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/" fcst: "exp/canesm5/cmip6-dcppB-forecast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppB-forecast/" + first_dcppB_syear: 2020 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} @@ -150,6 +154,7 @@ archive: src: hcst: "exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppB-forecast/" + first_dcppB_syear: 2020 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "prc":"Amon", "ts":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn", "prc":"gn", "ts":"gn"} @@ -283,7 +288,9 @@ archive: # ---- NorCPM1-i2: - src: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" + src: + hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" + fcst: monthly_mean: table: {"pr":"Amon", "psl":"Amon"} grid: {"pr":"gn", "psl":"gn"} diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index b29b30c5..d5ec1fcf 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -295,9 +295,9 @@ load_datasets <- function(recipe_file) { # retrieve = T) # Reshape dimensions - ## Insert syear at 3rd in [dat, var, ensemble, time, latitude, longitude] + ## Insert syear at 4th in [dat, var, ensemble, time, latitude, longitude] ## dat should be 1, syear should be length of dat - dim(fcst) <- c(dat = 1, var = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[3:6]) + dim(fcst) <- c(dat = 1, dim(fcst)[2:3], syear = as.numeric(dim(fcst))[1], dim(fcst)[4:6]) } @@ -326,10 +326,13 @@ load_datasets <- function(recipe_file) { # change syear to c(sday, sweek, syear) dim(fcst) <- c(dim(fcst)[1:3], sday = 1, sweek = 1, dim(fcst)[4:7]) tmp_time_attr <- attr(fcst, 'Variables')$common$time - if (!identical(dim(tmp_time_attr), dim(fcst)[6:7])) { - stop("fcst has problem in matching data and time attr dimension.") + #TODO: This check cannot pass if multi_path + if (!multi_path) { + if (!identical(dim(tmp_time_attr), dim(fcst)[6:7])) { + stop("fcst has problem in matching data and time attr dimension.") + } + dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(fcst)[6:7]) } - dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(fcst)[6:7]) # Change class from startR_array to s2dv_cube suppressWarnings( diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml new file mode 100644 index 00000000..ed81146f --- /dev/null +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -0,0 +1,44 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: MIROC6 #EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 + member: r1i1p1f1,r2i1p1f1,r3i1p1f1 #'all' + Multimodel: no + Reference: + name: ERA5 + Time: + fcst: [2017,2018] + hcst_start: 1990 + hcst_end: 1992 + season: 'Annual' + ftime_min: 2 + ftime_max: 4 + Region: + latmin: 10 #-90 + latmax: 20 #90 + lonmin: 0 + lonmax: 15 #359.9 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Calibration: + method: qmap + Skill: + metric: RPSS + Indicators: + index: FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml index 5acd642d..8312f81a 100644 --- a/tests/recipes/recipe-decadal_monthly_1.yml +++ b/tests/recipes/recipe-decadal_monthly_1.yml @@ -8,7 +8,7 @@ Analysis: freq: monthly_mean Datasets: System: - name: EC-Earth3-i4 #CanESM5 + name: EC-Earth3-i4 member: r1i4p1f1,r2i4p1f1 Multimodel: no Reference: @@ -16,17 +16,16 @@ Analysis: Time: sdate: fcst: 2021 -# fcst_sday: '1101' - hcst_start: 1991 #'1993' - hcst_end: 1994 #'2016' + hcst_start: 1991 + hcst_end: 1994 # season: 'Annual' - leadtimemin: 0 - leadtimemax: 2 + ftime_min: 0 + ftime_max: 2 Region: - latmin: 17 #-90 - latmax: 20 #90 + latmin: 17 + latmax: 20 lonmin: 12 - lonmax: 15 #359.9 + lonmax: 15 Regrid: method: bilinear type: to_system #to_reference diff --git a/tests/testthat/test-decadal_daily.R b/tests/testthat/test-decadal_daily.R new file mode 100644 index 00000000..27e29d6c --- /dev/null +++ b/tests/testthat/test-decadal_daily.R @@ -0,0 +1,197 @@ +context("Decadal daily data - 1") + +########################################### + +source("modules/Loading/Loading_decadal.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Save_data.R") + +recipe_file <- "tests/recipes/recipe-decadal_daily_1.yml" + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe_file) +))}) + +#recipe <- read_yaml(recipe_file) +## Calibrate datasets +#suppressWarnings({invisible(capture.output( +# calibrated_data <- calibrate_datasets(data, recipe) +#))}) +# +## Compute skill metrics +#suppressWarnings({invisible(capture.output( +#skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, +# recipe, na.rm = T, ncores = 4) +#))}) + +#====================================== + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 3, time = 90, latitude = 7, longitude = 11) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 2, time = 90, latitude = 7, longitude = 11) +) +expect_equal( +dim(data$hcst$Dates$start), +c(sday = 1, sweek = 1, syear = 3, time = 90) +) +expect_equal( +as.vector(drop(data$hcst$data)[1, 2:3, 1:3, 1, 1]), +c(298.5787, 293.6479, 298.5042, 293.7802, 297.8072, 293.0764), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +301.2666, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(285.9326, 314.9579), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$Dates$start)[1], +as.POSIXct("1991-01-01 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[2], +as.POSIXct("1992-01-01 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[5], +as.POSIXct("1992-01-02 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[1, 1, 3, 90], +as.POSIXct("1993-03-31 12:00:00", tz = 'UTC') +) + +}) + +##====================================== +#test_that("2. Calibration", { +# +#expect_equal( +#is.list(calibrated_data), +#TRUE +#) +#expect_equal( +#names(calibrated_data), +#c("hcst", "fcst") +#) +#expect_equal( +#class(calibrated_data$hcst), +#"s2dv_cube" +#) +#expect_equal( +#class(calibrated_data$fcst), +#"s2dv_cube" +#) +#expect_equal( +#dim(calibrated_data$hcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(calibrated_data$fcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#mean(calibrated_data$fcst$data), +#291.8375, +#tolerance = 0.0001 +#) +#expect_equal( +#mean(calibrated_data$hcst$data), +#289.6679, +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), +#c(286.3895, 286.6408, 290.6652, 288.3759), +#tolerance = 0.0001 +#) +#expect_equal( +#range(calibrated_data$fcst$data), +#c(287.2173, 297.4578), +#tolerance = 0.0001 +#) +# +#}) +# +# +##====================================== +#test_that("3. Metrics", { +# +#expect_equal( +#is.list(skill_metrics), +#TRUE +#) +#expect_equal( +#names(skill_metrics), +#c("rpss", "rpss_significance") +#) +#expect_equal( +#class(skill_metrics$rpss[[1]]), +#"array" +#) +#expect_equal( +#dim(skill_metrics$rpss[[1]]), +#c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(skill_metrics$rpss_significance[[1]]), +#dim(skill_metrics$rpss[[1]]) +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]), +#c(-0.2857143, -1.2500000, -1.8928571), +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]), +#rep(FALSE, 3) +#) +# +#}) + + -- GitLab From 2cacb4ff481d0ee6478fd373e65b23f2734325b6 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 12 Jul 2022 23:18:24 +0200 Subject: [PATCH 5/7] Add one monthly case --- tests/recipes/recipe-decadal_monthly_2.yml | 44 +++++ tests/testthat/test-decadal_monthly_2.R | 214 +++++++++++++++++++++ 2 files changed, 258 insertions(+) create mode 100644 tests/recipes/recipe-decadal_monthly_2.yml create mode 100644 tests/testthat/test-decadal_monthly_2.R diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml new file mode 100644 index 00000000..040bbe3e --- /dev/null +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -0,0 +1,44 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 #CanESM5 + member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst: [2020,2021] + hcst_start: 1990 + hcst_end: 1992 +# season: 'Annual' + ftime_min: 0 + ftime_max: 13 + Region: + latmin: -60 #-90 + latmax: -55 #90 + lonmin: -2 + lonmax: 2 #359.9 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Calibration: + method: bias + Skill: + metric: RPSS + Indicators: + index: FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R new file mode 100644 index 00000000..24aeac6b --- /dev/null +++ b/tests/testthat/test-decadal_monthly_2.R @@ -0,0 +1,214 @@ +context("Decadal monthly data - 2") + +########################################### + +source("modules/Loading/Loading_decadal.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Save_data.R") + +recipe_file <- "tests/recipes/recipe-decadal_monthly_2.yml" + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe_file) +))}) + +#recipe <- read_yaml(recipe_file) +## Calibrate datasets +#suppressWarnings({invisible(capture.output( +# calibrated_data <- calibrate_datasets(data, recipe) +#))}) +# +## Compute skill metrics +#suppressWarnings({invisible(capture.output( +#skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, +# recipe, na.rm = T, ncores = 4) +#))}) + +#====================================== + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 3, time = 14, latitude = 8, longitude = 5) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 2, time = 14, latitude = 8, longitude = 5) +) +expect_equal( +dim(data$hcst$Dates$start), +c(sday = 1, sweek = 1, syear = 3, time = 14) +) +#expect_equal( +#dim(data$fcst$Dates$start), +#c(time = 14) +#) +# hcst data +expect_equal( +as.vector(drop(data$hcst$data)[1, , 1:2, 2, 2]), +c(272.8613, 271.0689, 270.8007, 273.5594, 272.1561, 272.8729), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +269.8822, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(253.8541, 276.6805), +tolerance = 0.0001 +) +# fcst data +expect_equal( +as.vector(drop(data$fcst$data)[1, , 1:2, 2, 2]), +c(271.7708, 272.2929, 272.4980, 273.3616), +tolerance = 0.0001 +) +expect_equal( +mean(data$fcst$data), +271.2158, +tolerance = 0.0001 +) + +expect_equal( +(data$hcst$Dates$start)[1], +as.POSIXct("1990-11-16", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[2], +as.POSIXct("1991-11-16", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[5], +as.POSIXct("1991-12-16 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[10], +as.POSIXct("1991-02-15", tz = 'UTC') +) + +}) + +#====================================== +#test_that("2. Calibration", { +# +#expect_equal( +#is.list(calibrated_data), +#TRUE +#) +#expect_equal( +#names(calibrated_data), +#c("hcst", "fcst") +#) +#expect_equal( +#class(calibrated_data$hcst), +#"s2dv_cube" +#) +#expect_equal( +#class(calibrated_data$fcst), +#"s2dv_cube" +#) +#expect_equal( +#dim(calibrated_data$hcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(calibrated_data$fcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#mean(calibrated_data$fcst$data), +#291.8375, +#tolerance = 0.0001 +#) +#expect_equal( +#mean(calibrated_data$hcst$data), +#289.6679, +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), +#c(286.3895, 286.6408, 290.6652, 288.3759), +#tolerance = 0.0001 +#) +#expect_equal( +#range(calibrated_data$fcst$data), +#c(287.2173, 297.4578), +#tolerance = 0.0001 +#) +# +#}) +# +# +##====================================== +#test_that("3. Metrics", { +# +#expect_equal( +#is.list(skill_metrics), +#TRUE +#) +#expect_equal( +#names(skill_metrics), +#c("rpss", "rpss_significance") +#) +#expect_equal( +#class(skill_metrics$rpss[[1]]), +#"array" +#) +#expect_equal( +#dim(skill_metrics$rpss[[1]]), +#c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(skill_metrics$rpss_significance[[1]]), +#dim(skill_metrics$rpss[[1]]) +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]), +#c(-0.2857143, -1.2500000, -1.8928571), +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]), +#rep(FALSE, 3) +#) +# +#}) +# +# -- GitLab From 6ffacf542a45e557480a66725a279a9281239f20 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 13 Jul 2022 11:00:02 +0200 Subject: [PATCH 6/7] Remove leap year control and refine unit tests --- modules/Loading/Loading_decadal.R | 155 +++--------------- modules/Loading/get_daily_time_ind.R | 61 +++---- ...decadal_daily.R => test-decadal_daily_1.R} | 23 +++ ...dal_monthly.R => test-decadal_monthly_1.R} | 0 tests/testthat/test-decadal_monthly_2.R | 2 +- 5 files changed, 77 insertions(+), 164 deletions(-) rename tests/testthat/{test-decadal_daily.R => test-decadal_daily_1.R} (88%) rename tests/testthat/{test-decadal_monthly.R => test-decadal_monthly_1.R} (100%) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index d5ec1fcf..6ff9023a 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -43,37 +43,14 @@ load_datasets <- function(recipe_file) { #NOTE: time in recipe starts from 0, but in Start() it starts from 1 if (store.freq == "monthly_mean") { - time <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) + 1 - } else if (store.freq == "daily_mean") { - if (exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4', 'HadGEM3-GC31-MM')) { - # time selector uses indices, and tune the data afterward to remove 0229 - time <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min), - ftimemax = as.numeric(recipe$Analysis$Time$ftime_max), - initial_month = archive$System[[exp.name]]$initial_month, - sdates = sdates_hcst, - calendar = archive$System[[exp.name]][[store.freq]]$calendar) - } else { - # use an array [syear, time] as time selector since time_across = 'chunk' is not needed - initial_month_char <- sprintf('%02d', archive$System[[exp.name]]$initial_month) - # hcst - sdates_hcst_full <- paste0(sdates_hcst, initial_month_char, '01') - time_arr <- get_timeidx(sdates = sdates_hcst_full, - ltmin = as.numeric(recipe$Analysis$Time$ftime_min), - ltmax = as.numeric(recipe$Analysis$Time$ftime_max), - time_freq = "daily_mean") - names(dim(time_arr)) <- c('syear', 'time') - - # fcst - if (!is.null(recipe$Analysis$Time$fcst)) { - sdates_fcst_full <- paste0(sdates_fcst, initial_month_char, '01') - time_arr_fcst <- get_timeidx(sdates = sdates_fcst_full, - ltmin = as.numeric(recipe$Analysis$Time$ftime_min), - ltmax = as.numeric(recipe$Analysis$Time$ftime_max), - time_freq = "daily_mean") - names(dim(time_arr_fcst)) <- c('syear', 'time') - } + time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) + 1 - } + } else if (store.freq == "daily_mean") { + time_ind <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min), + ftimemax = as.numeric(recipe$Analysis$Time$ftime_max), + initial_month = archive$System[[exp.name]]$initial_month, + sdates = sdates_hcst, + calendar = archive$System[[exp.name]][[store.freq]]$calendar) } #NOTE: May be used in the future @@ -106,13 +83,14 @@ load_datasets <- function(recipe_file) { need_largest_dims_length <- ifelse(exp.name == 'EC-Earth3-i2', TRUE, FALSE) -#------------------------------------------- -# Step 1: Load the hcst -#------------------------------------------- + #------------------------------------------- + # Step 1: Load the hcst + #------------------------------------------- #monthly and daily hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, '$ensemble$', table, '$var$', grid, version) hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + Start_hcst_arg_list <- list( dat = file.path(hcst.path, hcst.files), var = variable, @@ -120,7 +98,7 @@ load_datasets <- function(recipe_file) { syear = paste0(sdates_hcst), chunk = 'all', chunk_depends = 'syear', - time = indices(time), + time = indices(time_ind), time_across = 'chunk', merge_across_dims = TRUE, largest_dims_length = need_largest_dims_length, @@ -139,48 +117,10 @@ load_datasets <- function(recipe_file) { time = c('syear', 'chunk')), retrieve = T) - # daily & multiple files per sdate - if (store.freq == "daily_mean" & !exp.name %in% c('EC-Earth3-i1', 'EC-Earth3-i2', 'EC-Earth3-i4', 'HadGEM3-GC31-MM')) { - Start_hcst_arg_list[['time_across']] <- NULL - Start_hcst_arg_list[['merge_across_dims']] <- NULL - Start_hcst_arg_list[['time']] <- time_arr - Start_hcst_arg_list[['return_vars']][['time']] <- 'syear' - - hcst <- do.call(Start, Start_hcst_arg_list) - - if (dim(hcst)['chunk'] != 1) { - stop("hcst loading has problem. The chunk number is > 1") - } else { - dim(hcst) <- dim(hcst)[-5] #remove chunk dim - } - - } else { - # monthly & daily if EC-Earth - hcst <- do.call(Start, Start_hcst_arg_list) - } + hcst <- do.call(Start, Start_hcst_arg_list) tmp_time_attr <- attr(hcst, 'Variables')$common$time -#------------------------------------------------------------------ - # If daily data with 0229, need to remove 0229 and extra time - #TODO: Make Start() able to read the correct time indices for each sdate (for case1), - # so we don't need this awkward correction. - #NOTE: It is not critical though. Having one more or one less day is acceptable. - # If we need to use Compute(), just ignore this part. - if (store.freq == "daily_mean" & any(format(tmp_time_attr, "%m%d") == "0229")) { - # Save hcst attributes - hcst_attr <- attributes(hcst)[-1] # without $dim - new_hcst <- Apply(hcst, - target_dims = c('syear', 'time'), - output_dims = list(data = c('syear', 'time')), - fun = correct_daily_for_leap, - time_attr = tmp_time_attr, return_time = F)$data - hcst <- Reorder(new_hcst, names(dim(hcst))) - attributes(hcst) <- c(attributes(hcst), hcst_attr) - tmp_time_attr <- correct_daily_for_leap(data = NULL, time_attr = tmp_time_attr, return_time = T)$time_attr - attr(hcst, 'Variables')$common$time <- tmp_time_attr - } -#------------------------------------------------------------------ # change syear to c(sday, sweek, syear) dim(hcst) <- c(dim(hcst)[1:3], sday = 1, sweek = 1, dim(hcst)[4:7]) @@ -244,84 +184,31 @@ load_datasets <- function(recipe_file) { Start_fcst_arg_list <- Start_hcst_arg_list Start_fcst_arg_list[['dat']] <- file.path(fcst.path, fcst.files) Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst) - if (is.array(Start_fcst_arg_list[['time']])) { - Start_fcst_arg_list[['time']] <- time_arr_fcst - } fcst <- do.call(Start, Start_fcst_arg_list) - if ('chunk' %in% names(dim(fcst))) { - if (dim(fcst)['chunk'] != 1) { - stop("fcst loading has problem. The chunk number is > 1") - } else { - dim(fcst) <- dim(fcst)[-5] #remove chunk dim - } - } } else { # multi_path - #TODO: time attribute is not correct + #TODO: time attribute is not correct Start_fcst_arg_list <- Start_hcst_arg_list Start_fcst_arg_list[['dat']] <- path_list Start_fcst_arg_list[['syear']] <- NULL Start_fcst_arg_list[['chunk_depends']] <- NULL - if ('syear' %in% Start_hcst_arg_list[['return_vars']][['time']]) { - remove_ind <- which(Start_hcst_arg_list[['return_vars']][['time']] == 'syear') - Start_fcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind] + if ('syear' %in% Start_fcst_arg_list[['return_vars']][['time']]) { + remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear') + Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind] } fcst <- do.call(Start, Start_fcst_arg_list) -# fcst <- Start(dat = path_list, -# var = variable, -# ensemble = member, -# chunk = 'all', -# time = indices(time), -# time_across = 'chunk', -# merge_across_dims = TRUE, -# largest_dims_length = need_largest_dims_length, -# latitude = values(list(lats.min, lats.max)), -# latitude_reorder = Sort(decreasing = TRUE), -# longitude = values(list(lons.min, lons.max)), -# longitude_reorder = circularsort, -# transform = regrid_params$fcst.transform, -# transform_extra_cells = 2, -# transform_params = list(grid = regrid_params$fcst.gridtype, #nc file -# method = regrid_params$fcst.gridmethod), -# transform_vars = c('latitude', 'longitude'), -# synonims = list(longitude = c('lon', 'longitude'), -# latitude = c('lat', 'latitude')), -# return_vars = list(latitude = NULL, longitude = NULL, -# time = c('chunk')), -# retrieve = T) - - # Reshape dimensions - ## Insert syear at 4th in [dat, var, ensemble, time, latitude, longitude] - ## dat should be 1, syear should be length of dat - dim(fcst) <- c(dat = 1, dim(fcst)[2:3], syear = as.numeric(dim(fcst))[1], dim(fcst)[4:6]) + # Reshape and reorder dimensions + ## dat should be 1, syear should be length of dat; reorder dimensions + dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6]) + fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'ensemble', 'syear', 'time', 'latitude', 'longitude')) } tmp_time_attr <- attr(fcst, 'Variables')$common$time -#------------------------------------------------------------------ - # If daily data with 0229, need to remove 0229 and extra time - #TODO: Make Start() able to read the correct time indices for each sdate (for case1), - # so we don't need this awkward correction. - #NOTE: It is not critical though. Having one more or one less day is acceptable. - # If we need to use Compute(), just ignore this part. - if (store.freq == "daily_mean" & any(format(tmp_time_attr, "%m%d") == "0229")) { - # Save fcst attributes - fcst_attr <- attributes(fcst)[-1] # without $dim - new_fcst <- Apply(fcst, - target_dims = c('syear', 'time'), - output_dims = list(data = c('syear', 'time')), - fun = correct_daily_for_leap, - time_attr = tmp_time_attr, return_time = F)$data - fcst <- Reorder(new_fcst, names(dim(fcst))) - attributes(fcst) <- c(attributes(fcst), fcst_attr) - tmp_time_attr <- correct_daily_for_leap(data = NULL, time_attr = tmp_time_attr, return_time = T)$time_attr - attr(fcst, 'Variables')$common$time <- tmp_time_attr - } -#------------------------------------------------------------------ # change syear to c(sday, sweek, syear) dim(fcst) <- c(dim(fcst)[1:3], sday = 1, sweek = 1, dim(fcst)[4:7]) diff --git a/modules/Loading/get_daily_time_ind.R b/modules/Loading/get_daily_time_ind.R index 59f60268..f7625690 100644 --- a/modules/Loading/get_daily_time_ind.R +++ b/modules/Loading/get_daily_time_ind.R @@ -5,9 +5,12 @@ # and ftimemax = 2, which means January next year, equals to c(31+31:31+31+30). # #NOTE: For the datasets that having one file per sdate, we can use array [syear, time] to define time since -# the Start() call doesn't need time_across = 'chunk' (there is only one chunk) +# the Start() call doesn't need time_across = 'chunk' (there is only one chunk). BUT to +# keep the script simple, we use indices always and ignore leap year difference. +# Only distinguish 360-day and non 360-day calendar. + get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calendar) { - # E.g., time = 1:4 means Dec to March, then indices will be c(31:(31+30+31+28+31)) for non-leap yr and c(31:(31+30+31+28), (31+30+31+28+2):(31+30+31+28+2+30)) for leap yr. Calendar needs to be considered too. +#NOTE: "sdates" is not needed if leap year is not considered if (!calendar %in% c('360-day', '365_day', 'noleap', 'standard', 'proleptic_gregorian', 'gregorian')) @@ -33,38 +36,38 @@ get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calend daily_time_ind <- (before_start_total_days_sum + 1):(before_start_total_days_sum + total_days_sum) } - - if (calendar %in% c('365_day', 'noleap')) { - # daily_time_ind - } else if (calendar %in% c('standard', 'proleptic_gregorian', 'gregorian')) { - -# leap <- leap_year(sdates) - sdates_full <- ymd(paste0(sdates, sprintf('%02d', initial_month), '01')) - idx_min <- sdates_full + months(ftimemin) - idx_max <- sdates_full + months(ftimemax + 1) - days(1) - - day0229 <- rep(NA, length(sdates)) - for (i_sdate in 1:length(sdates_full)) { - days <- seq(idx_min[i_sdate],idx_max[i_sdate], by = 'days') - day0229[i_sdate] <- sum(format(days, "%m%d") == "0229") - } - - if (all(day0229 == 0)) { # no 0229 - # daily_time_ind - } else { - # get extra indices and remove 0229 & extra indices after retrieval - #TODO: Make Start() can handle time as a list or array for this case - daily_time_ind <- c(daily_time_ind, - (tail(daily_time_ind, 1) + 1):((tail(daily_time_ind, 1) + 1) + (max(day0229) - 1))) - } - - } +# if (calendar %in% c('365_day', 'noleap')) { +# # daily_time_ind +# } else if (calendar %in% c('standard', 'proleptic_gregorian', 'gregorian')) { +# +## leap <- leap_year(sdates) +# sdates_full <- ymd(paste0(sdates, sprintf('%02d', initial_month), '01')) +# idx_min <- sdates_full + months(ftimemin) +# idx_max <- sdates_full + months(ftimemax + 1) - days(1) +# +# day0229 <- rep(NA, length(sdates)) +# for (i_sdate in 1:length(sdates_full)) { +# days <- seq(idx_min[i_sdate],idx_max[i_sdate], by = 'days') +# day0229[i_sdate] <- sum(format(days, "%m%d") == "0229") +# } +# +# if (all(day0229 == 0)) { # no 0229 +# # daily_time_ind +# } else { +# # get extra indices and remove 0229 & extra indices after retrieval +# #TODO: Make Start() can handle time as a list or array for this case +# daily_time_ind <- c(daily_time_ind, +# (tail(daily_time_ind, 1) + 1):((tail(daily_time_ind, 1) + 1) + (max(day0229) - 1))) +# } +# +# } } return(daily_time_ind) } - +# This function is used to pick out 0229 after loading data. But it is not used in the +# script now. correct_daily_for_leap <- function(data = NULL, time_attr, return_time = TRUE) { # data & time_attr: [syear, time] #NOTE: return_time = TRUE, only return time_attr; FALSE, only return data diff --git a/tests/testthat/test-decadal_daily.R b/tests/testthat/test-decadal_daily_1.R similarity index 88% rename from tests/testthat/test-decadal_daily.R rename to tests/testthat/test-decadal_daily_1.R index 27e29d6c..42da0401 100644 --- a/tests/testthat/test-decadal_daily.R +++ b/tests/testthat/test-decadal_daily_1.R @@ -74,12 +74,18 @@ expect_equal( dim(data$hcst$Dates$start), c(sday = 1, sweek = 1, syear = 3, time = 90) ) +# hcst data expect_equal( as.vector(drop(data$hcst$data)[1, 2:3, 1:3, 1, 1]), c(298.5787, 293.6479, 298.5042, 293.7802, 297.8072, 293.0764), tolerance = 0.0001 ) expect_equal( +as.vector(drop(data$hcst$data)[2, , 89:90, 1, 1]), +c(301.6978, 308.9792, 308.4501, 302.1620, 307.6034, 307.6388), +tolerance = 0.0001 +) +expect_equal( mean(data$hcst$data), 301.2666, tolerance = 0.0001 @@ -89,6 +95,19 @@ range(data$hcst$data), c(285.9326, 314.9579), tolerance = 0.0001 ) + +# fcst data +as.vector(drop(data$fcst$data)[1, , 1:3, 1, 1]), +c(295.0745, 291.1006, 296.2279, 291.6309, 295.3123, 290.8995), +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(data$fcst$data)[2, , 89:90, 1, 1]), +c(305.3428, 305.0657, 305.5445, 305.5681), +tolerance = 0.0001 +) + +# time value expect_equal( (data$hcst$Dates$start)[1], as.POSIXct("1991-01-01 12:00:00", tz = 'UTC') @@ -105,6 +124,10 @@ expect_equal( (data$hcst$Dates$start)[1, 1, 3, 90], as.POSIXct("1993-03-31 12:00:00", tz = 'UTC') ) +expect_equal( +(data$hcst$Dates$start)[1, 1, 2, 90], +as.POSIXct("1992-03-30 12:00:00", tz = 'UTC') +) }) diff --git a/tests/testthat/test-decadal_monthly.R b/tests/testthat/test-decadal_monthly_1.R similarity index 100% rename from tests/testthat/test-decadal_monthly.R rename to tests/testthat/test-decadal_monthly_1.R diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 24aeac6b..ac80fe77 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -97,7 +97,7 @@ tolerance = 0.0001 # fcst data expect_equal( as.vector(drop(data$fcst$data)[1, , 1:2, 2, 2]), -c(271.7708, 272.2929, 272.4980, 273.3616), +c(271.7708, 271.8424, 272.4980, 273.5842), tolerance = 0.0001 ) expect_equal( -- GitLab From c7cc2738180051dfdcc44483dac3a3fc130ce1f3 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 13 Jul 2022 12:10:24 +0200 Subject: [PATCH 7/7] Fix typo --- tests/testthat/test-decadal_daily_1.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R index 42da0401..15020591 100644 --- a/tests/testthat/test-decadal_daily_1.R +++ b/tests/testthat/test-decadal_daily_1.R @@ -97,6 +97,7 @@ tolerance = 0.0001 ) # fcst data +expect_equal( as.vector(drop(data$fcst$data)[1, , 1:3, 1, 1]), c(295.0745, 291.1006, 296.2279, 291.6309, 295.3123, 290.8995), tolerance = 0.0001 -- GitLab