diff --git a/DESCRIPTION b/DESCRIPTION index 6b75f77f6ceeeba0d02b5bc1e04cd701ea4aef5e..fdd5843f05e6104ccfdbaacedf227edcc800f847 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 2.2.1 +Version: 2.2.2 Authors@R: c( person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), @@ -36,7 +36,7 @@ Suggests: stats, utils, testthat -License: Apache License 2.0 +License: GPL-3 URL: https://earth.bsc.es/gitlab/es/startR/ BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues SystemRequirements: cdo ecFlow diff --git a/NEWS.md b/NEWS.md index c1dc90bd1044fbd763970d6e8422bd007bfe3ee4..ce5eab1ab35a8ef394d2bd94c2297f86457e13f9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# startR v2.2.1 (Release date: 2023-03-24) +- Start(): Bugfix when the input parameters are assigned by a variable with NULL value and retrieve = F +- NcDataReader(): Bugfix for wrong time attributes return when the unit is "month" + # startR v2.2.1 (Release date: 2022-11-17) - Reduce warning messages from CDO. - Reduce repetitive warning messages from CDORemapper() when single core is used. When multiple cores diff --git a/R/CDORemapper.R b/R/CDORemapper.R index 67c6b9e93b0b0e1f5c1b73efeeaefca0f21f2038..60aa0e2d0be6be80ec025e0a403ef352c3c654ae 100644 --- a/R/CDORemapper.R +++ b/R/CDORemapper.R @@ -33,7 +33,7 @@ #' data_path <- system.file('extdata', package = 'startR') #' path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') #' sdates <- c('200011') -#' \donttest{ +#' \dontrun{ #' data <- Start(dat = list(list(path = path_obs)), #' var = 'tos', #' sdate = sdates, diff --git a/R/NcDataReader.R b/R/NcDataReader.R index 1c83d0af68c155c35bbe0dfb103342001fe3f594..25e33d412dca7545f1878b18eabe90d07e0432d4 100644 --- a/R/NcDataReader.R +++ b/R/NcDataReader.R @@ -240,58 +240,123 @@ NcDataReader <- function(file_path = NULL, file_object = NULL, # define day in each month leap_month_day <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) no_leap_month_day <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - # Origin year and month - ori_year <- as.numeric(substr(parts[2], 1, 4)) - ori_month <- as.numeric(substr(parts[2], 6, 7)) - if (is.na(ori_month)) { - ori_month <- as.numeric(substr(parts[2], 6, 6)) - } - if (!is.numeric(ori_year) | !is.numeric(ori_month)) { - stop(paste0("The time unit attribute format is not 'YYYY-MM-DD' or 'YYYY-M-D'. ", - "Check the file or contact the maintainer.")) - } + # If calendar is gregorian, we get the result date directly instead of calculating how many seconds we have. + # The other calendar type can also do this but then we need to calculate each date in for loop. + #TODO: Try to use 'clock' to calculate the date (but dependency will be added) if (calendar == 'gregorian') { - # Find how many years + months - yr_num <- floor(result / 12) - month_left <- result - yr_num * 12 - # Find the leap years we care - if (ori_month <= 2) { - leap_num <- length(which(sapply(ori_year:(ori_year + yr_num - 1), s2dv::LeapYear))) - } else { - leap_num <- length(which(sapply((ori_year + 1):(ori_year + yr_num), s2dv::LeapYear))) + # Origin year and month and day + ori_year <- as.numeric(substr(parts[2], 1, 4)) + ori_month <- as.numeric(substr(parts[2], 6, 7)) + ori_day <- as.numeric(substr(parts[2], 9, 10)) + if (is.na(ori_month)) { + ori_month <- as.numeric(substr(parts[2], 6, 6)) + ori_day <- as.numeric(substr(parts[2], 8, 8)) } - total_days <- leap_num * 366 + (yr_num - leap_num) * 365 # not include month_left yet - - if (month_left != 0) { - if ((ori_month + month_left) <= 12) { # the last month is still in the same last yr - # Is the last year a leap year? - last_leap <- s2dv::LeapYear(ori_year + yr_num) - if (last_leap) { - total_days <- total_days + sum(leap_month_day[ori_month:(ori_month + month_left - 1)]) + if (!is.numeric(ori_year) | !is.numeric(ori_month) | !is.numeric(ori_day)) { + stop(paste0("The time unit attribute format is not 'YYYY-MM-DD' or 'YYYY-M-D'. ", + "Check the file or contact the maintainer.")) + } + result_vec <- rep(NA, length = length(result)) + for (result_i in 1:length(result)) { + yr_num <- floor(result[result_i] / 12) + month_left <- result[result_i] - yr_num * 12 + result_year <- ori_year + yr_num + result_month <- ori_month + floor(month_left) + result_day <- ori_day + #NOTE: Assumption that hour down is 00 + result_hour <- 0 + if (result_month > 12) { + result_year <- result_year + 1 + result_month <- result_month - 12 + } + if (month_left %% 1 != 0) { + if (result_month == 2) { + day_in_month <- ifelse(s2dv::LeapYear(result_year), 29, 28) } else { - total_days <- total_days + sum(no_leap_month_day[ori_month:(ori_month + month_left - 1)]) + day_in_month <- no_leap_month_day[result_month] } - } else { # the last month ends in the next yr - if (ori_month == 2) { # e.g., 2005-02-16 + 11mth = 2006-01-16 - last_leap <- s2dv::LeapYear(ori_year + yr_num) # still consider 2005 - if (last_leap) { - total_days <- total_days + sum(leap_month_day[2:12]) - } else { - total_days <- total_days + sum(no_leap_month_day[2:12]) - } - } else { # e.g., 2005-04-16 + 11mth = 2006-03-16 - last_leap <- s2dv::LeapYear(ori_year + yr_num + 1) - needed_month <- c(ori_month:12, 1:(ori_month + month_left - 12 - 1)) - if (last_leap) { - total_days <- total_days + sum(leap_month_day[needed_month]) - } else { - total_days <- total_days + sum(no_leap_month_day[needed_month]) - } + result_day <- ori_day + (month_left - floor(month_left)) * day_in_month + if (result_day > day_in_month) { + result_month <- result_month + 1 + result_day <- result_day - day_in_month + } + if (result_month > 12) { + result_year <- result_year + 1 + result_month <- result_month - 12 + } + # if there is hour left + if (result_day %% 1 != 0) { + result_hour <- (result_day - floor(result_day)) * 24 + result_day <- floor(result_day) + } + if (result_hour %% 1 != 0) { + warning("The time value is not correct below 'hour'.") + result_hour <- round(result_hour) } } + result_month <- sprintf("%02d", result_month) + result_day <- sprintf("%02d", result_day) + result_hour <- sprintf("%02d", result_hour) + # Combine all the parts into one string + tmp <- paste(result_year, result_month, result_day, sep = '-') + tmp <- paste0(tmp, ' ', result_hour, ':00:00') + result_vec[result_i] <- tmp } - result <- total_days * 24 * 60 * 60 # day to sec + # Transfer the strings to time class + new_array <- PCICt::as.PCICt(result_vec, cal = 'gregorian') + new_array <- suppressWarnings(PCICt::as.POSIXct.PCICt(new_array, tz = "UTC")) + +# if (calendar == 'gregorian') { +# # Find how many years + months +# yr_num <- floor(result / 12) +# month_left <- result - yr_num * 12 +# # Find the leap years we care +# if (ori_month <= 2) { +# leap_num <- length(which(sapply(ori_year:(ori_year + yr_num - 1), s2dv::LeapYear))) +# } else { +# leap_num <- length(which(sapply((ori_year + 1):(ori_year + yr_num), s2dv::LeapYear))) +# } +# total_days <- leap_num * 366 + (yr_num - leap_num) * 365 # not include month_left yet +# +# if (month_left != 0) { +##TODO: This part until result <- total_days* 24 ... is not correct. It doesn't consider ori_day +# if ((ori_month + month_left - 1) <= 12) { # the last month is still in the same last yr +# # Is the last year a leap year? +# last_leap <- s2dv::LeapYear(ori_year + yr_num) +# if (last_leap) { +# month_day_vector <- leap_month_day +# } else { +# month_day_vector <- no_leap_month_day +# } +# if (month_left >= 1) { # Only a few days in Jan. only, directly go to the next "if" +# total_days <- total_days + sum(month_day_vector[ori_month:(ori_month + month_left - 1)]) +# } +# if ((month_left %% 1) != 0) { +# # month_left has decimal point like 11.5 +# total_days <- total_days + (month_left - floor(month_left)) * month_day_vector[ceiling(month_left)] +# } +# } else { # the last month ends in the next yr +# if (ori_month == 2) { # e.g., 2005-02-16 + 11mth = 2006-01-16 +# last_leap <- s2dv::LeapYear(ori_year + yr_num) # still consider 2005 +# if (last_leap) { +# total_days <- total_days + sum(leap_month_day[2:12]) +# } else { +# total_days <- total_days + sum(no_leap_month_day[2:12]) +# } +# } else { # e.g., 2005-04-16 + 11mth = 2006-03-16 +# last_leap <- s2dv::LeapYear(ori_year + yr_num + 1) +# needed_month <- c(ori_month:12, 1:(ori_month + month_left - 12 - 1)) +# if (last_leap) { +# total_days <- total_days + sum(leap_month_day[needed_month]) +# } else { +# total_days <- total_days + sum(no_leap_month_day[needed_month]) +# } +# } +# } +# } +# result <- total_days * 24 * 60 * 60 # day to sec + } else if (calendar %in% c('365_day',' 365', 'noleap')) { yr_num <- floor(result / 12) month_left <- result - yr_num * 12 @@ -308,9 +373,10 @@ NcDataReader <- function(file_path = NULL, file_object = NULL, } } - new_array <- PCICt::as.PCICt(result, cal = calendar, origin = parts[2])[] - new_array <- suppressWarnings(PCICt::as.POSIXct.PCICt(new_array, tz = "UTC")) - + if (!(units %in% c('month', 'months') & calendar == 'gregorian')) { + new_array <- PCICt::as.PCICt(result, cal = calendar, origin = parts[2])[] + new_array <- suppressWarnings(PCICt::as.POSIXct.PCICt(new_array, tz = "UTC")) + } #new_array <- seq(as.POSIXct(parts[2]), # length = max(result, na.rm = TRUE) + 1, # by = units)[result[] + 1] diff --git a/R/Start.R b/R/Start.R index 1251634b83286f0cabc6af45dffe63c14ea05019..54ebdb8159a5a274f6132964f3ba1bdc00d20bd1 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1385,7 +1385,7 @@ Start <- function(..., # dim = indices/selectors, # names as depended dim. for (j in 1:length(dat_selectors[[file_dim]])) { sv <- selector_vector <- dat_selectors[[file_dim]][[j]] - if (!is(sv, first_class) || + if (!inherits(sv, first_class) || !identical(first_length, length(sv))) { stop("All provided selectors for depending dimensions must ", "be vectors of the same length and of the same class.") @@ -3967,7 +3967,7 @@ Start <- function(..., # dim = indices/selectors, if (!identical(inner_dim_has_split_dim, character(0))) { metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_metadata) # Convert numeric back to dates - if (is(picked_common_vars[[inner_dim_has_split_dim]], 'POSIXct')) { + if (inherits(picked_common_vars[[inner_dim_has_split_dim]], 'POSIXct')) { metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } picked_common_vars[[inner_dim_has_split_dim]] <- metadata_tmp @@ -4096,7 +4096,7 @@ Start <- function(..., # dim = indices/selectors, if (!identical(inner_dim_has_split_dim, character(0))) { metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_metadata) # Convert numeric back to dates - if (is(picked_common_vars[[inner_dim_has_split_dim]], 'POSIXct')) { + if (inherits(picked_common_vars[[inner_dim_has_split_dim]], 'POSIXct')) { metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') } picked_common_vars[[inner_dim_has_split_dim]] <- metadata_tmp @@ -4185,7 +4185,12 @@ Start <- function(..., # dim = indices/selectors, start_call <- match.call() for (i in 2:length(start_call)) { if (class(start_call[[i]]) %in% c('name', 'call')) { - start_call[[i]] <- eval.parent(start_call[[i]]) + tmp <- eval.parent(start_call[[i]]) + if (is.null(tmp)) { + start_call[i] <- list(NULL) + } else { + start_call[[i]] <- eval.parent(start_call[[i]]) + } } } start_call[['retrieve']] <- TRUE diff --git a/inst/doc/usecase/ex1_12_rotated_coordinates.R b/inst/doc/usecase/ex1_12_rotated_coordinates.R index f657a70418aead4f1bc7770e430f4a887f37b03a..5c24c502523a3908b152a54d9266f0feec2ba50a 100644 --- a/inst/doc/usecase/ex1_12_rotated_coordinates.R +++ b/inst/doc/usecase/ex1_12_rotated_coordinates.R @@ -11,31 +11,29 @@ date <- c('20131229', '20131230') # two temporal dimensions: one for days and another four hours data_split <- Start(dataset = path, var = 'od550du', - lev = 'all', date = date, time = 'all', rlat = 'all', rlon = 'all', - return_vars = list(lev = NULL, time = NULL, + return_vars = list(time = NULL, rlat = NULL, rlon = NULL, lat = NULL, lon = NULL), retrieve = TRUE, num_procs = 1) # one temporal dimension data <- Start(dataset = path, var = 'od550du', - lev = 'all', date = date, time = 'all', rlat = 'all', rlon = 'all', time_across = 'date', merge_across_dims = TRUE, - return_vars = list(lev = NULL, time = 'date', + return_vars = list(time = 'date', rlat = NULL, rlon = NULL, lat = NULL, lon = NULL), retrieve = TRUE, num_procs = 1) # check both are consistent: -all(data[1,1,1,1:8,,]==data_split[1,1,1,1,,,]) +all(data[1,1,1:8,,]==data_split[1,1,1,,,]) # Save important information from attributes lat <- attributes(data)$Variables$common$lat @@ -43,12 +41,11 @@ lon <- attributes(data)$Variables$common$lon rlat <- attributes(data)$Variables$common$rlat rlon <- attributes(data)$Variables$common$rlon # Or check information: -attributes(data)$Variables$common$lev attributes(data)$Variables$common$time # PLOT: -map <- data[1,1,1,1,,] # Time-step 1 +map <- data[1,1,1,,] # Time-step 1 library(sf) library(ggplot2) map <- data.frame(rlat = rep(rlat, length(rlon)), diff --git a/startR-manual.pdf b/startR-manual.pdf deleted file mode 100644 index 84ba5184bb5acc5bb7e84cf35887158bc6d1235a..0000000000000000000000000000000000000000 Binary files a/startR-manual.pdf and /dev/null differ diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index fde9c5e49074a03fade98c02a9c9669f1b84317e..da63e53a3ef0966bf17aab6ee7c9f8af46cf92f3 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -3,7 +3,7 @@ context("Start() different calendar") test_that("1. 360_day, daily, unit = 'days since 1850-01-01'", { path_hadgem3 <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast//HadGEM3-GC31-MM/', 'DCPP/MOHC/HadGEM3-GC31-MM/', - 'dcppA-hindcast/r1i1p1f2/day/$var$/gn/v20200101/', + 'dcppA-hindcast/r1i1p1f2/day/$var$/gn/v20200417/', '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$fyear$.nc') sdate <- c('2000', '2001') @@ -186,7 +186,7 @@ expect_equal( ) }) -test_that("6. standard, monthly, unit = 'months since 1870-01-16 12:00:00'", { +test_that("6. standard, monthly, unit = 'months since 1850-01-01 00:00:00'", { repos_obs <- '/esarchive/obs/ukmo/hadisst_v1.1/monthly_mean/$var$/$var$_$date$.nc' suppressWarnings( @@ -194,8 +194,8 @@ suppressWarnings( var = 'tos', date = '200505', #dates_file, time = 'all', - lat = indices(1:10), - lon = indices(1:10), + lat = indices(1), + lon = indices(1), time_across = 'date', #combine time and file_date dims merge_across_dims = TRUE, @@ -211,14 +211,37 @@ suppressWarnings( expect_equal( attr(obs, 'Variables')$common$time[1], - as.POSIXct('2005-05-01', tz = 'UTC') + as.POSIXct('2005-05-16 12:00:00', tz = 'UTC') ) expect_equal( dim(attr(obs, 'Variables')$common$time), c(time = 1) ) -}) +suppressWarnings( +data_obs <- Start(dat = repos_obs, + var = 'tos', + date = c("196011", "196012", "196101", "196102"), + latitude = indices(100), + longitude = indices(100), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(time = c('date'), + latitude = NULL, longitude = NULL), + retrieve = TRUE) +) + +expect_equal( +dim(attr(data_obs, 'Variables')$common$time), +c(date = 4, time = 1) +) +expect_equal( +as.character(attr(data_obs, 'Variables')$common$time), +c("1960-11-16 00:00:00", "1960-12-16 12:00:00", "1961-01-16 12:00:00", "1961-02-15 00:00:00") +) + +}) + test_that("7. proleptic_gregorian, monthly, unit = 'days since 1850-1-1 00:00:00'", { diff --git a/tests/testthat/test-Start-metadata_dims.R b/tests/testthat/test-Start-metadata_dims.R index 5e4d97252c149516d72eb7e0b6380c60d6bc01fb..ce30eec3100f661df7c99f3706af8a314597f366 100644 --- a/tests/testthat/test-Start-metadata_dims.R +++ b/tests/testthat/test-Start-metadata_dims.R @@ -433,14 +433,14 @@ path_list <- list( Had = list(name = 'HadGEM3', path = paste0('/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/', 'DCPP/MOHC/HadGEM3-GC31-MM/', - 'dcppA-hindcast/$member$/day/$var$/gn/v20200101/', + 'dcppA-hindcast/$member$/day/$var$/gn/v20200417/', '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-$member$_gn_$chunk$.nc'))) suppressWarnings( data <- Start(dataset = path_list, var = 'tasmin', member = list(c('r1i1p1f1', 'r2i1p1f2')), sdate = paste0(2018), - chunk = list(c('20181101-20281231', '20181101-20281230')), + chunk = list(c('20181101-20281231', '20181101-20181230')), time = indices(1), #'all', lat = values(list(0, 14)), lon = values(list(0, 28)), @@ -484,7 +484,7 @@ data <- Start(dataset = path_list, attr(data, 'Files'), array(c("/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/day/tasmin/gn/v20200101/tasmin_day_MPI-ESM1-2-HR_dcppA-hindcast_s2018-r1i1p1f1_gn_20181101-20281231.nc", NA, NA, NA, NA, NA, NA, - "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r2i1p1f2/day/tasmin/gn/v20200101/tasmin_day_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r2i1p1f2_gn_20181101-20281230.nc"), + "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r2i1p1f2/day/tasmin/gn/v20200417/tasmin_day_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r2i1p1f2_gn_20181101-20181230.nc"), dim = c(dataset = 2, var = 1, member = 2, sdate = 1, chunk = 2)) )