diff --git a/DESCRIPTION b/DESCRIPTION index bf0ed00992f66d66b2701e9199c1d41db7c593fc..c091bc3abc4aaa65c59522452d2b62a52c4d3f32 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 2.1.0 +Version: 2.1.0-1 Authors@R: c( person("BSC-CNS", role = c("aut", "cph")), person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), @@ -28,7 +28,8 @@ Imports: parallel, easyNCDF, s2dverification, - ClimProjDiags + ClimProjDiags, + PCICt Suggests: stats, utils diff --git a/NAMESPACE b/NAMESPACE index a3ad75ff6f708fdbf186ab589ce3c267cb576c46..ccf783cbe12b94f8b703a403b9cf16f4be0ff870 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,7 @@ export(Start) export(Step) export(indices) export(values) +import(PCICt) import(abind) import(bigmemory) import(easyNCDF) diff --git a/R/CDORemapper.R b/R/CDORemapper.R index 815b92a651d409a2ab1917125d486729ebeda52f..867ee2660250adfdca2b2a1e26e04678f5297989 100644 --- a/R/CDORemapper.R +++ b/R/CDORemapper.R @@ -54,8 +54,8 @@ #'@export CDORemapper <- function(data_array, variables, file_selectors = NULL, ...) { file_dims <- names(file_selectors) - known_lon_names <- .KnownLonNames() - known_lat_names <- .KnownLatNames() + known_lon_names <- startR:::.KnownLonNames() + known_lat_names <- startR:::.KnownLatNames() if (!any(known_lon_names %in% names(variables)) || !any(known_lat_names %in% names(variables))) { stop("The longitude and latitude variables must be requested in ", diff --git a/R/NcDataReader.R b/R/NcDataReader.R index 0dfe0b2c0872a8043fbb399fde843c8d30d3c3de..222d11dcfae494bbd622ccd6094ad01001ae5f49 100644 --- a/R/NcDataReader.R +++ b/R/NcDataReader.R @@ -43,7 +43,7 @@ #' first_round_indices, synonims) #'@seealso \code{\link{NcOpener}} \code{\link{NcDimReader}} #' \code{\link{NcCloser}} \code{\link{NcVarReader}} -#'@import easyNCDF +#'@import easyNCDF PCICt #'@export NcDataReader <- function(file_path = NULL, file_object = NULL, file_selectors = NULL, inner_indices = NULL, @@ -182,32 +182,46 @@ NcDataReader <- function(file_path = NULL, file_object = NULL, x } }) + if (length(names(attr(result, 'variables'))) == 1) { var_name <- names(attr(result, 'variables')) units <- attr(result, 'variables')[[var_name]][['units']] + if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) { if (units == 'seconds') { - units <- 'secs' +# units <- 'secs' } else if (units == 'minutes') { - units <- 'mins' +# units <- 'mins' + result <- result * 60 # min to sec } result[] <- paste(result[], units) + } else if (grepl(' since ', units)) { parts <- strsplit(units, ' since ')[[1]] units <- parts[1] + if (units %in% c('second', 'seconds')) { - units <- 'secs' +# units <- 'secs' } else if (units %in% c('minute', 'minutes')) { - units <- 'mins' - } else if (units == 'day') { - units <- 'days' +# units <- 'mins' + result <- result * 60 # min to sec + } else if (units %in% c('hour', 'hours')) { + result <- result * 60 * 60 # hour to sec + } else if (units %in% c('day', 'days')) { +# units <- 'days' + result <- result * 24 * 60 * 60 # day to sec } else if (units %in% c('month', 'months')) { result <- result * 30.5 - units <- 'days' + result <- result * 24 * 60 * 60 # day to sec +# units <- 'days' } - new_array <- rep(as.POSIXct(parts[2], tz = 'UTC'), length(result)) + - as.difftime(result[], units = units) + # Find the calendar + calendar <- attr(result, 'variables')$time$calendar + if (calendar == 'standard') calendar <- 'gregorian' + new_array <- PCICt::as.PCICt(result, cal = calendar, origin = parts[2])[] + new_array <- suppressWarnings(PCICt::as.POSIXct.PCICt(new_array, tz = "UTC")) + #new_array <- seq(as.POSIXct(parts[2]), # length = max(result, na.rm = TRUE) + 1, # by = units)[result[] + 1] diff --git a/R/Start.R b/R/Start.R index 39e1e6cbd8588c37f555eaea8cb268ec45645e82..25b0e2dd1859bcdbbf9e09cb50a0371dd041dbd1 100644 --- a/R/Start.R +++ b/R/Start.R @@ -3179,7 +3179,7 @@ Start <- function(..., # dim = indices/selectors, # Before changing crop, first we need to find the name of longitude. # NOTE: The potential bug here (also the bug for CDORemapper): the lon name # is limited (only the ones listed in .KnownLonNames() are available. - known_lon_names <- .KnownLonNames() + known_lon_names <- startR:::.KnownLonNames() lon_name <- names(subset_vars_to_transform)[which(names(subset_vars_to_transform) %in% known_lon_names)[1]] # NOTE: The cases not considered: (1) if lon reorder(decreasing = T) diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 947ad9f5c5591aa72b1db79d22d0dc0d884aea8e..24e7acb0dd0fb0cc2b0447819a040117c027b448 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -34,6 +34,7 @@ This document intends to be the first reference for any doubts that you may have 4. [My jobs work well in workstation and fatnodes but not on Power9 (or vice versa)](#4-my-jobs-work-well-in-workstation-and-fatnodes-but-not-on-power9-or-vice-versa) 5. [Errors related to wrong file formatting](#5-errors-related-to-wrong-file-formatting) 6. [Errors using a new cluster (setting Nord3)](#6-errors-using-a-new-cluster-setting-nord3) + 7. [Start() fails retrieving data](#7-start-fails-retrieving-data) ## 1. How to @@ -824,7 +825,6 @@ adopt the provided ones and use the first valid file to decide the rest of dimen By this means, the efficiency can be similar to `largest_dims_length = FALSE`. - # Something goes wrong... ### 1. No space left on device @@ -955,4 +955,16 @@ cannot read workspace version 3 written by R 3.6.2; need R 3.5.0 or newer change the R version used in your workstation to one newer. +### 7. Start() fails retrieving data + +If you get the following error message: + +``` +Exploring files... This will take a variable amount of time depending +* on the issued request and the performance of the file server... +Error in R_nc4_open: No such file or directory +Error in file_var_reader(NULL, file_object, NULL, var_to_read, synonims) : + Either 'file_path' or 'file_object' must be provided. +``` +check if your path contains the label $var$ in the path. If not, try to added it as part of the path or the file name. Where $var$ is the variable to retrieve from files. diff --git a/inst/doc/figures/Rotated_Coordinates.png b/inst/doc/figures/Rotated_Coordinates.png new file mode 100644 index 0000000000000000000000000000000000000000..c5bc9cf55905972921a816e1bf10a1b8c1e9d8fb Binary files /dev/null and b/inst/doc/figures/Rotated_Coordinates.png differ diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 82e0bf73cc76924270c8ecd20c2377595ef956de..ac4e4adde9ec1a7188a785dd3ced3e2e13f28794 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -49,7 +49,10 @@ You can find more explanation in FAQ [How-to-20](inst/doc/faq.md#20-use-metadata 11. [Three methods to load experimental files with different member and version](inst/doc/usecase/ex1_11_expid_member_version.R) This script shows three ways to load the data with different expid - member - version combination. It is useful for climate prediction of multiple experiments. - + + 12. [Load and plot data in rotated coordintaes](inst/doc/usecase/ex1_12_rotated_coordinates.R) + This script shows how to load and plot data in rotated coordinates using **Monarch-dust** simulations. + 2. **Execute computation (use `Compute()`)** 1. [Function working on time dimension](inst/doc/usecase/ex2_1_timedim.R) diff --git a/inst/doc/usecase/ex1_12_rotated_coordinates.R b/inst/doc/usecase/ex1_12_rotated_coordinates.R new file mode 100644 index 0000000000000000000000000000000000000000..3da4b794a376c71de0e6848e927b78e7824cf86e --- /dev/null +++ b/inst/doc/usecase/ex1_12_rotated_coordinates.R @@ -0,0 +1,65 @@ +# Núria Pérez-Zanón +# Loading and plotting data in rotated coordinates +# Monarch-dustclim +# Required modules in workstation: R 3.6.1 and GDAL/2.2.1-foss-2015a-GEOS-3.8.0 +#------------------------------- +library(startR) + +obs_path <- '/esarchive/exp/monarch-dustclim/3hourly/$var$/$var$_$date$03_av_an.nc' + +date <- c('20131229', '20131230') +# two temporal dimensions: one for days and another four hours +data_split <- Start(dataset = obs_path, + var = 'od550du', + lev = 'all', + date = date, + time = 'all', + rlat = 'all', + rlon = 'all', + return_vars = list(lev = NULL, time = NULL, + rlat = NULL, rlon = NULL, + lat = NULL, lon = NULL), + retrieve = TRUE, num_procs = 1) +# one temporal dimension +data <- Start(dataset = obs_path, + var = 'od550du', + lev = 'all', + date = date, + time = 'all', + rlat = 'all', + rlon = 'all', + time_across = 'date', + merge_across_dims = TRUE, + return_vars = list(lev = NULL, time = 'date', + rlat = NULL, rlon = NULL, + lat = NULL, lon = NULL), + retrieve = TRUE, num_procs = 1) +# check both are consistent: +all(data[1,1,1,1:8,,]==data_split[1,1,1,1,,,]) + +# Save important information from attributes +lat <- attributes(data)$Variables$common$lat +lon <- attributes(data)$Variables$common$lon +rlat <- attributes(data)$Variables$common$rlat +rlon <- attributes(data)$Variables$common$rlon +# Or check information: +attributes(data)$Variables$common$lev +attributes(data)$Variables$common$time + + +# PLOT: +map <- data[1,1,1,1,1,,] # Time-step 1 +library(sf) +library(ggplot2) +map <- data.frame(rlat = rep(rlat, length(rlon)), + rlon = sort(rep(rlon, length(rlat))), datos = as.vector(map)) +dat_map <- st_as_sf(map, coords = c("rlon", "rlat"), crs = 4326) +x <- lwgeom::st_transform_proj(dat_map, c(st_crs(4326)$proj4string, "+proj=ob_tran +o_proj=longlat +o_lon_p=-160 +o_lat_p=55 +lon_0=180 +to_meter=0.01745329")) +plot(x) +bbox <- st_bbox(x) +borders <- st_as_sf(maps::map('world', plot = FALSE, fill = TRUE)) +st_crs(x) = NA # ?! (!) +st_crs(x) = 4326 +ggplot() + + geom_sf(data=x, aes(color=datos)) + + geom_sf(data = borders, fill=NA, colour="black") + coord_sf(xlim = bbox[c(1,3)],ylim = bbox[c(2,4)]) diff --git a/inst/doc/usecase/ex1_2_exp_obs_attr.R b/inst/doc/usecase/ex1_2_exp_obs_attr.R index acff7c9167d19b7bee40e96913859c0982c310b3..2e4b5da965ba642081bf0e362c7b873004f9d9e7 100644 --- a/inst/doc/usecase/ex1_2_exp_obs_attr.R +++ b/inst/doc/usecase/ex1_2_exp_obs_attr.R @@ -89,13 +89,12 @@ print(attr(obs, 'Dimensions')) ##-----time----- print(attr(exp, 'Variables')$common$time) -# [1] "2005-01-16 13:14:44 CET" "2006-01-16 13:14:44 CET" -# [3] "2007-01-16 13:14:44 CET" "2008-01-16 13:14:44 CET" -# [5] "2005-02-15 01:14:44 CET" "2006-02-15 01:14:44 CET" -# [7] "2007-02-15 01:14:44 CET" "2008-02-15 13:14:44 CET" -# [9] "2005-03-16 13:14:44 CET" "2006-03-16 13:14:44 CET" -#[11] "2007-03-16 13:14:44 CET" "2008-03-16 13:14:44 CET" - +# [1] "2005-01-16 12:00:00 UTC" "2006-01-16 12:00:00 UTC" +# [3] "2007-01-16 12:00:00 UTC" "2008-01-16 12:00:00 UTC" +# [5] "2005-02-15 00:00:00 UTC" "2006-02-15 00:00:00 UTC" +# [7] "2007-02-15 00:00:00 UTC" "2008-02-15 12:00:00 UTC" +# [9] "2005-03-16 12:00:00 UTC" "2006-03-16 12:00:00 UTC" +#[11] "2007-03-16 12:00:00 UTC" "2008-03-16 12:00:00 UTC" print(attr(obs, 'Variables')$common$time) # [1] "2005-01-31 18:00:00 UTC" "2006-01-31 18:00:00 UTC" # [3] "2007-01-31 18:00:00 UTC" "2008-01-31 18:00:00 UTC" diff --git a/inst/doc/usecase/ex1_3_attr_loadin.R b/inst/doc/usecase/ex1_3_attr_loadin.R index a918e167c6f23092d8d27690613061e98e7affd5..d514c3061745a7aa926c31e094727806b3219238 100644 --- a/inst/doc/usecase/ex1_3_attr_loadin.R +++ b/inst/doc/usecase/ex1_3_attr_loadin.R @@ -104,6 +104,30 @@ erai[1, 1, 2, 31, 1, 1] # 1st March also, since June only has 30 days # The experimental and observational data are comparable with same structure. +#---------Check time attributes-------- +dim(attr(erai, 'Variables')$common$time) +#file_date time +# 8 31 +attr(erai, 'Variables')$common$time[1, ] +# [1] "1994-05-01 UTC" "1994-05-02 UTC" "1994-05-03 UTC" "1994-05-04 UTC" +# [5] "1994-05-05 UTC" "1994-05-06 UTC" "1994-05-07 UTC" "1994-05-08 UTC" +# [9] "1994-05-09 UTC" "1994-05-10 UTC" "1994-05-11 UTC" "1994-05-12 UTC" +#[13] "1994-05-13 UTC" "1994-05-14 UTC" "1994-05-15 UTC" "1994-05-16 UTC" +#[17] "1994-05-17 UTC" "1994-05-18 UTC" "1994-05-19 UTC" "1994-05-20 UTC" +#[21] "1994-05-21 UTC" "1994-05-22 UTC" "1994-05-23 UTC" "1994-05-24 UTC" +#[25] "1994-05-25 UTC" "1994-05-26 UTC" "1994-05-27 UTC" "1994-05-28 UTC" +#[29] "1994-05-29 UTC" "1994-05-30 UTC" "1994-05-31 UTC" +attr(erai, 'Variables')$common$time[2, ] +# [1] "1994-06-01 UTC" "1994-06-02 UTC" "1994-06-03 UTC" "1994-06-04 UTC" +# [5] "1994-06-05 UTC" "1994-06-06 UTC" "1994-06-07 UTC" "1994-06-08 UTC" +# [9] "1994-06-09 UTC" "1994-06-10 UTC" "1994-06-11 UTC" "1994-06-12 UTC" +#[13] "1994-06-13 UTC" "1994-06-14 UTC" "1994-06-15 UTC" "1994-06-16 UTC" +#[17] "1994-06-17 UTC" "1994-06-18 UTC" "1994-06-19 UTC" "1994-06-20 UTC" +#[21] "1994-06-21 UTC" "1994-06-22 UTC" "1994-06-23 UTC" "1994-06-24 UTC" +#[25] "1994-06-25 UTC" "1994-06-26 UTC" "1994-06-27 UTC" "1994-06-28 UTC" +#[29] "1994-06-29 UTC" "1994-06-30 UTC" NA + + # //////////////////"BONUS"////////////////////// # Here is something more to show the usage of parameter 'merge_across_dims_narm'. diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R new file mode 100644 index 0000000000000000000000000000000000000000..19ac0dff59f48fe7c679c403c0440347315d6229 --- /dev/null +++ b/tests/testthat/test-Start-calendar.R @@ -0,0 +1,180 @@ +context("Start() different calendar") + +test_that("1. 360_day, daily", { + + path_hadgem3 <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/', + 'cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/', + 'dcppA-hindcast/r1i1p1f2/day/$var$/gn/v20200101/', + '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$fyear$.nc') + + sdate <- c('2000', '2001') + fyear_hadgem3 <- indices(1) + + suppressWarnings( + data <- Start(dat = path_hadgem3, + var = 'tasmax', + sdate = sdate, + fyear = fyear_hadgem3, + fyear_depends = 'sdate', + time = indices(1:3), + lat = indices(1), lon = indices(1), + return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), + retrieve = FALSE) + ) + +expect_equal( + dim(attr(data, 'Variables')$common$time), + c(sdate = 2, time = 3) +) + time <- c(as.POSIXct('2000-11-01 12:00:00', tz = 'UTC'), + as.POSIXct('2000-11-02 12:00:00', tz = 'UTC'), + as.POSIXct('2000-11-03 12:00:00', tz = 'UTC')) + attr(time, "tzone") <- "UTC" +expect_equal( + attr(data, 'Variables')$common$time[1, ], + time +) + time <- c( + as.POSIXct('2001-11-01 12:00:00', tz = 'UTC'), + as.POSIXct('2001-11-02 12:00:00', tz = 'UTC'), + as.POSIXct('2001-11-03 12:00:00', tz = 'UTC')) + attr(time, "tzone") <- "UTC" +expect_equal( + attr(data, 'Variables')$common$time[2, ], + time +) + + +}) + +test_that("2. 365_day, daily", { +path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200408/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' + + data <- Start(dat = path_bcc_csm2, + var = 'tasmax', + sdate = '1980', + time = indices(1:3), + lat = indices(1), + lon = indices(1), + return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), + retrieve = FALSE) + +expect_equal( + dim(attr(data, 'Variables')$common$time), + c(sdate = 1, time = 3) +) + time <- c(as.POSIXct('1980-01-01 12:00:00', tz = 'UTC'), + as.POSIXct('1980-01-02 12:00:00', tz = 'UTC'), + as.POSIXct('1980-01-03 12:00:00', tz = 'UTC')) + attr(time, "tzone") <- "UTC" +expect_equal( + attr(data, 'Variables')$common$time[1, ], + time +) + + +}) + + +test_that("3. gregorian/standard, daily", { + + path_mpi_esm <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/mpi-esm1-2-hr/', + 'cmip6-dcppA-hindcast_i1p1/DCPP/MPI-M/MPI-ESM1-2-HR/', + 'dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200101/', + '$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$fyear$.nc') + + var <- 'tasmax' + sdate <- '2000' + fyear_mpi_esm <- paste0(sdate, '1101-', as.numeric(sdate) + 10, '1231') + + data <- Start(dat = path_mpi_esm, + var = var, + sdate = sdate, + fyear = fyear_mpi_esm, + fyear_depends = 'sdate', + time = indices(1:3), + lat = indices(1), + lon = indices(1), + return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), + retrieve = FALSE) + +expect_equal( + dim(attr(data, 'Variables')$common$time), + c(sdate = 1, time = 3) +) + time <- c(as.POSIXct('2000-11-01 12:00:00', tz = 'UTC'), + as.POSIXct('2000-11-02 12:00:00', tz = 'UTC'), + as.POSIXct('2000-11-03 12:00:00', tz = 'UTC')) + attr(time, "tzone") <- "UTC" +expect_equal( + attr(data, 'Variables')$common$time[1, ], + time +) + + +}) + +test_that("4. gregorian/standard, monthly", { + + path_mpi_esm <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/mpi-esm1-2-hr/', + 'cmip6-dcppA-hindcast_i1p1/DCPP/MPI-M/MPI-ESM1-2-HR/', + 'dcppA-hindcast/r1i1p1f1/Amon/$var$/gn/v20200320/', + '$var$_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_200011-201012.nc') + sdate <- '2000' + fyear_mpi_esm <- paste0(sdate, '1101-', as.numeric(sdate) + 10, '1231') + + data <- Start(dat = path_mpi_esm, + var = 'tasmax', + sdate = '2000', + time = indices(1:3), + lat = indices(1), + lon = indices(1), + return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), + retrieve = FALSE) + +expect_equal( + dim(attr(data, 'Variables')$common$time), + c(sdate = 1, time = 3) +) + time <- c(as.POSIXct('2000-11-16 00:00:00', tz = 'UTC'), + as.POSIXct('2000-12-16 12:00:00', tz = 'UTC'), + as.POSIXct('2001-01-16 12:00:00', tz = 'UTC')) + attr(time, "tzone") <- "UTC" +expect_equal( + attr(data, 'Variables')$common$time[1, ], + time +) + +}) + +test_that("4. gregorian/standard, 6hrly", { + repos_obs <- paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_199405.nc') + date <- paste0('1994-05-', sprintf('%02d', 1:31), ' 00:00:00') + date <- as.POSIXct(date, tz = 'UTC') +# attr(date, 'tzone') <- 'UTC' + data <- Start(dat = repos_obs, + var = 'tas', + time = date, + latitude = indices(1), + longitude = indices(1), + return_vars = list(latitude = NULL, + longitude = NULL, + time = NULL), + retrieve = TRUE) + +expect_equal( + as.vector(attr(data, 'Variables')$common$time[1:31]), + as.vector(date) +) +expect_equal( + as.vector(data)[1:5], + c(255.0120, 256.8095, 254.3654, 254.6059, 257.0551), + tolerance = 0.0001 +) +expect_equal( + as.vector(data)[length(data)], + c(268.2216), + tolerance = 0.0001 +) +})