Commit c5954c3c authored by erifarov's avatar erifarov
Browse files

Merge branch 'master' into develop-warning-test

parents f81a4802 40c3743f
Pipeline #7977 passed with stage
in 78 minutes and 57 seconds
......@@ -3,7 +3,7 @@ stages:
build:
stage: build
script:
- module load R/3.6.1-foss-2015a-bare
- module load R/4.1.2-foss-2015a-bare
- module load CDO/1.9.8-foss-2015a
- R CMD build --resave-data .
- R CMD check --as-cran --no-manual --run-donttest startR_*.tar.gz
......
Package: startR
Title: Automatically Retrieve Multidimensional Distributed Data Sets
Version: 2.2.0-1
Version: 2.2.0-2
Authors@R: c(
person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")),
person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")),
......@@ -39,4 +39,4 @@ License: Apache License 2.0
URL: https://earth.bsc.es/gitlab/es/startR/
BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues
SystemRequirements: cdo ecFlow
RoxygenNote: 7.0.1
RoxygenNote: 7.2.0
# startR v2.2.0-2 (Release date: 2022-08-25)
- Use the destination grid to decide which indices to take after interpolation.
- Bugfix when Start() parameter "return_vars" is not used.
- Allow netCDF files to not have calendar attributes (force it to be standard calendar)
# startR v2.2.0-1 (Release date: 2022-04-19)
- Bugfix for the case that the variable has units like time, e.g., "days".
- Development of metadata reshaping. The metadata should correspond to data if data are reshaped by parameter "merge_across_dims" and "split_multiselected_dims", as well as if data selectors are not continuous indices.
......
......@@ -210,6 +210,17 @@ NcDataReader <- function(file_path = NULL, file_object = NULL,
} else if (grepl(' since ', units)) {
# Find the calendar
calendar <- attr(result, 'variables')[[var_name]]$calendar
# Calendar types recognized by as.PCICt()
cal.list <- c("365_day", "365", "noleap", "360_day", "360", "gregorian", "standard", "proleptic_gregorian")
if (is.null(calendar)) {
warning("Calendar is missing. Use the standard calendar to calculate time values.")
calendar <- 'gregorian'
} else if (!calendar %in% cal.list) {
# if calendar is not recognized by as.PCICt(), forced it to be standard
warning("The calendar type '", calendar, "' is not recognized by NcDataReader(). It is forced to be standard type.")
calendar <- 'gregorian'
}
if (calendar == 'standard') calendar <- 'gregorian'
parts <- strsplit(units, ' since ')[[1]]
......@@ -291,6 +302,7 @@ NcDataReader <- function(file_path = NULL, file_object = NULL,
result <- result * 30 * 24 * 60 * 60 # day to sec
} else { #old code. The calendar is not in any of the above.
#NOTE: Should not have a chance to be used because the calendar types are forced to be standard above already.
result <- result * 30.5
result <- result * 24 * 60 * 60 # day to sec
}
......
......@@ -1413,7 +1413,7 @@ Start <- function(..., # dim = indices/selectors,
# Chunk it only if it is defined dim (i.e., list of character with names of depended dim)
if (!(length(dat_selectors[[depending_dim_name]]) == 1 &&
dat_selectors[[depending_dim_name]] %in% c('all', 'first', 'last'))) {
if (sapply(dat_selectors[[depending_dim_name]], is.character)) {
if (any(sapply(dat_selectors[[depending_dim_name]], is.character))) {
dat_selectors[[depending_dim_name]] <-
dat_selectors[[depending_dim_name]][desired_chunk_indices]
}
......@@ -3073,97 +3073,18 @@ Start <- function(..., # dim = indices/selectors,
sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]]
}
# Chunk sub_array_of_sri if this inner_dim needs to be chunked
#TODO: Potential problem: the transformed_subset_var value falls between
# the end of sub_sub_array_of_values of the 1st chunk and the beginning
# of sub_sub_array_of_values of the 2nd chunk. Then, one sub_array_of_sri
# will miss. 'previous_sri' is checked and will be included if this
# situation happens, but don't know if the transformed result is
# correct or not.
# NOTE: The chunking criteria may not be 100% correct. The current way
# is to pick the sri that larger than the minimal sub_sub_array_of_values
# and smaller than the maximal sub_sub_array_of_values; if it's
# the first chunk, make sure the 1st sri is included; if it's the
# last chunk, make sure the last sri is included.
if (chunks[[inner_dim]]["n_chunks"] > 1) {
sub_array_of_sri_complete <- sub_array_of_sri
if (is.list(sub_sub_array_of_values)) { # list
sub_array_of_sri <-
which(transformed_subset_var >= min(unlist(sub_sub_array_of_values)) &
transformed_subset_var <= max(unlist(sub_sub_array_of_values)))
# if it's 1st chunk & the first sri is not included, include it.
if (chunks[[inner_dim]]["chunk"] == 1 &
!(sub_array_of_sri_complete[1] %in% sub_array_of_sri)) {
sub_array_of_sri <- c(sub_array_of_sri_complete[1], sub_array_of_sri)
}
# if it's last chunk & the last sri is not included, include it.
if (chunks[[inner_dim]]["chunk"] == chunks[[inner_dim]]["n_chunks"] &
!(tail(sub_array_of_sri_complete, 1) %in% sub_array_of_sri)) {
sub_array_of_sri <- c(sub_array_of_sri, tail(sub_array_of_sri_complete, 1))
}
# Check if sub_array_of_sri perfectly connects to the previous sri.
# If not, inlclude the previous sri.
#NOTE 1: don't know if the transform for the previous sri is
# correct or not.
#NOTE 2: If crop = T, sub_array_of_sri always starts from 1.
# Don't know if the cropping will miss some sri or not.
if (sub_array_of_sri[1] != 1) {
if (!is.null(previous_sub_sub_array_of_values)) {
# if decreasing = F
if (transformed_subset_var[1] < transformed_subset_var[2]) {
previous_sri <- max(which(transformed_subset_var <= previous_sub_sub_array_of_values))
} else {
# if decreasing = T
previous_sri <- max(which(transformed_subset_var >= previous_sub_sub_array_of_values))
}
if (previous_sri + 1 != sub_array_of_sri[1]) {
sub_array_of_sri <- (previous_sri + 1):sub_array_of_sri[length(sub_array_of_sri)]
}
}
}
#========================================================
} else { # is vector
tmp <- which(transformed_subset_var >= min(sub_sub_array_of_values) &
transformed_subset_var <= max(sub_sub_array_of_values))
# Ensure tmp and sub_array_of_sri are both ascending or descending
if (is.unsorted(tmp) != is.unsorted(sub_array_of_sri)) {
tmp <- rev(tmp)
}
# Include first or last sri if tmp doesn't have. It's only for
# ""vectors"" because vectors look for the closest value.
#NOTE: The condition here is not correct. The criteria should be
# 'vector' instead of indices.
if (chunks[[inner_dim]]["chunk"] == 1) {
sub_array_of_sri <- unique(c(sub_array_of_sri[1], tmp))
} else if (chunks[[inner_dim]]["chunk"] ==
chunks[[inner_dim]]["n_chunks"]) { # last chunk
sub_array_of_sri <- unique(c(tmp, sub_array_of_sri[length(sub_array_of_sri)]))
} else {
sub_array_of_sri <- tmp
}
# Check if sub_array_of_sri perfectly connects to the previous sri.
# If not, inlclude the previous sri.
#NOTE 1: don't know if the transform for the previous sri is
# correct or not.
#NOTE 2: If crop = T, sub_array_of_sri always starts from 1.
# Don't know if the cropping will miss some sri or not.
if (sub_array_of_sri[1] != 1) {
if (!is.null(previous_sub_sub_array_of_values)) {
# if decreasing = F
if (transformed_subset_var[1] < transformed_subset_var[2]) {
previous_sri <- max(which(transformed_subset_var <= previous_sub_sub_array_of_values))
} else {
# if decreasing = T
previous_sri <- max(which(transformed_subset_var >= previous_sub_sub_array_of_values))
}
if (previous_sri + 1 != which(sub_array_of_sri[1] == sub_array_of_sri_complete)) {
sub_array_of_sri <- (previous_sri + 1):sub_array_of_sri[length(sub_array_of_sri)]
}
}
}
}
# Instead of using values to find sri, directly use the destination grid to count.
#NOTE: sub_array_of_sri seems to start at 1 always (because crop = c(lonmin, lonmax, latmin, latmax) already?)
if (chunks[[inner_dim]]["n_chunks"] > 1) {
sub_array_of_sri <- sub_array_of_sri[get_chunk_indices(
length(sub_array_of_sri),
chunks[[inner_dim]]["chunk"],
chunks[[inner_dim]]["n_chunks"],
inner_dim)]
}
#========================================================
ordered_sri <- sub_array_of_sri
sub_array_of_sri <- transformed_subset_var_unorder[sub_array_of_sri]
......@@ -3360,6 +3281,11 @@ Start <- function(..., # dim = indices/selectors,
selector_store_position <- chunk
}
sub_array_of_indices <- transformed_indices[which(indices_chunk == chunk)]
#NOTE: This 'with_transform' part is probably not tested because
# here is for the inner dim that goes across a file dim, which
# is normally not lat and lon dimension. If in the future, we
# can interpolate time, this part needs to be examined.
if (with_transform) {
# If the provided selectors are expressed in the world
# before transformation
......
......@@ -566,7 +566,7 @@ generate_vars_to_transform <- function(vars_to_transform, picked_vars, transform
# Turn indices to values for transform_crop_domain
generate_transform_crop_domain_values <- function(transform_crop_domain, picked_vars) {
if (transform_crop_domain == 'all') {
if (any(transform_crop_domain == 'all')) {
transform_crop_domain <- c(picked_vars[1], tail(picked_vars, 1))
} else { # indices()
if (is.list(transform_crop_domain)) {
......
......@@ -10,8 +10,8 @@
library(startR)
library(s2dv)
path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/cmcc-cm2-sr5/cmip6-dcppA-hindcast_i1p1/',
'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20210312/',
path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/',
'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20200101/',
'$var$_*_s$sdate$-$member$_gn_$aux$.nc')
data <- Start(dataset = path,
......@@ -19,38 +19,35 @@ data <- Start(dataset = path,
sdate = c('1960', '1961'),
aux = 'all',
aux_depends = 'sdate',
j = indices(2:361), # remove two indices to avoid white strips
i = indices(2:291), # remove two indices to avoid white strips
x = indices(2:361), # remove two indices to avoid white strips
y = indices(2:291), # remove two indices to avoid white strips
time = indices(1:12),
member = 'r1i1p1f1',
return_vars = list(j = NULL, i = NULL,
latitude = NULL, longitude = NULL),
return_vars = list(nav_lat = NULL, nav_lon = NULL),
retrieve = F)
attr(data, 'Dimensions')
#dataset var sdate aux j i time member
#dataset var sdate aux x y time member
# 1 1 2 1 360 290 12 1
dim(attr(data, 'Variables')$common$longitude)
# j i
#360 290
dim(attr(data, 'Variables')$common$latitude)
# j i
#360 290
dim(attr(data, 'Variables')$common$nav_lon)
# x y
#362 292
dim(attr(data, 'Variables')$common$nav_lat)
# x y
#362 292
func_regrid <- function(data) {
lons <- attr(data, 'Variables')$common$longitude
lats <- attr(data, 'Variables')$common$latitude
data <- s2dv::CDORemap(data, lons, lats, grid = 'r360x180',
method = 'bil', crop = FALSE)
lons <- attr(data, 'Variables')$common$nav_lon
lats <- attr(data, 'Variables')$common$nav_lat
data <- s2dv::CDORemap(data, lons[2:361, 2:291], lats[2:361, 2:291],
grid = 'r360x180', method = 'bil', crop = FALSE)
lons_reg <- data[['lons']]
lats_reg <- data[['lats']]
return(list(data = data[[1]], lats = lats_reg, lons = lons_reg))
}
#NOTE: The data transposes if target_dims are only 'j' and 'i'.
# If only 'j' and 'i', output_dims will be 'lat', 'lon'.
step <- Step(fun = func_regrid,
target_dims = list(data = c('j', 'i')),
target_dims = list(data = c('x', 'y')),
output_dims = list(data = c('lon', 'lat'),
lats = 'lat', lons = 'lon'),
use_attributes = list(data = "Variables"))
......
......@@ -36,7 +36,7 @@ to use for the computation. The default value is 1.}
\item{cluster}{A list of components that define the configuration of the
machine to be run on. The comoponents vary from the different machines.
Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more
Check \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{Practical guide on GitLab} for more
details and examples. Only needed when the computation is not run locally.
The default value is NULL.}
......
......@@ -5,7 +5,7 @@ context("Irregular regriding in the workflow")
test_that("1. ex2_13", {
path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/',
'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20210312/',
'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20200101/',
'$var$_*_s$sdate$-$member$_gn_$aux$.nc')
suppressWarnings(
data <- Start(dataset = path,
......@@ -13,28 +13,27 @@ data <- Start(dataset = path,
sdate = '1960',
aux = 'all',
aux_depends = 'sdate',
j = indices(2:361), # remove two indices to avoid white strips
i = indices(2:291), # remove two indices to avoid white strips
x = indices(2:361), # remove two indices to avoid white strips
y = indices(2:291), # remove two indices to avoid white strips
time = indices(1),
member = 'r1i1p1f1',
return_vars = list(j = NULL, i = NULL,
latitude = NULL, longitude = NULL),
# synonims = list(j = c(j, x), i = c(i, y)),
return_vars = list(#x = NULL, y = NULL,
nav_lat = NULL, nav_lon = NULL),
retrieve = F)
)
func_regrid <- function(data) {
lons <- attr(data, 'Variables')$common$longitude
lats <- attr(data, 'Variables')$common$latitude
data <- s2dv::CDORemap(data, lons, lats, grid = 'r360x180',
lons <- attr(data, 'Variables')$common$nav_lon
lats <- attr(data, 'Variables')$common$nav_lat
data <- s2dv::CDORemap(data, lons[2:361, 2:291], lats[2:361, 2:291], grid = 'r360x180',
method = 'bil', crop = FALSE)
lons_reg <- data[['lons']]
lats_reg <- data[['lats']]
return(list(data = data[[1]], lats = lats_reg, lons = lons_reg))
}
#NOTE: The data transposes if target_dims are only 'j' and 'i'.
# If only 'j' and 'i', output_dims will be 'lat', 'lon'.
step <- Step(fun = func_regrid,
target_dims = list(data = c('j', 'i')),
target_dims = list(data = c('x', 'y')),
output_dims = list(data = c('lon', 'lat'),
lats = 'lat', lons = 'lon'),
use_attributes = list(data = "Variables"))
......@@ -55,11 +54,11 @@ c(lon = 360, dataset = 1, var = 1, sdate = 1, aux = 1, time = 1, member = 1)
)
expect_equal(
attr(data, 'Dimensions'),
c(dataset = 1, var = 1, sdate = 1, aux = 1, j = 360, i = 290, time = 1, member = 1)
c(dataset = 1, var = 1, sdate = 1, aux = 1, x = 360, y = 290, time = 1, member = 1)
)
expect_equal(
mean(res$data, na.rm = T),
13.20951,
8.782398,
tolerance = 0.0001
)
expect_equal(
......@@ -67,5 +66,10 @@ drop(res$data)[120,105:110],
c(28.32521, 28.07044, 27.59033, 27.02514, 26.55184, 26.67090),
tolerance = 0.0001
)
expect_equal(
range(res$data),
c(-1.799982, 31.130471),
tolerance = 0.0001
)
})
......@@ -15,33 +15,36 @@ data <- Start(dat = path,
member = indices(1:2),
transform = CDORemapper,
transform_extra_cells = 2,
transform_params = list(grid = 'r100x50', method = 'conservative', crop = FALSE),
transform_vars = c('lat','lon'),
transform_params = list(grid = 'r100x50', method = 'conservative'),
transform_vars = c('lat', 'lon'),
synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')),
return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'),
retrieve = FALSE)
)
func <- function(x) {
a <- mean(x, na.rm = TRUE)
return(a)
}
step <- Step(func, target_dims = c('time'),
output_dims = NULL)
wf <- AddStep(data, step)
func <- function(x) {
return(x)
}
step <- Step(func, target_dims = c('time'),
output_dims = 'time')
wf <- AddStep(data, step)
#---
suppressWarnings(
res <- Compute(wf,
chunks = list(member = 2))
resT <- eval(data)
)
suppressWarnings(
res1 <- Compute(wf, chunks = list(member = 2))
)
expect_equal(
dim(res$output1),
c(dat = 1, var = 1, sdate = 1, lat = 50, lon = 100, fyear = 1, member = 2)
dim(res1$output1),
c(time = 2, dat = 1, var = 1, sdate = 1, lat = 50, lon = 100, fyear = 1, member = 2)
)
expect_equal(
res$output1[1, 1, 1, 10:12, 20, 1, 1],
c(274.2808, 275.8509, 277.7623),
tolerance = 0.0001
as.vector(resT),
as.vector(res1$output1)
)
})
......@@ -49,8 +52,7 @@ tolerance = 0.0001
test_that("2. chunk along lon", {
#skip_on_cran()
#!!!!!!!!!!!!!!!!!!!NOTE: the results are not identical when exp has extra cells = 2!!!!!!!!!!!!!!!!!!
# But exp2 (retrieve = T) has the same results with extra_cells = 2 and 8.
#NOTE: the results are not identical when exp has extra cells = 2
suppressWarnings(
exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc',
......@@ -64,8 +66,8 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$
longitude_reorder = CircularSort(0, 360),
transform = CDORemapper,
transform_params = list(grid = 'r100x50',
method = 'con',
crop = FALSE),
method = 'con'),
# crop = FALSE),
transform_vars = c('latitude', 'longitude'),
transform_extra_cells = 8,
synonims = list(latitude = c('lat', 'latitude'),
......@@ -81,46 +83,35 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$
}
step <- Step(func, target_dims = 'time', output_dims = 'time')
wf <- AddStep(exp, step)
#---
suppressWarnings(
res <- Compute(wf, chunks = list(longitude = 2))
resT <- eval(exp)
)
suppressWarnings(
res2 <- Compute(wf, chunks = list(ensemble = 1))
res1 <- Compute(wf, chunks = list(longitude = 2))$output1
)
expect_equal(
res$output1,
res2$output1
#suppressWarnings(
# res2 <- Compute(wf, chunks = list(ensemble = 1))
#)
suppressWarnings(
res3 <- Compute(wf, chunks = list(longitude = 2, latitude = 2))$output1
)
# Check with retrieve = TRUE
suppressWarnings(
exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc',
var = 'tas',
sdate = '20000101',
ensemble = indices(1),
time = indices(1),
latitude = 'all',
latitude_reorder = Sort(),
longitude = 'all',
longitude_reorder = CircularSort(0, 360),
transform = CDORemapper,
transform_params = list(grid = 'r100x50',
method = 'con',
crop = FALSE),
transform_vars = c('latitude', 'longitude'),
transform_extra_cells = 2,
synonims = list(latitude = c('lat', 'latitude'),
longitude = c('longitude', 'lon')),
return_vars = list(latitude = 'dat',#NULL,
longitude = 'dat',#NULL,
time = 'sdate'),
retrieve = T)
res4 <- Compute(wf, chunks = list(longitude = 3))$output1
)
expect_equal(
as.vector(res$output1),
as.vector(exp2)
as.vector(resT),
as.vector(res1)
)
expect_equal(
res1,
res3
)
expect_equal(
res1,
res4
)
})
......@@ -40,8 +40,8 @@ exp <- Start(dat = path,
lon_reorder = CircularSort(0, 360),
transform = CDORemapper,
transform_extra_cells = 8,
transform_params = list(grid = 'r100x50', method = 'conservative',
crop = c(0, 360, -90, 90)),
transform_params = list(grid = 'r100x50', method = 'conservative'),
# crop = c(0, 360, -90, 90)),
transform_vars = c('lat','lon'),
synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')),
return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'),
......@@ -53,20 +53,24 @@ func <- function(x) {
step <- Step(func, target_dims = 'time', output_dims = 'time')
wf <- AddStep(exp, step)
#---
suppressWarnings(
res1 <- Compute(wf, chunks = list(lon = 2))
resT <- eval(exp)
)
suppressWarnings(
res2 <- Compute(wf, chunks = list(ensemble = 1))
res1 <- Compute(wf, chunks = list(lon = 2))
)
#suppressWarnings(
#res2 <- Compute(wf, chunks = list(ensemble = 1))
#)
suppressWarnings(
res3 <- Compute(wf, chunks = list(lon = 3))
)
expect_equal(
res1$output1,
res2$output1
as.vector(res1$output1),
as.vector(resT)
)
expect_equal(
res1$output1,
......@@ -88,8 +92,8 @@ exp <- Start(dat = path,
lon_reorder = CircularSort(0, 360),
transform = CDORemapper,
transform_extra_cells = 8,
transform_params = list(grid = 'r100x50', method = 'conservative',
crop = c(0, 360, -90, 90)),
transform_params = list(grid = 'r100x50', method = 'conservative'),
# crop = c(0, 360, -90, 90)),
transform_vars = c('lat','lon'),
synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')),
return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'),
......@@ -101,6 +105,7 @@ func <- function(x) {
step <- Step(func, target_dims = 'time', output_dims = 'time')
wf <- AddStep(exp, step)
#---
suppressWarnings(
res1_list <- Compute(wf, chunks = list(lon = 2))
)
......@@ -109,116 +114,6 @@ res1$output1,
res1_list$output1
)
#-----------------------------------
# crop = FALSE
suppressWarnings(
exp <- Start(dat = path,
var = 'tas',
sdate = '20000101',
ensemble = indices(1),
time = indices(1),
lat = indices(1:640),
lon = indices(1:1296),
lat_reorder = Sort(),
lon_reorder = CircularSort(0, 360),
transform = CDORemapper,
transform_extra_cells = 8,
transform_params = list(grid = 'r100x50', method = 'conservative',
crop = FALSE),
transform_vars = c('lat','lon'),
synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')),
return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'),
retrieve = F)
)
func <- function(x) {
return(x)
}
step <- Step(func, tar