Loading 3 datasets in the same Start call
Hi @aho,
I open this issue because I am trying to load 3 datasets at once with Start and I get different results when I load them separately. Also, when comparing the results with CST_Load, they are a bit different. Here below I give more details about the code I use. The code is from the CSTools vignette MultiModelSkill_vignette. The substitution of CST_Load by CST_Start can be found here.
Step 0: Source functions and define input params
# Step (0a): Source functions
source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_Load.R")
source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_Start.R")
source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/zzz.R")
source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/as.s2dv_cube.R")
library(s2dv)
library(startR)
library(zeallot)
# Step (0b): Input params
# shared
mth = '11'
clim_var = 'tas'
ini <- 1993
fin <- 2012
start <- as.Date(paste(ini, mth, "01", sep = ""), "%Y%m%d")
end <- as.Date(paste(fin, mth, "01", sep = ""), "%Y%m%d")
# Load
dateseq <- format(seq(start, end, by = "year"), "%Y%m%d")
grid <- "256x128"
# Start
dateseq2 <- format(seq(start, end, by = "year"), "%Y%m")
lonmin = 50 #-20
lonmax = 70
latmin = 25
latmax = 45 # 75
Part 1: Load datasets within the same call
Load Call
glosea5 <- '/esarchive/exp/glosea5/glosea5c3s/$STORE_FREQ$_mean/$VAR_NAME$_f6h/$VAR_NAME$_$YEAR$$MONTH$.nc'
c(expl, obsl) %<-%
CST_Load(var = clim_var, exp = list(list(name = 'glosea5', path = glosea5),
list(name = 'ecmwf/system4_m1'),
list(name = 'meteofrance/system5_m1')),
obs = "erainterim", sdates = dateseq, leadtimemin = 2, leadtimemax = 4,
lonmin = 50, lonmax = 70, latmin = 25, latmax = 45,
storefreq = "monthly", sampleperiod = 1, nmember = 4,
output = "lonlat", method = "bilinear",
grid = paste("r", grid, sep = ""))
Start call
repos1 <- "/esarchive/exp/glosea5/glosea5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc"
repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$01.nc"
repos3 <- "/esarchive/exp/meteofrance/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$01.nc"
exp <- CST_Start(dataset = list(list(name = 'glosea5c3s', path = repos1),
list(name = 'ecmwf/system4_m1', path = repos2),
list(name = 'meteofrance/system5_m1', path = repos3)),
var = clim_var,
member = indices(1:4),
sdate = dateseq2,
ftime = indices(2:4),
lat = values(list(latmin, latmax)),
lat_reorder = Sort(decreasing = TRUE),
lon = values(list(lonmin, lonmax)),
lon_reorder = CircularSort(0, 360),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude'),
member = c('member', 'ensemble'),
ftime = c('ftime', 'time')),
transform = CDORemapper,
transform_extra_cells = 2,
transform_params = list(grid = 'r256x128',
method = 'bilinear'),
transform_vars = c('lat', 'lon'),
return_vars = list(lat = NULL,
lon = NULL, ftime = 'sdate'),
retrieve = TRUE)
dates_exp <- exp$attrs$Dates
repos_obs <- "/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$/$var$_$date$.nc"
obs <- CST_Start(dataset = list(list(name = 'erainterim', path = repos_obs)),
var = clim_var,
date = unique(format(dates_exp, '%Y%m')),
ftime = values(dates_exp),
ftime_across = 'date',
ftime_var = 'ftime',
merge_across_dims = TRUE,
split_multiselected_dims = TRUE,
lat = values(list(latmin, latmax)),
lat_reorder = Sort(decreasing = TRUE),
lon = values(list(lonmin, lonmax)),
lon_reorder = CircularSort(0, 360),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude'),
ftime = c('ftime', 'time')),
transform = CDORemapper,
transform_extra_cells = 2,
transform_params = list(grid = 'r256x128',
method = 'bilinear'),
transform_vars = c('lat', 'lon'),
return_vars = list(lon = NULL,
lat = NULL,
ftime = 'date'),
retrieve = TRUE)
Part 2: Load exp datasets within separated calls
exp1 <- CST_Start(dataset = list(list(name = 'glosea5c3s', path = repos1)),
var = clim_var,
member = indices(1:4),
sdate = dateseq2,
ftime = indices(2:4),
lat = values(list(latmin, latmax)),
lat_reorder = Sort(decreasing = TRUE),
lon = values(list(lonmin, lonmax)),
lon_reorder = CircularSort(0, 360),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude'),
member = c('member', 'ensemble'),
ftime = c('ftime', 'time')),
transform = CDORemapper,
transform_extra_cells = 2,
transform_params = list(grid = 'r256x128',
method = 'bilinear'),
transform_vars = c('lat', 'lon'),
return_vars = list(lat = NULL,
lon = NULL, ftime = 'sdate'),
retrieve = TRUE)
exp2 <- CST_Start(dataset = list(list(name = 'ecmwf/system4_m1', path = repos2)),
var = clim_var,
member = indices(1:4),
sdate = dateseq2,
ftime = indices(2:4),
lat = values(list(latmin, latmax)),
lat_reorder = Sort(decreasing = TRUE),
lon = values(list(lonmin, lonmax)),
lon_reorder = CircularSort(0, 360),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude'),
member = c('member', 'ensemble'),
ftime = c('ftime', 'time')),
transform = CDORemapper,
transform_extra_cells = 2,
transform_params = list(grid = 'r256x128',
method = 'bilinear'),
transform_vars = c('lat', 'lon'),
return_vars = list(lat = NULL,
lon = NULL, ftime = 'sdate'),
retrieve = TRUE)
exp3 <- CST_Start(dataset = list(list(name = 'meteofrance/system5_m1', path = repos3)),
var = clim_var,
member = indices(1:4),
sdate = dateseq2,
ftime = indices(2:4),
lat = values(list(latmin, latmax)),
lat_reorder = Sort(decreasing = TRUE),
lon = values(list(lonmin, lonmax)),
lon_reorder = CircularSort(0, 360),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude'),
member = c('member', 'ensemble'),
ftime = c('ftime', 'time')),
transform = CDORemapper,
transform_extra_cells = 2,
transform_params = list(grid = 'r256x128',
method = 'bilinear'),
transform_vars = c('lat', 'lon'),
return_vars = list(lat = NULL,
lon = NULL, ftime = 'sdate'),
retrieve = TRUE)
Part 3: Compare results
# First dataset
> summary(exp1$data) # Start separated
Min. 1st Qu. Median Mean 3rd Qu. Max.
254.3 274.1 277.5 278.1 281.8 297.4
> summary(expl$data[1,,,,,]) # Load
Min. 1st Qu. Median Mean 3rd Qu. Max.
254.3 274.1 277.5 278.1 281.8 297.4
> summary(exp$data[1,,,,,,]) # Start unique call
Min. 1st Qu. Median Mean 3rd Qu. Max.
254.3 274.1 277.5 278.1 281.8 297.4
# OK
# Second dataset
> summary(exp2$data)
Min. 1st Qu. Median Mean 3rd Qu. Max.
252.9 272.7 276.9 277.6 281.8 298.7
> summary(expl$data[2,,,,,])
Min. 1st Qu. Median Mean 3rd Qu. Max.
252.9 272.7 276.9 277.6 281.8 298.7
> summary(exp$data[2,,,,,,]) # Start unique call
Min. 1st Qu. Median Mean 3rd Qu. Max.
246.3 263.0 266.6 266.7 270.4 282.7
# Last NO OK
# Third dataset
> summary(exp3$data)
Min. 1st Qu. Median Mean 3rd Qu. Max.
247.8 273.0 276.4 277.5 281.7 300.5
> summary(expl$data[3,,,,,])
Min. 1st Qu. Median Mean 3rd Qu. Max.
247.8 273.0 276.4 277.5 281.7 300.5
> summary(exp$data[3,,,,,,]) # Start unique call
Min. 1st Qu. Median Mean 3rd Qu. Max.
253.8 267.1 270.7 270.4 273.8 283.5
# Last NO OK
The dates of the 3 datasets separated are different. However, the separated results of the data are equal to the CST_Load results.
> exp1$attrs$Dates[[1]]
[1] "1993-12-16 UTC"
> exp2$attrs$Dates[[1]]
[1] "1994-01-01 UTC"
> exp3$attrs$Dates[[1]]
[1] "1994-01-16 09:00:00 UTC"
Is there a way to load correclty the 3 datasets in a unique Start call correctly? If not, do you think it is a good idea that for now I load the datasets separated in the vignette and I keep the development for next releases of CSTools?
Thank you in advance,
Eva