......@@ -27,8 +27,7 @@ class(data1) <- 's2dv_cube'
test_that("1. Input checks", {
expect_error(
CST_WeatherRegimes(data = 1),
paste0("Parameter 'data' must be of the class 's2dv_cube', as output by ",
"CSTools::CST_Load.")
paste0("Parameter 'data' must be of the class 's2dv_cube'.")
)
# Check 'exp' object structure
expect_error(
......@@ -84,9 +83,11 @@ test_that("1. Input checks", {
##############################################
test_that("2. Output checks", {
expect_equal(
names(dim(CST_WeatherRegimes(data = data1, ncenters = 3, EOFs = FALSE)$data)),
c('lat', 'lon', 'cluster')
suppressWarnings(
expect_equal(
names(dim(CST_WeatherRegimes(data = data1, ncenters = 3, EOFs = FALSE)$data)),
c('lat', 'lon', 'cluster')
)
)
data1 <- 1 : 400
dim(data1) <- c(sdate = 2, ftime = 10, lat = 5, lon = 4)
......@@ -137,13 +138,17 @@ test_that("2. Output checks", {
data1[, 2, 3] <- NA
data1 <- list(data = data1, coords = list(lat = 1:5, lon = 1:4))
class(data1) <- 's2dv_cube'
expect_equal(
any(is.na(CST_WeatherRegimes(data = data1, ncenters = 3, EOFs = FALSE)$data)),
TRUE
suppressWarnings(
expect_equal(
any(is.na(CST_WeatherRegimes(data = data1, ncenters = 3, EOFs = FALSE)$data)),
TRUE
)
)
expect_equal(
names(dim(CST_WeatherRegimes(data = data1, ncenters = 3, EOFs = FALSE)$data)),
c('lat', 'lon', 'cluster')
suppressWarnings(
expect_equal(
names(dim(CST_WeatherRegimes(data = data1, ncenters = 3, EOFs = FALSE)$data)),
c('lat', 'lon', 'cluster')
)
)
})
......
......@@ -17,145 +17,145 @@ test_that("1. Input checks", {
##############################################
test_that("2. Tests from Load()", {
startDates <- c('20001101', '20011101')
suppressWarnings(
ob1 <- Load(var = 'tas', exp = 'system5c3s',
nmember = 2, sdates = startDates,
leadtimemax = 3, latmin = 30, latmax = 35,
lonmin = 10, lonmax = 20, output = 'lonlat')
)
res1 <- as.s2dv_cube(ob1)
# test_that("2. Tests from Load()", {
# startDates <- c('20001101', '20011101')
# suppressWarnings(
# ob1 <- Load(var = 'tas', exp = 'system5c3s',
# nmember = 2, sdates = startDates,
# leadtimemax = 3, latmin = 30, latmax = 35,
# lonmin = 10, lonmax = 20, output = 'lonlat')
# )
# res1 <- as.s2dv_cube(ob1)
# dimensions
expect_equal(
dim(res1$data),
c(dataset = 1, member = 2, sdate = 2, ftime = 3, lat = 6, lon = 11)
)
# elements
expect_equal(
names(res1),
c("data", "dims", "coords", "attrs")
)
expect_equal(
names(res1$attrs),
c("Variable", "Datasets", "Dates", "when", "source_files",
"not_found_files", "load_parameters")
)
# coordinates
expect_equal(
attributes(res1$coords$sdate),
list(indices = FALSE)
)
expect_equal(
attributes(res1$coords$ftime),
list(indices = TRUE)
)
expect_equal(
dim(res1$coords$lat),
NULL
)
expect_equal(
dim(res1$coords$lon),
NULL
)
expect_equal(
length(res1$coords$lat),
6
)
# Dates
expect_equal(
dim(res1$attrs$Dates),
c(ftime = 3, sdate = 2)
)
})
# # dimensions
# expect_equal(
# dim(res1$data),
# c(dataset = 1, member = 2, sdate = 2, ftime = 3, lat = 6, lon = 11)
# )
# # elements
# expect_equal(
# names(res1),
# c("data", "dims", "coords", "attrs")
# )
# expect_equal(
# names(res1$attrs),
# c("Variable", "Datasets", "Dates", "when", "source_files",
# "not_found_files", "load_parameters")
# )
# # coordinates
# expect_equal(
# attributes(res1$coords$sdate),
# list(indices = FALSE)
# )
# expect_equal(
# attributes(res1$coords$ftime),
# list(indices = TRUE)
# )
# expect_equal(
# dim(res1$coords$lat),
# NULL
# )
# expect_equal(
# dim(res1$coords$lon),
# NULL
# )
# expect_equal(
# length(res1$coords$lat),
# 6
# )
# # Dates
# expect_equal(
# dim(res1$attrs$Dates),
# c(ftime = 3, sdate = 2)
# )
# })
##############################################
test_that("3. Tests from Load()", {
obs_path <- list(name = "ERA5",
path = "/esarchive/recon/ecmwf/era5/$STORE_FREQ$_mean/$VAR_NAME$_f1h/$VAR_NAME$_$YEAR$$MONTH$.nc")
ob2 <- Load(var = 'windagl100', obs = list(obs_path),
sdates = '20180301', nmember = 1,
leadtimemin = 1, leadtimemax = 1,
storefreq = "monthly", sampleperiod = 1,
latmin = 36, latmax = 38, lonmin = 0, lonmax = 4,
output = 'lonlat', nprocs = 1, grid = 'r360x181')
# test_that("3. Tests from Load()", {
# obs_path <- list(name = "ERA5",
# path = "/esarchive/recon/ecmwf/era5/$STORE_FREQ$_mean/$VAR_NAME$_f1h/$VAR_NAME$_$YEAR$$MONTH$.nc")
# ob2 <- Load(var = 'windagl100', obs = list(obs_path),
# sdates = '20180301', nmember = 1,
# leadtimemin = 1, leadtimemax = 1,
# storefreq = "monthly", sampleperiod = 1,
# latmin = 36, latmax = 38, lonmin = 0, lonmax = 4,
# output = 'lonlat', nprocs = 1, grid = 'r360x181')
res2 <- as.s2dv_cube(ob2)
# res2 <- as.s2dv_cube(ob2)
# dimensions
expect_equal(
dim(res2$data),
c(dataset = 1, member = 1, sdate = 1, ftime = 1, lat = 3, lon = 5)
)
# elements
expect_equal(
names(res2$attrs),
c("Variable", "Datasets", "Dates", "when", "source_files",
"not_found_files", "load_parameters")
)
# coordinates
expect_equal(
attributes(res2$coords$sdate),
list(indices = FALSE)
)
expect_equal(
unlist(res2$coords)[1:4],
c(dataset = "ERA5", member = "1", sdate = "20180301", ftime = "1")
)
expect_equal(
dim(res2$coords$ftime),
NULL
)
expect_equal(
length(res2$coords$lat),
3
)
# Dates
expect_equal(
dim(res2$attrs$Dates),
c(ftime = 1, sdate = 1)
)
})
# # dimensions
# expect_equal(
# dim(res2$data),
# c(dataset = 1, member = 1, sdate = 1, ftime = 1, lat = 3, lon = 5)
# )
# # elements
# expect_equal(
# names(res2$attrs),
# c("Variable", "Datasets", "Dates", "when", "source_files",
# "not_found_files", "load_parameters")
# )
# # coordinates
# expect_equal(
# attributes(res2$coords$sdate),
# list(indices = FALSE)
# )
# expect_equal(
# unlist(res2$coords)[1:4],
# c(dataset = "ERA5", member = "1", sdate = "20180301", ftime = "1")
# )
# expect_equal(
# dim(res2$coords$ftime),
# NULL
# )
# expect_equal(
# length(res2$coords$lat),
# 3
# )
# # Dates
# expect_equal(
# dim(res2$attrs$Dates),
# c(ftime = 1, sdate = 1)
# )
# })
##############################################
test_that("4. Tests from Load()", {
exp <- list(name = 'ecmwfS5',
path = "/esarchive/exp/ecmwf/system5c3s/$STORE_FREQ$_mean/$VAR_NAME$_s0-24h/$VAR_NAME$_$START_DATE$.nc")
obs <- list(name = 'era5',
path = '/esarchive/recon/ecmwf/era5/$STORE_FREQ$_mean/$VAR_NAME$_f1h-r1440x721cds/$VAR_NAME$_$YEAR$$MONTH$.nc')
suppressWarnings(
ob3 <- Load(var = 'prlr', exp = list(exp), obs = list(obs),
sdates = paste0(1993:1995, '1101'), nmember = 1,
storefreq = "monthly", sampleperiod = 1,
latmin = 42, latmax = 45, lonmin = 4, lonmax = 6,
output = 'lonlat', nprocs = 1)
)
expect_warning(
as.s2dv_cube(ob3),
"The output is a list of two 's2dv_cube' objects corresponding to 'exp' and 'obs'."
)
suppressWarnings(
res3 <- as.s2dv_cube(ob3)
)
# test_that("4. Tests from Load()", {
# exp <- list(name = 'ecmwfS5',
# path = "/esarchive/exp/ecmwf/system5c3s/$STORE_FREQ$_mean/$VAR_NAME$_s0-24h/$VAR_NAME$_$START_DATE$.nc")
# obs <- list(name = 'era5',
# path = '/esarchive/recon/ecmwf/era5/$STORE_FREQ$_mean/$VAR_NAME$_f1h-r1440x721cds/$VAR_NAME$_$YEAR$$MONTH$.nc')
# suppressWarnings(
# ob3 <- Load(var = 'prlr', exp = list(exp), obs = list(obs),
# sdates = paste0(1993:1995, '1101'), nmember = 1,
# storefreq = "monthly", sampleperiod = 1,
# latmin = 42, latmax = 45, lonmin = 4, lonmax = 6,
# output = 'lonlat', nprocs = 1)
# )
# expect_warning(
# as.s2dv_cube(ob3),
# "The output is a list of two 's2dv_cube' objects corresponding to 'exp' and 'obs'."
# )
# suppressWarnings(
# res3 <- as.s2dv_cube(ob3)
# )
# dimensions
expect_equal(
dim(res3[[1]]$data),
c(dataset = 1, member = 1, sdate = 3, ftime = 8, lat = 4, lon = 3)
)
expect_equal(
unlist(res3[[1]]$coords)[1:4],
c(dataset = "ecmwfS5", member = "1", sdate1 = "19931101", sdate2 = "19941101")
)
# Dates
expect_equal(
dim(res3[[1]]$attrs$Dates),
dim(res3[[2]]$attrs$Dates)
)
})
# # dimensions
# expect_equal(
# dim(res3[[1]]$data),
# c(dataset = 1, member = 1, sdate = 3, ftime = 8, lat = 4, lon = 3)
# )
# expect_equal(
# unlist(res3[[1]]$coords)[1:4],
# c(dataset = "ecmwfS5", member = "1", sdate1 = "19931101", sdate2 = "19941101")
# )
# # Dates
# expect_equal(
# dim(res3[[1]]$attrs$Dates),
# dim(res3[[2]]$attrs$Dates)
# )
# })
##############################################
......@@ -166,9 +166,9 @@ test_that("5. Tests from Start()", {
var = 'tas',
sdate = c('20170101', '20180101'),
ensemble = indices(1:3),
time = 'all',
latitude = indices(1:10),
longitude = indices(1:10),
time = indices(1:3),
latitude = indices(1:2),
longitude = indices(1:2),
return_vars = list(latitude = 'dat', longitude = 'dat', time = 'sdate'),
retrieve = TRUE)
)
......@@ -178,7 +178,7 @@ test_that("5. Tests from Start()", {
# dimensions
expect_equal(
dim(res4$data),
c(dat = 1, var = 1, sdate = 2, ensemble = 3, time = 7, latitude = 10, longitude = 10)
c(dat = 1, var = 1, sdate = 2, ensemble = 3, time = 3, latitude = 2, longitude = 2)
)
# elements
expect_equal(
......@@ -204,12 +204,12 @@ test_that("5. Tests from Start()", {
)
expect_equal(
length(res4$coords$latitude),
10
2
)
# Dates
expect_equal(
dim(res4$attrs$Dates),
c(sdate = 2, time = 7)
c(sdate = 2, time = 3)
)
})
......@@ -228,10 +228,10 @@ test_that("6. Tests from Start()", {
suppressWarnings(
hcst <- Start(dat = anlgs,
var = vari,
latitude = indices(1:4), #'all',
longitude= indices(1:4), #'all',
member= indices(1), #'all',
time = 'all',
latitude = indices(1:2), #'all',
longitude = indices(1:2), #'all',
member = indices(1), #'all',
time = indices(1:3),
syear = indices(1:4),
file_date = file_date_array,
split_multiselected_dims = TRUE,
......@@ -248,7 +248,7 @@ test_that("6. Tests from Start()", {
# dimensions
expect_equal(
dim(res5$data),
c(dat = 1, var = 1, latitude = 4, longitude = 4, member = 1, time = 4,
c(dat = 1, var = 1, latitude = 2, longitude = 2, member = 1, time = 3,
syear = 4, sweek = 2, sday = 3)
)
# elements
......
......@@ -3,7 +3,7 @@ title: "Analogs based on large scale for downscaling"
author: "M. Carmen Alvarez-Castro and M. del Mar Chaves-Montero (CMCC, Italy)"
date: "November 2020"
revisor: "Eva Rifà"
revision date: "January 2023"
revision date: "October 2023"
output: rmarkdown::html_vignette
vignette: >
%\VignetteEngine{knitr::knitr}
......@@ -21,8 +21,7 @@ In this example, the seasonal temperature forecasts, initialized in october, wil
## 1. Introduction of the function
For instance if we want to perform a temperature donwscaling in Balearic Island for October we will get a daily series of temperature with 1 analog per day,
the best analog. How we define the best analog for a certain day? This function offers three options for that:
For instance if we want to perform a temperature donwscaling in Balearic Island for October we will get a daily series of temperature with 1 analog per day, the best analog. How we define the best analog for a certain day? This function offers three options for that:
(1) The day with the minimum Euclidean distance in a large scale field: using i.e. pressure or geopotencial height as variables and North Atlantic region as large scale region. The Atmospheric circulation pattern in the North Atlantic (LargeScale) has an important role in the climate in Spain (LocalScale). The function will find the day with the most similar pattern in atmospheric circulation in the database (obs, slp in ERA5) to the day of interest (exp, slp in model). Once the date of the best analog is found, the function takes the associated temperature to that day (obsVar, tas in ERA5), with a subset of the region of interest (Balearic Island).
......@@ -39,7 +38,7 @@ Two datasets are used to illustrate how to use the function. The first one could
### Example 1: using data from CSTools
After loading **CSTools** package on the R session, the user will have access to the sample data `lonlat_temp` and `lonlat_prec`.
After loading **CSTools** package on the R session, the user will have access to the sample data created with using `CST_Start`: lonlat_temp_st` and `lonlat_prec_st`.
*Note: If it is the first time using CSTools, install the package by running `install.packages("CSTools")`.
......@@ -50,32 +49,24 @@ library(CSTools)
After exploring the data, the user can directly run the Analogs downscaling method using the 'Large_dis' metric:
```
class(lonlat_temp$exp)
names(lonlat_temp$obs)
dim(lonlat_temp$obs$data)
dim(lonlat_temp$exp$data)
head(lonlat_temp$exp$attrs$Dates)
class(lonlat_temp_st$exp)
names(lonlat_temp_st$obs)
dim(lonlat_temp_st$obs$data)
dim(lonlat_temp_st$exp$data)
lonlat_temp_st$exp$attrs$Dates
lonlat_temp_st$obs$attrs$Dates
```
There are 15 ensemble members available in the `exp` data set, 6 starting dates and 3 forecast times, which refer to monthly values during 3 months following starting dates on November 1st in the years 2000, 2001, 2002, 2003, 2004 and 2005.
There are 15 ensemble members available in the `exp` data set, 6 starting dates and 3 forecast times, which refer to monthly values during 3 months following starting dates on November in the years 2000, 2001, 2002, 2003, 2004 and 2005.
```
exp1 <- lonlat_temp$exp
exp1$data <- exp1$data[, , 1, 1, , , drop = FALSE]
exp1$attrs$Dates <- exp1$attrs$Dates[1]
exp1 <- CST_Subset(x = lonlat_temp_st$exp, along = c('sdate', 'ftime'), indices = list(1, 1))
down_1 <- CST_Analogs(expL = exp1, obsL = lonlat_temp_st$obs)
down_1 <- CST_Analogs(expL = exp1, obsL = lonlat_temp$obs)
exp2 <- CST_Subset(x = lonlat_temp_st$exp, along = c('sdate', 'ftime'), indices = list(1, 2))
down_2 <- CST_Analogs(expL = exp2, obsL = lonlat_temp_st$obs)
exp2 <- lonlat_temp$exp
exp2$data <- exp2$data[, , 1, 2, , , drop = FALSE]
exp2$attrs$Dates <- exp2$attrs$Dates[2]
down_2 <- CST_Analogs(expL = exp2, obsL = lonlat_temp$obs)
exp3 <- lonlat_temp$exp
exp3$data <- exp3$data[, , 1, 3, , , drop = FALSE]
exp3$attrs$Dates <- exp3$attrs$Dates[3]
down_3 <- CST_Analogs(expL = exp3, obsL = lonlat_temp$obs)
exp3 <- CST_Subset(x = lonlat_temp_st$exp, along = c('sdate', 'ftime'), indices = list(1, 3))
down_3 <- CST_Analogs(expL = exp3, obsL = lonlat_temp_st$obs)
```
The visualization of the first three time steps for the ensemble mean of the forecast initialized the 1st of Noveber 2000 can be done using the package **s2dv**:
......@@ -97,13 +88,12 @@ PlotLayout(PlotEquiMap, c('lat', 'lon'),
width = 10, height = 4)
```
![](./Figures/Analogs1.png)
The user can also request extra Analogs and the information:
```
down <- CST_Analogs(expL = exp1, obsL = lonlat_temp$obs,
down <- CST_Analogs(expL = exp1, obsL = lonlat_temp_st$obs,
nAnalogs = 2, AnalogsInfo = TRUE)
```
......@@ -121,7 +111,7 @@ The last command run concludes that the best analog of the ensemble 15 correspon
```
PlotLayout(PlotEquiMap, c('lat', 'lon'), var = list(down$data$fields[1, , , 15],
lonlat_temp$obs$data[1, 1, 5, 1, , ]), nrow = 1, ncol = 2,
lonlat_temp_st$obs$data[1, 1, 5, 1, , ]), nrow = 1, ncol = 2,
lon = down$coords$lon, lat = down$coords$lat, filled.continents = FALSE,
titles = c("Downscaled 2000-11-01", "Observed 2004-11-01"), units = 'T(K)',
width = 7, height = 4)
......@@ -131,71 +121,118 @@ PlotLayout(PlotEquiMap, c('lat', 'lon'), var = list(down$data$fields[1, , , 15],
As expected, they are exatly the same.
### Exemple 2: Load data using CST_Load
### Exemple 2: Load data using CST_Start
In this case, the spatial field of a single forecast day will be downscale using Analogs in this example. This will allow illustrating how to use CST_Load to retrieve observations separated from simulations. To explore other options, see other CSTools vignettes as well as `CST_Load` documentation.
In this case, the spatial field of a single forecast day will be downscale using Analogs in this example. This will allow illustrating how to use `CST_Start` to retrieve observations separated from simulations. To explore other options, see other CSTools vignettes as well as `CST_Start` documentation and [startR](https://CRAN.R-project.org/package=startR) package.
The simulations available for the desired model cover the period 1993-2016. Here, the 15th of October 2000 (for the simulation initialized in the 1st of October 2000), will be downscaled. For ERA5 from 1979 to the present days. For this example we will just use October days from 2000 to 2006, so, the starting dates can be defined by running the following lines:
```
start <- as.Date(paste(2000, 10, "01", sep = ""), "%Y%m%d")
end <- as.Date(paste(2006, 10, "01", sep = ""), "%Y%m%d")
dateseq <- format(seq(start, end, by = "year"), "%Y%m%d")
dates <- as.POSIXct(seq(start, end, by = "year"), format = '%Y%m%d', 'UTC')
```
Using the `CST_Load` function from **CSTool package**, the data available in our data store can be loaded. The following lines show how this function can be used. The experimental datasets are interpolated to the ERA5 grid by specifying the 'grid' parameter while ERA5 doesn't need to be interpolated. While parameter leadtimemax is set to 1 for the experimental dataset, it is set to 31 for the observations, returning the daily observations for October for the years requested in 'sdate' (2000-2006). Download the data to run the recipe under the HTTPS: downloads.cmcc.bo.it/d_chaves/ANALOGS/data_for_Analogs.Rdat or ask carmen.alvarez-castro at cmcc.it or nuria.perez at bsc.es.
```
exp <- list(name = 'ECMWF_system4_m1',
path = file.path("/esarchive/exp/ecmwf/system4_m1/",
"$STORE_FREQ$_mean/$VAR_NAME$_*/$VAR_NAME$_$START_DATE$.nc"))
obs <- list(name = 'ERA5',
path = file.path("/esarchive/recon/ecmwf/era5/$STORE_FREQ$_mean/",
"$VAR_NAME$_f1h-r1440x721cds/$VAR_NAME$_$YEAR$$MONTH$.nc"))
expTAS <- CST_Load(var = 'tas', exp = list(exp), obs = NULL,
sdates = '20001001', latmin = 22, latmax = 70,
lonmin = -80, lonmax = 50, output = 'lonlat',
storefreq = 'daily', nmember = 15, leadtimemin = 15,
leadtimemax = 15, method = "bilinear", grid = 'r1440x721',
nprocs = 1)
obsTAS <- CST_Load(var = 'tas', exp = NULL, obs = list(obs),
sdates = dateseq, leadtimemax = 31,
latmin = 22, latmax = 70,
lonmin = -80, lonmax = 50, output = 'lonlat',
nprocs = 1, storefreq = "daily", nmember = 1)
expPSL <- CST_Load(var = 'psl', exp = list(exp), obs = NULL,
sdates = '20001001', latmin = 22, latmax = 70,
lonmin = -80, lonmax = 50, output = 'lonlat',
storefreq = 'daily', nmember = 15, leadtimemin = 15,
leadtimemax = 15, method = "bilinear", grid = 'r1440x721',
nprocs = 1)
obsPSL <- CST_Load(var = 'psl', exp = NULL, obs = list(obs),
sdates = dateseq, leadtimemax = 31,
latmin = 22, latmax = 70,
lonmin = -80, lonmax = 50, output = 'lonlat',
nprocs = 1, storefreq = "daily", nmember = 1)
```
*Note: `CST_Load` allows to load the data simultaneously for 'exp' and 'obs' already formatted to have the same dimensions as in this example. However, it is possible to request separated 'obs' and 'exp'. In this second case, the observations could be return in a continous time series instead of being split in start dates and forecast time.*
The s2dv_cube objects `expTAS`,`obsTAS`, `expPSL` and `obsPSL` are now loaded in the R enviroment. The first two elements correspond to the experimental and observed data for temperature and the other two are the equivalent for the SLP data.
Loading the data using `CST_Load` allows to obtain two lists, one for the experimental data and another for the observe data, with the same elements and compatible dimensions of the data element:
Using the `CST_Start` function from **CSTools package**, the data available in our data store can be loaded. The following lines show how this function can be used. The experimental datasets are interpolated to the ERA5 grid by specifying the 'grid' parameter while ERA5 doesn't need to be interpolated. While dimension 'time' is set to 1 for the experimental dataset, it is set to 31 for the observations, returning the daily observations for October for the years requested in 'sdate' (2000-2006). Download the data to run the recipe under the HTTPS: downloads.cmcc.bo.it/d_chaves/ANALOGS/data_for_Analogs.Rdat or ask carmen.alvarez-castro at cmcc.it or nuria.perez at bsc.es.
```r
exp_path <- paste0('/esarchive/exp/ecmwf/system4_m1/daily_mean/',
'$var$_f6h/$var$_$sdate$.nc')
obs_path <- paste0('/esarchive/recon/ecmwf/era5/daily_mean/',
'$var$_f1h-r1440x721cds/$var$_$sdate$.nc')
date_exp <- '20001001'
lonmax <- 50
lonmin <- -80
latmax <- 70
latmin <- 22
expTAS <- CST_Start(dataset = exp_path,
var = 'tas',
member = startR::indices(1:15),
sdate = '20001001',
ftime = startR::indices(15),
lat = startR::values(list(latmin, latmax)),
lat_reorder = startR::Sort(decreasing = TRUE),
lon = startR::values(list(lonmin, lonmax)),
lon_reorder = startR::CircularSort(0, 360),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude'),
member = c('member', 'ensemble'),
ftime = c('ftime', 'time')),
transform = startR::CDORemapper,
transform_params = list(grid = 'r1440x721',
method = 'bilinear'),
transform_vars = c('lat', 'lon'),
return_vars = list(lat = NULL,
lon = NULL, ftime = 'sdate'),
retrieve = TRUE)
expPSL <- CST_Start(dataset = exp_path,
var = 'psl',
member = startR::indices(1:15),
sdate = '20001001',
ftime = startR::indices(15),
lat = startR::values(list(latmin, latmax)),
lat_reorder = startR::Sort(decreasing = TRUE),
lon = startR::values(list(lonmin, lonmax)),
lon_reorder = startR::CircularSort(0, 360),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude'),
member = c('member', 'ensemble'),
ftime = c('ftime', 'time')),
transform = startR::CDORemapper,
transform_params = list(grid = 'r1440x721',
method = 'bilinear'),
transform_vars = c('lat', 'lon'),
return_vars = list(lat = NULL,
lon = NULL, ftime = 'sdate'),
retrieve = TRUE)
obsTAS <- CST_Start(dataset = obs_path,
var = 'tas',
sdate = unique(format(dates, '%Y%m')),
ftime = startR::indices(1:31),
lat = startR::values(list(latmin, latmax)),
lat_reorder = startR::Sort(decreasing = TRUE),
lon = startR::values(list(lonmin, lonmax)),
lon_reorder = startR::CircularSort(0, 360),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude'),
ftime = c('ftime', 'time')),
return_vars = list(lat = NULL,
lon = NULL, ftime = 'sdate'),
retrieve = TRUE)
obsPSL <- CST_Start(dataset = obs_path,
var = 'psl',
sdate = unique(format(dates, '%Y%m')),
ftime = startR::indices(1:31),
lat = startR::values(list(latmin, latmax)),
lat_reorder = startR::Sort(decreasing = TRUE),
lon = startR::values(list(lonmin, lonmax)),
lon_reorder = startR::CircularSort(0, 360),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude'),
ftime = c('ftime', 'time')),
return_vars = list(lat = NULL,
lon = NULL, ftime = 'sdate'),
retrieve = TRUE)
```
The 's2dv_cube' objects `expTAS`,`obsTAS`, `expPSL` and `obsPSL` are now loaded in the R environment. The first two elements correspond to the experimental and observed data for temperature and the other two are the equivalent for the SLP data.
Loading the data using `CST_Start` allows to obtain two lists, one for the experimental data and another for the observe data, with the same elements and compatible dimensions of the data element:
```
dim(expTAS$data)
# dataset member sdate ftime lat lon
# 1 15 1 1 193 521
# dataset var member sdate ftime lat lon
# 1 1 15 1 1 193 521
dim(obsTAS$data)
# dataset member sdate ftime lat lon
# 1 1 7 31 193 521
# dataset var sdate ftime lat lon
# 1 1 7 31 193 521
```
#### Two variables and criteria Large [scale] Distance:
The aim is to downscale the temperature field of the simulation for the 15th of October 2000 but looking at the pressure pattern:
......@@ -210,11 +247,11 @@ Some warnings could appear indicating information about undefining parameters. I
```
names(down1$data)
dim(down1$data$field)
# nAnalogs lat lon member time
# 3 193 521 15 1
# nAnalogs lat lon member
# 3 193 521 15
dim(down1$data$dates)
# nAnalogs member time
# 3 15 1
# nAnalogs member
# 3 15
down1$data$dates[1,1]
# "07-10-2005"
```
......@@ -222,14 +259,14 @@ Now, we can visualize the output:
```
PlotLayout(PlotEquiMap, c('lat', 'lon'),
var = list(expPSL$data[1, 1, 1, 1, , ], obsPSL$data[1, 1, 1, 15, , ],
var = list(expPSL$data[1, 1, 1, 1, 1, , ], obsPSL$data[1, 1, 1, 15, , ],
obsPSL$data[1, 1, 6, 7, , ]), lon = obsPSL$coords$lon,
lat = obsPSL$coords$lat, filled.continents = FALSE,
titles = c('Exp PSL 15-10-2000','Obs PSL 15-10-2000',
'Obs PSL 7-10-2005'),
toptitle = 'First member', ncol = 3, nrow = 1, width = 10, height = 4)
PlotLayout(PlotEquiMap, c('lat', 'lon'), var = list(
expTAS$data[1, 1, 1, 1, , ], obsTAS$data[1, 1, 1, 15, , ],
expTAS$data[1, 1, 1, 1, 1, , ], obsTAS$data[1, 1, 1, 15, , ],
down1$data$field[1, , , 1], obsTAS$data[1, 1, 6, 7, , ]),
lon = obsTAS$coords$lon, lat = obsTAS$coords$lat, filled.continents = FALSE,
titles = c('Exp TAS 15-10-2000', 'Obs TAS 15-10-2000',
......@@ -252,8 +289,8 @@ The aim is to downscale the temperature simulation of the 15th of October 2000,
```
region <- c(lonmin = 0, lonmax = 5, latmin = 38.5, latmax = 40.5)
expPSL$data <- expPSL$data[1, 1, 1, 1, , ]
expTAS$data <- expTAS$data[1, 1, 1, 1, , ]
expPSL$data <- expPSL$data[1, 1, 1, 1, 1, , ]
expTAS$data <- expTAS$data[1, 1, 1, 1, 1, , ]
down2 <- CST_Analogs(expL = expPSL, obsL = obsPSL, AnalogsInfo = TRUE,
criteria = "Local_dist", # nAnalogs = 50,
obsVar = obsTAS, expVar = expTAS,
......@@ -262,11 +299,11 @@ down2 <- CST_Analogs(expL = expPSL, obsL = obsPSL, AnalogsInfo = TRUE,
The parameter 'nAnalogs' doesn't correspond to the number of Analogs returned, but to the number of the best observations to use in the comparison between large and local scale.
In this case, when looking to a large scale pattern and also to local scale pattern the best analog for the first member is the 13th of October 2001:
In this case, when looking to a large scale pattern and also to local scale pattern the best analog for the first member is the 7th of October 2001:
```
down2$data$dates[2]
# [1] "13-10-2001"
# "13-10-2001"
```
```
......@@ -276,7 +313,7 @@ var = list(expTAS$data, obsTAS$data[1, 1, 1, 15, , ],
down2$data$field[1, , ], SelBox(obsTAS$data[1, 1, 2, 13, , ],
lon = as.vector(obsTAS$coords$lon),
lat = as.vector(obsTAS$coords$lat),
region)$data)
region, londim = 'lon', latdim = 'lat')$data)
PlotLayout(PlotEquiMap, c('lat', 'lon'), var = var,
special_args = list(list(lon = expTAS$coords$lon, lat = expTAS$coords$lat),
......@@ -311,9 +348,11 @@ down3$data$dates[3]
```
```
var = list(down3$data$field[1, , ], SelBox(obsTAS$data[1, 1, 2, 10, , ],
lon = as.vector(obsTAS$coords$lon), lat = as.vector(obsTAS$coords$lat),
region)$data)
var = list(down3$data$field[1, , ],
SelBox(obsTAS$data[1, 1, 2, 10, , ],
lon = as.vector(obsTAS$coords$lon),
lat = as.vector(obsTAS$coords$lat),
region, londim = 'lon', latdim = 'lat')$data)
PlotLayout(PlotEquiMap, c('lat', 'lon'), var = var,
lon = down3$coords$lon, lat = down3$coords$lat,
......
---
author: "Nuria Perez"
date: "`r Sys.Date()`"
revisor: "Eva Rifà"
revision date: "October 2023"
output: rmarkdown::html_vignette
vignette: >
%\VignetteEngine{knitr::knitr}
......@@ -49,6 +51,7 @@ All CSTools functions have been developed following the same guidelines. The mai
A reasonable important doubt that a new user may have at this point is: what 's2dv_cube' object is?
's2dv_cube' is a class of an object storing the data and metadata in several elements:
+ $data element is an N-dimensional array with named dimensions containing the data (e.g.: temperature values),
+ $dims vector with the dimensions of $data
+ $coords is a named list with elements of the coordinates vectors corresponding to the dimensions of the $data,
+ $attrs is a named list with elements corresponding to attributes of the object. It has the following elements:
+ $Variable is a list with the variable name in element $varName and with the metadata of all the variables in the $metadata element,
......@@ -59,9 +62,9 @@ It is possible to visualize an example of the structure of 's2dv_cube' object by
```
library(CSTools)
class(lonlat_temp$exp) # check the class of the object lonlat_temp$exp
names(lonlat_temp$exp) # shows the names of the elements in the object lonlat_temp$exp
str(lonlat_temp$exp) # shows the full structure of the object lonlat_temp$exp
class(lonlat_temp_st$exp) # check the class of the object lonlat_temp$exp
names(lonlat_temp_st$exp) # shows the names of the elements in the object lonlat_temp$exp
str(lonlat_temp_st$exp) # shows the full structure of the object lonlat_temp$exp
```
### 3. Data storage recommendations
......@@ -75,9 +78,11 @@ CSTools main objective is to share state-of-the-arts post-processing methods wit
- CST_Load can perform spatial averages over a defined region or return the lat-lon grid and
- CST_Load can read from files using multiple parallel processes among other possibilites.
If you plan to use CST_Load, we have developed guidelines to download and formatting the data. See [CDS_Seasonal_Downloader](https://earth.bsc.es/gitlab/es/cds-seasonal-downloader).
CSTools also has the function `CST_Start` from [startR](https://CRAN.R-project.org/package=startR) that is more flexible than `CST_Load`. We recommend to use `CST_Start` since it's more efficient and flexible.
There are alternatives to CST_Load function, for instance, the user can:
If you plan to use `CST_Load` or `CST_Start`, we have developed guidelines to download and formatting the data. See [CDS_Seasonal_Downloader](https://earth.bsc.es/gitlab/es/cds-seasonal-downloader).
There are alternatives to these functions, for instance, the user can:
1) use another tool to read the data from files (e.g.: ncdf4, easyNDCF, startR packages) and then convert it to the class 's2dv_cube' with `s2dv.cube()` function or
2) If they keep facing problems to convert the data to that class, they can just skip it and work with the functions without the prefix 'CST_'. In this case, they will be able to work with the basic class 'array'.
......@@ -108,6 +113,55 @@ c(exp, obs) %<-% CST_Load(var = 'sfcWind',
grid = "r360x180")
```
### 5. CST_Start example
```r
path_exp <- paste0('/esarchive/exp/meteofrance/system6c3s/monthly_mean/',
'$var$_f6h/$var$_$sdate$.nc')
sdates <- sapply(1993:2012, function(x) paste0(x, '0501'))
lonmax <- 60.5
lonmin <- -19
latmax <- 79.5
latmin <- 0
exp <- CST_Start(dataset = path_exp,
var = 'sfcWind',
ensemble = startR::indices(1:9),
sdate = sdates,
time = startR::indices(1:3),
latitude = startR::values(list(latmin, latmax)),
latitude_reorder = startR::Sort(decreasing = TRUE),
longitude = startR::values(list(lonmin, lonmax)),
longitude_reorder = startR::CircularSort(0, 360),
synonims = list(longitude = c('lon', 'longitude'),
latitude = c('lat', 'latitude')),
return_vars = list(latitude = NULL,
longitude = NULL, time = 'sdate'),
retrieve = TRUE)
path_obs <- paste0('/esarchive/recon/ecmwf/erainterim/daily_mean/',
'$var$_f6h/$var$_$sdate$.nc')
dates <- as.POSIXct(sdates, format = '%Y%m%d', 'UTC')
obs <- CST_Start(dataset = path_obs,
var = 'sfcWind',
sdate = unique(format(dates, '%Y%m')),
time = startR::indices(2:4),
latitude = startR::values(list(latmin, latmax)),
latitude_reorder = startR::Sort(decreasing = TRUE),
longitude = startR::values(list(lonmin, lonmax)),
longitude_reorder = startR::CircularSort(0, 360),
synonims = list(longitude = c('lon', 'longitude'),
latitude = c('lat', 'latitude')),
transform = startR::CDORemapper,
transform_extra_cells = 2,
transform_params = list(grid = 'r360x181',
method = 'conservative'),
transform_vars = c('latitude', 'longitude'),
return_vars = list(longitude = NULL,
latitude = NULL,
time = 'sdate'),
retrieve = TRUE)
```
Extra lines to see the size of the objects and visualize the data:
......@@ -118,11 +172,10 @@ object_size(exp)
object_size(obs)
# 3.09 MB
library(s2dv)
PlotEquiMap(exp$data[1,1,1,1,,], lon = exp$coords$lon, lat= exp$coords$lat,
PlotEquiMap(exp$data[1,1,1,1,1,,], lon = exp$coords$longitude, lat= exp$coords$latitude,
filled.continents = FALSE, fileout = "Meteofrance_r360x180.png")
```
![Meteofrance](../vignettes/Figures/Meteofrance_r360x180.png)
### Managing big datasets and memory issues
......
---
author: "Ignazio Giuntoli and Federico Fabiano - CNR-ISAC"
date: "`r Sys.Date()`"
revisor: "Eva Rifà"
revision date: "October 2023"
output: rmarkdown::html_vignette
vignette: >
%\VignetteEngine{knitr::knitr}
......@@ -42,15 +44,15 @@ For our example we will use the sample seasonal temperature data provided within
Data can be loaded as follows:
```r
datalist <- lonlat_temp$exp
datalist <- lonlat_temp_st$exp
```
The data will has the following dimension:
```r
dim(datalist$data)
dataset member sdate ftime lat lon
1 15 6 3 22 53
dataset var member sdate ftime lat lon
1 1 15 6 3 22 53
```
Therefore the number of members is 15, the number of start dates is 6, while the forecast time steps are 3. The lat and lon dimensions refer to a 22x53 grid.
......@@ -101,11 +103,11 @@ results$freq
[4,] 15.55556
```
Further, the cluster number to which each 'member - start-date' pair is assigned can be displayed by quering '$cluster' in 'results' as shown below (members (15) are in row and the start-dates (6) are in column (i.e. 15*6 pairs).
Further, the cluster number to which each 'member - start-date' pair is assigned can be displayed by quering '$cluster' in 'results' as shown below (members (15) are in row and the start-dates (6) are in column (i.e. 15*6 pairs)).
```r
results$cluster
, , 1, 1
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 3 2 1 1 4 2
[2,] 2 4 4 2 3 1
......@@ -131,16 +133,16 @@ To achieve this we can get an idea of how the clustering has performed by lookin
```r
dim(results$composites)
cluster lat lon dataset
4 22 53 1
cluster lat lon dataset var
4 22 53 1 1
```
while the 'repr_field' argument of 'results' provides the spatial pattern of the member lying closes to the centroid (has the same dimensions as those of 'composites'):
```r
dim(results$repr_field)
cluster lat lon dataset
4 22 53 1
cluster lat lon dataset var
4 22 53 1 1
```
Finally, the pairs 'member - start-dates' to be picked as representative for each cluster are found in the 'closest_member' argument.
......@@ -169,12 +171,12 @@ The following lines produce a multiplot of the representative temperature anomal
These are actually the closest realizations (member - start-date pair) to the cluster centroids noted as 'repr_field' above.
```r
EnsMean <- MeanDims(datalist $data, c('member', 'sdate', 'ftime'))
EnsMean <- InsertDim(Reorder(EnsMean, c("lat", "lon", "dataset")),
EnsMean <- MeanDims(datalist$data, c('member', 'sdate', 'ftime'))
EnsMean <- InsertDim(Reorder(EnsMean, c("lat", "lon", "dataset", "var")),
posdim = 1, lendim = 4, name = 'cluster')
PlotLayout(PlotEquiMap, plot_dims = c("lat", "lon"),
var = results$repr_field[,,,1] - EnsMean[,,,1],
var = results$repr_field[,,,1,1] - EnsMean[,,,1,1],
lon = results$lon, lat = results$lat, filled.continents = FALSE,
titles = c("1","2","3","4"), brks = seq(-2, 2, 0.5),
fileout = "EnsClus_4clus_both_mem_std_Fig1.png")
......@@ -188,16 +190,18 @@ The lines below produce a multiplot of the temperature anomaly patterns for each
```r
ExpMean <- MeanDims(datalist$data, 'ftime')
EnsMean <- InsertDim(InsertDim(InsertDim(EnsMean[1,,,],
EnsMean <- InsertDim(InsertDim(InsertDim(InsertDim(EnsMean[1,,,,],
posdim = 1, lendim = 6, name = 'sdate'),
posdim = 1, lendim = 15, name = 'member'),
posdim = 1, lendim = 1, name = 'dataset')
ExpMeanSd <- Reorder(ExpMean - EnsMean, c('dataset', 'sdate', 'member' , 'lat', 'lon'))
posdim = 1, lendim = 1, name = 'var'),
posdim = 1, lendim = 1, name = 'dataset')
ExpMeanSd <- Reorder(ExpMean - EnsMean, c('dataset', 'var', 'sdate', 'member' , 'lat', 'lon'))
PlotLayout(PlotEquiMap, plot_dims = c("lat", "lon"),
var = ExpMeanSd[, , 1:4, , ], title_scale = 0.7,
var = ExpMeanSd[, , , 1:4, , ], title_scale = 0.7,
ncol = 6, nrow = 4, row_titles = paste('member' , 1:4), col_titles = paste('sdate', 1:6),
lon = results$lon, lat = results$lat, filled.continents = FALSE,
titles = as.character(t(results$cluster[1:4, 1:6,])), brks = seq(-2, 2, 0.5),
titles = as.character(t(results$cluster[1:4, 1:6,,])), brks = seq(-2, 2, 0.5),
width = 24, height = 20, size_units = 'cm',
fileout = "EnsClus_4clus_both_mem_std_Fig2.png")
```
......
vignettes/Figures/Analogs1.png

90.7 KB | W: | H:

vignettes/Figures/Analogs1.png

10.5 KB | W: | H:

vignettes/Figures/Analogs1.png
vignettes/Figures/Analogs1.png
vignettes/Figures/Analogs1.png
vignettes/Figures/Analogs1.png
  • 2-up
  • Swipe
  • Onion skin
vignettes/Figures/Analogs2.png

42.2 KB | W: | H:

vignettes/Figures/Analogs2.png

5.34 KB | W: | H:

vignettes/Figures/Analogs2.png
vignettes/Figures/Analogs2.png
vignettes/Figures/Analogs2.png
vignettes/Figures/Analogs2.png
  • 2-up
  • Swipe
  • Onion skin
vignettes/Figures/Analogs3.png

124 KB | W: | H:

vignettes/Figures/Analogs3.png

15.8 KB | W: | H:

vignettes/Figures/Analogs3.png
vignettes/Figures/Analogs3.png
vignettes/Figures/Analogs3.png
vignettes/Figures/Analogs3.png
  • 2-up
  • Swipe
  • Onion skin
vignettes/Figures/Analogs4.png

182 KB | W: | H:

vignettes/Figures/Analogs4.png

26.1 KB | W: | H:

vignettes/Figures/Analogs4.png
vignettes/Figures/Analogs4.png
vignettes/Figures/Analogs4.png
vignettes/Figures/Analogs4.png
  • 2-up
  • Swipe
  • Onion skin
vignettes/Figures/Analogs5.png

132 KB | W: | H:

vignettes/Figures/Analogs5.png

19.7 KB | W: | H:

vignettes/Figures/Analogs5.png
vignettes/Figures/Analogs5.png
vignettes/Figures/Analogs5.png
vignettes/Figures/Analogs5.png
  • 2-up
  • Swipe
  • Onion skin
vignettes/Figures/Analogs6.png

28.6 KB | W: | H:

vignettes/Figures/Analogs6.png

3.46 KB | W: | H:

vignettes/Figures/Analogs6.png
vignettes/Figures/Analogs6.png
vignettes/Figures/Analogs6.png
vignettes/Figures/Analogs6.png
  • 2-up
  • Swipe
  • Onion skin