# ----------------------------------------------------- # Loading tas and tos for EC-Earth decadal predictions: # Authors: Carlos Delgado and Núria Pérez-Zanón # Revised by An-Chi Ho on 4th Aug. 2021 # ------------------------------------------------------ # Three ways to load the same data are provided: # 1) single Start call providing two paths and two variable names # 2) single Start call providing one path and two variabe names # 3) two Start call (one for each path and variable) # Case 1: # Return dimensions 'dataset' and 'var' with length 2, but only the positions of the diagonal # are filled: # = Amon/tas is stored in {dataset = 1, var = 1} # = Omon/tos is stored in {dataset = 2, var = 2} # We choose an ocean region (lon = 150:170; lat = 10:20) so 'tos' will have values. ## NOTE!!! [dataset = 1, var = 2] has values because an issue in ESMValTool:https://earth.bsc.es/gitlab/es/auto-ecearth3/issues/1258. ## However, the file seems incorrect. tos shouldn't have values on land. But it is a file issue ## rather than Start()'s problem. library(startR) path_tas <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/', 'dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/', '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') path_tos <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/', 'dcppA-hindcast/$member$/Omon/$var$/gr/v20190713/', '$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') data1 <- Start(dataset = list(list(path = path_tas), list(path = path_tos)), var = c('tas', 'tos'), sdate = paste0(1960:1962), fmonth = 1, lat = values(list(10, 20)), lat_reorder = Sort(), lon = values(list(150, 170)), lon_reorder = CircularSort(0, 360), fyear = 'all', member = indices(1), fyear_depends = 'sdate', fmonth_across = 'fyear', merge_across_dims = TRUE, synonims = list(fmonth = c('fmonth', 'time'), lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), metadata_dims = c('dataset', 'var'), return_vars = list(lat = 'dataset', lon = 'dataset'), num_procs = 1, retrieve = TRUE) dim(data1) #dataset var sdate fmonth lat lon member # 2 2 3 1 15 28 1 # Check empty and filled dimensions: sum(is.na(data1[1, 1, , , , , ])) #[1] 0 sum(is.na(data1[1, 2, , , , , ])) # It should be 1260 if Amon/tos doesn't exist #[1] 0 sum(is.na(data1[2, 1, , , , , ])) #[1] 1260 sum(is.na(data1[2, 2, , , , , ])) #[1] 0 lat1 <- as.vector(attributes(data1)$Variables$dat1$lat) lon1 <- as.vector(attributes(data1)$Variables$dat1$lon) # Check metadata. 'dat1' has 'tas' and 'tos'; 'dat2' has 'tos' names(attr(data1, 'Variables')$common) #[1] "fmonth" names(attr(data1, 'Variables')$dat1) #[1] "lat" "lon" "tas" "tos" names(attr(data1, 'Variables')$dat2) #[1] "lat" "lon" "tos" # --------------------------------------------------------------- # Case 2: # Use a single path, {dataset = 1, var = 2, type = 1}. # 'type' dimension is necessary to distinguish between 'Amon' and 'Omon', and the dependency # needs to be specified. library(startR) path <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/', 'dcppA-hindcast/$member$/$type$/$var$/gr/v20190713/', '$var$_$type$_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') data2 <- Start(dataset = path, var = c('tas', 'tos'), type = list('tas' = 'Amon', 'tos' = 'Omon'), type_depends = 'var', sdate = paste0(1960:1962), fmonth = 1, lat = values(list(10, 20)), lat_reorder = Sort(), lon = values(list(150, 170)), lon_reorder = CircularSort(0, 360), fyear = indices(1), member = indices(1), fyear_depends = 'sdate', fmonth_across = 'fyear', merge_across_dims = TRUE, synonims = list(fmonth = c('fmonth', 'time'), lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), metadata_dims = 'var', return_vars = list(lat = 'dataset', lon = 'dataset'), num_procs = 1, retrieve = TRUE) dim(data2) #dataset var type sdate fmonth lat lon member # 1 2 1 3 1 15 28 1 # Compare data1 and data2 identical(as.vector(data1[1, 1, , 1, , , 1]), as.vector(data2[1, 1, 1, , 1, , , 1])) #[1] TRUE identical(as.vector(data1[2, 2, , 1, , , 1]), as.vector(data2[1, 2, 1, , 1, , , 1])) #[1] TRUE # --------------------------------------------------------------- # Case 3: # Two different Start calls can save data_tas and data_tos both with {dataset = 1 and var = 1} # dimensions and avoid extra dimensions like 'type'. path <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/', 'dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/', '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') data_tas <- Start(dataset = path, var = 'tas', sdate = paste0(1960:1962), fmonth = 1, lat = values(list(10, 20)), lat_reorder = Sort(), lon = values(list(150, 170)), lon_reorder = CircularSort(0, 360), fyear = indices(1), member = indices(1), fyear_depends = 'sdate', fmonth_across = 'fyear', merge_across_dims = TRUE, synonims = list(fmonth = c('fmonth', 'time'), lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), return_vars = list(lat = 'dataset', lon = 'dataset'), num_procs = 1, retrieve = TRUE) dim(data_tas) #dataset var sdate fmonth lat lon member # 1 1 3 1 15 28 1 path <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/', 'dcppA-hindcast/$member$/Omon/$var$/gr/v20190713/', '$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') data_tos <- Start(dataset = path, var = 'tos', sdate = paste0(1960:1962), fmonth = 1, lat = values(list(10, 20)), lat_reorder = Sort(), lon = values(list(150, 170)), lon_reorder = CircularSort(0, 360), fyear = indices(1), member = indices(1), fyear_depends = 'sdate', fmonth_across = 'fyear', merge_across_dims = TRUE, synonims = list(fmonth = c('fmonth', 'time'), lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), return_vars = list(lat = 'dataset', lon = 'dataset'), num_procs = 1, retrieve = TRUE) dim(data_tos) #dataset var sdate fmonth lat lon member # 1 1 3 1 15 28 1 # Compare with previous results identical(as.vector(data2[1, 1, 1, , 1, , , 1]), as.vector(data_tas)) #[1] TRUE identical(as.vector(data2[1, 2, 1, , 1, , , 1]), as.vector(data_tos)) #[1] TRUE