From c9030c334c4d0a9517d702b841720b679aa3d824 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 15 Apr 2020 19:12:06 +0200 Subject: [PATCH 01/17] Carlos code runs in WS R 3.4.2 --- Nord3/Nord3_Carlos.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Nord3/Nord3_Carlos.R b/Nord3/Nord3_Carlos.R index 17afb80..098cfeb 100644 --- a/Nord3/Nord3_Carlos.R +++ b/Nord3/Nord3_Carlos.R @@ -70,8 +70,10 @@ res <- Compute(workflow = wf, polling_period = 10), ecflow_suite_dir = '/home/Earth/nperez/startR_local/', wait = TRUE) -res <- Compute(workflow = wf, chunks = list(member = 10)) -range(res) +res_WS <- Compute(workflow = wf, chunks = list(member = 10)) +range(res_WS) +#[1] 299.8463 301.7826 + res <- Compute(workflow = wf, chunks = list(member = 10), -- GitLab From b1ab09dfeba0ff9b348a58f892da0d91d100651a Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 15 Apr 2020 19:14:17 +0200 Subject: [PATCH 02/17] Run in Power 9 with startR 0.1.4 fails --- Nord3/Nord3_Carlos.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Nord3/Nord3_Carlos.R b/Nord3/Nord3_Carlos.R index 098cfeb..92c3c3c 100644 --- a/Nord3/Nord3_Carlos.R +++ b/Nord3/Nord3_Carlos.R @@ -85,7 +85,7 @@ res <- Compute(workflow = wf, cores_per_job = 2, job_wallclock = '01:00:00', max_jobs = 10, - r_module = 'R/3.6.1-foss-2018b', + #r_module = 'R/3.6.1-foss-2018b', bidirectional = FALSE, polling_period = 10), ecflow_suite_dir = '/home/Earth/nperez/startR_local/', -- GitLab From 0f3fb291f25a3d47399f7609d1a703fcc36c3ddf Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 15 Apr 2020 19:21:58 +0200 Subject: [PATCH 03/17] Carlos code fails for startR 1.0.0 in Power 9 --- Nord3/Nord3_Carlos.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Nord3/Nord3_Carlos.R b/Nord3/Nord3_Carlos.R index 92c3c3c..53aa6b9 100644 --- a/Nord3/Nord3_Carlos.R +++ b/Nord3/Nord3_Carlos.R @@ -85,7 +85,7 @@ res <- Compute(workflow = wf, cores_per_job = 2, job_wallclock = '01:00:00', max_jobs = 10, - #r_module = 'R/3.6.1-foss-2018b', + r_module = 'R/3.6.1-foss-2018b', #startR 1.0.0 bidirectional = FALSE, polling_period = 10), ecflow_suite_dir = '/home/Earth/nperez/startR_local/', -- GitLab From 17f17723625b5775fbc307ea79e1e45450097630 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 16 Apr 2020 11:18:03 +0200 Subject: [PATCH 04/17] Carlos code to run in Fatnodes. The job is currently queue. --- Nord3/Nord3_Carlos.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Nord3/Nord3_Carlos.R b/Nord3/Nord3_Carlos.R index 53aa6b9..ebdd55a 100644 --- a/Nord3/Nord3_Carlos.R +++ b/Nord3/Nord3_Carlos.R @@ -92,4 +92,17 @@ res <- Compute(workflow = wf, wait = TRUE) +res <- Compute(wf, + chunks = list(member = 10), + threads_load = 2, + threads_compute = 4, + cluster = list(queue_host = 'bsceslogin01.bsc.es', + queue_type = 'slurm', + temp_dir = '/home/Earth/nperez/startR_hpc/', + cores_per_job = 2, + job_wallclock = '00:10:00', + max_jobs = 4, + bidirectional = TRUE + ), + ecflow_suite_dir = '/home/Earth/nperez/startR_local/') -- GitLab From abbf9a996456e31bb09e13f75a0f885120b89f0e Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 16 Apr 2020 16:58:05 +0200 Subject: [PATCH 05/17] Carlos code in Fatnodes is active but never ends in ecFlow --- Nord3/Nord3_Carlos.R | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/Nord3/Nord3_Carlos.R b/Nord3/Nord3_Carlos.R index ebdd55a..a212772 100644 --- a/Nord3/Nord3_Carlos.R +++ b/Nord3/Nord3_Carlos.R @@ -1,3 +1,31 @@ +# -------------------------- NEW VERSION CARLOS CODE +# Try to simplify Start() call to see if the error lies in there: + +library(startR) +#Path for one member +path = '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i1p1f1/Omon/$var$/gr/v20190713/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-r1i1p1f1_gr_$fyear$.nc' + + ## Regions for AMV (Doblas-Reyes et al., 2013) + +regions = NULL +regions$reg1 = c(0, 60, 0, 40) +data <- Start(dataset = path, + var = 'tos', + sdate = c('1990', '1991', '1992'), + fyear = 'all', + time = c(1:5), + lat = values(list(0, 20)), + lon = values(list(0, 40)), + fyear_depends = 'sdate', + synonims = list(time = c('fmonth', 'time'), + lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), + return_vars = list(lat = 'dataset', lon = 'dataset'), + num_procs = 4, retrieve = FALSE) + +data_lats <- as.vector(attributes(data)$Variables$dat1$lat) +data_lons <- as.vector(attributes(data)$Variables$dat1$lon) + # -------------------------- CARLOS CODE @@ -11,7 +39,7 @@ path = '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3 data <- Start(dataset = path, var = 'tos', - sdate = paste0(1990:1992), + sdate = c('1990', '1991', '1992'), time = c(1:5), lat = values(list(0, 20)), lon = values(list(0, 40)), @@ -101,7 +129,7 @@ res <- Compute(wf, temp_dir = '/home/Earth/nperez/startR_hpc/', cores_per_job = 2, job_wallclock = '00:10:00', - max_jobs = 4, + max_jobs = 10, bidirectional = TRUE ), ecflow_suite_dir = '/home/Earth/nperez/startR_local/') -- GitLab From ac1c30f284a433536f982362e33d8d514ef6f93a Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 16 Apr 2020 16:59:03 +0200 Subject: [PATCH 06/17] Composite fix --- CompositeVero.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/CompositeVero.R b/CompositeVero.R index 39ef34a..788e40a 100644 --- a/CompositeVero.R +++ b/CompositeVero.R @@ -1,18 +1,20 @@ -#data <- 1 : (4*5*6) +#dadata <- 1 : (4*5*6)ta <- 1 : (4*5*6) #dim(data) <- c(lon = 4, lat = 5, case = 6) #occ <- c(1,1,2,2,4,4) #res <- Composite(data, occ) -Composite <- function(var, occ, lag = 0, eno = FALSE, fileout = NULL) { +Composite <- function(var, occ, lag = 0, eno = FALSE, K = NULL, fileout = NULL) { if ( dim(var)[3] != length(occ) ) { stop("Temporal dimension of var is not equal to length of occ.") } - K <- max(occ) - composite <- array(dim = c(dim(var)[1:2], K)) + if (is.null(K)) { + K <- max(occ) + } + composite <- array(dim = c(dim(var)[1:2], cluster = K)) tvalue <- array(dim = dim(var)[1:2]) dof <- array(dim = dim(var)[1:2]) - pvalue <- array(dim = c(dim(var)[1:2], K)) + pvalue <- array(dim = c(dim(var)[1:2], cluster = K)) if (eno == TRUE) { n_tot <- Eno(var, posdim = 3) -- GitLab From e77ae3780ca6d6488a9c704db6e96c5f3177ba3a Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 16 Apr 2020 17:16:04 +0200 Subject: [PATCH 07/17] Simplified version works on Nord3 --- Nord3/Nord3_Carlos.R | 45 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/Nord3/Nord3_Carlos.R b/Nord3/Nord3_Carlos.R index a212772..72c8f14 100644 --- a/Nord3/Nord3_Carlos.R +++ b/Nord3/Nord3_Carlos.R @@ -27,6 +27,49 @@ data_lats <- as.vector(attributes(data)$Variables$dat1$lat) data_lons <- as.vector(attributes(data)$Variables$dat1$lon) +fun_amv_ipo <- function(data, data_lats, data_lons, index, regions) { + + data = s2dv::Season(data = data, time_dim = 'time', + monini = 11, moninf = 1, monsup = 3, + method = mean, na.rm = TRUE) + #names(dim(data))[which(names(dim(data))=='time')] = 'fyear' + mean_1 = ClimProjDiags::WeightedMean(data = data, lon = data_lons, + lat = data_lats, region = regions$reg1, + londim = which(names(dim(data))=='lon'), + latdim = which(names(dim(data))=='lat')) +return(mean_1) +} + +step <- Step(fun = fun_amv_ipo, + #use_libraries = c('startR','s2dv','multiApply','ClimProjDiags'), + target_dims = c('time','lat','lon'), + output_dims = NULL) + +wf <- AddStep(inputs = data, + data_lats = data_lats, + data_lons = data_lons, + index = 'AMV', + regions = regions, + step = step) + + +res <- Compute(workflow = wf, + chunks = list(sdate = 3), + threads_load = 2, + threads_compute = 4, + cluster = list(queue_host = 'nord3', + queue_type = 'lsf', + temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/', + cores_per_job = 2, + job_wallclock = '01:00', + max_jobs = 3, + extra_queue_params = list('#BSUB -q bsc_es'), + bidirectional = FALSE, + polling_period = 10), + ecflow_suite_dir = '/home/Earth/nperez/startR_local/', + wait = TRUE) +res_WS <- Compute(workflow = wf, chunks = list(sdate = 3)) +# -------------------------------------- # -------------------------- CARLOS CODE library(startR) @@ -74,7 +117,7 @@ return(mean_1) step <- Step(fun = fun_amv_ipo, #use_libraries = c('startR','s2dv','multiApply','ClimProjDiags'), target_dims = c('dataset','var','sdate','time','lat','lon'), - output_dims = c('dataset','var','sdate','fyear')) + output_dims = c('dataset','var','sdate','other')) wf <- AddStep(inputs = data, data_lats = data_lats, -- GitLab From 5a0cabf892d2765f9a680aa5d755a1a49491db79 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 16 Apr 2020 18:08:18 +0200 Subject: [PATCH 08/17] Simplied code works with all members --- Nord3/Nord3_Carlos.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Nord3/Nord3_Carlos.R b/Nord3/Nord3_Carlos.R index 72c8f14..deb48e5 100644 --- a/Nord3/Nord3_Carlos.R +++ b/Nord3/Nord3_Carlos.R @@ -2,8 +2,8 @@ # Try to simplify Start() call to see if the error lies in there: library(startR) -#Path for one member -path = '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i1p1f1/Omon/$var$/gr/v20190713/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-r1i1p1f1_gr_$fyear$.nc' +#Path for all member +path = '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Omon/$var$/gr/v20190713/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc' ## Regions for AMV (Doblas-Reyes et al., 2013) @@ -11,6 +11,7 @@ regions = NULL regions$reg1 = c(0, 60, 0, 40) data <- Start(dataset = path, var = 'tos', + member = 'all', sdate = c('1990', '1991', '1992'), fyear = 'all', time = c(1:5), @@ -32,7 +33,6 @@ fun_amv_ipo <- function(data, data_lats, data_lons, index, regions) { data = s2dv::Season(data = data, time_dim = 'time', monini = 11, moninf = 1, monsup = 3, method = mean, na.rm = TRUE) - #names(dim(data))[which(names(dim(data))=='time')] = 'fyear' mean_1 = ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, region = regions$reg1, londim = which(names(dim(data))=='lon'), -- GitLab From 3b58c2d79523139a6944e8d4e4abaebc40c8488f Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 16 Apr 2020 18:12:06 +0200 Subject: [PATCH 09/17] Code using merge_across_dims works in Nord3 --- Nord3/Nord3_Carlos.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Nord3/Nord3_Carlos.R b/Nord3/Nord3_Carlos.R index deb48e5..5bef95b 100644 --- a/Nord3/Nord3_Carlos.R +++ b/Nord3/Nord3_Carlos.R @@ -18,6 +18,8 @@ data <- Start(dataset = path, lat = values(list(0, 20)), lon = values(list(0, 40)), fyear_depends = 'sdate', + time_across = 'fyear', + merge_across_dims = TRUE, synonims = list(time = c('fmonth', 'time'), lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), @@ -68,6 +70,12 @@ res <- Compute(workflow = wf, polling_period = 10), ecflow_suite_dir = '/home/Earth/nperez/startR_local/', wait = TRUE) + range(res) +#[1] 299.8463 301.7826 + dim(res$output1) +#dataset var member sdate +# 1 1 10 3 + res_WS <- Compute(workflow = wf, chunks = list(sdate = 3)) # -------------------------------------- # -------------------------- CARLOS CODE -- GitLab From ca1f95b9e544a33cb597006ea5815e95984b5962 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 16 Apr 2020 18:16:05 +0200 Subject: [PATCH 10/17] Now chunks in member dimensioon and works in Nord3 --- Nord3/Nord3_Carlos.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Nord3/Nord3_Carlos.R b/Nord3/Nord3_Carlos.R index 5bef95b..a3a17e7 100644 --- a/Nord3/Nord3_Carlos.R +++ b/Nord3/Nord3_Carlos.R @@ -56,7 +56,7 @@ wf <- AddStep(inputs = data, res <- Compute(workflow = wf, - chunks = list(sdate = 3), + chunks = list(member = 10), threads_load = 2, threads_compute = 4, cluster = list(queue_host = 'nord3', @@ -64,7 +64,7 @@ res <- Compute(workflow = wf, temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/', cores_per_job = 2, job_wallclock = '01:00', - max_jobs = 3, + max_jobs = 10, extra_queue_params = list('#BSUB -q bsc_es'), bidirectional = FALSE, polling_period = 10), -- GitLab From 5d501dc86e3095810b5515d04a3b1383d9e0b93c Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 22 Apr 2020 12:54:28 +0200 Subject: [PATCH 11/17] Simplified Bala's code, same data MeanDims function --- Nord3/Bala_nord3.R | 96 ++++++++++++++++++++++++++++++++++++++++++++++ Nord3/Nord3_Bala.R | 92 -------------------------------------------- 2 files changed, 96 insertions(+), 92 deletions(-) create mode 100644 Nord3/Bala_nord3.R delete mode 100644 Nord3/Nord3_Bala.R diff --git a/Nord3/Bala_nord3.R b/Nord3/Bala_nord3.R new file mode 100644 index 0000000..1ed65e0 --- /dev/null +++ b/Nord3/Bala_nord3.R @@ -0,0 +1,96 @@ +library(startR) +t <- 4 +model_version <- "INIT" # INIT or NoINIT -> Initialized or uninitialized +path_exp <- paste0('/esarchive/exp/ncar/cesm-dple/daily/', + '$var$/$var$_$sdate$.nc') + +spatial_target <- 'all' # NH or SH -> Northern or Southern hemishphere + lats.min=42 + lats.max=65 + lons.min=0 + lons.max=18 + +fyears <- c(1961,1962,1963,1964,1965,1966,1967,1968,1969,1970) +fy_start <- c(62,427,792,1157,1522,1887,2252,2617,2982,3347) +fy_end <- c(426,791,1156,1521,1886,2251,2616,2981,3346,3711) + +ldtmin = fy_start[t] - 61 +ldtmax = fy_end[t] + +variable <- 'tasmax' +y1 <- 1960 +y2 <- 2014 + +monini <- '11' +sdates_exp <- paste0(y1:y2, monini, '01') + +nmemb <- 40 + +data_exp <- Start(dat = list(list(path=path_exp)), + var = variable, + member = indices(1:nmemb), + time = indices(ldtmin:ldtmax), + sdate = sdates_exp, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude=c('lat','latitude'), + longitude=c('lon','longitude'), + member=c('ensemble','realization')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'dat'), + retrieve = F) + +path_mask <- "/esarchive/scratch/bsolaraj/gitlab/multi-annual-heatwave/HMD_study/Scripts/mask_harvest_month/mask_harvestmonth.nc" +mask <- Start(dat = path_mask, + var = 'mask', + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort(0, 360), + return_vars = list(latitude = 'dat', + longitude = 'dat'), + synonims = list(latitude=c('lat','latitude'), + longitude=c('lon','longitude')), + retrieve = F) + +atomic_x <- function(x, mask, t) { + object <- s2dv::MeanDims(x, dims = c('dat', 'time')) + return(object) +} +step <- Step(fun = atomic_x, + target_dims = list(x = c('dat','var','member','time','sdate'), + mask = c('dat')), + output_dims = c('member','sdate', 'var')) + +wf <- AddStep(list(data_exp, mask), step, t = t) + +#-----------modify according to your personal info--------- +queue_host = 'nord3' #your own host name for nord3 +temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' +ecflow_suite_dir = '/home/Earth/nperez/startR_local/' #your own local directory +#------------------------------------------------------------ + +res <- Compute(wf, + chunks = list(latitude = 4, + longitude = 3), + threads_load = 48, + threads_compute = 48, + + cluster = list(queue_host = queue_host, + queue_type = 'lsf', + cores_per_job = 48, + job_wallclock = '02:00', + temp_dir = temp_dir, + #r_module = 'R/3.6.2-foss-2019b', + polling_period = 10, + #extra_queue_params = list('#BSUB -M 7000'), + max_jobs = 48, + bidirectional = FALSE), + ecflow_suite_dir = ecflow_suite_dir, + wait = TRUE) + + + diff --git a/Nord3/Nord3_Bala.R b/Nord3/Nord3_Bala.R deleted file mode 100644 index 6045a34..0000000 --- a/Nord3/Nord3_Bala.R +++ /dev/null @@ -1,92 +0,0 @@ -library(startR) - -path_exp <- paste0('/esarchive/exp/ncar/cesm-dple/daily/', - '$var$/$var$_$sdate$.nc') -data_exp <- Start(dat = list(list(path=path_exp)), - var = 'tasmax', - member = indices(1), - time = indices(1), - sdate = c('19921101'), - latitude = "all", - longitude = "all", - synonims = list(latitude=c('lat','latitude'), - longitude=c('lon','longitude'), - member=c('ensemble','realization')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'dat'), - retrieve = F) - -path_mask <- "/esarchive/scratch/bsolaraj/gitlab/multi-annual-heatwave/HMD_study/Scripts/mask_harvest_month/mask_harvestmonth.nc" -mask <- Start(dat = path_mask, - var = 'mask', - latitude = "all", - longitude = "all", - return_vars = list(latitude = 'dat', - longitude = 'dat'), - synonims = list(latitude=c('lat','latitude'), - longitude=c('lon','longitude')), - retrieve = F) - -atomic_x <- function(x, mask, t){ - source("/esarchive/scratch/bsolaraj/HMD_study/eur/source_files_CC/hmd_atomic_fun.R") - source("/esarchive/scratch/bsolaraj/HMD_study/eur/source_files_CC/hmd_CC_mod.R") - ldtmin_ref <- (6936 - (365 * (t - 1))) # 1980 location - ldtmax_ref <- (18615 - (365 * (t - 1))) - - hmd_final <- hmd_fun(x = x, ldtmin_ref, ldtmax_ref, har_mon = mask) - return(hmd_final) - } - - -step <- Step(fun = atomic_x, - target_dims = list(x = c('dat','var','member','time','sdate'), - mask = c('dat')), - output_dims = c('member','sdate', 'var')) - -wf <- AddStep(list(data_exp,mask), step, t = 1) - -res <- Compute(wf, chunks = list(latitude = 4, longitude = 6)) - -queue_host = 'nord3' #your own host name for nord3 -temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' -ecflow_suite_dir = '/home/Earth/nperez/startR_local/' #your own local directory -#------------------------------------------------------------ - -res <- Compute(wf, - chunks = list(latitude = 10, - longitude = 10), - threads_load = 1, - threads_compute = 8, - - cluster = list(queue_host = queue_host, - queue_type = 'lsf', - cores_per_job = 8, - job_wallclock = '12:00', - temp_dir = temp_dir, - polling_period = 10, - extra_queue_params = list('#BSUB -M 3000'), - max_jobs = 100, - bidirectional = FALSE), - ecflow_suite_dir = ecflow_suite_dir, - wait = TRUE) - - - -res <- Compute(wf, - chunks = list(latitude = 8, - longitude = 8), - threads_load = 32, - threads_compute = 32, - cluster = list(queue_host = 'cte-power', - queue_type = 'slurm', - temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/', - job_wallclock = '23:00:00', - cores_per_job = 32, - max_jobs = 64, - extra_queue_params = list('#SBATCH --mem 64000'), - bidirectional = FALSE, - polling_period = 10 - ), - ecflow_suite_dir = '/home/Earth/nperez/startR_local/', - wait=TRUE) -- GitLab From 2961cdec3a6d112e0eccb7f286e4c06c0459d8e8 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 22 Apr 2020 13:50:33 +0200 Subject: [PATCH 12/17] Balas code works in Power 9 --- Nord3/Bala_nord3.R | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/Nord3/Bala_nord3.R b/Nord3/Bala_nord3.R index 1ed65e0..fead978 100644 --- a/Nord3/Bala_nord3.R +++ b/Nord3/Bala_nord3.R @@ -93,4 +93,21 @@ res <- Compute(wf, wait = TRUE) - +queue_host = 'cte-power' +res <- Compute(wf, + chunks = list(latitude = 4, + longitude = 3), + threads_load = 2, + threads_compute = 12, + cluster = list(queue_host = queue_host, + queue_type = 'slurm', + cores_per_job = 12, + job_wallclock = '02:00:00', + temp_dir = temp_dir, + #r_module = 'R/3.6.2-foss-2019b', + polling_period = 10, + #extra_queue_params = list('#BSUB -M 7000'), + max_jobs = 12, + bidirectional = FALSE), + ecflow_suite_dir = ecflow_suite_dir, + wait = TRUE) -- GitLab From eb9d0544630ed44df9546ad6c46dab6922292c44 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 22 Apr 2020 14:00:46 +0200 Subject: [PATCH 13/17] same configuration Bala's code fails in Nord3 --- Nord3/Bala_nord3.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Nord3/Bala_nord3.R b/Nord3/Bala_nord3.R index fead978..a0998a8 100644 --- a/Nord3/Bala_nord3.R +++ b/Nord3/Bala_nord3.R @@ -76,18 +76,17 @@ ecflow_suite_dir = '/home/Earth/nperez/startR_local/' #your own local directory res <- Compute(wf, chunks = list(latitude = 4, longitude = 3), - threads_load = 48, - threads_compute = 48, - + threads_load = 2, + threads_compute = 12, cluster = list(queue_host = queue_host, queue_type = 'lsf', - cores_per_job = 48, + cores_per_job = 12, job_wallclock = '02:00', temp_dir = temp_dir, #r_module = 'R/3.6.2-foss-2019b', polling_period = 10, #extra_queue_params = list('#BSUB -M 7000'), - max_jobs = 48, + max_jobs = 12, bidirectional = FALSE), ecflow_suite_dir = ecflow_suite_dir, wait = TRUE) -- GitLab From f6ecf80f9d1c638d4d76a2de2f4235f0db70eaba Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 22 Apr 2020 14:59:56 +0200 Subject: [PATCH 14/17] Bala's code keeps failing with 2 startdates --- Nord3/Bala_nord3.R | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Nord3/Bala_nord3.R b/Nord3/Bala_nord3.R index a0998a8..61f7860 100644 --- a/Nord3/Bala_nord3.R +++ b/Nord3/Bala_nord3.R @@ -18,19 +18,14 @@ ldtmin = fy_start[t] - 61 ldtmax = fy_end[t] variable <- 'tasmax' -y1 <- 1960 -y2 <- 2014 -monini <- '11' -sdates_exp <- paste0(y1:y2, monini, '01') -nmemb <- 40 data_exp <- Start(dat = list(list(path=path_exp)), var = variable, - member = indices(1:nmemb), + member = indices(1:40), time = indices(ldtmin:ldtmax), - sdate = sdates_exp, + sdate = c('19601101', '19611101'), latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), -- GitLab From f044d726a9dad3ade42c880ee2f973689c792f97 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 22 Apr 2020 15:11:55 +0200 Subject: [PATCH 15/17] Original error again modifying Start --- Nord3/Bala_nord3.R | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/Nord3/Bala_nord3.R b/Nord3/Bala_nord3.R index 61f7860..6aebb39 100644 --- a/Nord3/Bala_nord3.R +++ b/Nord3/Bala_nord3.R @@ -4,31 +4,15 @@ model_version <- "INIT" # INIT or NoINIT -> Initialized or uninitialized path_exp <- paste0('/esarchive/exp/ncar/cesm-dple/daily/', '$var$/$var$_$sdate$.nc') -spatial_target <- 'all' # NH or SH -> Northern or Southern hemishphere - lats.min=42 - lats.max=65 - lons.min=0 - lons.max=18 - -fyears <- c(1961,1962,1963,1964,1965,1966,1967,1968,1969,1970) -fy_start <- c(62,427,792,1157,1522,1887,2252,2617,2982,3347) -fy_end <- c(426,791,1156,1521,1886,2251,2616,2981,3346,3711) - -ldtmin = fy_start[t] - 61 -ldtmax = fy_end[t] - -variable <- 'tasmax' - - data_exp <- Start(dat = list(list(path=path_exp)), - var = variable, + var = 'tasmax', member = indices(1:40), - time = indices(ldtmin:ldtmax), + time = indices(1096:1521), sdate = c('19601101', '19611101'), - latitude = values(list(lats.min, lats.max)), + latitude = values(list(42, 65)), latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), + longitude = values(list(0, 18)), longitude_reorder = CircularSort(0, 360), synonims = list(latitude=c('lat','latitude'), longitude=c('lon','longitude'), -- GitLab From aef1252f13569106ca348824b16e24f55310c4db Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 22 Apr 2020 16:02:00 +0200 Subject: [PATCH 16/17] Works Bala's code changing threads to load and compute --- Nord3/Bala_nord3.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Nord3/Bala_nord3.R b/Nord3/Bala_nord3.R index 6aebb39..fdfdf1e 100644 --- a/Nord3/Bala_nord3.R +++ b/Nord3/Bala_nord3.R @@ -55,8 +55,8 @@ ecflow_suite_dir = '/home/Earth/nperez/startR_local/' #your own local directory res <- Compute(wf, chunks = list(latitude = 4, longitude = 3), - threads_load = 2, - threads_compute = 12, + threads_load = 1, + threads_compute = 4, cluster = list(queue_host = queue_host, queue_type = 'lsf', cores_per_job = 12, -- GitLab From 99fe42e9f9e2518c7aa896ec81dc26ca7518616f Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 27 Apr 2020 15:04:25 +0200 Subject: [PATCH 17/17] functions --- Nord3/Bala_nord3.R | 35 +- Nord3/Config1.jpg | Bin 0 -> 23603 bytes Nord3/Config2.jpg | Bin 0 -> 23393 bytes Nord3/Start.R | 3364 ++++++++++++++++++++ R/AddStep.R | 105 + R/ByChunks.R | 906 ++++++ R/CDORemapper.R | 46 + R/Collect.R | 275 ++ R/Compute.R | 75 + R/NcCloser.R | 3 + R/NcDataReader.R | 182 ++ R/NcDimReader.R | 77 + R/NcOpener.R | 3 + R/NcVarReader.R | 27 + R/SelectorChecker.R | 269 ++ R/Sort.R | 36 + R/Start.R | 3286 +++++++++++++++++++ R/Step.R | 83 + R/Subset.R | 97 + R/Utils.R | 841 +++++ Rissues/CST_LoadIssue_class_19.R | 50 + {R => Rissues}/CST_SaveNC.R | 0 {R => Rissues}/CST_SaveNC_v2.R | 0 {R => Rissues}/CST_SaveNC_v3.R | 0 {R => Rissues}/CST_SaveNC_v4.R | 0 {R => Rissues}/CSTools_CRANDownloads.R | 0 {R => Rissues}/Image_test.R | 0 {R => Rissues}/JostRainFarm.R | 0 {R => Rissues}/Medscope_splitingfunction.R | 0 {R => Rissues}/MonthsToString.R | 0 {R => Rissues}/SelectSector.R | 0 {R => Rissues}/StartExampleClimateProj.R | 0 {R => Rissues}/script_demo_Prakash.R | 0 RowColIndices.R | 8 + esarchive_search.R | 46 + source_dir.R | 6 + source_lines.R | 7 + 37 files changed, 9812 insertions(+), 15 deletions(-) create mode 100644 Nord3/Config1.jpg create mode 100644 Nord3/Config2.jpg create mode 100644 Nord3/Start.R create mode 100644 R/AddStep.R create mode 100644 R/ByChunks.R create mode 100644 R/CDORemapper.R create mode 100644 R/Collect.R create mode 100644 R/Compute.R create mode 100644 R/NcCloser.R create mode 100644 R/NcDataReader.R create mode 100644 R/NcDimReader.R create mode 100644 R/NcOpener.R create mode 100644 R/NcVarReader.R create mode 100644 R/SelectorChecker.R create mode 100644 R/Sort.R create mode 100644 R/Start.R create mode 100644 R/Step.R create mode 100644 R/Subset.R create mode 100644 R/Utils.R create mode 100644 Rissues/CST_LoadIssue_class_19.R rename {R => Rissues}/CST_SaveNC.R (100%) rename {R => Rissues}/CST_SaveNC_v2.R (100%) rename {R => Rissues}/CST_SaveNC_v3.R (100%) rename {R => Rissues}/CST_SaveNC_v4.R (100%) rename {R => Rissues}/CSTools_CRANDownloads.R (100%) rename {R => Rissues}/Image_test.R (100%) rename {R => Rissues}/JostRainFarm.R (100%) rename {R => Rissues}/Medscope_splitingfunction.R (100%) rename {R => Rissues}/MonthsToString.R (100%) rename {R => Rissues}/SelectSector.R (100%) rename {R => Rissues}/StartExampleClimateProj.R (100%) rename {R => Rissues}/script_demo_Prakash.R (100%) create mode 100644 RowColIndices.R create mode 100644 esarchive_search.R create mode 100644 source_dir.R create mode 100644 source_lines.R diff --git a/Nord3/Bala_nord3.R b/Nord3/Bala_nord3.R index fdfdf1e..9f179dd 100644 --- a/Nord3/Bala_nord3.R +++ b/Nord3/Bala_nord3.R @@ -1,33 +1,33 @@ -library(startR) -t <- 4 + model_version <- "INIT" # INIT or NoINIT -> Initialized or uninitialized -path_exp <- paste0('/esarchive/exp/ncar/cesm-dple/daily/', - '$var$/$var$_$sdate$.nc') +path_exp <- '/esarchive/exp/ncar/cesm-dple/daily/$var$/$var$_$sdate$.nc' -data_exp <- Start(dat = list(list(path=path_exp)), +data_exp <- Start(dat = path_exp, var = 'tasmax', member = indices(1:40), time = indices(1096:1521), sdate = c('19601101', '19611101'), latitude = values(list(42, 65)), - latitude_reorder = Sort(), + #latitude_reorder = Sort(decreasing = FALSE), longitude = values(list(0, 18)), - longitude_reorder = CircularSort(0, 360), + #longitude_reorder = CircularSort(0, 360), synonims = list(latitude=c('lat','latitude'), longitude=c('lon','longitude'), member=c('ensemble','realization')), return_vars = list(latitude = 'dat', longitude = 'dat', time = 'dat'), - retrieve = F) +retrieve = F) + retrieve = T,debug = F, + num_procs = 2) path_mask <- "/esarchive/scratch/bsolaraj/gitlab/multi-annual-heatwave/HMD_study/Scripts/mask_harvest_month/mask_harvestmonth.nc" mask <- Start(dat = path_mask, var = 'mask', - latitude = values(list(lats.min, lats.max)), + latitude = values(list(42, 65)), latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), + longitude = values(list(0, 18)), longitude_reorder = CircularSort(0, 360), return_vars = list(latitude = 'dat', longitude = 'dat'), @@ -35,16 +35,15 @@ mask <- Start(dat = path_mask, longitude=c('lon','longitude')), retrieve = F) -atomic_x <- function(x, mask, t) { +atomic_x <- function(x, t) { object <- s2dv::MeanDims(x, dims = c('dat', 'time')) return(object) } step <- Step(fun = atomic_x, - target_dims = list(x = c('dat','var','member','time','sdate'), - mask = c('dat')), + target_dims = list(x = c('dat','var','member','time','sdate')),, output_dims = c('member','sdate', 'var')) -wf <- AddStep(list(data_exp, mask), step, t = t) +wf <- AddStep(list(data_exp), step, t = 1) #-----------modify according to your personal info--------- queue_host = 'nord3' #your own host name for nord3 @@ -56,7 +55,7 @@ res <- Compute(wf, chunks = list(latitude = 4, longitude = 3), threads_load = 1, - threads_compute = 4, + threads_compute = 2, cluster = list(queue_host = queue_host, queue_type = 'lsf', cores_per_job = 12, @@ -64,12 +63,18 @@ res <- Compute(wf, temp_dir = temp_dir, #r_module = 'R/3.6.2-foss-2019b', polling_period = 10, + #extra_queue_params = list('#BSUB -q debug'), #extra_queue_params = list('#BSUB -M 7000'), max_jobs = 12, + lib_dir = "/home/bsc32/bsc32339/R/x-86_64-pc-linux-gnu-library/", bidirectional = FALSE), ecflow_suite_dir = ecflow_suite_dir, wait = TRUE) +source('https://earth.bsc.es/gitlab/es/startR/raw/master/inst/PlotProfiling.R') +jpeg("/esarchive/scratch/nperez/git/Flor/Nord3/Config2.jpg") +PlotProfiling(attr(res, 'startR_compute_profiling')) +dev.off() queue_host = 'cte-power' res <- Compute(wf, diff --git a/Nord3/Config1.jpg b/Nord3/Config1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5bdf87fb5cfc5b5b6481efd6b5cb72ce9d64a319 GIT binary patch literal 23603 zcmb?@1z1$w*6^VP2|+*_q#J3F7!YZZ25BXvq#J}m6cnUWkVZfnq@_brq`Ra`x@+b? z1HSitzkBa{zj*%PIXq|1*?aA^*4}&7UTffE@Sngf1sQo600{{Ikib6xJ_$$xH_*{B z(9v#SU|?Wk-oU~p#KFFK6PuLaHXb1*88sCp83hFmJtq?lEjt|r1+yRv`#o+xK0azD zVKE^dQBGby9t031OiWDdo7f~cI3zrGDem(8uRric0RINkPo$qHNVEVlJ`xH(61)|l z2Ju8gy7~aWKS;LY`LMC+6cWBK^-x+yR!&|) zT|-k#`-zUOiK*Fha|=r=CubK|H+K)u*Khm--o6VAij0bmiH-XZpOBvMDKjhkb58D; z(z5c3%Bt$G%`L6p+S)rhy9Ne_hDS!n#wX_H7Z#V6S60{7clY)W4v&scPR|f@Apt1Y zWP$&$3Hv9y@IkteQBhG)F%WbiA-jPW3O*{@U2b#&NmYzz4!3D}Ufm#k5Rq2Wgh|J% zwnJp>*pEd_&o{@giy-Zavi})je*Y)R{z2HEbWH%*C`jPsq2L1$;P{L=-52XWU4BP< zvyY3b>c2*HS5yRM#q2yDQjr&;NJd&yuEMs-#G)vX@_mE;L*#ZN|03D**K8+H!@;M0 zj1lJj3{w2x-oy^nHB!^c-xSrLCVE3T@I>web} zvDF0Ot&%38*!zyLV5m7?z+FzR<=4_Okfua=TgfEr(XTJhevGwgawI*TD#dB1>(e-U z8<0BeynV~+_$y(hwsOu`H{Jw${L&t(Yrund-N}9ujo3+@Z`e@LGmbY_ z(x8PC_-y?`=JBx|PsK?xF{teCsa3}LHat8?R9RjjF(CgcmX00(*5v_Hr;ZGnQJcfe zY7M{nk>wF(*7=bov(kSf2?hkqe_ zO>I5-M)B(h{anxLY|u@I7$ebx4Tvk7Irm%hjjUNve5?S5nH^>@zA5=UcGY~z0TF$ZVH{n~Sp?Dn-+@O27(n+dpn z^5uDqrrl8YR%x)Xs8Ifv-ot%4iB7E4sP;bXgtdCgs)kXHPu`ps&L`4yUxaV6Q9LQ8&!g)<^>>=7;t>b2b z5s|2Zkx?1AX;KojY^^sI9HsZb*u7Xa+vVYDY4Oiuj`C292BK_p51a#ZYsV#&TJx7L z-a3}3w*U^|9;=?gLHnl%dhZykgGL|43Zc!sGm%TZ`=I@PTq6pJYNWTwz$%PNFkvP+ zAuEnP)`I`bngb?x6vG*x*DDf{IQG;-6uK-6l>XHs!Z>B_ljo-{n`|mytHPu;22J;p zLNZu8Zg9W>c0G~D%WWD+QNh|p7G2s>gTnzgDuVIh01_dRYY=lP^8eQ>6nb(TciD20 zmz0W|5VWK=&%zi>_@a|oFqn#mhgAVs`&_6+{;m+^!$*5rQ`;LYD1O%upN8k^yaTW! zy;RBbg9?L5pCFS^_=#DdWM_4fb>D7VU|!dJ`RFh31V@XxPf1(o0J-!}=H^r>KIN;~nBPd$voZ|n)A$5yrXKp}-0 z``*uGi!TtPgXuvVp#^|t{}a!*7f|^V=v_?SvSqIYpbB7^llNUkKQHqT4;)94t3uOxS-m&RS| z^mwDqG_=iuQ%R&HS%>6AZHJ>Lb ziIGjOmb7<9Vi~{owMBQet#m&5aLPk(=1bZ_M1zaV8}Z9iTg`@c4A(RAeuDCsZyO+| zURCTQ{rsw39XTFlX%vNGJlEDJS`s4?V6WlmV;F-vLoxJoAfxiu1fK&o>%;GE#1?zp zWzS>=d$WazRvWI6;V|wsT9LTJ4>>u21Fx{Y6>PNX&`eD8ut9#6c*24Ez!JYV#<{Xi z=~s(_WM*HTm%X(TPaAP@WeqWj9?Z7ff!anYMp*R8vpdQK`J8Gw%9EozHB>W&X4Mp3 zq4=9+n6(UU%o-3A(>>HjrXp2X5&&UGQ~Rz-SWU8(A_P;3Ji@V0kvcK-XGEZ1DMj%O zaa&on>C?j90$Nn3F{-Sn-^gTWmBE1%SlBL1=F=7Atn$hbPSxl=-i{jjff$+epst*yVUWM@u~WB=U4bb7htjURq=@H2;CiXDu{PLtlTTt+Jj z6%oWJV#+zGxI`~SK7Uf3%$qosV0q3tj%H$tg+7y5B>YQ;rS#9c3zfvKv|{ z2M1zjcc?d1uZGrUrI zU|QH_ae~}SV2CDnaRmh{gIa>k>zdEv-FJ%iQ?dBVv!LxC;DGxu1hGOk9`q24*yjy0 z`m(D^69RvGJxTaBJzai4Aj_NiNrB8cFk@;m5vhfR-eoQHVudIPH}A6|N3G1JZ|&GO zKTeyyErGB&ip`t*=HV77pHpCBOK`v*4zR9RZPsTxFou}X$Hv{^vad0X1bn${R_N_L zcT+OC#?s+Ha!L^#(Dh{=Wq|~6>SN1Gy zbnOfd9DY70G}<3LgaaEY7%AY^UZ|1@d|@ItA(4rT+eUy*9R+E_!=Nz1C~z6+|6ubh z@E!ZRT!%p#t-L3#Bw_bpOmLt;=}!=7WWOb+Rmz{}l)YGZk~)!1$CLmhU+G1^<*k{) zuKgv<<&F~BlzFE9`eKA$o=Gjgn{OE2+|A0E7c;gN>9A4btF)B#C!q*X zoerZ&I8cNvT(GtI5JNk9o#23>4ID_z^x{w1g%PdJeOh|_>tt%n|2|12mO@uY(8{Hk z4|P|1jxBVwat`Fc)E;sQwH6$39%R`)v^!P;co5T6qzeafsC6^63!p<`1%Q$?p$gL9 zT&TP{llqE;rFv5I5fp7(zR}{X2dgY^mA|m@d=zmkr5p%jak%@#amuN;KquXoj=5wq zS{f^;N>kMr{XW*RsA_I+`ibj?N`>5vg{zGY6075Swh&!OVV3ft%>jSwg#^itOhyMN zKcHvaR;&W46Th5)rjgEDHY==k`k3SWGt*=*IK-S_-p+8~{OC8=V?eG$Bw(FQ(B3jP zoLzPO=kyb6Q*dBV5Dv_*!U6y6LTGPc3LNm6g#+&kse;7n9?xAy`GHemab$;bl6BtN zV0ZpPYxgpxdwekuduuvzF=Z=gn*+fb57}nbl+92HH~?BwrK5S14tNWOu~d&W2F+Wa zA3T#KzwbO8S-BYaIX!4>|51{jjQ4}(nPrh`V|PPQrtV4$`|loVj`ku)L=VJ}k637; z`D^%-hhMpc)2sAEuGn?iw`6$`4fIQpa$6MR@cdvaBj~mg%wxNSa!7Ad;caEae+gA+ zxZWkH7^g)WXR6B$MQe}JeHnNl(b1wU`Opqr z&+c#l_Ao>iHVy|?zZHgZ=yzu6^l+4IGjxp2JTx07PoqE5Kj!a!De=giT2&+(4&<&v z*V0E^pl`WkTtHQ@?cDH6@<(!dj3CMO%6My)H_jAE3lW7 za6n4o$m`L|6Y>9kEzwoACyz5#+F89v;28N^SQbXL+(CSQ>E$?;^3_Yj`Q)tfJj<6H z!3z?QS~XRPn}~aZ3Q!ER7pUfqG_1>x_%i-B-Vy5RpD$$L)X+IiGlM1ree8&R7F6o* zSlO3>*{^7B74MU9(`uv18Mj#Wc6TxbyA)tCBNO_GSaYK?6EF{2ef|X$^0vc)J$ft1 zQ7;^j;c(r9=GmncGR-4L{0x!Jz9`*>14_iDc^AT-yhU7az$+8R471n)<5smM2Rk4# zCIBGmdq}W&9 zH-(-1I0IcQuoo$_c0a-dltDL20tpVNHEdI_J<53$w!|_sKIL`EQh(LJmsHjh`jP2) zazCRd>U{IG4=wL%z2=RwYq9%D_FCmEcOo1*q-gbvaR}(nQC)Y#ydQQfyO*U=MCW&J z^H|o??g~aKx<@?{NU79d0qv*BFEHG8b}%odHt-&$Y(yzanz2 zlP+@H^*p-{kd}sAapU^=DpHImLYxdMUv<;w2(0iAsFF6rMFQnDg!D3*AMdtpf0&bf3Id8JI#oDO0cw2fjQ2VrW!=JsYN{N4W2pCPgQ(LwRa^_bDlqDtW^duYeRW9S z0ET25-a&h%^Vi`3u3_*-HyoH*g9bqoMDGVN7m-|wBEG^Za<_i&^%~b{=@)Cus4o5Lc~g7O8-OLO5j4)wJ#SgSz_)^E^#q! zQ_U(oM@U#8+AR|yPUDt9Q3L3G1!c|2Fl(Jl7*mRFCPMrG41`snG*Z2~zyCPnvi~9{ zhW?|YFvQvQDC>F{wMl4^QTwTeJJ?X8;{4@dli0bJ3Tj+R)5&P?1!%)WBu~O>M?snC zdE^C(V5SDE-LN1~YrRRHq~8Az>KhceFd46uvwgcu9JcX%Fs!swM0%nVb|Sw8+N9#w zHmO*a_{-aq<(?mu=TcBwA=^d$w?IRz$gbs7LzBO4R_u$i?aVMSNUY|7yYaQcwndPB za;&ztWB0T1aDvJl^zfh_7Q1-;k(VCFcc92RS#d*bGJJpy_ z-D4}Q_nV)ewJ8{{h^B0+EnfGIS5`fsIS=ajGH_^7h`+^ka1aQPzW}!+Ly}~N+V^e$ z#T`I&#{%WS#-p;@B*tU`a}^8%o&aDG9hmbK{kfbDYaMJ1zwyK)m?VPD?-Td4vX!O9 zsJ8nf{)9+al9c5i{0K1#Ez31r0d3L+IB-h9Pkn9<2Qr9jJRrlBeWSN`k>o(*co3#@ z3kPvv>PSc9;6U8~93X`9xqE}w75AAt=$Rs|8DTaobHjlb&?C>o_eO_w8-}Vo z(dOsBl>lfX+=B6MJZpH+eK00{cyQU%NWah*OKF3lg&?B;q*j@7iSJ zDnGpsBWgWCcKxV^|EaH?g7_HgY3SjWXx}k;e>Lz7g=l)Xa=%Y&CDN0DNakE4v{^&e z&x6p{P;SU3m>3JQld@Mbs$C(!p0>jAfu#Kpg?Acg-+z{x6$8~XQ~kM&c!KMg|Y$)EUuMl7oR6)-tMJhP#?0vc45(7;W>M8 zuMwt+zNHwVL+lj({T(mReFq}-Ur~3-h!vWCc zmC~0Dg9D3lL(t6>Sg`JzOu5Ai4#uQl4!1qpTY4+2+BBZK2`l0>GTlGDA4arqH5?fK zuMKgv*+~-8E9bDBZG_|1lVZ>1f3?GdXK#0txtptnEcsJe>twuBn3JlEZKsGxMuJFq zvtcxS9k3GIL|&gLAJ=ni6B+&T^qz+KF^cP!SE8fW{8tVyg^y1LsRm|MKPtz(niS>E zDDH}Z@r&0LyZ;P&tJp_BSQIjpP`)(2m%vR?(nN+GLq9Jx%CtoC!y#%zFIip;mgL~! zZNoFwO!IbmkKs)fN8;98J6(ljyKYu8rTc`SUQ_+7ML8q;yCGhRd(-=-0wT^tb<%+O zw@lq{*?LWUs#E-5mgpnz$@ydkfY#VIK`*E~%q8s)x75yVoBBt{=H8DB2_NZJme<(zFq=Yr{L-&6zv*!c?H zM`=Kwu0r;^&+7K3(p6XXKOBll1dJSx@!<8Fw?`YwrGBeC71YreGT#aA9~68flK*AsM9y=ehH$V<)PimiWa3`gpT~$ z>34_DM=AeCVLfrDQ>CXqHJ^kfaF}bKI3AcA8o`bgFH|r3=MGe*-P_mZ5W#@VmyB@W zL;Gb392nmi`6JSzqI}eb1X)L@Ft4W%MT4FGov8-e0Ja`?K_Dom;4Yt)(wXH-`-e}< z)xBzQ>W#xdh=j1MjKJ_A@6rL^!XJz9=H3~ZQC#1k#kRzq!ji@P4UKvuDQ(W{Na7u^ z<;(hj$RHq6shZLU3@G-3O+T$erm4H)B@kU>XYG=!dZ9}yL%;n2B>oxkM=N#i?cYS9 z&B``mr~3#AKVAfRy$c#g*$9bVdFfFE3png~4h5zNiy7 z76wm1S*-=^vL}@tL zj*FK8^?Go?p@H?Pg+{!1ndw;{N*5$L!`gdca!qM(_B`ZNE%IgN$_Z%4ES~itpW0ov zU3|WNZhF=qwIOlGR+9TE@?$Se2zX*xH5;RU?LDxk=mJL9PpGcPc$A{c<86Vo6)JbC zw-q`xd;OCA&7#u<-Kn}G`0C{qo*ZBACCRH{h{={qMQy0*MXeNxH+1}fo;lmrk7aym+cO^Y|xhdH$H|s<^}Ih_qUgaXLsXnw7K4u zr*J*`T#LX_484onN=I8pI^^ZIh=y|fU+Fia5F_~*rf^fN1dXQq zvNc&##wN%1o0~dpqJ_#YPt{u)Wzx!zkLo>Y&t1s)&Yalbida zz%dwvAuxo3=~2A9%AkX4cXlm;w|4hTsV+C##90<#1YR$oy;m0bUt=(!Hu?`S81uR3 zkU6I6-iy;Kd;I9b=?UcNjXT6@&NXyEGDqd- z0+qw^pkFN!92s1gjQx2zIO9u~SCO(Fc0R0(;6Sx?81usU5K;&9nIRmoGgP{a`M72t zKQ~f)XvE3*E|(E$W>OyI8M)AO;wv538G^cSy|x7EBkwOC8ESlcNbhh_q zL3D7)CcSp|U9p~p+3=72f%27vkFX!lY(8s?Kh^9yG)P3oWFlJk_2ap()`#&i3}^i; z;Yo{k21!9$#o%UEs7{6*Z&#diE!!=1yCQm1{E5t(o#BgmuFy?qx!7^rquP>p6`fX3 zxowQU>-)+v-pfrz@og_k>zAO?%RW(5DGyT=Or|{745^DL6C|^6$BSo`rHz{~GI{n< zG~#%F$hc6?9ipx${HeN3$1d_FQH2w;34W;7bF9`yBDK^?@{*Z4vLImoB~!YQWink| zvJ%^`k$v_o#Xv)WNLe0R{rdu@`ozl*wG7c8E4dE+bAE@W;(1$HC>QHp_CW>O5Yh_2 z$I46?0H7>2;L8q1$YB9hVbq`qivW*8CLxz3aJ~ zYIE%(Aq*V`IXIyG|H0A`qN5ls2!Ij5zNRFtH0<5W0gt-}RQU;2^^)aOr8OFAt>xzT z(g}L_(wX}|TF@`<+1mrC2V%^~->;qGR|VrALH)|0=T>t{7t+gchCou?mikvQ-+(L| z>jEuvg3LxJktEX&Ar9ko_&S@&xz!}h`;hB`cnZ2p2({6J_SR>@0l&#G7;Sd4SdUMU z-l@@4kJN9wNdvdiX6ehgXu7;DyiX^tLsD^ZHE6uo^ABkyJQ0 z84OQ357L+s!U0f`e6`q10fX@W2+5L1imM+>3wA+4U)JomAW%Xy`Q#_46CFe21iH1y z|2E8(`Fy<0yugF2HHloKewV1WBEe><)wo}y_8zaVTLURd5p8ve{8vw^5lQBDlEkDk z^CVT>l%3#>*#p#}h8+UsoeUU$5)|f}5C=n+hY>A1DwZ4@J;}J}-j*>S0*_#B{HTKi z5=x3-h8XC0VnOCeM;lf#bD%%zQ|4spYcNKb;Q-Hh|G-OjMEGq5R(tKvTO&?&JL}0U zCNjg`6*ANe+Houh1g9;(3cp<^)Jf^izoC`e-C*CYaIlk;>zQ&hrHiLc@Qyb=&TorP zy_dY~`-4a!m?m{J9GK=GaL>4Q`>C$@7%7PMB^%^bP~ikL7qYEsgy5;wbZ*Q$Kkgpj zA(#|EzM=V|=-Cj5bSru7m#vh0O;7af?MG82tRLe>)q@slPQAdQCKSW!Yw64ynfGF8 zYx9VtSlrkEbNY>vHxmy+?WaDA-VWvjQt=+7m$>9Hrs(WkcCr7m+19UUw+YIcQH@ox z_n1JU^K;+O)A*M(4Nb3ZzPdowqy09}>1x*VRx#x78c!k3Hni1l`~cz3(q>k9@rT_I#;MNxz~us<%MtIt)o< zxYxsFtYPjxP9Ya58`9NM)j>DpC^8J@bk!1wNJ%up6wn(mjVWq{ z!E|5Vd)};y_Ad_?Mo%@*n2`%sc6|8tFb2w2Q?b8OD`;SN?#`5bPQ@OghO+xj^Jc6l?0~()6o3fA1zYaX?lV_FN zRk9&}E`@I|$Asor$h*1L1DHLTwVH4)Zr;+1eCw{DQS^-95+7 zOpvzNo2%FpcLlHydL07owr#lzT->)eot>2+Do+L!%Xy_K3^nYQ?}<3V0bvqU7Zj%Y z6j6T6+mVV*YR4~M?p0nf@<=O{N1i@*wM4h{ z1%7TUP*xi$NslZ+V}Fx_-}|vQ`iQXYbH6Pm)nA$M1H9}Cl)H&2q{c+rH2ATFzN=)s z`pWjs?@{>X-VBhNPSe=ck}NQZ?T=)U87A|A=I!n?EB1z!*-j&_LZ2<}(UbzfN!M&S zCr2?|?rT9~qy^dhGCswFI}cBtQrN3Fzh@UojIAjTmD?Eq*mxqFjQZqP0TofG=w;HU z$cGW{sT)F4{Uk_GLaWyXeoXL2SNcO;X-`$~!p(0Ukcv7lC$O`(+K`m_(mkXx+iSx% z5qiLVETc>*EMA6We3dDM$RrN^$z2i6?D!2A`4+j*FJ)jfY;A&J<$~@jEa_Rg{&tE~ zEUyE9bVZcBz`c?V(jcX~-PHWv+%vvnkwN>Eo+2p(>)TG~Q8@+C@3S82TIe5Hv#$tQ zFL?M;ZsPtlz$|oPZZkL%Kn;D1Q;C|0d}9$e(LtZMBukmGAa`q^==m;c<*s<#=TGdN zgmm3)4AksaRo)$P)4ezeYZtdaefuPV53r3GII>lTNHi&vbDe8FV*EzG4U>;~r?qqy!LZcd?xNmQ@$sPWdZVzm`NhY+l@ z4ZoLqW$n9#Q%5M++E0Yd{7X0Q({6r(;8TNrVm?zI>CzEIn<7Y#Q2*a`x3s1_+ z&=B?nM}3KrFM8YIhIid=NI=-VDuqZyDH2Tq&qkWy*Adbl{@ZWTFm&!&Y?B)JG}%15 z7c*}2Wtg7(r~Sv5eXS0@PJ?fT$}%hxVw1n#7LT0RiMahstru1Gquh^eT2azlT~AE5 z0_c;$Y!iQ?rDUR=i#XlbkCu6ByyY?Z_tQPJwepMoz21>AOgPrK5@|>n0=M2?HQiTt4M8Wp@=35NW0YYm7gnG;<%ZK9fc8f^BO(&~4*XSiDPzrITxDr&&2Jp^iMs52zig-%Poy`-ql1FnJuxPw z5vu0VO)avwDHQin)#B5*sxSG2w*EY1r^IE=cFM2?*V?__ zmRQfMqSSV$v}wi&qJUK5iBrWl)1p+mRifCRpKI7ks7|}LmAus$bmmYP@n+IYn1=%` z#S?mH6>i_(+1@d!5PyIgcw7=O$cMBMqbNU2Gs2Fx^tGcT-t0XAfNK(wA%?lY>ac0( z4x@@*^W0omi%OE_XJaZ2Z=Esr4;bQ=ZIiv*L{~U~r(lF@dB5UguC&`{-Dv~8sk#G! zpyC*B;qQA@&ys-{<+GMSEZ%@rJmvJ-*}3W0YiD`isG=MQ89OkGn7+ggsM9=q6U|GQ zAQj7~dC=~8@b`&Wr=aH`g0yg{Oz-?24oDX`s((2pc-gX*3>zN4kP8wxd6g3@&4lkL zd&#T2m=$mr^^J`0CZt)6t-HYPMQ?p~lJc+BaW)-Tz3kv_nd01`vRrj4eUng{=W4DzX_m1(f<4rv#;HJF22ZQD7~sO zQafCcoMVoqE|GJ|U{cZ?puknm`D}X-hNCl$;z6eO>0ZD-LoEvlMQT^v`Iw_1{w(RH zMp^I8*w}$(4gX`-6LAE9g%q7=#}Q$c6G@YbxrDR_CCj;!c#Vj&T%i_o9cqBVfpu^PR@itw#OC z&8CImJ!@;p{;e_#LsGlQWhUd3y(uCQO$qED?bIwkqelGzWM6JOyD}vyIj_0Q_0r65 zi&6I*4KX!0_esN-`cjd(Uxh1EnDY-m$!9YGDm|zOd@~e&{gPZX`w$BWaTr}Q=Zq7p ztFN|UD6h!jnUOjY<|NiJbE0|k$(FR@^fiqsg`*H{4f)N_Mfo(`+FP`k#OROq!>i(pb#*=SjwW^u z&llLC2^AW$&qW9Fu@IFb#h~N1u*G%kZ!ml&bpo2}K&i6co!6PMU z$MktR$C0@P&(9Qvc+>R0(0kTS;CWRTcsL7^bXS6ke3u#~d#Sv32liffaRq$i0hxw$ z)#0`K5Rbaxk@}NwY|heNrb$ zv^<hj}j$0<^JpdcBGxcnevelE24=8gY!WpG4P@F;~ShL^SJ zQTR;3FP?U!a08yZ`zBL20a~-Wud%na&NJ5UF9){2ouK^Gd8Ae&%Ov6uwDf7%1#Raj zo;W$7pd?_>^m~G7faJa47fN4bkL9N$F-TD8_}8d!$)}%4X$(eg|EPb{?Pf{OD7Oy_ zUcAVnk&zH)cqOJOES747ZbaHY(k)l6--iDzrgvQ73vsMyYInsH{@&$qr?9ZikD~d^ zp={~OI=Sx;?n%~d7JYAEAb)hQ%N2aXd67LVI?gX>(>!T9_)w3U z2X$4Fv^lIL0SG@KSTY|>V5-o32kLO(Q?l(o?QK$qca1;OP9-`iv{K&Iinx3-Ql>vT z!lQ`9kk$GgktqIznw{YB5=G?Lbet`k=HP9kfaR_>>+XI_s*y;eFR2U zZmNkRMS&dTsym#SwgFp~^u^vE-DLSfiX*~nc zL?`NxC381~9K+<<-dhZe*9l_-n?Bz%C-pq=o0e#dIE5d*g8qDgWS2wAZ|O6C$#PWy z@Yn5(EDUbI8KYyoy+YTo!ZOe7y>jwXN@c-VDv+sPQJ$thz3%4{fJv+qbvrTst`U2V z3U^5L>DNQ1(104dt9If&)M;Q@lHb>zAK9KCv#cfDu$$?ps_@_7#0Fn9`pKefyAbh@ zzI7pO?fbNIql88NX5^O9vliq}NPFR)i&RN@Rf% z?4D1&A<|F$`AoxP70p6(_AkRBlS59<-(SvYyZD5|!1Qc8WRB)rxRQr?g1h;CV$-D( zB;}nIfiB)2{9WxN>h_al-d#KM5|1K+9py+i`y3?;SAO-MQU3_LrgWjqhE)kyS-^c> zdapL)NZJFtr>f!@$(_dj{-Njr5pG`U^!n%*g9%Yp`kb<|)VDOq0&`I`w?`A7rbH)O zSDy3vyxq?{XV(*ax+1h8K|yawT2qnSwNCPDLeH_?TD*_v6f4t&goJ)4BYA2$K66#D zZ=KaFX!;SsgC575=prVkF+xj^?L8>ZM!k+KF7Z}m$1@PIf3*Swrc3<>RZSRz#p{P} ztjWY@mVJ3_p$qFq$G!WX^B!2cPk3NrFob!U|L#(##)V@fEow_P=o#K^uk*eQxp=R! zomIYb(M8m`aN)01?+@?9oK(c#32l~}^%*nRdDlahwOesd5cyLmBzQj}YE19pv$6n2 znlH3DblE6GSw|&o;YbTJQCBd0SyZ)su4evd;YFsOZ=%a-QV`? z=Q%OfY8cv5W8)Y_s*P+(tPmtVwZ{a4_eaw@i71Qwx727D&jn68@5Ko3?F+`Y5lJ*I z_(HHpo&GxA9=~RAYd^>blu1>6xp(@JUck}SSr}MxXmiwirR(`T@G?%+&5b!^NoM^$ zSvc5HV>=1$ zsIctO0v8|rl7yB=Rr0O0@|?*JjN8a*oQ@@XM;+jR#n3Eb3%dAxBNigzQX0QR1HfQY zzFmQ_yEaUM4GtLHft-nLfZ^#8!I7M`Bkr{kzX`_Y5?iOX)#mI_Ej}dAxs@=b_)QhD z=GHa_G18L8i;Z`$4a!RtWd{#)%#-JZM$YusbS*fZq%_dRQmIG9fIlvgEHPjgK2ctX*)(Y3025wkoj9t2Z20x91qSE3P(&b3w73` zuG~)|B`<7*C;J>_#iZT%aq98~f;Ny}5XcQw(l32jO!n>p9CVjU3iRCT-8r}lV%fY* z?(22m#t-8NoGx#A@ny%$X|#ShB+t-_SDvBuNTu0Um-*?9m+e^b5N=y#pF8O(hJvf9 zxY{jlU(0&~`QHp}ZWX`*7u1Lr`iL(NZ^{7%`5t#AnW`~T8w+#)*;KmDShY*b-<ZC-T42@U;$H4 zfIMdT<@4~TGvr4`cIA|E8hA!J4Zf8Gr=2y$cFeA~T@gf6ipQ*_UFNfU~TaUZS zS4UL52-8O;A0W|?OQ4i<{C#Z~I%8#SubGQEHVbP;5A{dpS4j$SH;ZrGKwV5}nbLB; zUzo;Tu}fZoyl+B$qq?%YYC>G!sf?>9S+X!^K^`EWEk?bE&FhCNdG=a`QoJL->&0f* zcQ*ym%+|NU-44BAr0cYB*I9d{Z!rq*Fim8SHSG68>0m_8 z>Fu_vgsiM{U_m;`Dx0`3bqRDO9VUCh{JRBV7c=scVYjdumJ1v~FsYzd`XcN^_4iUd zzXS8+T7))-fR3Wroju4LSRKU}sG(40@3Hn;-2iYk^W7N9llpUE;{4H2<{Ma4`Tmr8 z2T>~Y$55MjOVE6yx>YMy=tT+jwhGd};)3+Ykogn7M8%E!6FI{*kgv14K!CJ5+r_%D zSjeA9a-D&>*6!er`J>NY(PW6w{%$wJywW%5%5`yI1P09g39dvL&%b5CK`QwRQ(O#w z^*>mkS^az6Vf->WU2TW}51$170K^jn#1Prf)*98UTGsq#(VJB`u=O{%u`UUXb4^K>5<$q90gD?dDDW(1@MbMWZ$N-Ub*ngEm2RIvT zU@Sb!p?kiM@!jh<>)$qkY(#CS&p0c$rTKsO9?w)%pzJC)?kB4Yc&~B>!Q6+dM98Zw%09H;;v{b%lEV>C zh)D6@v_Xl=fCDv;a_~Rm?NirmJo=3|;T6sqR~?Wziq7T^vJJp0Uh1s@J#Jp?NcYUI z#tbC%4iZGGnSbVErlS59FVj~^oZ}yjU$&=hZ~rsJ*R`T*SN#ks=vz8_`H;Xf>c9U9 z1)^T}peZF6i+nUSsMQ&*FYpQY@v`UQj^KBEPX9r3@MK2wp;z6R%h`KOFo}X zG+9~nlj|ipNX7keRZ#a)W&B&txAe8$*1JldQM>I)iXS(cocJyzl%6*neFp1y;D8%! zn7RfISoz*)7{P0cS8FeIZ(Fwm^~*#mx~lT!?74=|_GK<`7ZkH8y2<I8ZKo-3tV|^bX7s3PU`5d@5Qr-?k0}1wj#SGmfE~Dk!Ye zDbscZ+8}Vv=cBeGlgV|f|CoK#@#T6qbHGWCiM1idzA_LPl`{=(`K)0;?pXi+2V(J4!3lNZn!WA!+>juyIP&NL_Qqt>5 z*FX?zJ4vNY+UE85RTzZ-Uy5jaMY_ODbn@%$;sO>#CB%s9$N?X?M>g%GWDs|&*Ap#M zu#Ai6`TU>WK}VTJvTIjULZq-Fcg{RYUn#jR?*dB#tTu~VMZu{BQ)mCBbOw=;r}clUe6ia4 z@AUI=<5VaJvg4I6I< z{$C?k9uL*}#?KIQD@#O{EZx$zq)@aFqlC&w!(<4BvLus{D7&$w#S$6RC0o|HHB^}D zMs$T1YoxM=TwM(fG0nvIy=S(x{qFr^-ZOLNyvy^x=REKCdCv2FG6Pt^sRPt7V6LfC z>B~n6nH#hEeT~2h3c_qQF84`~y@WqsIJdy81MRi?DL$HNQw~iEJMESzR#LVO$A4JN z6nuBUNVJn^UF|7j`=aD&cbZOL6%i4Yhq}A+j$h9Zv0u6k*JaaHe`O|(8Q&FpL+tgN z1jVy9r8LqsHM(JujlNUlZ<(R^{1>GgT2k81zrFZNs~bsGqW&IAt==yl#Fbh)^@YTJ zTI0A_<&L|Of150Qne4LcO(G)k$q1v=3^8J{>H6}*7w5}f6_}LZwZ{X*LhzzfuF1F? z*CKSN<*jz^*|Bm0W_P=BYhs)y``IUElAa%gUd6;wLL z()CfE0_1fXKx>lqHFp_O;e;Ve?$x{4n|}^+f^$h8Ahc!SnDbBqfI(@$31b;jelN3W zr1A4BH|9)_VRQL6hb}^xHV>1=K@|O#L%zCkp{CchvJZ>AcC95A75y8lNxf1X+2^7w+|E1H#ZZ0Z5m(WJQ z)x{T!bF2MOtSa?7$_a77SW+dS1IFnHc|5=Gja%t=(*FEa=Blic3*%g{2mEti+?)%LAG)jDr%aZG3&0IfSrznTO02^0RdciP81{>% z&_y`Ht>(c6Fl}YvW6?m6b70s+@WpIELpA^00riMah~cj=th= z^q@k~@WEi1_*9PY03k5E+yyRx{SN=AINg0d_}?iiusU9}+^O5Kr@Px?qY7QqR`veEJ!LaTNS-%=^=(n%YZNNq8}^f^@3)y1E;SsK)skTvw|)KI_X zx)ndomm!6CKy2{yP>rdrTaC9>d?;;}u_6v?*BrdRLz9vheESxJ!k8Hx?JR(^K)?`? zpA&b0@>RLYgmp>hRrNk6lDdPk$Ht{^4}a694_P zyu1=Q!o2;1okb(y3d`1S)&?QHuptCz8@UeWr@&^|YLbb4UtmPq9@)J$N+TI?HVYWq z!AEnT@(LiX@mG*+E~$;IVrTY?JFv1>8OZl?0j%;uTNv4x!#0 z22ir~-k|fWOd?AoWp)=u14Og7NP<1U3ViCUkj|#Q^U#NzRKn;%nigwm!#pIt|8?L< z*9>#1b#7qPnoVaUEnO>QRIe-cEP?*vu!sH=DH}$XQr1|l3Hj$+D~r#RS*peAdUP$` zw*9ICgfJ8bFHt!dzu*x5(Yu+lgFO#D>8@_pO&X-5qt<@(i-CigZ}-4N7etD!*uBAL zhlCm48;9no%eJ~X9_M$i|LRw*y0PKTq~p-zG-jN-_Xf0 z{7DI!?DMPX$r${vIuv7MWRIV*JjmHqf+nMb+EDVbYCU`N^AOp4)2a3pZ%i;ix0qO4 zbF8N8NvK&M8J7V3Iv0+C5MgD&92h_XdJKqbVtbeDWWd@wicHduvV-snu9`3=Aj7XxnS(47|%wT&&Wp#R|yS~ zu<-9WW~r^jW3$jZfTUpQP+^PpuDKWz3z4~wC=L5He(RFr&Cp&xsjBpqldyiA%!jEl zL2Nno+s5&mqIL{K1V#E~L&E?Hkq&}docFtdZ6xtw-g*Gkw!JU0*7 z+6h90e~%vZk~r+SL3FhI%%SY0mbZ!8rG?{n0re9E^|rP}kK*>lLX8hU>Up&TGxr9F zIes2JQr*%1UFZH0jD+dJ0j4}B(pZ!;f?wP@8qR{#Tk;XxZ8}k5yYrqTvVyecrW`t~ zgdI$jl<7D+;lAZ0SidX_@T$MFLJC0~2AXC2Wfr4Is7qTInIlayg)kIkv+`cYN(S(J k-($x`%WLx|;cCr$;PO6|_wD%-pwK|-vh<$^56!>+AN?wUC;$Ke literal 0 HcmV?d00001 diff --git a/Nord3/Config2.jpg b/Nord3/Config2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..351f2a9e93a676977e0a975ca5b312f687527f43 GIT binary patch literal 23393 zcmc$`1z1#Hw+DQNPLY&G1f)f}84Ow&X(W{pL^`CzK~NB+Q)#4Iqy?m;m4=b-?(UiI z0RH2B-~YY$yZ8B?=fmfW%sKn4z1QAruk~ANuLbHCY7)4rD61d~Kp+4B0sjH05#Szx zgN22Sg^7cWjg5(1{_a z?*IchPE5$f5Agd3f`*QPiG_`Wi-!+3D7^}xLD13BFwikEF)+Z^p5Su;gBX*9k@pT3 zsp=E#8}?*;uftMsnC=$UldJV?GV>cdc;n(xT%)9-X1U4A#?B!iC?qT*Dt7O_jI5lz zf}*;Hrk1wOBV7|yv!~`3&nz9CoL{)Oy1DzjdF$)<&OabLA~GsE=EKL>PpN6?8JStx zIbVuPO3TVCD!(=~es5}SX>Duo?du;HMEn{Wo|>MSots}+Tw31R-r3#TKR7%(KIaPp zpkJ~DeqJ*6Z+sDhe4$}rpkrX4^94b30WWl73`|B|ERs8_*iY<9Z}7dwA-fxvQdE!2 z#ILqVZtT#5N5L#G#j~)o(mKIe>Zd-Ohj7Eb z$K5x>%zIew2{yfn?yad~xFMgX2)pI*vVZ;Cw3p;tI#U^EQ7yn{?V*t+#aFY>71c2v z?^?sw>!JJ=GL*F2Kd@&EHK+4=OKG&c8ykC5l<7#7O)?&=etFXQt67sfL1nat8NJv+)4*=F~RqOUx9t=BY z`t5n%SyjZHE4^xk87hMQGN;Aom)5t49NiUN<$`q6AA<;9|EpitL?qv>%019#w4GYJ zd(uSD96O;kP~!1Scl6%Ag|S8`4w~GadoA{5>3Ndghx$ zb>-JFLrGq(ucGn2vKdMyOfWGh@A+8`BWxNN=p!#LEtBq5cpc5e3;@dtfT?3^sw~2K zFTGO3dwOtw5XLb*IEU6odX>@~!yD(iH!k*IWsAQ+zS3QU(;EHG$xRye)$u`2EmA#f z*D<5^=4pN^3`pXgX8s3*HqOd$x#~!Z1N)`tZq^?jDAWBM`}&#R>j z>eK9>j)t-Ouw-DbPuYD69WL0BqIzuibOk7v6uZQvxU8m2Kag)kTwbZ6(t#P#F7*%k?e^$=B#Mi!A zM}C_GE~rM1AvF6Kcfq%{Af(0n*YdY(CuZ@8=p>t%PH(or^qAFc=Q|xNo4V@%7D{ zGM2UK&~{g&5Aw@?p+FstYgfb_VKIEYUx)5Unx-BblYyU2xhNnVHb>~gZ0sh{Q~Ain z1UoGLA$DrH|C&ir!2EN)Ux5*Nca2>?t7baiKPvt_`I);k7`v7{)7*`47wfsh9D23+ zODjKzBK1bVKGbc|JuqPBXjkvu&B_48gJ==V@pmTj$&7bf#A52uDOJNg#d{Z#^un>@ ziLn_m%+VHtUzY4~c_Ubk1w39;ipOvz@1ZkgSfKYT7Lvulcn_Z*Ij?a({8|wrqtR!& zoe-4D(Tc;30=V?VRpy&DAQ6Gug%<7F_xc8WamoUTQ2>Mt@*;pO8SVemD;R!w5Odae znwyY(B{pDAZJPaNG?`T!g>WD}A0LMzu#{DxMf0uz{X?9coT&{?Bf9tH&BviRkKO?U z;U20qc>x81R61yswBC{y=$RR9)E%VFGi=M6ep!Or>L?%tc6c>U7g*$4yZ*VTytov0 zYkXWVfVoXvQx<5O-4yR1==jJ);%0_;H8$edDQUl?0*)kCUq{ctKv(}K#Zo?9BG#yE zvl)j9Hujvc=w88#>1j@-XSZ%z7s;l7n2d+4iV9x89=N~KMF#2X(f_I2hXQ7fp)E($ z!04x9LHLmg3P3|feU!eo$AiCkTr)9Tm`@O1V$3eozW61X`E8hn*D|ieZKjA7-Mv;_ zqY)!)q27(5pb2)$E|pR)1o54@qx|w8cJJ#&O}ZaqYsxqZ(v04ZA+G!B&rG0d=Y~#& zH1-n9;ry5{$%NE{*TM6FXFWRZey4DSLpY9)eGd0QXMVt|Ezsu3RHZTh zyNjImRId#=h1Bdi3OU<9T|`g&R9VZ50=`Wo2;P!tsJ}T@vSuY@=SfrpIQ47avc0NT zT9@Rch@^~@yDv4qgSs`GR}1pGZI=3cC2fDeH_8|E#GY|m;4&w)|VxFSUs zd0Q;^++?5Fd1U7$70WHc#eiwjhA$N1MmO8Xwc#;sT=(TQ>ou8)Hk>FKR4}m{U1H3J z_pCZqqf`yz(dRI*qv)F5?G0*&Qg@*V3{OM4kxr0$r75L(w@2yTGo8jizSNF3np(Ht zUSR(G*iG|PFx|hPgg2e9sB&BDrqly}s>fJ%P4ehIfwLa;QT;SJWmxrX%+AMZp0Ut- zN8Q+%)>Gp3PKAyv#W#CvZ(=ny26?%eaJUdmX3d_~aNt&pX4j|7*)Wz^OjBz3e^}@0 znKiL!2+B-SvE%HK`pJ-Tx=7Kp0DhvaXQBZ5q)kSh(i->IYnx+yoZ_oJ#|)ibq6S8H zkbQ$z>#>~X0oN&v_&2Un-bVKcLwL(PEZ=OJ7)cAC)fFI7q7A>1^i4B9T(&-bV_jh; zyI|qs{gBH!n|!(%HJbA9EGVLtm^XBNk{tllnXnm}f~D5ovbj!K1Z-qF?r^Lp6G zu_ldG2x=Ahq)~6hnu}a@zimEixzM3kE;?f)8ZB@A@R)Q9CJd4O;$SN;(QthT83p^z zpfyL&t}0t4%jsCv%MarZ9y`YK&{@VEI&bj2>C+o(L)WKDd?f7lh4K0uGbT!X6Kn}c zU;KXU#Zq$TnR(UE9(QA3`9{~T#(HBf~TNUm2eI~M@Q&u}ge7EcV zOprIy3E?6dl=i`z0WZf|PjkNt_4qPj7v}$#iyszZ%FQ2HhCWD>sHNXGx9`+rPRLD6 zEr974&CuC|p#bjMN$6n8iNGtro!n^RyjM@h50NF1dk;bFvjioY<;Z_FRu|&1dZ(=g zZt3@%JH0NV{PoYb<^#s%PX~-?O(TiLW-<>xsz}!4-k7LA=%Dp?v?s{h!g%JgjRG7w zCT`jZ#hZoLV7e1sh%CqO_n}q^H1}@o1lZI`Zjj3K`1r60pityoQm|+ImI-=G$O|vf zrj8E#Uf*IFQN~h-?al6yp|TWAZB`LVP0C}4-sMzX3M?kaZ@pj{b3xu>=(>{KwG3rI z=l2#p-=_b0Wf?3N=~_|@%Ca*@+sO~NrpQV!IHI}lQb|x3e7jNRaQBW~r{X&e%pa3Q z54Go;rPL9$53%A=z}5!OiQkd7xZdZ{CeKXn?|RjhCZl|}W##T|Jen{I8K@NlGWFAt zl$F+6^V1@$`^vY&n3?_Ftg9JF8&%uMsdhW|YuoPYoaybPt`;4aFG$m(9jYP4-&jO9 z3k0R%J)B&9a=reD|Mc;uee-R1gn>1c?;zy|@791!IzG&2l4+K+W`_q1QwkR(lJT5N zGvF;BP=M(a+e?Pox-C4Ek ztvR7*Wj2x>(UhgBt!0b?8U_&X1%wI;sD+_`rfwLojIz zNxnFhHm9Rm!D(VQwcYZ@`om61)PCb^9p+B!JPGbYg}(C#=L z9+#hFc+pg#fG}Bbrdh~(zgouqeIB^{$_v_V(D0sgSr6d5sKU7DUpjquXeY zr!Se=MH|k;cpOddCoyOgAQ+c_t+8D9ZCOh!U7VJ*V*JBQNXDI!)ECo3OG9W46c$b zxJn3o88^f6OvhcK)%a_tCvq=daXp`1SUm>Ct?c}HFT4SPKmiNmjmWdw^JU~F16Q%} zgcTD7NcF7IV{g4koi-{zE4?1FiZtwE(J_Cqy1)!hq_(q_l1#XRqakcF4r#)$-+}_n z3w26SfSOe4(v#UaWucd{wz6dQ#hJ>FIA6)f2C(Ia2s9wpZKo=~P;7SXKar4CxZy1wlrhPV)e>msQySds%%W&#+X`=7&aodvxaNo!>A$Q;t&@*>4G6?6pZN8rea<<4C z;XlVg;(W1Xld-4&pqzajci|Skd1C5ta<+9XqHyLk3+;J+?agU9L+@(y>5Pd(g|)J9 zxnD6^D8PDU$QJ6eRY-LRZYUx_}u@{>&HvtEN|Fp@dgS|rl`WXm0Hu+JI}QmEm?QFp4LffuLYEU zS4ni4*3hOUJmy9|q-Bhx&e4>rPa;RdkIorxn#jU)IH+quMdF`rYgx)_zX z3ET;(naX-xeoXx{8S~e~7&MB8<|K1F?UNTX$GF;Q<_7(Q;?rj2(rI?fnIa0PhNpu% z{roMBDm}1WMh_^4253(v&12JxYQMrcDxHy;TR9p))j5^ai-w?lhjW^~%v=v1@BbNe z)_0_ywhSF(X#XgUfALE6n2t)C&qR6YJ+%>dA!sKaVjvzTe8AW%q&&>P*^(@ig?5AL z;GX;b^RCI$=gqI!Q9x@KsBsOSgIc%75wrrn9`TaM;~J{Q9vSkvq?hOG=fZ#wkz%c^ z>pG@^FFrHuvcQ=Ll~ddbd7HG&KMHQ3xbwR|%}?QP-9NyllF+|HHwt@*M>(l}d3n6p z&ngo>fX%7>P0EZ{ZP%09lc>w|=AcgdSxZ}cwg<|9@CAof=Z`=GXmHM*=E&pw5`m5y zmV1TAVSCBAS>SA+&-g!GKl@AcJY`)o&7*Dxr80(V1348|k8jh?6!!IIXZjfm1r2W6 zmJf3)yuTqx&_W>ma)>Z^4AWLsn#$yU(Ftaru+WwPsJY3)n{l<5b=+xp^cTM4(p3{> zd4CTXb3p+a?+rld1MU2~El>~pQ-_=tqkx|Gj|PymHB^Wg6j0NP0?6P3u5Zb}O?d2j zzD<|lxO6cmCeMkHnVDxLg*s&{pW2w?4+?J{Dx(12kQ2}oEJoz%qJV4$-M!Bf4Mqs{ zo$3RmEV-1?vEt270i2F)c$|q_%OClr3+Rhq}y3yG-V53Q+ z&;*)@6Th8g{7VI?T_8{Fz{jt7`SQP#!bp%iNj*~M&}I+YhU07~U%Uh@@**!xwD-bq9Xrn=uqHqPXvIH}lms zI&nny8zj}H#69Hk88Knk@%||^N=Cg?79KTdUk+aaBR_L_4AVVNE;Xz!aFm@&^-yR8N*t%~C-S0vHvfSp{hYq{u zhDOK(rBl_@o@sD5yS6M%$&fw7PCsQ~hFd)jLxSeUyimK0xv{NFa2B*G@OfSoUTN+Bzi`N3`uVCe&Ko@2~fFIK5P*$7*7n)*cLd*78c@ z{??Kj9$-ftdd@eTmnLdU0R^{Hh9K9jCg>5>TJ6eD_^xoFc@@=AU#z^R{ ze4s()l-oGD^R#|5{UaY!Q2NV{I=l>Pyz-@z^tK zZIeB_%P#JKBXMk=c&g1a)e&@K=R2qElk_H2iOg8>IL?Oq%|bGvd|v3c4mc z5^$UB#tc$dicoWv8zyFy)8^=2H#4*Hm2Dj!lXXM7W&L;9`f0&?)32VOiryTPOdg|+ zxKEk8$5nZfDY2!G_>S#3>3N^R80rk#?>?D=LD-({uHg6omHiLwI4dfBhXT~QL1U)4 z=V7c%8?Mmhz72mfu|o+0Y3PZhd(*iED%2x9WjS+(ig&?hpQz8FfV7mDZhd{dwbCk~ zUY`o#XgXLi`?Ay<2Kz^4x`z4W4U$dB#IWn=*1<`$#aW_wj}kVk69Mc*LW4mqx$aU- z-!-c_p3D62%c&zGOf4Qq%rCbsJk1>kKZ{r9NjPnf~|Tc;ba`jTC2ppTcT91;m4noEQ_Kz*UQ zEzm6gi}NkE6;7r_UGK&hWe#!A(DWCSN_wpvYLial4qMC1M%<&d(;H>?7DrU< z+#9u(orSz1(00+9=(=NI>pvGH!JjR=-4G&}*YlmlE@h;{Bv`%+NJ^H9@|hfaCP=gY z+SyunHNVn?Ze{2;B?te>>naz?PEpA%X}WxJ*%OW61`RoHH!?3n`F`s_3Ow94x~@AB zEsFnZBk`*+x$<-H6fK9!N9hIZ#9w;_axIgfzfYV6Vi|DZcZ~;~;eI@_&Y(nZIMp^W z{G?|DosGa(iRb9g-KarLZoe6qVfox9co;vBh~ELd!btNI6XdUR%)CNT3wlcb0buy6 zVO-Cq5)_4_$=vdv1~YQLy)v#x>mcijaR%AVW58Nb$eJj@&$UFDS$9xw7`A!Ut0s*S zj#RGOw`~M0macJFn>_GP@|3lf(!#ddYz}VZ&_3)x-mjnW=Ah$|j!^j~tY(RbF>kl{ z?$Cl^!KV@NZbm;T(nPcwmT%+69Gj2;M)_)EXo;Ry@TER5F@Nm9|3*|5K}99$ns>O1 z|0~V=;o$0^hZg3zbl5#1rG#4c#w#CZwUuN5)27tbKCY?gY3J}?wo>==0@nJyu{FC) z)Q&EQ{Evu&_*78 zQb|4QIn9m){-hd?{iFSMYT=!=hZXN$?_{AVG(0LSWWG z>1FN#%rYQ}0aQ#*-H=OezSCxV|I zig7x9^t}_Raj>Y@BNDu+pWCC5f>Je~T_*$ll)9Pw3WNG%OXr5RV|i(d>Zu8$n5RV$ ztaFr|_7N+30uQmQkqP#0p4NP$4cGnVw^`m)aL2DtwJ??4Y0>>}qd(?1!=BM6Sz~NA zISi&cIVe#wL)983ueeQDCcV4TQcwMkOa2T2Kei|xZbRmO^Q{PB0Krrks60oYXOyK0 zMpGR<32yL{F`g~C9l3X^GUPK1nV=&fO&_xv_7ProTmI5G)fyIBDflsmJC6Tf98~m^ zh7!}?MxX?!U>GNR40lyil_t3=k$zMQ+CRmo*avS#{xAd&(I&yi;H7S{A$_f&wtufh zPzj__U;xiPwTuF8fsH9aBIiCSBy0zcgVgvzw1MYziTT+ecF<}LJv)iL1;oEZD z!KE_{WQU+PdIkP}^F~FtBs*20-Czvl9*F0qu}*1sK?nEIdEBJy%&i>h3yLbdtLy98zb zC)q>0kjY$oIC=h9dU0O*dVHGM!zc`PlZsth_G6^8!b=&e**D%(@ljW^@ITfYCl~$< ze3@Awl1%srI#Aw?Al-uaZ_1{I!{?Hafx0WCs-=m|G$Dc}dT-Bug}!A5mG*uCY#J?W zGDz+^3h4L;vGD^P_P@j?GVH9#TkbgNEV0J$*F3%F^L8l`Z%~qOK}kAyJ99?}?set6 z|FWIpS;;;c_Wts64RM?@YH9TlpVH%j5;tQazIKar{ zMo`?hqzb0AIr>6F_Z4salNbMV?^;SWA{2V6< zU3eaGx@z!T+myVGnC@;gRi*7?YAK_8(pP@NfGEb1IUXZ0^2a~7D*!~gq#t<_@^ma( zpp9LCYt5BBH;Ll`o_~G!50I^Jb2PXXMBC=z3lI)H+r1SK5Oajg?Fw>#n~<~z{quG& z=6@2#nb}ScMBq#HmQ&_^5V0i*o*w}H?|j!E@CKM;($Qs9d5w2xMl$yw<*-2LXf1vp zsd=qq@p;I0rM7?%?YCP+ucHf?>Q_7X zhO3tbPnBPfZWOYUBVU}?!0>kWP;+ZrIev(pVC2fT^>eDcH)Xm{%@={iTl_QnroVPl zB=P)AG8I$c4otNyV5j*;y>&{Awpg(uBGJ7?kIn&4hxmDjNWL}-ah;$UfOd*&#Y>A zyZOclOSV^2>3d!W$n_5mIbK)L>A(5O0B$x(Q`~n4CAr33UDmXO_O-f4pQTJnnP$@l z7(Me0^0nHQo9I#vsI;CuQ?{B9pYJjICeNic^dluuR`k&aPSQ|KMek9Y7WZ9CHgx~Z zM1Ri=*Wi6Ofx=?FA74J+VsRZ-plz$PTO10r)B8yL$dyV|a0HaibNT%1(&X8o?iH!d zdQ+^-79j9+7*|vxo)&;e5-9@3tjA@?)YB5LV5f}KA<&(RD3UWT7*TmPxYNP>V$H5X zXVwna=B-?I7kS62v=%*1zk(WcVE;Tq`k%p2&%Xsjjc`?mqyR;>J-nZBtEsr;CILl!dsvYs@E!?4-UwOCwLLoMuiezMM*-h8c3LV62Ug&mCm*xm{e>qj$Lbuf2iK}UOLb+N zvm&3A(@(6b!^`q(7bZXxRBzpa0*K`#Jp3+SNtJW&L=KmMd(Be`1z?sloXR}APpuO! zBZs$4iKUi_-A{jKU!V_W%%Wm<7$I|c0Gx}sUZR7}DyZHLUyn5f4bvR-K$#x2)*v+g z9lna1agb|&wU}`PJL{7s_f2iyVUwGOT@JLu{=MYK_qJH=`{|For3m$>k z8g@D5jR*zq6|x*gq*}&|-zbmnQ?9NE)a8ir#j?9{Fmv5`R$GW&7kU zCx=rVNu1z;^oor^qA4KB;0^qRpynV2`K?ocHdJ>iypUR1A%lPPTh*e`b@W^GIa>+1}To9%{<#;5k(>%TLi4hD67*6k^OT}m9Po#heJCK$Hs9D)8!63+0*tOfk0rrOr;sz5`Em1%l4dt8R;oN!rm8Ewdj>>KWqaB7P`H-6;ZC3EwP_mW@k`w{yM? zr+ab6HT^AoT_@4QZC2gNi}BG2=Yg0to{HNfZ9w7Vnzok57!#~I+Vz8OE9;jNY8kRS z6u0B#RXb^-u)Xcy){#LLHMDM(n)a=SB~0yX|FXUHV|Tj9s4(1v<=l&80B!FE1 zwiuR_>v!|T*wFd0djo0De?z0W@Pq%l%b?f?TK_a`rjRLy_Bfv1UvK&7ruXWj*zj<#tc$wg_H+we=kBZs98>YhaWNQLDP(mxt~WQAtk}Mx)V6e zot(kgoL+Z^=dU~LN^cYAim-A@zS>#F_=p0E-W+{Tls%pKHi}@eE?m~t*ELUaJx+^# z>#c8t$k8sGP@P|CLwGg#>{M%|CA`??qH>b@|R2D5Xgl7ue%iu7SyXpw~8 zVftpTD<*u(^fgxbnaR|M>)LeELGHdQTB=taKO3`8JtdP}Yb!AC#WYyD(o$MR5z&#^ zYMoILEmvMQ8s>{5TtMaEAitW4q0aSE)5USxp73YsTusuigzwNNk;U@)^!h^Ma-DUM z#El ziL}D4nvt|5fJBxE_OqpCH!_G1_k6Zc;fu22wt#*t1?KQE3QjF+Z$Y&>ikNVDW-ob+ zD-g4c!tAc(;TG;6-3Dzrl99z?vS$AGzVUmRaNW(BMdKUjrbCx~?!SH|?D5n1+p+ho z;?f8Yd}vJc}+z&3{RXp2d!NeN5TPTO`6*2 z4=keL0tqTfxfji5chzgC#n=_A8m`jA1D&En?5+@!EWA9XEsFEcOD~C4D0&+#{X^~9 z6`iBpB`_ep8mawZrDyaoM%&|qK!UPWngQR9KHWA_y7KzjBY6U}>^4c>i0-FJJicm! z3+fdmy@sOKw^-gkqMTyt@QSk)XnNg1*Jm@*GTx;K?0)6^u_nP4{xenCKCRU><~y~2 zmAU^H>kb1xY!K7Pyt$9Q9WGZWkkOll?Cf(IslijQnyToJCKY^xct&}mjuUz}{qDR3 zc^Fa?J-7t2!u&cgu|zv!(eP@-cM1oY{9mK#qp#>oN_;&kK2X|_7ifM|SYFvqXk;b8 z5wUTH1Io^M!v?N#54{?lv#j2lK#kM^ z*kr;@?wgHj_xpN(bmSO(8+apEBQgKWogk>9ez*BN6}Z|@w$vj|=}y`+ZTsgPxuaE< z$~NOVUa(t}W+&;2r{4@6tKwbq7Iz7C4&e_qzI-*fZxN$vn5X-oNb<)|3bg8cPTbL2 z#xU3`X^~_iBP8O@3JUnOMAjuzevL#v<%Mi|Ko)a67*hZwMVlXgLVNA%cjDQ|Hl7a{wL3jAHVUG13W9fc?g#9pzrWYLvFh~9v@d)bt2BJ#rS}$o z#a+me$yjAw{p9S-b@V|9hAC|T=MSEjEI49qK60+@EBpACPu>)CSVw0whM>P0nfGN1 zL{E&rnVwE_zrSR!`{Qmj;?27x<7n3#fy1&cZp4$VzG{sOPQ1+9Z0P-P7Cb7M`Rkkvr{H4^{~R}XwD z9lNXs8^-^K`9javC75CM~WxtSk@k9m!#(ejK?JA@)yo3v@ zVP=`j{Ew8#8uh`uw=xBaXYZ0OFyn||E*L9tCLhl~_10Bj=j28VQ%QRWQ_MH}5 zz@{y`o}%13?MvIjs9uLdATUfJ6r@8Mzra@@vI?guU5I`~TRb4>i6x$JWl7*&o*+<$ zXxG2FG`T%cKWTQ4FVz7pc%@k|P%I-Z#Zs$yNh}X>4N2$Q`-{8yo9R8fD!QWWtIV*P zz~GO`MEW_pD%n^?k9oKUWdb|6bV30~+90Y$7?pa6zr2n8A zF9^7Ml7ks$S5_Hd1BPt7aqm)uq?O$yBg?`+q7yKud`Ro1E#fQ85J#k49HpHfr~Uhd z4SHR~zla!?-Iy*AM0i3>Ys5T*)4_g0;q-mg^6!KXzOakD4~H(rB3J}?ahPuiU_V0= zFBuv<_ju~LufGg8mu=JS+S`^&3i@P3l&g}SLQ&~Vr?@uzO0!dMIq}-_8hlmxo8^hf z);xXzU+7Iruh6uXS6DbLqHP%T{07WdJyY)}*^OXC*YgL?a0W*;FCi(4`v+G}_zgy_ z3W6$|D(RKdY?xJa=nc&s>Vk2AP3oF^lPbRAP%s2fpHALX9bWCNGh~aaygKE=u|AA1 z0!%u1D&KQSbG7VTq-pIPV|*k0k$XzMZVpSMcDymHBKWY&GCjjpzoaaN*K$c9)_JUv zQ7G?~8yG5R8ZEw(OuJXlQlwok=52FHVuz<8WVdVMEII}*xR3ck*$+@(nm#K>L_2fc|7WNtU0XA#G|v3Xx;vi zvUw&-opC0adnKUVr!Jr51GFU7F9T5A(-Cv`xiA+YdkdZoLJF z0|~rcXG9Bfnx5!UVpg?oMq*NHn;R1`aeXg3eI8t^Bcy90Y#rfdb#dxwm)-{G5@IEAgBE+ zrCOR!ko-r_AD@^5B`(olMwDwiq7=(xm}GxR1*To~tWPU0F7I1SE-rcdr2I98eRJyD zhqo~wtDe90+8hMT_V8+s^}=3;mg9$*aWweWLj8KWGII@dqqnACnH&FbaGU#()SQRl zf1p62JaUgPPh(xJ`ouiz0L(#BCT|}Zm2tL-Xc$Qiwb81lVhB<=r`vJlKYyf$_`&pI zbSFgLT|MifCS{-(KG3m821%81L_oLpcup3M`ZXYZs`@|WY$dcGa&?z2j-=F6fyE~| zpDNv$bw0V#P7y2d?xMY$ePr_J>YNT1c0V<+%vSI;0IV>{ae;5+L)Qy;$lw1l0pMix zl8`I$0+4W+C9t$u@B}P2bb8$0$oRUIX)xML%#!mOn4J|;zbmk&jg*Qxt3Uk-SgGoN zRiJ(AEmov8{7bZcS88_|q+Xz=_aP;ka@LssDLW;eiaf^tQk_KX1&OF@#5B?6geL>i zb9S$mn9+}X^kTJ7-OE9C@PN$;@T_GUx(t`E-PJ{~LQg%~kV{C-#aH5+WmQ?OoyXf9 zO0m=9cj$+RBd~EG%X0Smd4?og?=P0KiiLiy}p^V2-Pq2F$3 zgit%gs&v7skEtiRkl#aA8~C9^Es@O3x7Mmn1LL1oOihZ9=&_dg?%6(x79sFn3L}c6 zpVWbnvJ+66yO)nKs3HVZB&536ZoCh<9QsvdHB_XIimFil&|CRh19e7x%q!>Jn|f6WjC^uQyXLHOyl8xoHZI)d?6x(u zL%ze#;4)pAocY`D2?}1dYlfB-&0!goO@_p*7bvuK*f_(5WI*DT8#GkWW85MS*v zOFbK>T-%YSnP3K_MaV|pd#YA5y39#s&$ne)*-(t!jJ@&eD3tXsPA3_NG%@_@VmPJZ zdnCW)-7guD$7J4d`FQn18<#9LCQ&<<{B+5wd05oH0w8kZ-?)&Qt1;cP_9W68$Hwgnvrw-eAvH`$K)f`ESnlHfyFOz8lp%M z5eeGC=9uUE+_#2DT7!w$(T?ofP4Q6x`eS@g`LWMHp=1TMN<~KA*^KQ$3`SBrswhiydO!0M~b=OZswuF=5$VC}P z&t)|ZEjhy1S-gLYUl+aNDe=v@y1?sJCFb)OBq9tRS=j~E;#r+H-?Dni_$$im$2go^ zp(MXf6zPMwzPo17p!@v?1bZj#qLw9@fSaOzcTCiJc9_OPsQ-G->;gJoDuXoV-GYl@ z{qF86=wi2w8g1ZJKrf1RE-O*6=~ADmSva^Aw~+8e%`tr8Y<+6oR9=B!kNz5w zy^x&cY=E_)RTIA6Gwsi!oSu#$3J{kQd;}7!XE09Fc77Lh@eCOcRbh*^=mJwWHh+$l z>}x-4dD=S?E&gc#bxf-!CuLnd>s#9W1%aPDcfR?ls&i>AKMtpO9;(AT!tf)zg~QRO zbz`t<;-W_CvK$L7%5n+1Ja-&WcrnM?c}zMz`v(G$2uDy|f)TQB@B|rMQoZKBR(Vk_ zkG?-%atL#^JZ7J<8;tAs)${&gc$?fW8cS471;&&;lkURscfJz~-xM~Le6D^$B73sm zu<=ILVn8^k=ye4-v;C=gd|+b3FcV=KFE3^oxdNRCWxjB!zrGg{{ch4_?V4gh)y2T% zF3NWRgDMmd`sh4Q;X|J8cmnz(a>?7!bo0w8#=25u!g5xkKz)xbSe5CH=nYY63~Ljg zQJ?PZPoLEr$~_*@J}S`kkq@3r!zdvB!e9Dg(!u>oJi_hoJ=k4Kq>jdaf32*fH0%|E zUx@-oOUq%Y3eQ!ub;(ObOk>N{=8P$LY9*s{%B}s|JfS_S-8z)=O~?U- zt{2=a(2v=9Q}-(fk{7UR#EzN6&A8*&T9z%)U&l0(lp-@7!XDXF5Y9%^&J|UZcYwaN zd1B2K`K+~Jp+e{-uu|KW}q7oC%fhyZceQOjDC_?7hje&T*e`(PhrcEbFx= zCE7ZqkHcl7Dp|Q&n0MAK&;t;HL6##;JN3^zt%~$UsCvhBJ_Nd1sjL;?o5lEyB-R`V zSnKYn#wy%m{o+gN&~p#CH!q^mcJ$uCW3z_rcnggG*&%WIZx!r$%9vNV4Wtr%9u>&A zxnnvlBXRfSMF_X+k+RDl=;&|%|E}V?NIL&pZp6Q4+=u15`OHb4s0*Fvv8%j4dwlvV z^52s3LvwK>uJ7$;uh)y;#$C|_#aBN`H*x3FD*=e!f2Oso@0Uj%z{Yp8mOvcv9bO|% z_%GE07c~KzHr-eAL*)cj@`ug_!CDHETq>hENtk5|LZz0jMD9*`akM9R^nK0D3qSbR zMl2%c>^b=LStO+SXo3W~$dC-5a6kb;_rTP3e2(c(3pIqy+XOuS8*ah{9v~X10uMdA z1e9R@U$v6=6uRJP^$N9-hm&yEK}61JF-uaB|M4tNd`&v}lUkNeorvG2jJ zDY(uUl=Lx`{d!TFD(X2zxe^xmw0!@3W)x%q!${KSUZQn5dgm1jxyA67>|?yt-=zr` zvj?`35BLR-Jo}>(qOAu7jAcATyQl~VI?wqB6H-V=*hvYzMbf{jnNebFDTCRq`cjcA zwR@>&!+_5}Oy`iE@ypDVwGGw2k#$xuhf@r&IWM!wnajGt6c!Sjt?u(Bkn^f!5MX#a zn>l_*E;MmZ=^~$CpyPL{ClE@Q3>l98r;^6YdysG?+B*+|v#7j-TBumPq)Ku7t?oW( z-X7(`wn}TS2wb4Faa|ZJcCO*uKVI(iaQnoL(ev4Vx)aCl(N2W}nMvk@<3T4el@zkF z-Z-rrUB*#YDk~Z*M@82vgntGrBwtDX+Gb-w$;UVMNaYUYGN)8(S?|2Tjb`>+nPnc% zqgxFfrB$m|KA~dv0ZFdN>GM2@4Te!&;gEH1fEb>~5X6@rRByk{E&T5fucCkgSU$|H z2TC2xils?I<~W@3)ynGV&dQ({dk<_JT2W#P5(3PiNS%;_tn=?{Gwmnu53$k*3aRp-(2(i!hxttG%FW}YISySryFd!FF z!|{j~mu6Pi2E3b$rTSNg<-(`#Ny#~a?&|NYCh4)s)&G6uxEGS4ku^iO%`=v9&Mja@ zb_R|{_M8;0b@8)PKvw`CN&{TfxHI z;krP;U&BP_xF{~ki36D}(4%dNhjdv2pd|cbhS_D74_h)8n8t&SdMSwv>wE%sdgOEAdYbPz+$rzbnx(_jOPxzdK`sO!nqv_pboW+0X zOR?PiKyO9Uu67(6rB~8CSl=Z8(KXsDL~dnhgukAzmF%*7dP(e4%sCm=%K?Mt5ya)e}1{D)r9^o;fzmsCLfp$i>LD?{(3l&oR5DVh&V9FK$;{wt^Dqp%dD)95+# zu*X%Ti7n+dfSk;&^P#6+&YfXZsby?ltkwR^ z>hhM-4OxTqVHbW(i0qY_{=|5^ti&7PviFr(c5Vs9Jv6HeQ@Y(jS9=R>ffn>YD=#a# z(hY3BFYOoWkJeipn3$^m==Ln|ht=rxi?ZQH$YY6pp$ zKdQCR_)*E8W`Dc9NhJeqk2ncBQoVc!@H=zxwDjooKkQePoz`TbBskVUSpF=wj6ftO z?oZ?e@emH!y71hed-S3o%{Wantx=qXiex-+ zk@MjPpn?b&=Bvc{Zn~dwqz1RI}Z{UH!+H%9;xJCU5kf96mn1i8kSEL`t=ag^4~bjyp#{Oi}5(ho{-MkBqr`c z$p4KItV;f4be8Cyo#$Qu#m*L}J& z+%}pVkhB%OJQ8=y_Nn!??zOw6R(`2weqdty2%Qb!+i*w;cCj*P#sr)Z!@5RaKi;}U z0gZXd)@b^{y79VCuty+6Z!V8terR~aC}*}hK3e4)EH1Ru8YyGZF7iSdK+Ct6Qyio0 z9ZA^D0q%=KQnivlGWAO?ij)s81Pb;Wcbu0kAO5L0?hqZ%9`_$4kZvX8m7JfefVIKK z(D4i~Az%&+GPrwDo$n2;Rpg4NWx8JRN&a3k-uAgbWM`b0FM#>98`NeOnITdatABn| zXF~V#mf;^ueW|JiE=IGvhyvF8*q1Kt_W3r$7_43K>rHS$mcXn2)m9gtcMh|yNV<2r z6uE!mzYFcW9FTTCygwvgHFvOfS#Hn^O!4@KKP3SE6_C>1Yu6}FtZjalB(D?B`KhnA zyN9G;k7`**gU2FvA@=ahG#-xxEEW*7`R!_Gw>Xx!Doaz8HU~yLE+Abs~6Ws^La$b-G zW^k>BtOxY`PAx&3g*&N+o%MkDMS+U`@sXVSQ@H~fF`hl2EfBo??IA9`gPmCafjNx` zcE0wXySy9I3M9q{{mRakjM<7@%$#*Ia`IdbP6GcBhTKU%F#0b~b6@yzjGDw|UM!mh z8m|0~(HxyJKhw#l1fFBCj>5H%4dMX($_5fTuPqG?;r*sM_z@hPN(|@VT{Za z?gS#KYhZFqh+)E8HL&epcnoYR3l5)!4040m??`Se18l4w0Ydz$2N%Oj1-k-EFcW{x zKdGZt8;e&I!49x4Jf7OkGyr9XVZ9rJ>dzkVL1HjtA=Y*=hK0Z3FOd9so5+jF(Hybe zlNA3St1b8>{(mbN0JIS{#>y(4qn3oWc88|w_H#FaX1HWuIoNwuA1n=inXdWp71!4M z>i^Tona4G8ZE<`eOHhLk*Cys8y^WLf8z*QV@c;V}zoB zY$jBVVHK=l5fv)hP(dn$9kd|oV~JE2*&)0;GXz0=?fZ)%cXH+~nS0Oo+~2wPET_^0 zZfka2V?_85_(2*jtdK@ebLdF(D>~M^w%NNx=t|bPViic>9TOL|j%_o(Bq!~rbh98Vsr0zW}oHkNC>c&N=BV|VL*I?QI1%u=wuhenc z2w5%3MAVYCLwui@Z`QeU(tXjgxu#IV)I7Fho7mvrQIa2)H{}DT;xc?AbzK^^7yd81 zC1Pcs{|hb^@#*-OrKh$vR*sluJfKc=4%|SU1{)DL*UJW{AspA3&O9lm=~{wm zKZ%S(nvaJcf7_O}r{%ytQx7eR~_EQKLJR}7>v+t8H4&@+bBuumk$qJu1P%**#5JCe=5qJ_$lbr`e zXo7d4yPU)CTA$_ydsglMo0tDDgo?VE4W$zTNiMj=xRqvMVw;|*dp81^TabDFGvHdq zm2PQQIIf?7VAK1V!J7|dD>Mt#-;siZNl4B8PB2FnQ1EcY&LUQm zgvLU`r42hYarC-EL+eh=tPS{uos!v`r@6+$6mY^nA1PpXYp1JYfL$j&l2O#2iaUfM zoh1k0zq-2lVWB*xRgZ8-%T$18n+~NL${XwO`lc*lYR~esblu`-apCt`WMGHd_Ul z-D|zm8Lf0Mr_IGx^;9j326Gcw=tux*Y92Bf?hvwe$8YeFkBRko@Ok0vDpAKUA*46m zWY-(kx*V&(9pYp5&cPIrsSn2rVEW5KTK zBIhZkmj`tb@42fePhFW7l0?Raq9Ru6&wlE5HsRl==EH-UNw+g?6r$%6a0a^GW`?2 z642Kc?kJDvZ;qV2R12aVv`>{>jXDc7fTsC=P5;@@t`KS?jrcke9myB|&evkUH^@No zY@AcULau8NX1*2Un`jCGJKvP>0A~R->H*jTUt?#dS-LW#Q&%^aO9_g(s@o8u4+w6E z6tC<|1vDpdvQprWNbd%UIgh%@%WLM`VgGYtRsMlq+sm*W<1a#b1 z)rn~kfUYnKAU^mSyvFdbSQS3^p*RDi`m7ui=sP+@`i@5bSlTZdUBZQ}Lpq8@piZ{P zwdoep1ivEI==B) z^K*qc9rLx@XizOsHvDRorXWDat8oMuV-lo=lXxoj|=|2Vq;t zKV;3@{=IA&Id;P{fpnOk+|z+~d^n~V?Xf)}JY|*H-rx?7gw-4r?srrklpXWli zQaWoLcH-q=ucO9jJg-vP&**A)79nzbixL#u)XB+#0^=OjK;Ni?UBom8kTnuvIqCUx z^-%E8#MQ}A#}mndPG5JEp?`%X8>S!1b!*PI5>?yCY|-1N zwmebr7LzKYEw&c`<=#_1u!_6D?2`g{v1GN&NZ2Op5;%V8qQ^8uRtNE(cA#52bRE<{ z=|yGh)aee@PEtPzNH*JGoy{3-eRnsKx}-YPCqXu#NSOO{y~SZt-brv0*<`SKf0{xW z5^k*jLG_H(yV9uvdVS_ESw7PKFKuKz(8a~>89wRzGe-!a|AXlyQ6DXTt*?Caqj-hE&rO1wp?ZE3Y@eQ*hDU!^PU{I znf+_LhD#B**un#2uNz$!vEG(4a7+9qoQBdUbkwY*BMdi)k}WmaxQU~6;^Rn~T>5-` zCd}lKWgz{M`6_R{5}a-G_c09n4;zHTIu=2fW1n8s4hBivtVlCUtw&BbBGy0lREv*1 zV17NeobURUy@eB!F)NN$CdTuClr 1)) { + stop("All '*_depends' parameters must be single character strings.") + } else if (!any(grepl(paste0('^', strsplit(names(depends_params)[i], + '_depends$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_depends' parameters must be associated to a dimension parameter. Found parameter '", + names(depends_params)[i], "' but no parameter '", + strsplit(names(depends_params)[i], '_depends$')[[1]][1], "'.")) + } + i <- i + 1 + } + # Make the keys of 'depends_params' to be the name of + # the corresponding dimension. + if (length(depends_params) < 1) { + depends_params <- NULL + } else { + names(depends_params) <- gsub('_depends$', '', names(depends_params)) + } + # Change name to depending_file_dims + depending_file_dims <- depends_params + + # Take *_across parameters apart + across_params_ind <- grep('_across$', names(dim_params)) + across_params <- dim_params[across_params_ind] + # Check all *_across are NULL or vectors of character strings, and + # that they all have a matching dimension param. + i <- 1 + for (across_param in across_params) { + if (!is.character(across_param) || (length(across_param) > 1)) { + stop("All '*_across' parameters must be single character strings.") + } else if (!any(grepl(paste0('^', strsplit(names(across_params)[i], + '_across$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_across' parameters must be associated to a dimension parameter. Found parameter '", + names(across_params)[i], "' but no parameter '", + strsplit(names(across_params)[i], '_across$')[[1]][1], "'.")) + } + i <- i + 1 + } + # Make the keys of 'across_params' to be the name of + # the corresponding dimension. + if (length(across_params) < 1) { + across_params <- NULL + } else { + names(across_params) <- gsub('_across$', '', names(across_params)) + } + # Change name to inner_dims_across_files + inner_dims_across_files <- across_params + + # Check merge_across_dims + if (!is.logical(merge_across_dims)) { + stop("Parameter 'merge_across_dims' must be TRUE or FALSE.") + } + + # Leave alone the dimension parameters in the variable dim_params + if (length(c(var_params_ind, dim_reorder_params_ind, tolerance_params_ind, + depends_params_ind, across_params_ind)) > 0) { + dim_params <- dim_params[-c(var_params_ind, dim_reorder_params_ind, + tolerance_params_ind, depends_params_ind, + across_params_ind)] + # Reallocating pairs of across file and inner dimensions if they have + # to be merged. They are put one next to the other to ease merge later. + if (merge_across_dims) { + for (inner_dim_across in names(inner_dims_across_files)) { + inner_dim_pos <- which(names(dim_params) == inner_dim_across) + file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]]) + new_pos <- inner_dim_pos + if (file_dim_pos < inner_dim_pos) { + new_pos <- new_pos - 1 + } + dim_params_to_move <- dim_params[c(inner_dim_pos, file_dim_pos)] + dim_params <- dim_params[-c(inner_dim_pos, file_dim_pos)] + new_dim_params <- list() + if (new_pos > 1) { + new_dim_params <- c(new_dim_params, dim_params[1:(new_pos - 1)]) + } + new_dim_params <- c(new_dim_params, dim_params_to_move) + if (length(dim_params) >= new_pos) { + new_dim_params <- c(new_dim_params, dim_params[new_pos:length(dim_params)]) + } + dim_params <- new_dim_params + } + } + } + dim_names <- names(dim_params) + if (is.null(dim_names)) { + stop("At least one pattern dim must be specified.") + } + + # Look for chunked dims + chunks <- vector('list', length(dim_names)) + names(chunks) <- dim_names + for (dim_name in dim_names) { + if (!is.null(attr(dim_params[[dim_name]], 'chunk'))) { + chunks[[dim_name]] <- attr(dim_params[[dim_name]], 'chunk') + attributes(dim_params[[dim_name]]) <- attributes(dim_params[[dim_name]])[-which(names(attributes(dim_params[[dim_name]])) == 'chunk')] + } else { + chunks[[dim_name]] <- c(chunk = 1, n_chunks = 1) + } + } + # This is a helper function to compute the chunk indices to take once the total + # number of indices for a dimension has been discovered. + chunk_indices <- function(n_indices, chunk, n_chunks, dim_name) { + if (n_chunks > n_indices) { + stop("Requested to divide dimension '", dim_name, "' of length ", + n_indices, " in ", n_chunks, " chunks, which is not possible.") + } + chunk_sizes <- rep(floor(n_indices / n_chunks), n_chunks) + chunks_to_extend <- n_indices - chunk_sizes[1] * n_chunks + if (chunks_to_extend > 0) { + chunk_sizes[1:chunks_to_extend] <- chunk_sizes[1:chunks_to_extend] + 1 + } + chunk_size <- chunk_sizes[chunk] + offset <- 0 + if (chunk > 1) { + offset <- sum(chunk_sizes[1:(chunk - 1)]) + } + indices <- 1:chunk_sizes[chunk] + offset + array(indices, dim = setNames(length(indices), dim_name)) + } + + # Check pattern_dims + if (is.null(pattern_dims)) { + .warning(paste0("Parameter 'pattern_dims' not specified. Taking the first dimension, '", + dim_names[1], "' as 'pattern_dims'.")) + pattern_dims <- dim_names[1] + } else if (is.character(pattern_dims) && (length(pattern_dims) > 0)) { + pattern_dims <- unique(pattern_dims) + } else { + stop("Parameter 'pattern_dims' must be a vector of character strings.") + } + if (any(names(var_params) %in% pattern_dims)) { + stop("'*_var' parameters specified for pattern dimensions. Remove or fix them.") + } + # Find the pattern dimension with the pattern specifications + found_pattern_dim <- NULL + for (pattern_dim in pattern_dims) { + # Check all specifications in pattern_dim are valid + dat <- datasets <- dim_params[[pattern_dim]] + if (is.null(dat) || !(is.character(dat) && all(nchar(dat) > 0)) && !is.list(dat)) { + stop(paste0("Parameter '", pattern_dim, + "' must be a list of lists with pattern specifications or a vector of character strings.")) + } + if (!is.null(dim_reorder_params[[pattern_dim]])) { + .warning(paste0("A reorder for the selectors of '", pattern_dim, + "' has been specified, but it is a pattern dimension and the reorder will be ignored.")) + } + if (is.list(dat) || any(sapply(dat, is.list))) { + if (is.null(found_pattern_dim)) { + found_pattern_dim <- pattern_dim + } else { + stop("Found more than one pattern dim with pattern specifications (list of lists). One and only one pattern dim must contain pattern specifications.") + } + } + } + if (is.null(found_pattern_dim)) { + .warning(paste0("Could not find any pattern dim with explicit data set descriptions (in the form of list of lists). Taking the first pattern dim, '", pattern_dims[1], "', as dimension with pattern specifications.")) + found_pattern_dim <- pattern_dims[1] + } + + # Check all *_reorder are NULL or functions, and that they all have + # a matching dimension param. + i <- 1 + for (dim_reorder_param in dim_reorder_params) { + if (!is.function(dim_reorder_param)) { + stop("All '*_reorder' parameters must be functions.") + } else if (!any(grepl(paste0('^', strsplit(names(dim_reorder_params)[i], + '_reorder$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_reorder' parameters must be associated to a dimension parameter. Found parameter '", + names(dim_reorder_params)[i], "' but no parameter '", + strsplit(names(dim_reorder_params)[i], '_reorder$')[[1]][1], "'.")) + #} else if (!any(grepl(paste0('^', strsplit(names(dim_reorder_params)[i], + # '_reorder$')[[1]][1], '$'), + # names(var_params)))) { + # stop(paste0("All '*_reorder' parameters must be associated to a dimension parameter associated to a ", + # "variable. Found parameter '", names(dim_reorder_params)[i], "' and dimension parameter '", + # strsplit(names(dim_reorder_params)[i], '_reorder$')[[1]][1], "' but did not find variable ", + # "parameter '", strsplit(names(dim_reorder_params)[i], '_reorder$')[[1]][1], "_var'.")) + } + i <- i + 1 + } + + # Check all *_tolerance are NULL or vectors of character strings, and + # that they all have a matching dimension param. + i <- 1 + for (tolerance_param in tolerance_params) { + if (!any(grepl(paste0('^', strsplit(names(tolerance_params)[i], + '_tolerance$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_tolerance' parameters must be associated to a dimension parameter. Found parameter '", + names(tolerance_params)[i], "' but no parameter '", + strsplit(names(tolerance_params)[i], '_tolerance$')[[1]][1], "'.")) + #} else if (!any(grepl(paste0('^', strsplit(names(tolerance_params)[i], + # '_tolerance$')[[1]][1], '$'), + # names(var_params)))) { + # stop(paste0("All '*_tolerance' parameters must be associated to a dimension parameter associated to a ", + # "variable. Found parameter '", names(tolerance_params)[i], "' and dimension parameter '", + # strsplit(names(tolerance_params)[i], '_tolerance$')[[1]][1], "' but did not find variable ", + # "parameter '", strsplit(names(tolerance_params)[i], '_tolerance$')[[1]][1], "_var'.")) + } + i <- i + 1 + } + # Make the keys of 'tolerance_params' to be the name of + # the corresponding dimension. + if (length(tolerance_params) < 1) { + tolerance_params <- NULL + } else { + names(tolerance_params) <- gsub('_tolerance$', '', names(tolerance_params)) + } + + # Check metadata_dims + if (!is.null(metadata_dims)) { + if (is.na(metadata_dims)) { + metadata_dims <- NULL + } else if (!is.character(metadata_dims) || (length(metadata_dims) < 1)) { + stop("Parameter 'metadata' dims must be a vector of at least one character string.") + } + } else { + metadata_dims <- pattern_dims + } + + # Once the pattern dimension with dataset specifications is found, + # the variable 'dat' is mounted with the information of each + # dataset. + # Take only the datasets for the requested chunk + dats_to_take <- chunk_indices(length(dim_params[[found_pattern_dim]]), + chunks[[found_pattern_dim]]['chunk'], + chunks[[found_pattern_dim]]['n_chunks'], + found_pattern_dim) + dim_params[[found_pattern_dim]] <- dim_params[[found_pattern_dim]][dats_to_take] + dat <- datasets <- dim_params[[found_pattern_dim]] + dat_info_names <- c('name', 'path')#, 'nc_var_name', 'suffix', 'var_min', 'var_max', 'dimnames') + dat_to_fetch <- c() + dat_names <- c() + if (!is.list(dat)) { + dat <- as.list(dat) + } else { + if (!any(sapply(dat, is.list))) { + dat <- list(dat) + } + } + for (i in 1:length(dat)) { + if (is.character(dat[[i]]) && length(dat[[i]]) == 1 && nchar(dat[[i]]) > 0) { + if (grepl('^(\\./|\\.\\./|/.*/|~/)', dat[[i]])) { + dat[[i]] <- list(path = dat[[i]]) + } else { + dat[[i]] <- list(name = dat[[i]]) + } + } else if (!is.list(dat[[i]])) { + stop(paste0("Parameter '", pattern_dim, + "' is incorrect. It must be a list of lists or character strings.")) + } + #if (!(all(names(dat[[i]]) %in% dat_info_names))) { + # stop("Error: parameter 'dat' is incorrect. There are unrecognized components in the information of some of the datasets. Check 'dat' in ?Load for details.") + #} + if (!('name' %in% names(dat[[i]]))) { + dat[[i]][['name']] <- paste0('dat', i) + if (!('path' %in% names(dat[[i]]))) { + stop(paste0("Parameter '", found_pattern_dim, + "' is incorrect. A 'path' should be provided for each dataset if no 'name' is provided.")) + } + } else if (!('path' %in% names(dat[[i]]))) { + dat_to_fetch <- c(dat_to_fetch, i) + } + #if ('path' %in% names(dat[[i]])) { + # if (!('nc_var_name' %in% names(dat[[i]]))) { + # dat[[i]][['nc_var_name']] <- '$var_name$' + # } + # if (!('suffix' %in% names(dat[[i]]))) { + # dat[[i]][['suffix']] <- '' + # } + # if (!('var_min' %in% names(dat[[i]]))) { + # dat[[i]][['var_min']] <- '' + # } + # if (!('var_max' %in% names(dat[[i]]))) { + # dat[[i]][['var_max']] <- '' + # } + #} + dat_names <- c(dat_names, dat[[i]][['name']]) + } + if ((length(dat_to_fetch) > 0) && (length(dat_to_fetch) < length(dat))) { + .warning("'path' has been provided for some datasets. Any information in the configuration file related to these will be ignored.") + } + if (length(dat_to_fetch) > 0) { + stop("Specified only the name for some data sets, but not the path ", + "pattern. This option has not been yet implemented.") + } + + # Reorder inner_dims_across_files (to make the keys be the file dimensions, + # and the values to be the inner dimensions that go across it). + if (!is.null(inner_dims_across_files)) { + # Reorder: example, convert list(ftime = 'chunk', ensemble = 'member', xx = 'chunk') + # to list(chunk = c('ftime', 'xx'), member = 'ensemble') + new_idaf <- list() + for (i in names(inner_dims_across_files)) { + if (!(inner_dims_across_files[[i]] %in% names(new_idaf))) { + new_idaf[[inner_dims_across_files[[i]]]] <- i + } else { + new_idaf[[inner_dims_across_files[[i]]]] <- c(new_idaf[[inner_dims_across_files[[i]]]], i) + } + } + inner_dims_across_files <- new_idaf + } + + # Check return_vars + if (is.null(return_vars)) { + return_vars <- list() +# if (length(var_params) > 0) { +# return_vars <- as.list(paste0(names(var_params), '_var')) +# } else { +# return_vars <- list() +# } + } + if (!is.list(return_vars)) { + stop("Parameter 'return_vars' must be a list or NULL.") + } + if (length(return_vars) > 0 && is.null(names(return_vars))) { +# names(return_vars) <- rep('', length(return_vars)) + stop("Parameter 'return_vars' must be a named list.") + } + i <- 1 + while (i <= length(return_vars)) { +# if (names(return_vars)[i] == '') { +# if (!(is.character(return_vars[[i]]) && (length(return_vars[[i]]) == 1))) { +# stop("The ", i, "th specification in 'return_vars' is malformed.") +# } +# if (!grepl('_var$', return_vars[[i]])) { +# stop("The ", i, "th specification in 'return_vars' is malformed.") +# } +# dim_name <- strsplit(return_vars[[i]], '_var$')[[1]][1] +# if (!(dim_name %in% names(var_params))) { +# stop("'", dim_name, "_var' requested in 'return_vars' but ", +# "no '", dim_name, "_var' specified in the .Load call.") +# } +# names(return_vars)[i] <- var_params[[dim_name]] +# return_vars[[i]] <- found_pattern_dim +# } else + if (length(return_vars[[i]]) > 0) { + if (!is.character(return_vars[[i]])) { + stop("The ", i, "th specification in 'return_vars' is malformed. It ", + "must be a vector of character strings of valid file dimension ", + "names.") + } + } + i <- i + 1 + } + + # Check synonims + if (!is.null(synonims)) { + error <- FALSE + if (!is.list(synonims)) { + error <- TRUE + } + for (synonim_entry in names(synonims)) { + if (!(synonim_entry %in% names(dim_params)) && + !(synonim_entry %in% names(return_vars))) { + error <- TRUE + } + if (!is.character(synonims[[synonim_entry]]) || + length(synonims[[synonim_entry]]) < 1) { + error <- TRUE + } + } + if (error) { + stop("Parameter 'synonims' must be a named list, where the names are ", + "a name of a requested dimension or variable and the values are ", + "vectors of character strings with at least one alternative name ", + " for each dimension or variable in 'synonims'.") + } + } + if (length(unique(names(synonims))) < length(names(synonims))) { + stop("There must not be repeated entries in 'synonims'.") + } + if (length(unique(unlist(synonims))) < length(unlist(synonims))) { + stop("There must not be repeated values in 'synonims'.") + } + # Make that all dims and vars have an entry in synonims, even if only dim_name = dim_name + dim_entries_to_add <- which(!(names(dim_params) %in% names(synonims))) + if (length(dim_entries_to_add) > 0) { + synonims[names(dim_params)[dim_entries_to_add]] <- as.list(names(dim_params)[dim_entries_to_add]) + } + var_entries_to_add <- which(!(names(var_params) %in% names(synonims))) + if (length(var_entries_to_add) > 0) { + synonims[names(var_params)[var_entries_to_add]] <- as.list(names(var_params)[var_entries_to_add]) + } + + # Check selector_checker + if (is.null(selector_checker) || !is.function(selector_checker)) { + stop("Parameter 'selector_checker' must be a function.") + } + + # Check file_opener + if (is.null(file_opener) || !is.function(file_opener)) { + stop("Parameter 'file_opener' must be a function.") + } + + # Check file_var_reader + if (!is.null(file_var_reader) && !is.function(file_var_reader)) { + stop("Parameter 'file_var_reader' must be a function.") + } + + # Check file_dim_reader + if (!is.null(file_dim_reader) && !is.function(file_dim_reader)) { + stop("Parameter 'file_dim_reader' must be a function.") + } + + # Check file_data_reader + if (is.null(file_data_reader) || !is.function(file_data_reader)) { + stop("Parameter 'file_data_reader' must be a function.") + } + + # Check file_closer + if (is.null(file_closer) || !is.function(file_closer)) { + stop("Parameter 'file_closer' must be a function.") + } + + # Check transform + if (!is.null(transform)) { + if (!is.function(transform)) { + stop("Parameter 'transform' must be a function.") + } + } + + # Check transform_params + if (!is.null(transform_params)) { + if (!is.list(transform_params)) { + stop("Parameter 'transform_params' must be a list.") + } + if (is.null(names(transform_params))) { + stop("Parameter 'transform_params' must be a named list.") + } + } + + # Check transform_vars + if (!is.null(transform_vars)) { + if (!is.character(transform_vars)) { + stop("Parameter 'transform_vars' must be a vector of character strings.") + } + } + if (any(!(transform_vars %in% names(return_vars)))) { + stop("All the variables specified in 'transform_vars' must also be specified in 'return_vars'.") + } + + # Check apply_indices_after_transform + if (!is.logical(apply_indices_after_transform)) { + stop("Parameter 'apply_indices_after_transform' must be either TRUE or FALSE.") + } + aiat <- apply_indices_after_transform + + # Check transform_extra_cells + if (!is.numeric(transform_extra_cells)) { + stop("Parameter 'transform_extra_cells' must be numeric.") + } + transform_extra_cells <- round(transform_extra_cells) + + # Check split_multiselected_dims + if (!is.logical(split_multiselected_dims)) { + stop("Parameter 'split_multiselected_dims' must be TRUE or FALSE.") + } + + # Check path_glob_permissive + if (!is.numeric(path_glob_permissive) && !is.logical(path_glob_permissive)) { + stop("Parameter 'path_glob_permissive' must be TRUE, FALSE or an integer.") + } + if (length(path_glob_permissive) != 1) { + stop("Parameter 'path_glob_permissive' must be of length 1.") + } + + # Check retrieve + if (!is.logical(retrieve)) { + stop("Parameter 'retrieve' must be TRUE or FALSE.") + } + + # Check num_procs + if (!is.null(num_procs)) { + if (!is.numeric(num_procs)) { + stop("Parameter 'num_procs' must be numeric.") + } else { + num_procs <- round(num_procs) + } + } + + # Check silent + if (!is.logical(silent)) { + stop("Parameter 'silent' must be logical.") + } + + dim_params[[found_pattern_dim]] <- dat_names + + if (!silent) { + .message(paste0("Exploring files... This will take a variable amount ", + "of time depending on the issued request and the ", + "performance of the file server...")) + } + +if (!is.character(debug)) { +dims_to_check <- c('time') +} else { +dims_to_check <- debug +debug <- TRUE +} + + ############################## READING FILE DIMS ############################ + # Check that no unrecognized variables are present in the path patterns + # and also that no file dimensions are requested to THREDDs catalogs. + # And in the mean time, build all the work pieces and look for the + # first available file of each dataset. + array_of_files_to_load <- NULL + array_of_not_found_files <- NULL + indices_of_first_files_with_data <- vector('list', length(dat)) + selectors_of_first_files_with_data <- vector('list', length(dat)) + dataset_has_files <- rep(FALSE, length(dat)) + found_file_dims <- vector('list', length(dat)) + expected_inner_dims <- vector('list', length(dat)) + +#print("A") + for (i in 1:length(dat)) { +#print("B") + dat_selectors <- dim_params + dat_selectors[[found_pattern_dim]] <- dat_selectors[[found_pattern_dim]][i] + dim_vars <- paste0('$', dim_names, '$') + file_dims <- which(sapply(dim_vars, grepl, dat[[i]][['path']], fixed = TRUE)) + if (length(file_dims) > 0) { + file_dims <- dim_names[file_dims] + } + file_dims <- unique(c(pattern_dims, file_dims)) + found_file_dims[[i]] <- file_dims + expected_inner_dims[[i]] <- dim_names[which(!(dim_names %in% file_dims))] + # (Check the depending_file_dims). + if (any(c(names(depending_file_dims), unlist(depending_file_dims)) %in% + expected_inner_dims[[i]])) { + stop(paste0("The dimension dependancies specified in ", + "'depending_file_dims' can only be between file ", + "dimensions, but some inner dimensions found in ", + "dependancies for '", dat[[i]][['name']], "', which ", + "has the following file dimensions: ", + paste(paste0("'", file_dims, "'"), collapse = ', '), ".")) + } else { + a <- names(depending_file_dims) %in% file_dims + b <- unlist(depending_file_dims) %in% file_dims + ab <- a & b + if (any(!ab)) { + .warning(paste0("Detected some dependancies in 'depending_file_dims' with ", + "non-existing dimension names. These will be disregarded.")) + depending_file_dims <- depending_file_dims[-which(!ab)] + } + if (any(names(depending_file_dims) == unlist(depending_file_dims))) { + depending_file_dims <- depending_file_dims[-which(names(depending_file_dims) == unlist(depending_file_dims))] + } + } + # (Check the inner_dims_across_files). + if (any(!(names(inner_dims_across_files) %in% file_dims)) || + any(!(unlist(inner_dims_across_files) %in% expected_inner_dims[[i]]))) { + stop(paste0("All relationships specified in ", + "'_across' parameters must be between a inner ", + "dimension and a file dimension. Found wrong ", + "specification for '", dat[[i]][['name']], "', which ", + "has the following file dimensions: ", + paste(paste0("'", file_dims, "'"), collapse = ', '), + ", and the following inner dimensions: ", + paste(paste0("'", expected_inner_dims[[i]], "'"), + collapse = ', '), ".")) + } + # (Check the return_vars). + j <- 1 + while (j <= length(return_vars)) { + if (any(!(return_vars[[j]] %in% file_dims))) { + if (any(return_vars[[j]] %in% expected_inner_dims[[i]])) { + stop("Found variables in 'return_vars' requested ", + "for some inner dimensions (for dataset '", + dat[[i]][['name']], "'), but variables can only be ", + "requested for file dimensions.") + } else { + stop("Found variables in 'return_vars' requested ", + "for non-existing dimensions.") + } + } + j <- j + 1 + } + # (Check the metadata_dims). + if (!is.null(metadata_dims)) { + if (any(!(metadata_dims %in% file_dims))) { + stop("All dimensions in 'metadata_dims' must be file dimensions.") + } + } + ## Look for _var params that should be requested automatically. + for (dim_name in dim_names) { + if (!(dim_name %in% pattern_dims)) { + if (is.null(attr(dat_selectors[[dim_name]], 'values')) || + is.null(attr(dat_selectors[[dim_name]], 'indices'))) { + flag <- ((dat_selectors[[dim_name]] %in% c('all', 'first', 'last')) || + (is.numeric(unlist(dat_selectors[[dim_name]])))) + attr(dat_selectors[[dim_name]], 'values') <- !flag + attr(dat_selectors[[dim_name]], 'indices') <- flag + } + ## The following code 'rewrites' var_params for all datasets. If providing different + ## path pattern repositories with different file/inner dimensions, var_params might + ## have to be handled for each dataset separately. + if ((attr(dat_selectors[[dim_name]], 'values') || (dim_name %in% c('var', 'variable'))) && + !(dim_name %in% names(var_params)) && !(dim_name %in% file_dims)) { + if (dim_name %in% c('var', 'variable')) { + var_params <- c(var_params, setNames(list('var_names'), dim_name)) + .warning(paste0("Found specified values for dimension '", dim_name, "' but no '", + dim_name, "_var' requested. ", '"', dim_name, "_var = '", + 'var_names', "'", '"', " has been automatically added to ", + "the Start call.")) + } else { + var_params <- c(var_params, setNames(list(dim_name), dim_name)) + .warning(paste0("Found specified values for dimension '", dim_name, "' but no '", + dim_name, "_var' requested. ", '"', dim_name, "_var = '", + dim_name, "'", '"', " has been automatically added to ", + "the Start call.")) + } + } + } + } + ## (Check the *_var parameters). + if (any(!(unlist(var_params) %in% names(return_vars)))) { + vars_to_add <- which(!(unlist(var_params) %in% names(return_vars))) + new_return_vars <- vector('list', length(vars_to_add)) + names(new_return_vars) <- unlist(var_params)[vars_to_add] + return_vars <- c(return_vars, new_return_vars) + .warning(paste0("All '*_var' params must associate a dimension to one of the ", + "requested variables in 'return_vars'. The following variables", + " have been added to 'return_vars': ", + paste(paste0("'", unlist(var_params), "'"), collapse = ', '))) + } + + replace_values <- vector('list', length = length(file_dims)) + names(replace_values) <- file_dims + # Take the first selector for all possible file dimensions + for (file_dim in file_dims) { + if (file_dim %in% names(var_params)) { + .warning(paste0("The '", file_dim, "_var' param will be ignored since '", + file_dim, "' is a file dimension (for the dataset with pattern ", + dat[[i]][['path']], ").")) + } + if (!is.list(dat_selectors[[file_dim]]) || + (is.list(dat_selectors[[file_dim]]) && + length(dat_selectors[[file_dim]]) == 2 && + is.null(names(dat_selectors[[file_dim]])))) { + dat_selectors[[file_dim]] <- list(dat_selectors[[file_dim]]) + } + first_class <- class(dat_selectors[[file_dim]][[1]]) + first_length <- length(dat_selectors[[file_dim]][[1]]) + for (j in 1:length(dat_selectors[[file_dim]])) { + sv <- selector_vector <- dat_selectors[[file_dim]][[j]] + if (!identical(first_class, class(sv)) || + !identical(first_length, length(sv))) { + stop("All provided selectors for depending dimensions must ", + "be vectors of the same length and of the same class.") + } + if (is.character(sv) && !((length(sv) == 1) && (sv[1] %in% c('all', 'first', 'last')))) { + dat_selectors[[file_dim]][[j]] <- selector_checker(selectors = sv, + return_indices = FALSE) + # Take chunk if needed + dat_selectors[[file_dim]][[j]] <- dat_selectors[[file_dim]][[j]][chunk_indices(length(dat_selectors[[file_dim]][[j]]), + chunks[[file_dim]]['chunk'], + chunks[[file_dim]]['n_chunks'], + file_dim)] + } else if (!(is.numeric(sv) || + (is.character(sv) && (length(sv) == 1) && (sv %in% c('all', 'first', 'last'))) || + (is.list(sv) && (length(sv) == 2) && (all(sapply(sv, is.character)) || + all(sapply(sv, is.numeric)))))) { + stop("All explicitly provided selectors for file dimensions must be character strings.") + } + } + sv <- dat_selectors[[file_dim]][[1]] + if (is.character(sv) && !((length(sv) == 1) && (sv[1] %in% c('all', 'first', 'last')))) { + replace_values[[file_dim]] <- dat_selectors[[file_dim]][[1]][1] + } + } +#print("C") + # Now we know which dimensions whose selectors are provided non-explicitly. + undefined_file_dims <- file_dims[which(sapply(replace_values, is.null))] + defined_file_dims <- file_dims[which(!(file_dims %in% undefined_file_dims))] + # Quickly check if the depending dimensions are provided properly. + for (file_dim in file_dims) { + if (file_dim %in% names(depending_file_dims)) { + ## TODO: Detect multi-dependancies and forbid. + if (all(c(file_dim, depending_file_dims[[file_dim]]) %in% defined_file_dims)) { + if (length(dat_selectors[[file_dim]]) != length(dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) { + stop(paste0("If providing selectors for the depending ", + "dimension '", file_dim, "', a ", + "vector of selectors must be provided for ", + "each selector of the dimension it depends on, '", + depending_file_dims[[file_dim]], "'.")) + } else if (!all(names(dat_selectors[[file_dim]]) == dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) { + stop(paste0("If providing selectors for the depending ", + "dimension '", file_dim, "', the name of the ", + "provided vectors of selectors must match ", + "exactly the selectors of the dimension it ", + "depends on, '", depending_file_dims[[file_dim]], "'.")) + } + } + } + } + # Find the possible values for the selectors that are provided as + # indices. If the requested file is on server, impossible operation. + if (length(grep("^http", dat[[i]][['path']])) > 0) { + if (length(undefined_file_dims) > 0) { + stop(paste0("All selectors for the file dimensions must be ", + "character strings if requesting data to a remote ", + "server. Found invalid selectors for the file dimensions ", + paste(paste0("'", undefined_file_dims, "'"), collapse = ', '), ".")) + } + dataset_has_files[i] <- TRUE + } else { + dat[[i]][['path']] <- path.expand(dat[[i]][['path']]) + # Iterate over the known dimensions to find the first existing file. + # The path to the first existing file will be used to find the + # values for the non explicitly defined selectors. + first_file <- NULL + first_file_selectors <- NULL + if (length(undefined_file_dims) > 0) { + replace_values[undefined_file_dims] <- '*' + } + ## TODO: What if length of defined_file_dims is 0? code might crash (in practice it worked for an example case) + files_to_check <- sapply(dat_selectors[defined_file_dims], function(x) length(x[[1]])) + sub_array_of_files_to_check <- array(1:prod(files_to_check), dim = files_to_check) + j <- 1 +#print("D") + while (j <= prod(files_to_check) && is.null(first_file)) { + selector_indices <- which(sub_array_of_files_to_check == j, arr.ind = TRUE)[1, ] + selectors <- sapply(1:length(defined_file_dims), + function (x) { + vector_to_pick <- 1 + if (defined_file_dims[x] %in% names(depending_file_dims)) { + vector_to_pick <- selector_indices[which(defined_file_dims == depending_file_dims[[defined_file_dims[x]]])] + } + dat_selectors[defined_file_dims][[x]][[vector_to_pick]][selector_indices[x]] + }) + replace_values[defined_file_dims] <- selectors + file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values) + file_path <- Sys.glob(file_path) + if (length(file_path) > 0) { + first_file <- file_path[1] + first_file_selectors <- selectors + } + j <- j + 1 + } +#print("E") + # Start looking for values for the non-explicitly defined selectors. + if (is.null(first_file)) { + .warning(paste0("No found files for the datset '", dat[[i]][['name']], + "'. Provide existing selectors for the file dimensions ", + " or check and correct its path pattern: ", dat[[i]][['path']])) + } else { + dataset_has_files[i] <- TRUE + ## TODO: Improve message here if no variable found: + if (length(undefined_file_dims) > 0) { + # Looking for the first values, parsed from first_file. + first_values <- vector('list', length = length(undefined_file_dims)) + names(first_values) <- undefined_file_dims + found_values <- 0 + stop <- FALSE + try_dim <- 1 + last_success <- 1 + while ((found_values < length(undefined_file_dims)) && !stop) { + u_file_dim <- undefined_file_dims[try_dim] + if (is.null(first_values[[u_file_dim]])) { + path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][['path']], + replace_values[-which(file_dims == u_file_dim)], + allow_undefined_key_vars = TRUE) + found_value <- .FindTagValue(path_with_globs_and_tag, + first_file, u_file_dim) + if (!is.null(found_value)) { + found_values <- found_values + 1 + last_success <- try_dim + first_values[[u_file_dim]] <- found_value + replace_values[[u_file_dim]] <- found_value + } + } + try_dim <- (try_dim %% length(undefined_file_dims)) + 1 + if (try_dim == last_success) { + stop <- TRUE + } + } + if (found_values < length(undefined_file_dims)) { + stop(paste0("Path pattern of dataset '", dat[[i]][['name']], + "' is too complex. Could not automatically ", + "detect values for all non-explicitly defined ", + "indices. Check its pattern: ", dat[[i]][['path']])) + } + ## TODO: Replace ReplaceGlobExpressions by looped call to FindTagValue? As done above + ## Maybe it can solve more cases actually. I got warnings in ReplGlobExp with a typical + ## cmor case, requesting all members and chunks for fixed var and sdate. Not fixing + ## sdate raised 'too complex' error. + # Replace shell globs in path pattern and keep the file_dims as tags + dat[[i]][['path']] <- .ReplaceGlobExpressions(dat[[i]][['path']], first_file, replace_values, + file_dims, dat[[i]][['name']], path_glob_permissive) + # Now time to look for the available values for the non + # explicitly defined selectors for the file dimensions. +#print("H") + # Check first the ones that do not depend on others. + ufd <- c(undefined_file_dims[which(!(undefined_file_dims %in% names(depending_file_dims)))], + undefined_file_dims[which(undefined_file_dims %in% names(depending_file_dims))]) + + for (u_file_dim in ufd) { + replace_values[undefined_file_dims] <- first_values + replace_values[[u_file_dim]] <- '*' + depended_dim <- NULL + depended_dim_values <- NA + selectors <- dat_selectors[[u_file_dim]][[1]] + if (u_file_dim %in% names(depending_file_dims)) { + depended_dim <- depending_file_dims[[u_file_dim]] + depended_dim_values <- dat_selectors[[depended_dim]][[1]] + dat_selectors[[u_file_dim]] <- vector('list', length = length(depended_dim_values)) + names(dat_selectors[[u_file_dim]]) <- depended_dim_values + } else { + dat_selectors[[u_file_dim]] <- list() + } + if (u_file_dim %in% unlist(depending_file_dims)) { + depending_dims <- names(depending_file_dims)[which(sapply(depending_file_dims, function(x) u_file_dim %in% x))] + replace_values[depending_dims] <- rep('*', length(depending_dims)) + } + for (j in 1:length(depended_dim_values)) { + parsed_values <- c() + if (!is.null(depended_dim)) { + replace_values[[depended_dim]] <- depended_dim_values[j] + } + path_with_globs <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values) + found_files <- Sys.glob(path_with_globs) + ## TODO: Enhance this error message, or change by warning. + ## Raises if a wrong sdate is specified, for example. + if (length(found_files) == 0) { + .warning(paste0("Could not find files for any '", u_file_dim, + "' for '", depended_dim, "' = '", + depended_dim_values[j], "'.")) + dat_selectors[[u_file_dim]][[j]] <- NA + } else { + for (found_file in found_files) { + path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][['path']], + replace_values[-which(file_dims == u_file_dim)], + allow_undefined_key_vars = TRUE) + parsed_values <- c(parsed_values, + .FindTagValue(path_with_globs_and_tag, found_file, + u_file_dim)) + } + dat_selectors[[u_file_dim]][[j]] <- selector_checker(selectors = selectors, + var = unique(parsed_values), + return_indices = FALSE) + # Take chunk if needed + dat_selectors[[u_file_dim]][[j]] <- dat_selectors[[u_file_dim]][[j]][chunk_indices(length(dat_selectors[[u_file_dim]][[j]]), + chunks[[u_file_dim]]['chunk'], + chunks[[u_file_dim]]['n_chunks'], + u_file_dim)] + } + } + } +#print("I") + } else { + dat[[i]][['path']] <- .ReplaceGlobExpressions(dat[[i]][['path']], first_file, replace_values, + defined_file_dims, dat[[i]][['name']], path_glob_permissive) + } + } + } + # Now fetch for the first available file + if (dataset_has_files[i]) { + known_dims <- file_dims + } else { + known_dims <- defined_file_dims + } + replace_values <- vector('list', length = length(known_dims)) + names(replace_values) <- known_dims + files_to_load <- sapply(dat_selectors[known_dims], function(x) length(x[[1]])) + files_to_load[found_pattern_dim] <- 1 + sub_array_of_files_to_load <- array(1:prod(files_to_load), + dim = files_to_load) + names(dim(sub_array_of_files_to_load)) <- known_dims + sub_array_of_not_found_files <- array(!dataset_has_files[i], + dim = files_to_load) + names(dim(sub_array_of_not_found_files)) <- known_dims + j <- 1 + selector_indices_save <- vector('list', prod(files_to_load)) + while (j <= prod(files_to_load)) { + selector_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ] + names(selector_indices) <- known_dims + selector_indices_save[[j]] <- selector_indices + selectors <- sapply(1:length(known_dims), + function (x) { + vector_to_pick <- 1 + if (known_dims[x] %in% names(depending_file_dims)) { + vector_to_pick <- selector_indices[which(known_dims == depending_file_dims[[known_dims[x]]])] + } + dat_selectors[known_dims][[x]][[vector_to_pick]][selector_indices[x]] + }) + names(selectors) <- known_dims + replace_values[known_dims] <- selectors + if (!dataset_has_files[i]) { + if (any(is.na(selectors))) { + replace_values <- replace_values[-which(names(replace_values) %in% names(selectors[which(is.na(selectors))]))] + } + file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values, TRUE) + sub_array_of_files_to_load[j] <- file_path + #sub_array_of_not_found_files[j] <- TRUE??? + } else { + if (any(is.na(selectors))) { + replace_values <- replace_values[-which(names(replace_values) %in% names(selectors[which(is.na(selectors))]))] + file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values, TRUE) + sub_array_of_files_to_load[j] <- file_path + sub_array_of_not_found_files[j] <- TRUE + } else { + file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values) + if (!(length(grep("^http", file_path)) > 0)) { + if (grepl(file_path, '*', fixed = TRUE)) { + file_path_full <- Sys.glob(file_path)[1] + if (nchar(file_path_full) > 0) { + file_path <- file_path_full + } + } + } + sub_array_of_files_to_load[j] <- file_path + if (is.null(indices_of_first_files_with_data[[i]])) { + if (!(length(grep("^http", file_path)) > 0)) { + if (!file.exists(file_path)) { + file_path <- NULL + } + } + if (!is.null(file_path)) { + test_file <- NULL + ## TODO: suppress error messages + test_file <- file_opener(file_path) + if (!is.null(test_file)) { + selector_indices[which(known_dims == found_pattern_dim)] <- i + indices_of_first_files_with_data[[i]] <- selector_indices + selectors_of_first_files_with_data[[i]] <- selectors + file_closer(test_file) + } + } + } + } + } + j <- j + 1 + } + # Extend array as needed progressively + if (is.null(array_of_files_to_load)) { + array_of_files_to_load <- sub_array_of_files_to_load + array_of_not_found_files <- sub_array_of_not_found_files + } else { + array_of_files_to_load <- .MergeArrays(array_of_files_to_load, sub_array_of_files_to_load, + along = found_pattern_dim) + ## TODO: file_dims, and variables like that.. are still ok now? I don't think so + array_of_not_found_files <- .MergeArrays(array_of_not_found_files, sub_array_of_not_found_files, + along = found_pattern_dim) + } + dat[[i]][['selectors']] <- dat_selectors + } + if (all(sapply(indices_of_first_files_with_data, is.null))) { + stop("No data files found for any of the specified datasets.") + } + + ########################### READING INNER DIMS. ############################# +#print("J") + ## TODO: To be run in parallel (local multi-core) + # Now time to work out the inner file dimensions. + # First pick the requested variables. + dims_to_iterate <- NULL + for (return_var in names(return_vars)) { + dims_to_iterate <- unique(c(dims_to_iterate, return_vars[[return_var]])) + } + if (found_pattern_dim %in% dims_to_iterate) { + dims_to_iterate <- dims_to_iterate[-which(dims_to_iterate == found_pattern_dim)] + } + common_return_vars <- NULL + common_first_found_file <- NULL + common_return_vars_pos <- NULL + if (length(return_vars) > 0) { + common_return_vars_pos <- which(sapply(return_vars, function(x) !(found_pattern_dim %in% x))) + } + if (length(common_return_vars_pos) > 0) { + common_return_vars <- return_vars[common_return_vars_pos] + return_vars <- return_vars[-common_return_vars_pos] + common_first_found_file <- rep(FALSE, length(which(sapply(common_return_vars, length) == 0))) + names(common_first_found_file) <- names(common_return_vars[which(sapply(common_return_vars, length) == 0)]) + } + return_vars <- lapply(return_vars, + function(x) { + if (found_pattern_dim %in% x) { + x[-which(x == found_pattern_dim)] + } else { + x + } + }) + if (length(common_return_vars) > 0) { + picked_common_vars <- vector('list', length = length(common_return_vars)) + names(picked_common_vars) <- names(common_return_vars) + } else { + picked_common_vars <- NULL + } + picked_common_vars_ordered <- picked_common_vars + picked_common_vars_unorder_indices <- picked_common_vars + picked_vars <- vector('list', length = length(dat)) + names(picked_vars) <- dat_names + picked_vars_ordered <- picked_vars + picked_vars_unorder_indices <- picked_vars + for (i in 1:length(dat)) { + if (dataset_has_files[i]) { + # Put all selectors in a list of a single list/vector of selectors. + # The dimensions that go across files will later be extended to have + # lists of lists/vectors of selectors. + for (inner_dim in expected_inner_dims[[i]]) { + if (!is.list(dat[[i]][['selectors']][[inner_dim]]) || + (is.list(dat[[i]][['selectors']][[inner_dim]]) && + length(dat[[i]][['selectors']][[inner_dim]]) == 2 && + is.null(names(dat[[i]][['selectors']][[inner_dim]])))) { + dat[[i]][['selectors']][[inner_dim]] <- list(dat[[i]][['selectors']][[inner_dim]]) + } + } + if (length(return_vars) > 0) { + picked_vars[[i]] <- vector('list', length = length(return_vars)) + names(picked_vars[[i]]) <- names(return_vars) + picked_vars_ordered[[i]] <- picked_vars[[i]] + picked_vars_unorder_indices[[i]] <- picked_vars[[i]] + } + indices_of_first_file <- as.list(indices_of_first_files_with_data[[i]]) + array_file_dims <- sapply(dat[[i]][['selectors']][found_file_dims[[i]]], function(x) length(x[[1]])) + names(array_file_dims) <- found_file_dims[[i]] + if (length(dims_to_iterate) > 0) { + indices_of_first_file[dims_to_iterate] <- lapply(array_file_dims[dims_to_iterate], function(x) 1:x) + } + array_of_var_files <- do.call('[', c(list(x = array_of_files_to_load), indices_of_first_file, list(drop = FALSE))) + array_of_var_indices <- array(1:length(array_of_var_files), dim = dim(array_of_var_files)) + array_of_not_found_var_files <- do.call('[', c(list(x = array_of_not_found_files), indices_of_first_file, list(drop = FALSE))) + previous_indices <- rep(-1, length(indices_of_first_file)) + names(previous_indices) <- names(indices_of_first_file) + first_found_file <- NULL + if (length(return_vars) > 0) { + first_found_file <- rep(FALSE, length(which(sapply(return_vars, length) == 0))) + names(first_found_file) <- names(return_vars[which(sapply(return_vars, length) == 0)]) + } + for (j in 1:length(array_of_var_files)) { + current_indices <- which(array_of_var_indices == j, arr.ind = TRUE)[1, ] + names(current_indices) <- names(indices_of_first_file) + if (!is.na(array_of_var_files[j]) && !array_of_not_found_var_files[j]) { + changed_dims <- which(current_indices != previous_indices) + vars_to_read <- NULL + if (length(return_vars) > 0) { + vars_to_read <- names(return_vars)[sapply(return_vars, function(x) any(names(changed_dims) %in% x))] + } + if (!is.null(first_found_file)) { + if (any(!first_found_file)) { + vars_to_read <- c(vars_to_read, names(first_found_file[which(!first_found_file)])) + } + } + if ((i == 1) && (length(common_return_vars) > 0)) { + vars_to_read <- c(vars_to_read, names(common_return_vars)[sapply(common_return_vars, function(x) any(names(changed_dims) %in% x))]) + } + if (!is.null(common_first_found_file)) { + if (any(!common_first_found_file)) { + vars_to_read <- c(vars_to_read, names(common_first_found_file[which(!common_first_found_file)])) + } + } + file_object <- file_opener(array_of_var_files[j]) + if (!is.null(file_object)) { + for (var_to_read in vars_to_read) { + if (var_to_read %in% unlist(var_params)) { + associated_dim_name <- names(var_params)[which(unlist(var_params) == var_to_read)] + } + var_name_to_reader <- var_to_read + names(var_name_to_reader) <- 'var' + var_dims <- file_dim_reader(NULL, file_object, var_name_to_reader, NULL, + synonims) + # file_dim_reader returns dimension names as found in the file. + # Need to translate accoridng to synonims: + names(var_dims) <- sapply(names(var_dims), + function(x) { + which_entry <- which(sapply(synonims, function(y) x %in% y)) + if (length(which_entry) > 0) { + names(synonims)[which_entry] + } else { + x + } + }) + if (!is.null(var_dims)) { + var_file_dims <- NULL + if (var_to_read %in% names(common_return_vars)) { + var_to_check <- common_return_vars[[var_to_read]] + } else { + var_to_check <- return_vars[[var_to_read]] + } + if (any(names(dim(array_of_files_to_load)) %in% var_to_check)) { + var_file_dims <- dim(array_of_files_to_load)[which(names(dim(array_of_files_to_load)) %in% + var_to_check)] + } + if (((var_to_read %in% names(common_return_vars)) && + is.null(picked_common_vars[[var_to_read]])) || + ((var_to_read %in% names(return_vars)) && + is.null(picked_vars[[i]][[var_to_read]]))) { + if (any(names(var_file_dims) %in% names(var_dims))) { + stop("Found a requested var in 'return_var' requested for a ", + "file dimension which also appears in the dimensions of ", + "the variable inside the file.\n", array_of_var_files[j]) + } + special_types <- list('POSIXct' = as.POSIXct, 'POSIXlt' = as.POSIXlt, + 'Date' = as.Date) + first_sample <- file_var_reader(NULL, file_object, NULL, + var_to_read, synonims) + if (any(class(first_sample) %in% names(special_types))) { + array_size <- prod(c(var_file_dims, var_dims)) + new_array <- rep(special_types[[class(first_sample)[1]]](NA), array_size) + dim(new_array) <- c(var_file_dims, var_dims) + } else { + new_array <- array(dim = c(var_file_dims, var_dims)) + } + attr(new_array, 'variables') <- attr(first_sample, 'variables') + if (var_to_read %in% names(common_return_vars)) { + picked_common_vars[[var_to_read]] <- new_array + pick_ordered <- FALSE + if (var_to_read %in% unlist(var_params)) { + if (associated_dim_name %in% names(dim_reorder_param) && !aiat) { + picked_common_vars_ordered[[var_to_read]] <- new_array + pick_ordered <- TRUE + } + } + if (!pick_ordered) { + picked_common_vars_ordered[[var_to_read]] <- NULL + } + } else { + picked_vars[[i]][[var_to_read]] <- new_array + pick_ordered <- FALSE + if (var_to_read %in% unlist(var_params)) { + if (associated_dim_name %in% names(dim_reorder_params) && !aiat) { + picked_vars_ordered[[i]][[var_to_read]] <- new_array + pick_ordered <- TRUE + } + } + if (!pick_ordered) { + picked_vars_ordered[[i]][[var_to_read]] <- NULL + } + } + } else { + if (var_to_read %in% names(common_return_vars)) { + array_var_dims <- dim(picked_common_vars[[var_to_read]]) + } else { + array_var_dims <- dim(picked_vars[[i]][[var_to_read]]) + } + full_array_var_dims <- array_var_dims + if (any(names(array_var_dims) %in% names(var_file_dims))) { + array_var_dims <- array_var_dims[-which(names(array_var_dims) %in% names(var_file_dims))] + } + if (names(array_var_dims) != names(var_dims)) { + stop("Error while reading the variable '", var_to_read, "' from ", + "the file. Dimensions do not match.\nExpected ", + paste(paste0("'", names(array_var_dims), "'"), + collapse = ', '), " but found ", + paste(paste0("'", names(var_dims), "'"), + collapse = ', '), ".\n", array_of_var_files[j]) + } + if (any(var_dims > array_var_dims)) { + longer_dims <- which(var_dims > array_var_dims) + if (length(longer_dims) == 1) { + longer_dims_in_full_array <- longer_dims + if (any(names(full_array_var_dims) %in% names(var_file_dims))) { + candidates <- (1:length(full_array_var_dims))[-which(names(full_array_var_dims) %in% names(var_file_dims))] + longer_dims_in_full_array <- candidates[longer_dims] + } + padding_dims <- full_array_var_dims + padding_dims[longer_dims_in_full_array] <- var_dims[longer_dims] - + array_var_dims[longer_dims] + special_types <- list('POSIXct' = as.POSIXct, 'POSIXlt' = as.POSIXlt, + 'Date' = as.Date) + if (var_to_read %in% names(common_return_vars)) { + var_class <- class(picked_common_vars[[var_to_read]]) + } else { + var_class <- class(picked_vars[[i]][[var_to_read]]) + } + if (any(var_class %in% names(special_types))) { + padding_size <- prod(padding_dims) + padding <- rep(special_types[[var_class[1]]](NA), padding_size) + dim(padding) <- padding_dims + } else { + padding <- array(dim = padding_dims) + } + if (var_to_read %in% names(common_return_vars)) { + picked_common_vars[[var_to_read]] <- .abind2( + picked_common_vars[[var_to_read]], + padding, + names(full_array_var_dims)[longer_dims_in_full_array] + ) + } else { + picked_vars[[i]][[var_to_read]] <- .abind2( + picked_vars[[i]][[var_to_read]], + padding, + names(full_array_var_dims)[longer_dims_in_full_array] + ) + } + } else { + stop("Error while reading the variable '", var_to_read, "' from ", + "the file. Found size (", paste(var_dims, collapse = ' x '), + ") is greater than expected maximum size (", + array_var_dims, ").") + } + } + } + var_store_indices <- c(as.list(current_indices[names(var_file_dims)]), lapply(var_dims, function(x) 1:x)) + var_values <- file_var_reader(NULL, file_object, NULL, var_to_read, synonims) + if (var_to_read %in% unlist(var_params)) { + if ((associated_dim_name %in% names(dim_reorder_params)) && !aiat) { + ## Is this check really needed? + if (length(dim(var_values)) > 1) { + stop("Requested a '", associated_dim_name, "_reorder' for a dimension ", + "whose coordinate variable that has more than 1 dimension. This is ", + "not supported.") + } + ordered_var_values <- dim_reorder_params[[associated_dim_name]](var_values) + attr(ordered_var_values$x, 'variables') <- attr(var_values, 'variables') + if (!all(c('x', 'ix') %in% names(ordered_var_values))) { + stop("All the dimension reorder functions must return a list with the components 'x' and 'ix'.") + } + # Save the indices to reorder back the ordered variable values. + # This will be used to define the first round indices. + unorder <- sort(ordered_var_values$ix, index.return = TRUE)$ix + if (var_to_read %in% names(common_return_vars)) { + picked_common_vars_ordered[[var_to_read]] <- do.call('[<-', + c(list(x = picked_common_vars_ordered[[var_to_read]]), + var_store_indices, + list(value = ordered_var_values$x))) + picked_common_vars_unorder_indices[[var_to_read]] <- do.call('[<-', + c(list(x = picked_common_vars_unorder_indices[[var_to_read]]), + var_store_indices, + list(value = unorder))) + } else { + picked_vars_ordered[[i]][[var_to_read]] <- do.call('[<-', + c(list(x = picked_vars_ordered[[i]][[var_to_read]]), + var_store_indices, + list(value = ordered_var_values$x))) + picked_vars_unorder_indices[[i]][[var_to_read]] <- do.call('[<-', + c(list(x = picked_vars_unorder_indices[[i]][[var_to_read]]), + var_store_indices, + list(value = unorder))) + } + } + } + if (var_to_read %in% names(common_return_vars)) { + picked_common_vars[[var_to_read]] <- do.call('[<-', + c(list(x = picked_common_vars[[var_to_read]]), + var_store_indices, + list(value = var_values))) + } else { + picked_vars[[i]][[var_to_read]] <- do.call('[<-', + c(list(x = picked_vars[[i]][[var_to_read]]), + var_store_indices, + list(value = var_values))) + } + if (var_to_read %in% names(first_found_file)) { + first_found_file[var_to_read] <- TRUE + } + if (var_to_read %in% names(common_first_found_file)) { + common_first_found_file[var_to_read] <- TRUE + } + } else { + stop("Could not find variable '", var_to_read, + "' in the file ", array_of_var_files[j]) + } + } + file_closer(file_object) + } + } + previous_indices <- current_indices + } + } + } + # Once we have the variable values, we can work out the indices + # for the implicitly defined selectors. + # + # Trnasforms a vector of indices v expressed in a world of + # length N from 1 to N, into a world of length M, from + # 1 to M. Repeated adjacent indices are collapsed. + transform_indices <- function(v, n, m) { + #unique2 turns e.g. 1 1 2 2 2 3 3 1 1 1 into 1 2 3 1 + unique2 <- function(v) { + if (length(v) < 2) { + v + } else { + v[c(1, v[2:length(v)] - v[1:(length(v) - 1)]) != 0] + } + } + unique2(round(((v - 1) / (n - 1)) * (m - 1))) + 1 # this rounding may generate 0s. what then? + } + beta <- transform_extra_cells + dims_to_crop <- vector('list') + transformed_vars <- vector('list', length = length(dat)) + names(transformed_vars) <- dat_names + transformed_vars_ordered <- transformed_vars + transformed_vars_unorder_indices <- transformed_vars + transformed_common_vars <- NULL + transformed_common_vars_ordered <- NULL + transformed_common_vars_unorder_indices <- NULL + + for (i in 1:length(dat)) { + if (dataset_has_files[i]) { + indices <- indices_of_first_files_with_data[[i]] + if (!is.null(indices)) { + file_path <- do.call("[", c(list(array_of_files_to_load), as.list(indices_of_first_files_with_data[[i]]))) + # The following 5 lines should go several lines below, but were moved + # here for better performance. + # If any of the dimensions comes without defining variable, then we read + # the data dimensions. + data_dims <- NULL + if (length(unlist(var_params[expected_inner_dims[[i]]])) < length(expected_inner_dims[[i]])) { + file_to_open <- file_path + data_dims <- file_dim_reader(file_to_open, NULL, selectors_of_first_files_with_data[[i]], + lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1), + synonims) + # file_dim_reader returns dimension names as found in the file. + # Need to translate accoridng to synonims: + names(data_dims) <- sapply(names(data_dims), + function(x) { + which_entry <- which(sapply(synonims, function(y) x %in% y)) + if (length(which_entry) > 0) { + names(synonims)[which_entry] + } else { + x + } + }) + } + # Transform the variables if needed and keep them apart. + if (!is.null(transform) && (length(transform_vars) > 0)) { + if (!all(transform_vars %in% c(names(picked_vars[[i]]), names(picked_common_vars)))) { + stop("Could not find all the required variables in 'transform_vars' ", + "for the dataset '", dat[[i]][['name']], "'.") + } + vars_to_transform <- NULL + picked_vars_to_transform <- which(names(picked_vars[[i]]) %in% transform_vars) + if (length(picked_vars_to_transform) > 0) { + picked_vars_to_transform <- names(picked_vars[[i]])[picked_vars_to_transform] + new_vars_to_transform <- picked_vars[[i]][picked_vars_to_transform] + which_are_ordered <- which(!sapply(picked_vars_ordered[[i]][picked_vars_to_transform], is.null)) + +##NOTE: The following 'if' replaces the original with reordering vector + if (length(which_are_ordered) > 0) { + tmp <- which(!is.na(match(names(picked_vars_ordered[[i]]), names(which_are_ordered)))) + new_vars_to_transform[which_are_ordered] <- picked_vars_ordered[[i]][tmp] + + } + vars_to_transform <- c(vars_to_transform, new_vars_to_transform) + } + +##NOTE: Above is non-common vars, here is common vars (ie, return_vars = NULL). + picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars) + if (length(picked_common_vars_to_transform) > 0) { + picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform] + + new_vars_to_transform <- picked_common_vars[picked_common_vars_to_transform] + which_are_ordered <- which(!sapply(picked_common_vars_ordered[picked_common_vars_to_transform], is.null)) + + if (length(which_are_ordered) > 0) { + + tmp <- which(!is.na(match(names(picked_common_vars_ordered), names(which_are_ordered)))) + new_vars_to_transform[which_are_ordered] <- picked_common_vars_ordered[tmp] + } + vars_to_transform <- c(vars_to_transform, new_vars_to_transform) + } + + # Transform the variables + transformed_data <- do.call(transform, c(list(data_array = NULL, + variables = vars_to_transform, + file_selectors = selectors_of_first_files_with_data[[i]]), + transform_params)) + # Discard the common transformed variables if already transformed before + if (!is.null(transformed_common_vars)) { + common_ones <- which(names(picked_common_vars) %in% names(transformed_data$variables)) + if (length(common_ones) > 0) { + transformed_data$variables <- transformed_data$variables[-common_ones] + } + } + transformed_vars[[i]] <- list() + transformed_vars_ordered[[i]] <- list() + transformed_vars_unorder_indices[[i]] <- list() + # Order the transformed variables if needed + # 'var_to_read' should be 'transformed_var', but is kept to reuse the same code as above. + for (var_to_read in names(transformed_data$variables)) { + if (var_to_read %in% unlist(var_params)) { + associated_dim_name <- names(var_params)[which(unlist(var_params) == var_to_read)] + if ((associated_dim_name %in% names(dim_reorder_params)) && aiat) { + ## Is this check really needed? + if (length(dim(transformed_data$variables[[associated_dim_name]])) > 1) { + stop("Requested a '", associated_dim_name, "_reorder' for a dimension ", + "whose coordinate variable that has more than 1 dimension (after ", + "transform). This is not supported.") + } + ordered_var_values <- dim_reorder_params[[associated_dim_name]](transformed_data$variables[[associated_dim_name]]) + attr(ordered_var_values, 'variables') <- attr(transformed_data$variables[[associated_dim_name]], 'variables') + if (!all(c('x', 'ix') %in% names(ordered_var_values))) { + stop("All the dimension reorder functions must return a list with the components 'x' and 'ix'.") + } + # Save the indices to reorder back the ordered variable values. + # This will be used to define the first round indices. + unorder <- sort(ordered_var_values$ix, index.return = TRUE)$ix + if (var_to_read %in% names(picked_common_vars)) { + transformed_common_vars_ordered[[var_to_read]] <- ordered_var_values$x + transformed_common_vars_unorder_indices[[var_to_read]] <- unorder + } else { + transformed_vars_ordered[[i]][[var_to_read]] <- ordered_var_values$x + transformed_vars_unorder_indices[[i]][[var_to_read]] <- unorder + } + } + } + } + transformed_picked_vars <- which(names(picked_vars[[i]]) %in% names(transformed_data$variables)) + if (length(transformed_picked_vars) > 0) { + transformed_picked_vars <- names(picked_vars[[i]])[transformed_picked_vars] + transformed_vars[[i]][transformed_picked_vars] <- transformed_data$variables[transformed_picked_vars] + } + if (is.null(transformed_common_vars)) { + transformed_picked_common_vars <- which(names(picked_common_vars) %in% names(transformed_data$variables)) + if (length(transformed_picked_common_vars) > 0) { + transformed_picked_common_vars <- names(picked_common_vars)[transformed_picked_common_vars] + transformed_common_vars <- transformed_data$variables[transformed_picked_common_vars] + } + } + } + # Once the variables are transformed, we compute the indices to be + # taken for each inner dimension. + # In all cases, indices will have to be computed to know which data + # values to take from the original data for each dimension (if a + # variable is specified for that dimension, it will be used to + # convert the provided selectors into indices). These indices are + # referred to as 'first round of indices'. + # The taken data will then be transformed if needed, together with + # the dimension variable if specified, and, in that case, indices + # will have to be computed again to know which values to take from the + # transformed data. These are the 'second round of indices'. In the + # case there is no transformation, the second round of indices will + # be all the available indices, i.e. from 1 to the number of taken + # values with the first round of indices. + for (inner_dim in expected_inner_dims[[i]]) { +if (debug) { +print("-> DEFINING INDICES FOR INNER DIMENSION:") +print(inner_dim) +} + file_dim <- NULL + if (inner_dim %in% unlist(inner_dims_across_files)) { + file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]] + chunk_amount <- length(dat[[i]][['selectors']][[file_dim]][[1]]) + names(chunk_amount) <- file_dim + } else { + chunk_amount <- 1 + } + # In the special case that the selectors for a dimension are 'all', 'first', ... + # and chunking (dividing in more than 1 chunk) is requested, the selectors are + # replaced for equivalent indices. + if ((dat[[i]][['selectors']][[inner_dim]][[1]] %in% c('all', 'first', 'last')) && + (chunks[[inner_dim]]['n_chunks'] != 1)) { + selectors <- dat[[i]][['selectors']][[inner_dim]][[1]] + if (selectors == 'all') { + selectors <- indices(1:(data_dims[[inner_dim]] * chunk_amount)) + } else if (selectors == 'first') { + selectors <- indices(1) + } else { + selectors <- indices(data_dims[[inner_dim]] * chunk_amount) + } + dat[[i]][['selectors']][[inner_dim]][[1]] <- selectors + } + # The selectors for the inner dimension are taken. + selector_array <- dat[[i]][['selectors']][[inner_dim]][[1]] +if (debug) { +if (inner_dim %in% dims_to_check) { +print(paste0("-> DEBUG MESSAGES FOR THE DATASET", i, " AND INNER DIMENSION '", inner_dim, "':")) +print("-> STRUCTURE OF SELECTOR ARRAY:") +print(str(selector_array)) +print("-> PICKED VARS:") +print(picked_vars) +print("-> TRANSFORMED VARS:") +print(transformed_vars) +} +} + if (is.null(dim(selector_array))) { + dim(selector_array) <- length(selector_array) + } + if (is.null(names(dim(selector_array)))) { + if (length(dim(selector_array)) == 1) { + names(dim(selector_array)) <- inner_dim + } else { + stop("Provided selector arrays must be provided with dimension ", + "names. Found an array of selectors without dimension names ", + "for the dimension '", inner_dim, "'.") + } + } + selectors_are_indices <- FALSE + if (!is.null(attr(selector_array, 'indices'))) { + if (!is.logical(attr(selector_array, 'indices'))) { + stop("The atribute 'indices' for the selectors for the dimension '", + inner_dim, "' must be TRUE or FALSE.") + } + selectors_are_indices <- attr(selector_array, 'indices') + } + taken_chunks <- rep(FALSE, chunk_amount) + selector_file_dims <- 1 + if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) { + selector_file_dims <- dim(selector_array)[which(names(dim(selector_array)) %in% found_file_dims[[i]])] + } + selector_inner_dims <- dim(selector_array)[which(!(names(dim(selector_array)) %in% found_file_dims[[i]]))] + var_with_selectors <- NULL + var_with_selectors_name <- var_params[[inner_dim]] + var_ordered <- NULL + var_unorder_indices <- NULL + with_transform <- FALSE + # If the selectors come with an associated variable + if (!is.null(var_with_selectors_name)) { + if ((var_with_selectors_name %in% transform_vars) && (!is.null(transform))) { + with_transform <- TRUE + if (!is.null(file_dim)) { + stop("Requested a transformation over the dimension '", + inner_dim, "', wich goes across files. This feature ", + "is not supported. Either do the request without the ", + "transformation or request it over dimensions that do ", + "not go across files.") + } + } +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> NAME OF THE VARIABLE WITH THE SELECTOR VALUES FOR THE CURRENT INNER DIMENSION:") +print(var_with_selectors_name) +print("-> NAMES OF THE VARIABLES TO BE TRANSFORMED:") +print(transform_vars) +print("-> STRUCTURE OF THE TRANSFORMATION FUNCTION:") +print(str(transform)) +} +} + if (var_with_selectors_name %in% names(picked_vars[[i]])) { + var_with_selectors <- picked_vars[[i]][[var_with_selectors_name]] + var_ordered <- picked_vars_ordered[[i]][[var_with_selectors_name]] + var_unorder_indices <- picked_vars_unorder_indices[[i]][[var_with_selectors_name]] + } else if (var_with_selectors_name %in% names(picked_common_vars)) { + var_with_selectors <- picked_common_vars[[var_with_selectors_name]] + var_ordered <- picked_common_vars_ordered[[var_with_selectors_name]] + var_unorder_indices <- picked_common_vars_unorder_indices[[var_with_selectors_name]] + } + n <- prod(dim(var_with_selectors)) + if (is.null(var_unorder_indices)) { + var_unorder_indices <- 1:n + } + if (with_transform) { + if (var_with_selectors_name %in% names(transformed_vars[[i]])) { + m <- prod(dim(transformed_vars[[i]][[var_with_selectors_name]])) + if (aiat) { + var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]] + var_ordered <- transformed_vars_ordered[[i]][[var_with_selectors_name]] + var_unorder_indices <- transformed_vars_unorder_indices[[i]][[var_with_selectors_name]] + } + } else if (var_with_selectors_name %in% names(transformed_common_vars)) { + m <- prod(dim(transformed_common_vars[[var_with_selectors_name]])) + if (aiat) { + var_with_selectors <- transformed_common_vars[[var_with_selectors_name]] + var_ordered <- transformed_common_vars_ordered[[var_with_selectors_name]] + var_unorder_indices <- transformed_common_vars_unorder_indices[[var_with_selectors_name]] + } + } + if (is.null(var_unorder_indices)) { + var_unorder_indices <- 1:m + } + } +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> SIZE OF ORIGINAL VARIABLE:") +print(n) +print("-> SIZE OF TRANSFORMED VARIABLE:") +if (with_transform) print(m) +print("-> STRUCTURE OF ORDERED VAR:") +print(str(var_ordered)) +print("-> UNORDER INDICES:") +print(var_unorder_indices) +} +} + var_dims <- dim(var_with_selectors) + var_file_dims <- 1 + if (any(names(var_dims) %in% found_file_dims[[i]])) { + if (with_transform) { + stop("Requested transformation for inner dimension '", + inner_dim, "' but provided selectors for such dimension ", + "over one or more file dimensions. This is not ", + "supported. Either request no transformation for the ", + "dimension '", inner_dim, "' or specify the ", + "selectors for this dimension without the file dimensions.") + } + var_file_dims <- var_dims[which(names(var_dims) %in% found_file_dims[[i]])] + var_dims <- var_dims[-which(names(var_dims) %in% found_file_dims[[i]])] + } +## # Keep the selectors if they correspond to a variable that will be transformed. +## if (with_transform) { +## if (var_with_selectors_name %in% names(picked_vars[[i]])) { +## transformed_var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]] +## } else if (var_with_selectors_name %in% names(picked_common_vars)) { +## transformed_var_with_selectors <- transformed_common_vars[[var_with_selectors_name]] +## } +## transformed_var_dims <- dim(transformed_var_with_selectors) +## transformed_var_file_dims <- 1 +## if (any(names(transformed_var_dims) %in% found_file_dims[[i]])) { +## transformed_var_file_dims <- transformed_var_dims[which(names(transformed_var_dims) %in% found_file_dims[[i]])] +## transformed_var_dims <- tranasformed_var_dims[-which(names(transformed_var_dims) %in% found_file_dims[[i]])] +## } +##if (inner_dim %in% dims_to_check) { +##print("111m") +##print(str(transformed_var_dims)) +##} +## +## m <- prod(transformed_var_dims) +## } + # Work out var file dims and inner dims. + if (inner_dim %in% unlist(inner_dims_across_files)) { + #TODO: if (chunk_amount != number of chunks in selector_file_dims), crash + if (length(var_dims) > 1) { + stop("Specified a '", inner_dim, "_var' for the dimension '", + inner_dim, "', which goes across files (across '", file_dim, + "'). The specified variable, '", var_with_selectors_name, "', has more ", + "than one dimension and can not be used as selector variable. ", + "Select another variable or fix it in the files.") + } + } + ## TODO HERE:: + #- indices_of_first_files_with_data may change, because array is now extended + var_full_dims <- dim(var_with_selectors) + if (!(inner_dim %in% names(var_full_dims))) { + stop("Could not find the dimension '", inner_dim, "' in ", + "the file. Either change the dimension name in ", + "your request, adjust the parameter ", + "'dim_names_in_files' or fix the dimension name in ", + "the file.\n", file_path) + } + } else if (((is.numeric(selector_array) || is.list(selector_array)) && selectors_are_indices) || + (is.character(selector_array) && (length(selector_array) == 1) && + (selector_array %in% c('all', 'first', 'last')) && + !is.null(file_dim_reader))) { + #### TODO HERE:: + ###- indices_of_first_files_with_data may change, because array is now extended + # Lines moved above for better performance. + ##data_dims <- file_dim_reader(file_path, NULL, selectors_of_first_files_with_data[[i]], + ## lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1)) + if (!(inner_dim %in% names(data_dims))) { + stop("Could not find the dimension '", inner_dim, "' in ", + "the file. Either change the dimension name in ", + "your request, adjust the parameter ", + "'dim_names_in_files' or fix the dimension name in ", + "the file.\n", file_path) + } + } else { + stop(paste0("Can not translate the provided selectors for '", inner_dim, + "' to numeric indices. Provide numeric indices and a ", + "'file_dim_reader' function, or a '", inner_dim, + "_var' in order to calculate the indices.")) + } + # At this point, if no selector variable was provided, the variable + # data_dims has been populated. If a selector variable was provided, + # the variables var_dims, var_file_dims and var_full_dims have been + # populated instead. + fri <- first_round_indices <- NULL + sri <- second_round_indices <- NULL + # This variable will keep the indices needed to crop the transformed + # variable (the one that has been transformed without being subset + # with the first round indices). + tvi <- tranaformed_variable_indices <- NULL + ordered_fri <- NULL + ordered_sri <- NULL + if ((length(selector_array) == 1) && is.character(selector_array) && + (selector_array %in% c('all', 'first', 'last')) && + (chunks[[inner_dim]]['n_chunks'] == 1)) { + if (is.null(var_with_selectors_name)) { + fri <- vector('list', length = chunk_amount) + dim(fri) <- c(chunk_amount) + sri <- vector('list', length = chunk_amount) + dim(sri) <- c(chunk_amount) + if (selector_array == 'all') { + fri[] <- replicate(chunk_amount, list(1:(data_dims[inner_dim]))) + taken_chunks <- rep(TRUE, chunk_amount) + #sri <- NULL + } else if (selector_array == 'first') { + fri[[1]] <- 1 + taken_chunks[1] <- TRUE + #sri <- NULL + } else if (selector_array == 'last') { + fri[[chunk_amount]] <- data_dims[inner_dim] + taken_chunks[length(taken_chunks)] <- TRUE + #sri <- NULL + } + } else { + if ((!is.null(file_dim)) && !(file_dim %in% names(var_file_dims))) { + stop("The variable '", var_with_selectors_name, "' must also be ", + "requested for the file dimension '", file_dim, "' in ", + "this configuration.") + } + fri <- vector('list', length = prod(var_file_dims)) + dim(fri) <- var_file_dims + ordered_fri <- fri + sri <- vector('list', length = prod(var_file_dims)) + dim(sri) <- var_file_dims + ordered_sri <- sri + if (selector_array == 'all') { +# TODO: Populate ordered_fri + ordered_fri[] <- replicate(prod(var_file_dims), list(1:n)) + fri[] <- replicate(prod(var_file_dims), list(var_unorder_indices[1:n])) + taken_chunks <- rep(TRUE, chunk_amount) + if (!with_transform) { + #fri[] <- replicate(prod(var_file_dims), list(1:n)) + #taken_chunks <- rep(TRUE, chunk_amount) + #sri <- NULL + } else { + ordered_sri[] <- replicate(prod(var_file_dims), list(1:m)) + sri[] <- replicate(prod(var_file_dims), list(1:m)) + ## var_file_dims instead?? + #if (!aiat) { + #fri[] <- replicate(prod(var_file_dims), list(1:n)) + #taken_chunks <- rep(TRUE, chunk_amount) + #sri[] <- replicate(prod(transformed_var_file_dims), list(1:m)) + #} else { + #fri[] <- replicate(prod(var_file_dims), list(1:n)) + #taken_chunks <- rep(TRUE, chunk_amount) + #sri[] <- replicate(prod(transformed_var_file_dims), list(1:m)) + #} + tvi <- 1:m + } + } else if (selector_array == 'first') { + taken_chunks[1] <- TRUE + if (!with_transform) { + ordered_fri[[1]] <- 1 + fri[[1]] <- var_unorder_indices[1] + #taken_chunks[1] <- TRUE + #sri <- NULL + } else { + if (!aiat) { + ordered_fri[[1]] <- 1 + fri[[1]] <- var_unorder_indices[1] +# TODO: TO BE IMPROVED + #taken_chunks[1] <- TRUE + ordered_sri[[1]] <- 1:ceiling(m / n) + sri[[1]] <- 1:ceiling(m / n) + tvi <- 1:ceiling(m / n) + } else { + ordered_fri[[1]] <- 1:ceiling(m / n) + fri[[1]] <- var_unorder_indices[1:ceiling(m / n)] + #taken_chunks[1] <- TRUE + ordered_sri[[1]] <- 1 + sri[[1]] <- 1 + tvi <- 1 + } + } + } else if (selector_array == 'last') { + taken_chunks[length(taken_chunks)] <- TRUE + if (!with_transform) { + ordered_fri[[prod(var_file_dims)]] <- n + fri[[prod(var_file_dims)]] <- var_unorder_indices[n] + #taken_chunks[length(taken_chunks)] <- TRUE + #sri <- NULL + } else { + if (!aiat) { + ordered_fri[[prod(var_file_dims)]] <- prod(var_dims) + fri[[prod(var_file_dims)]] <- var_unorder_indices[prod(var_dims)] + #taken_chunks[length(taken_chunks)] <- TRUE + ordered_sri[[prod(var_file_dims)]] <- 1:ceiling(m / n) + sri[[prod(var_file_dims)]] <- 1:ceiling(m / n) +# TODO: TO BE IMPROVED. THE TVI MAY BE WRONG IF THERE'S BEEN A REORDERING. + tvi <- 1:ceiling(m / n) + } else { + ordered_fri[[prod(var_file_dims)]] <- (n - ceiling(m / n) + 1):n + fri[[prod(var_file_dims)]] <- var_unorder_indices[(n - ceiling(m / n) + 1):n] + #taken_chunks[length(taken_chunks)] <- TRUE + ordered_sri[[prod(var_file_dims)]] <- 1 + sri[[prod(var_file_dims)]] <- 1 + tvi <- 1 + } + } + } + } + # If the selectors are not 'all', 'first', 'last', ... + } else { + if (!is.null(var_with_selectors_name)) { + unmatching_file_dims <- which(!(names(var_file_dims) %in% names(selector_file_dims))) + if ((length(unmatching_file_dims) > 0)) { + raise_error <- FALSE + if (is.null(file_dim)) { + raise_error <- TRUE + } else { + if (!((length(unmatching_file_dims) == 1) && + (names(var_file_dims)[unmatching_file_dims] == file_dim) && + (inner_dim %in% names(selector_inner_dims)))) { + raise_error <- TRUE + } + } + if (raise_error) { + stop("Provided selectors for the dimension '", inner_dim, "' must have as many ", + "file dimensions as the variable the dimension is defined along, '", + var_with_selectors_name, "', with the exceptions of the file pattern dimension ('", + found_pattern_dim, "') and any depended file dimension (if specified as ", + "depended dimension in parameter 'inner_dims_across_files' and the ", + "depending file dimension is present in the provided selector array).") + } + } + if (any(names(selector_file_dims) %in% names(dim(var_with_selectors)))) { + if (any(dim(var_with_selectors)[names(selector_file_dims)] != selector_file_dims)) { + stop("Size of selector file dimensions must mach size of requested ", + "variable dimensions.") + } + } + } + ## TODO: If var dimensions are not in the same order as selector dimensions, reorder + if (is.null(names(selector_file_dims))) { + if (is.null(file_dim)) { + fri_dims <- 1 + } else { + fri_dims <- chunk_amount + names(fri_dims) <- file_dim + } + } else { + fri_dim_names <- names(selector_file_dims) + if (!is.null(file_dim)) { + fri_dim_names <- c(fri_dim_names, file_dim) + } + fri_dim_names <- found_file_dims[[i]][which(found_file_dims[[i]] %in% fri_dim_names)] + fri_dims <- rep(NA, length(fri_dim_names)) + names(fri_dims) <- fri_dim_names + fri_dims[names(selector_file_dims)] <- selector_file_dims + if (!is.null(file_dim)) { + fri_dims[file_dim] <- chunk_amount + } + } + fri <- vector('list', length = prod(fri_dims)) + dim(fri) <- fri_dims + sri <- vector('list', length = prod(fri_dims)) + dim(sri) <- fri_dims + selector_file_dim_array <- array(1:prod(selector_file_dims), dim = selector_file_dims) + selector_store_position <- fri_dims + for (j in 1:prod(dim(selector_file_dim_array))) { + selector_indices_to_take <- which(selector_file_dim_array == j, arr.ind = TRUE)[1, ] + names(selector_indices_to_take) <- names(selector_file_dims) + selector_store_position[names(selector_indices_to_take)] <- selector_indices_to_take + sub_array_of_selectors <- Subset(selector_array, names(selector_indices_to_take), + as.list(selector_indices_to_take), drop = 'selected') +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> ITERATING OVER FILE DIMENSIONS OF THE SELECTORS.") +print("-> STRUCTURE OF A SUB ARRAY:") +print(str(sub_array_of_selectors)) +print("-> STRUCTURE OF THE VARIABLE WITH SELECTORS:") +print(str(var_with_selectors)) +print(dim(var_with_selectors)) +} +} + if (selectors_are_indices) { + sub_array_of_values <- NULL + #} else if (!is.null(var_ordered)) { + # sub_array_of_values <- var_ordered + } else { + if (length(var_file_dims) > 0) { + var_indices_to_take <- selector_indices_to_take[which(names(selector_indices_to_take) %in% names(var_file_dims))] + sub_array_of_values <- Subset(var_with_selectors, names(var_indices_to_take), + as.list(var_indices_to_take), drop = 'selected') + } else { + sub_array_of_values <- var_with_selectors + } + } +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> STRUCTURE OF THE SUB ARRAY FROM THE VARIABLE CORRESPONDING TO THE SUB ARRAY OF SELECTORS") +print(str(sub_array_of_values)) +print(dim(sub_array_of_values)) +print("-> NAME OF THE FILE DIMENSION THE CURRENT INNER DIMENSION EXTENDS ALONG:") +print(file_dim) +} +} + if ((!is.null(file_dim) && (file_dim %in% names(selector_file_dims))) || is.null(file_dim)) { + if (length(sub_array_of_selectors) > 0) { +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> THE INNER DIMENSION DOES NOT GO ACROSS ANY FILE DIMENSION OR IT DOES BUT IS IN THE PROVIDED SELECTOR ARRAY.") +} +} + if (selectors_are_indices) { + if (!is.null(var_with_selectors_name)) { + max_allowed <- ifelse(aiat, m, n) + } else { + max_allowed <- data_dims[inner_dim] + } + if (any(na.omit(unlist(sub_array_of_selectors)) > max_allowed) || + any(na.omit(unlist(sub_array_of_selectors)) < 1)) { + stop("Provided indices out of range for dimension '", inner_dim, "' ", + "for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ", + max_allowed, ").") + } + } + + # The selector_checker will return either a vector of indices or a list + # with the first and last desired indices. + goes_across_prime_meridian <- FALSE + if (!is.null(var_ordered) && !selectors_are_indices) { + if (!is.null(dim_reorder_params[[inner_dim]])) { + if (is.list(sub_array_of_selectors)) { + +## NOTE: The check of 'goes_across_prime_meridian' is moved forward to here. + is_circular_dim <- attr(dim_reorder_params[[inner_dim]], "circular") + if (!is.null(is_circular_dim)) { + if (is_circular_dim) { + +# NOTE: Use CircularSort() to put the values in the assigned range, and get the order. +# For example, [-10, 20] in CircularSort(0, 360) is [350, 20]. The $ix list is [2, 1]. +# 'goes_across_prime_meridian' means the selector range across the border. For example, +# CircularSort(-180, 180) with selector [170, 190] -> goes_across_prime_meridian = TRUE. + tmp <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$ix + goes_across_prime_meridian <- tmp[1] > tmp[2] + } + } + + # HERE change to the same code as below (under 'else'). Not sure why originally + #it uses additional lines, which make reorder not work. + sub_array_of_selectors <- as.list(dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$x) + #sub_array_reordered <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors)) + #sub_array_unorder <- sort(sub_array_reordered$ix, index.return = TRUE)$ix + #sub_array_of_selectors <- as.list(sub_array_reordered$x[sub_array_unorder]) + +# Add warning if the boundary is out of range + if (sub_array_of_selectors[1] < range(var_ordered)[1] | sub_array_of_selectors[1] > range(var_ordered)[2]) { + .warning(paste0("The lower boundary of selector of ", + inner_dim, + " is out of range [", + min(var_ordered), ", ", max(var_ordered), "]. ", + "Check if the desired range is all included.")) + } + if (sub_array_of_selectors[2] < range(var_ordered)[1] | sub_array_of_selectors[2] > range(var_ordered)[2]) { + .warning(paste0("The upper boundary of selector of ", + inner_dim, + " is out of range [", + min(var_ordered), ", ", max(var_ordered), "]. ", + "Check if the desired range is all included.")) + } + + + } else { + sub_array_of_selectors <- dim_reorder_params[[inner_dim]](sub_array_of_selectors)$x + } + } + +# NOTE: The ideal solution for selecting indices in goes_across_prime_meridian case +# is modified SelectorCheckor.R. But now SelectorCheckor doesn't know the info of +#goes_across_prime_meridian, so I do the adjustion after calling SelectorCheckor(). + sub_array_of_indices <- selector_checker(sub_array_of_selectors, var_ordered, + tolerance = if (aiat) { + NULL + } else { + tolerance_params[[inner_dim]] + }) + + if (goes_across_prime_meridian & sub_array_of_indices[[1]] < sub_array_of_indices[[2]]) { + if (!(sub_array_of_selectors[[1]] %in% var_ordered)){ + sub_array_of_indices[[1]] <- sub_array_of_indices[[1]] - 1 + } + + if (!(sub_array_of_selectors[[2]] %in% var_ordered)){ + sub_array_of_indices[[2]] <- sub_array_of_indices[[2]] + 1 + } + } + +#NOTE: the possible case? + if (goes_across_prime_meridian & sub_array_of_indices[[1]] > sub_array_of_indices[[2]]) { + .stop("The case is goes_across_prime_meridian but no adjustion for the indices!") + } + + if (any(is.na(sub_array_of_indices))) { + + stop(paste0("The selectors of ", inner_dim, + " are out of range [", min(var_ordered), + ", ", max(var_ordered), "].")) + } + + } else { + +# Add warning if the boundary is out of range + if (is.list(sub_array_of_selectors)) { + if (sub_array_of_selectors[1] < + min(sub_array_of_values) | sub_array_of_selectors[1] > + max(sub_array_of_values)) { + .warning(paste0("The lower boundary of selector of ", + inner_dim, " is out of range [", + min(sub_array_of_values), ", ", + max(sub_array_of_values), "]. ", + "Check if the desired range is all included.")) + } + if (sub_array_of_selectors[2] < + min(sub_array_of_values) | sub_array_of_selectors[2] > + max(sub_array_of_values)) { + .warning(paste0("The upper boundary of selector of ", + inner_dim, " is out of range [", + min(sub_array_of_values), ", ", + max(sub_array_of_values), "]. ", + "Check if the desired range is all included.")) + } + } + + sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values, + tolerance = if (aiat) { + NULL + } else { + tolerance_params[[inner_dim]] + }) + + if (any(is.na(sub_array_of_indices))) { + + stop(paste0("The selectors of ", inner_dim, + " are out of range [", min(sub_array_of_values), + ", ", max(sub_array_of_values), "].")) + } + + } + ## This 'if' runs in both Start() and Compute(). In Start(), it doesn't have any effect (no chunk). + ## In Compute(), it creates the indices for each chunk. For example, if 'sub_array_of_indices' + ## is c(5:10) and chunked into 2, 'sub_array_of_indices' becomes c(5:7) for chunk = 1, c(8:10) + ## for chunk = 2. If 'sub_array_of_indices' is list(55, 62) and chunked into 2, it becomes + ## list(55, 58) for chunk = 1 and list(59, 62) for chunk = 2. + ## TODO: The list can be turned into vector here? So afterward no need to judge if it is list + ## or vector. + if (!is.list(sub_array_of_indices)) { + sub_array_of_indices <- sub_array_of_indices[chunk_indices(length(sub_array_of_indices), + chunks[[inner_dim]]["chunk"], + chunks[[inner_dim]]["n_chunks"], + inner_dim)] + } else { + tmp <- chunk_indices(length(sub_array_of_indices[[1]]:sub_array_of_indices[[2]]), + chunks[[inner_dim]]["chunk"], chunks[[inner_dim]]["n_chunks"], + inner_dim) + vect <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] + sub_array_of_indices[[1]] <- vect[tmp[1]] + sub_array_of_indices[[2]] <- vect[tmp[length(tmp)]] + } + # The sub_array_of_indices now contains numeric indices of the values to be taken by each chunk. + +# Check if all the files have the selectors assigned (e.g., region = 'Grnland') _20191015 +if (is.character(sub_array_of_selectors)) { + array_of_var_files_check <- vector('list', length(selector_indices)) + for (k in 1:length(selector_indices)) { + asdasd <- selector_indices[[k]] + array_of_var_files_check <- do.call('[', c(list(x = array_of_files_to_load), asdasd, list(drop = FALSE)))[j] + file_object <- file_opener(array_of_var_files_check) + var_values_check <- file_var_reader(NULL, file_object, NULL, var_to_read, synonims) + if (any(as.vector(var_values_check)[sub_array_of_indices] != sub_array_of_selectors)) { + .warning('Not all the files has correponding selectors. Check the selector attributes') + } + } +} + +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> TRANSFORMATION REQUESTED?") +print(with_transform) +print("-> BETA:") +print(beta) +} +} + if (with_transform) { + # If there is a transformation and selector values are provided, these + # selectors will be processed in the same way either if aiat = TRUE or + # aiat = FALSE. +## TODO: If sub_array_of_selectors was integer and aiat then... do what's commented 50 lines below. +## otherwise, do what's coded. +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> SELECTORS REQUESTED BEFORE TRANSFORM.") +} +} + +###NOTE: Here, the transform, is different from the below part of non-transform. +# search 'if (goes_across_prime_meridian' to find the lines below. + if (goes_across_prime_meridian) { +# NOTE: before changing, the return is already correct. + +#NOTE: The fix below has the same explanation as no with_transform part below. +# Search the next next 'if (goes_across_prime_meridian) {'. + if (sub_array_of_indices[[1]] == sub_array_of_indices[[2]]) { + # global longitude + sub_array_of_fri <- 1:n + # Warning if transform_extra_cell != 0 + if (beta != 0) { + .warning(paste0("Adding parameter transform_extra_cells = ", + transform_extra_cells, " to the transformed index excesses ", + "the border. The border index is used for transformation.")) + } + + } else { + # normal case, i.e., not global + first_index <- min(unlist(sub_array_of_indices)) + last_index <- max(unlist(sub_array_of_indices)) + gap_width <- last_index - first_index - 1 + sub_array_of_fri <- c(1:(min(unlist(sub_array_of_indices)) + min(gap_width, beta)), + (max(unlist(sub_array_of_indices)) - min(gap_width, beta)):n) + + if (min(gap_width, beta) != beta) { + .warning(paste0("Adding parameter transform_extra_cells = ", + transform_extra_cells, " to the transformed index excesses ", + "the border. The border index is used for transformation.")) + } + } + + } else { + #NOTE: This if seems redundant. + if (is.list(sub_array_of_indices)) { + sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] + } + first_index <- min(unlist(sub_array_of_indices)) + last_index <- max(unlist(sub_array_of_indices)) + + start_padding <- min(beta, first_index - 1) + end_padding <- min(beta, n - last_index) + sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding) + if (start_padding != beta | end_padding != beta) { + .warning(paste0("Adding parameter transform_extra_cells = ", + transform_extra_cells, " to the transformed index excesses ", + "the border. The border index is used for transformation.")) + } + + } + subset_vars_to_transform <- vars_to_transform + if (!is.null(var_ordered)) { + +##NOTE: if var_ordered is common_vars, it doesn't have attributes and it is a vector. +## Turn it into array and add dimension name. + if (!is.array(var_ordered)) { + var_ordered <- as.array(var_ordered) + names(dim(var_ordered)) <- inner_dim + } + + subset_vars_to_transform[[var_with_selectors_name]] <- Subset(var_ordered, inner_dim, sub_array_of_fri) + } else { +##NOTE: It should be redundant because without reordering the var should remain array +## But just stay same with above... + if (!is.array(sub_array_of_values)) { + sub_array_of_values <- as.array(sub_array_of_values) + names(dim(sub_array_of_values)) <- inner_dim + } + + subset_vars_to_transform[[var_with_selectors_name]] <- Subset(sub_array_of_values, inner_dim, sub_array_of_fri) + } + +# Change the order of longitude crop if no reorder + from big to small. +# cdo -sellonlatbox, the lon is west, east (while lat can be north +# to south or opposite) + +# Before changing crop, first we need to find the name of longitude. +# NOTE: The potential bug here (also the bug for CDORemapper): the lon name +# is limited (only the ones listed in .KnownLonNames() are available. + known_lon_names <- s2dverification:::.KnownLonNames() + lon_name <- names(subset_vars_to_transform)[which(names(subset_vars_to_transform) %in% known_lon_names)[1]] + +# NOTE: The cases not considered: (1) if lon reorder(decreasing = T) +# It doesn't make sense, but if someone uses it, here should +# occur error. (2) crop = TRUE/FALSE + if ('crop' %in% names(transform_params) & var_with_selectors_name == lon_name & is.null(dim_reorder_params[[inner_dim]])) { + if (is.numeric(class(transform_params$crop))) { + if (transform_params$crop[1] > transform_params$crop[2]) { + tmp <- transform_params$crop[1] + transform_params$crop[1] <- transform_params$crop[2] + transform_params$crop[2] <- tmp + } + } + } + + transformed_subset_var <- do.call(transform, c(list(data_array = NULL, + variables = subset_vars_to_transform, + file_selectors = selectors_of_first_files_with_data[[i]]), + transform_params))$variables[[var_with_selectors_name]] + # Sorting the transformed variable and working out the indices again after transform. + if (!is.null(dim_reorder_params[[inner_dim]])) { + transformed_subset_var_reorder <- dim_reorder_params[[inner_dim]](transformed_subset_var) + transformed_subset_var <- transformed_subset_var_reorder$x +#NOTE: The fix here solves the mis-ordered lon when across_meridian. + transformed_subset_var_unorder <- transformed_subset_var_reorder$ix +# transformed_subset_var_unorder <- sort(transformed_subset_var_reorder$ix, index.return = TRUE)$ix + } else { + transformed_subset_var_unorder <- 1:length(transformed_subset_var) + } + sub_array_of_sri <- selector_checker(sub_array_of_selectors, transformed_subset_var, + tolerance = if (aiat) { + tolerance_params[[inner_dim]] + } else { + NULL + }) + +# Check if selectors fall out of the range of the transform grid +# It may happen when original lon is [-180, 180] while want to regrid to +# [0, 360], and lon selector = [-20, -10]. + if (any(is.na(sub_array_of_sri))) { + stop(paste0("The selectors of ", + inner_dim, " are out of range of transform grid '", + transform_params$grid, "'. Use parameter '", + inner_dim, "_reorder' or change ", inner_dim, + " selectors.")) + } + + if (goes_across_prime_meridian) { + + if (sub_array_of_sri[[1]] == sub_array_of_sri[[2]]) { + # global longitude + sub_array_of_sri <- c(1:length(transformed_subset_var)) + } else { + # the common case, i.e., non-global + # NOTE: Because sub_array_of_sri order is exchanged due to + # previous development, here [[1]] and [[2]] should exchange + sub_array_of_sri <- c(1:sub_array_of_sri[[1]], + sub_array_of_sri[[2]]:length(transformed_subset_var)) + } + + } else if (is.list(sub_array_of_sri)) { + sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]] + } + ordered_sri <- sub_array_of_sri + sub_array_of_sri <- transformed_subset_var_unorder[sub_array_of_sri] + # In this case, the tvi are not defined and the 'transformed_subset_var' + # will be taken instead of the var transformed before in the code. +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> FIRST INDEX:") +print(first_index) +print("-> LAST INDEX:") +print(last_index) +print("-> STRUCTURE OF FIRST ROUND INDICES:") +print(str(sub_array_of_fri)) +print("-> STRUCTURE OF SECOND ROUND INDICES:") +print(str(sub_array_of_sri)) +print("-> STRUCTURE OF TRANSFORMED VARIABLE INDICES:") +print(str(tvi)) +} +} +### # If the selectors are expressed after transformation +### } else { +###if (debug) { +###if (inner_dim %in% dims_to_check) { +###print("-> SELECTORS REQUESTED AFTER TRANSFORM.") +###} +###} +### if (goes_across_prime_meridian) { +### sub_array_of_indices <- c(sub_array_of_indices[[1]]:m, +### 1:sub_array_of_indices[[2]]) +### } +### first_index <- min(unlist(sub_array_of_indices)) +### last_index <- max(unlist(sub_array_of_indices)) +### first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1) +### last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n) +### sub_array_of_fri <- first_index_before_transform:last_index_before_transform +### n_of_extra_cells <- round(beta / n * m) +### if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) { +### sub_array_of_sri <- 1:(last_index - first_index + 1) +### if (is.null(tvi)) { +### tvi <- sub_array_of_sri + first_index - 1 +### } +### } else { +### sub_array_of_sri <- sub_array_of_indices - first_index + 1 +### if (is.null(tvi)) { +### tvi <- sub_array_of_indices +### } +### } +### sub_array_of_sri <- sub_array_of_sri + n_of_extra_cells + sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position), + list(value = sub_array_of_sri))) + } else { + if (goes_across_prime_meridian) { +#NOTE: The potential problem here is, if it is global longitude, +# and the indices overlap (e.g., lon = [0, 359.723] and +# CircularSort(-180, 180), then sub_array_of_indices = list(649, 649)). +# Therefore, sub_array_of_fri will be c(1:649, 649:1296). We'll +# get two 649. +# The fix below may not be the best solution, but it works for +# the example above. + + if (sub_array_of_indices[[1]] == sub_array_of_indices[[2]]) { + # global longitude + sub_array_of_fri <- c(1:n) + } else { + # the common case, i.e., non-global + sub_array_of_fri <- c(1:min(unlist(sub_array_of_indices)), + max(unlist(sub_array_of_indices)):n) + } + + } else if (is.list(sub_array_of_indices)) { + sub_array_of_fri <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] + } else { + sub_array_of_fri <- sub_array_of_indices + } + } + if (!is.null(var_unorder_indices)) { + if (is.null(ordered_fri)) { + ordered_fri <- sub_array_of_fri + } + sub_array_of_fri <- var_unorder_indices[sub_array_of_fri] + } + fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position), + list(value = sub_array_of_fri))) + if (!is.null(file_dim)) { + taken_chunks[selector_store_position[[file_dim]]] <- TRUE + } else { + taken_chunks <- TRUE + } + } + } else { +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> THE INNER DIMENSION GOES ACROSS A FILE DIMENSION.") +} +} + if (inner_dim %in% names(dim(sub_array_of_selectors))) { + if (is.null(var_with_selectors_name)) { + if (any(na.omit(unlist(sub_array_of_selectors)) < 1) || + any(na.omit(unlist(sub_array_of_selectors)) > data_dims[inner_dim] * chunk_amount)) { + stop("Provided indices out of range for dimension '", inner_dim, "' ", + "for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ", + data_dims[inner_dim] * chunk_amount, ").") + } + } else { + if (inner_dim %in% names(dim(sub_array_of_values))) { + inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_values)) == inner_dim) + if (inner_dim_pos_in_sub_array != 1) { + new_sub_array_order <- (1:length(dim(sub_array_of_values)))[-inner_dim_pos_in_sub_array] + new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order) + sub_array_of_values <- .aperm2(sub_array_of_values, new_sub_array_order) + } + } + } + inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_selectors)) == inner_dim) + if (inner_dim_pos_in_sub_array != 1) { + new_sub_array_order <- (1:length(dim(sub_array_of_selectors)))[-inner_dim_pos_in_sub_array] + new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order) + sub_array_of_selectors <- .aperm2(sub_array_of_selectors, new_sub_array_order) + } + sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values, + tolerance = tolerance_params[[inner_dim]]) + # It is needed to expand the indices here, otherwise for + # values(list(date1, date2)) only 2 values are picked. + if (is.list(sub_array_of_indices)) { + sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] + } + sub_array_of_indices <- sub_array_of_indices[chunk_indices(length(sub_array_of_indices), + chunks[[inner_dim]]['chunk'], + chunks[[inner_dim]]['n_chunks'], + inner_dim)] + sub_array_is_list <- FALSE + if (is.list(sub_array_of_indices)) { + sub_array_is_list <- TRUE + sub_array_of_indices <- unlist(sub_array_of_indices) + } + if (is.null(var_with_selectors_name)) { + indices_chunk <- floor((sub_array_of_indices - 1) / data_dims[inner_dim]) + 1 + transformed_indices <- ((sub_array_of_indices - 1) %% data_dims[inner_dim]) + 1 + } else { + indices_chunk <- floor((sub_array_of_indices - 1) / var_full_dims[inner_dim]) + 1 + transformed_indices <- ((sub_array_of_indices - 1) %% var_full_dims[inner_dim]) + 1 + } + if (sub_array_is_list) { + sub_array_of_indices <- as.list(sub_array_of_indices) + } +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> GOING TO ITERATE ALONG CHUNKS.") +} +} + for (chunk in 1:chunk_amount) { + if (!is.null(names(selector_store_position))) { + selector_store_position[file_dim] <- chunk + } else { + selector_store_position <- chunk + } + chunk_selectors <- transformed_indices[which(indices_chunk == chunk)] + sub_array_of_indices <- chunk_selectors + if (with_transform) { + # If the provided selectors are expressed in the world + # before transformation + if (!aiat) { + first_index <- min(unlist(sub_array_of_indices)) + last_index <- max(unlist(sub_array_of_indices)) + sub_array_of_fri <- max(c(first_index - beta, 1)):min(c(last_index + beta, n)) + sub_array_of_sri <- transform_indices(unlist(sub_array_of_indices) - first_index + 1, n, m) + if (is.list(sub_array_of_indices)) { + if (length(sub_array_of_sri) > 1) { + sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]] + } + } +##TODO: TRANSFORM SUBSET VARIABLE AS ABOVE, TO COMPUTE SRI + # If the selectors are expressed after transformation + } else { + first_index <- min(unlist(sub_array_of_indices)) + last_index <- max(unlist(sub_array_of_indices)) + first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1) + last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n) + sub_array_of_fri <- first_index_before_transform:last_index_before_transform + if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) { + sub_array_of_sri <- 1:(last_index - first_index + 1) + + round(beta / n * m) + } else { + sub_array_of_sri <- sub_array_of_indices - first_index + 1 + + round(beta / n * m) + } +##TODO: FILL IN TVI + } + sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position), + list(value = sub_array_of_sri))) + if (length(sub_array_of_sri) > 0) { + taken_chunks[chunk] <- TRUE + } + } else { + sub_array_of_fri <- sub_array_of_indices + if (length(sub_array_of_fri) > 0) { + taken_chunks[chunk] <- TRUE + } + } + if (!is.null(var_unorder_indices)) { + ordered_fri <- sub_array_of_fri + sub_array_of_fri <- var_unorder_indices[sub_array_of_fri] + } + fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position), + list(value = sub_array_of_fri))) + } +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> FINISHED ITERATING ALONG CHUNKS") +} +} + } else { + stop("Provided array of indices for dimension '", inner_dim, "', ", + "which goes across the file dimension '", file_dim, "', but ", + "the provided array does not have the dimension '", inner_dim, + "', which is mandatory.") + } + } + } + } +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> PROCEEDING TO CROP VARIABLES") +} +} + #if ((length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last'))) { + #if (!is.null(var_with_selectors_name) || (is.null(var_with_selectors_name) && is.character(selector_array) && + # (length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last')))) { + empty_chunks <- which(!taken_chunks) + if (length(empty_chunks) >= length(taken_chunks)) { + stop("Selectors do not match any of the possible values for the dimension '", inner_dim, "'.") + } + if (length(empty_chunks) > 0) { +# # Get the first group of chunks to remove, and remove them. +# # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 1 and 2 +# dist <- abs(rev(empty_chunks) - c(rev(empty_chunks)[1] - 1, head(rev(empty_chunks), length(rev(empty_chunks)) - 1))) +# if (all(dist == 1)) { +# start_chunks_to_remove <- NULL +# } else { +# first_chunk_to_remove <- tail(which(dist > 1), 1) +# start_chunks_to_remove <- rev(rev(empty_chunks)[first_chunk_to_remove:length(empty_chunks)]) +# } +# # Get the last group of chunks to remove, and remove them. +# # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 8 and 9 +# dist <- abs(empty_chunks - c(empty_chunks[1] - 1, head(empty_chunks, length(empty_chunks) - 1))) +# if (all(dist == 1)) { +# first_chunk_to_remove <- 1 +# } else { +# first_chunk_to_remove <- tail(which(dist > 1), 1) +# } +# end_chunks_to_remove <- empty_chunks[first_chunk_to_remove:length(empty_chunks)] +# chunks_to_keep <- which(!((1:length(taken_chunks)) %in% c(start_chunks_to_remove, end_chunks_to_remove))) + chunks_to_keep <- which(taken_chunks) + dims_to_crop[[file_dim]] <- c(dims_to_crop[[file_dim]], list(chunks_to_keep)) +# found_indices <- Subset(found_indices, file_dim, chunks_to_keep) +# # Crop dataset variables file dims. +# for (picked_var in names(picked_vars[[i]])) { +# if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) { +# picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, chunks_to_keep) +# } +# } + } + #} + dat[[i]][['selectors']][[inner_dim]] <- list(fri = fri, sri = sri) + # Crop dataset variables inner dims. + # Crop common variables inner dims. + types_of_var_to_crop <- 'picked' + if (with_transform) { + types_of_var_to_crop <- c(types_of_var_to_crop, 'transformed') + } + if (!is.null(dim_reorder_params[[inner_dim]])) { + types_of_var_to_crop <- c(types_of_var_to_crop, 'reordered') + } + for (type_of_var_to_crop in types_of_var_to_crop) { + if (type_of_var_to_crop == 'transformed') { + if (is.null(tvi)) { + if (!is.null(dim_reorder_params[[inner_dim]])) { + crop_indices <- unique(unlist(ordered_sri)) + } else { + crop_indices <- unique(unlist(sri)) + } + } else { + crop_indices <- unique(unlist(tvi)) + } + vars_to_crop <- transformed_vars[[i]] + common_vars_to_crop <- transformed_common_vars + } else if (type_of_var_to_crop == 'reordered') { + crop_indices <- unique(unlist(ordered_fri)) + vars_to_crop <- picked_vars_ordered[[i]] + common_vars_to_crop <- picked_common_vars_ordered + } else { + crop_indices <- unique(unlist(fri)) + vars_to_crop <- picked_vars[[i]] + common_vars_to_crop <- picked_common_vars + } + for (var_to_crop in names(vars_to_crop)) { + if (inner_dim %in% names(dim(vars_to_crop[[var_to_crop]]))) { + if (!is.null(crop_indices)) { + if (type_of_var_to_crop == 'transformed') { + if (!aiat) { + vars_to_crop[[var_to_crop]] <- Subset(transformed_subset_var, inner_dim, crop_indices) + } else { + vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices) + } + } else { + vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices) + } + } + } + } + if (i == length(dat)) { + for (common_var_to_crop in names(common_vars_to_crop)) { + if (inner_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { + common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices) + } + } + } + if (type_of_var_to_crop == 'transformed') { + if (!is.null(vars_to_crop)) { + transformed_vars[[i]] <- vars_to_crop + } + if (i == length(dat)) { + transformed_common_vars <- common_vars_to_crop + } + } else if (type_of_var_to_crop == 'reordered') { + if (!is.null(vars_to_crop)) { + picked_vars_ordered[[i]] <- vars_to_crop + } + if (i == length(dat)) { + picked_common_vars_ordered <- common_vars_to_crop + } + } else { + if (!is.null(vars_to_crop)) { + picked_vars[[i]] <- vars_to_crop + } + if (i == length(dat)) { + picked_common_vars <- common_vars_to_crop + } + } + } + #} + } + # After the selectors have been picked (using the original variables), + # the variables are transformed. At that point, the original selectors + # for the transformed variables are also kept in the variable original_selectors. +#print("L") + } + } + } +# if (!is.null(transformed_common_vars)) { +# picked_common_vars[names(transformed_common_vars)] <- transformed_common_vars +# } + # Remove the trailing chunks, if any. + for (file_dim in names(dims_to_crop)) { +# indices_to_keep <- min(sapply(dims_to_crop[[file_dim]], min)):max(sapply(dims_to_crop[[file_dim]], max)) + ## TODO: Merge indices in dims_to_crop with some advanced mechanism? + indices_to_keep <- unique(unlist(dims_to_crop[[file_dim]])) + array_of_files_to_load <- Subset(array_of_files_to_load, file_dim, indices_to_keep) + array_of_not_found_files <- Subset(array_of_not_found_files, file_dim, indices_to_keep) + for (i in 1:length(dat)) { + # Crop selectors + for (selector_dim in names(dat[[i]][['selectors']])) { + if (selector_dim == file_dim) { + for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['fri']])) { + dat[[i]][['selectors']][[selector_dim]][['fri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['fri']][[j]][indices_to_keep] + } + for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['sri']])) { + dat[[i]][['selectors']][[selector_dim]][['sri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['sri']][[j]][indices_to_keep] + } + } + if (file_dim %in% names(dim(dat[[i]][['selectors']][[selector_dim]][['fri']]))) { + dat[[i]][['selectors']][[selector_dim]][['fri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['fri']], file_dim, indices_to_keep) + dat[[i]][['selectors']][[selector_dim]][['sri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['sri']], file_dim, indices_to_keep) + } + } + # Crop dataset variables file dims. + for (picked_var in names(picked_vars[[i]])) { + if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) { + picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, indices_to_keep) + } + } + for (transformed_var in names(transformed_vars[[i]])) { + if (file_dim %in% names(dim(transformed_vars[[i]][[transformed_var]]))) { + transformed_vars[[i]][[transformed_var]] <- Subset(transformed_vars[[i]][[transformed_var]], file_dim, indices_to_keep) + } + } + } + # Crop common variables file dims. + for (picked_common_var in names(picked_common_vars)) { + if (file_dim %in% names(dim(picked_common_vars[[picked_common_var]]))) { + picked_common_vars[[picked_common_var]] <- Subset(picked_common_vars[[picked_common_var]], file_dim, indices_to_keep) + } + } + for (transformed_common_var in names(transformed_common_vars)) { + if (file_dim %in% names(dim(transformed_common_vars[[transformed_common_var]]))) { + transformed_common_vars[[transformed_common_var]] <- Subset(transformed_common_vars[[transformed_common_var]], file_dim, indices_to_keep) + } + } + } + # Calculate the size of the final array. + total_inner_dims <- NULL + for (i in 1:length(dat)) { + if (dataset_has_files[i]) { + inner_dims <- expected_inner_dims[[i]] + inner_dims <- sapply(inner_dims, + function(x) { + if (!all(sapply(dat[[i]][['selectors']][[x]][['sri']], is.null))) { + max(sapply(dat[[i]][['selectors']][[x]][['sri']], length)) + } else { + if (length(var_params[[x]]) > 0) { + if (var_params[[x]] %in% names(transformed_vars[[i]])) { + length(transformed_vars[[i]][[var_params[[x]]]]) + } else if (var_params[[x]] %in% names(transformed_common_vars)) { + length(transformed_common_vars[[var_params[[x]]]]) + } else { + max(sapply(dat[[i]][['selectors']][[x]][['fri']], length)) + } + } else { + max(sapply(dat[[i]][['selectors']][[x]][['fri']], length)) + } + } + }) + names(inner_dims) <- expected_inner_dims[[i]] + if (is.null(total_inner_dims)) { + total_inner_dims <- inner_dims + } else { + new_dims <- .MergeArrayDims(total_inner_dims, inner_dims) + total_inner_dims <- new_dims[[3]] + } + } + } + new_dims <- .MergeArrayDims(dim(array_of_files_to_load), total_inner_dims) + final_dims <- new_dims[[3]][dim_names] + # final_dims_fake is the vector of final dimensions after having merged the + # 'across' file dimensions with the respective 'across' inner dimensions, and + # after having broken into multiple dimensions those dimensions for which + # multidimensional selectors have been provided. + # final_dims will be used for collocation of data, whereas final_dims_fake + # will be used for shaping the final array to be returned to the user. + final_dims_fake <- final_dims + if (merge_across_dims) { + if (!is.null(inner_dims_across_files)) { + for (file_dim_across in names(inner_dims_across_files)) { + inner_dim_pos <- which(names(final_dims_fake) == inner_dims_across_files[[file_dim_across]]) + new_dims <- c() + if (inner_dim_pos > 1) { + new_dims <- c(new_dims, final_dims_fake[1:(inner_dim_pos - 1)]) + } + new_dims <- c(new_dims, setNames(prod(final_dims_fake[c(inner_dim_pos, inner_dim_pos + 1)]), + inner_dims_across_files[[file_dim_across]])) + if (inner_dim_pos + 1 < length(final_dims_fake)) { + new_dims <- c(new_dims, final_dims_fake[(inner_dim_pos + 2):length(final_dims_fake)]) + } + final_dims_fake <- new_dims + } + } + } + all_split_dims <- NULL + if (split_multiselected_dims) { + for (dim_param in 1:length(dim_params)) { + if (!is.null(dim(dim_params[[dim_param]]))) { + if (length(dim(dim_params[[dim_param]])) > 1) { + split_dims <- dim(dim_params[[dim_param]]) + all_split_dims <- c(all_split_dims, setNames(list(split_dims), + names(dim_params)[dim_param])) + if (is.null(names(split_dims))) { + names(split_dims) <- paste0(names(dim_params)[dim_param], + 1:length(split_dims)) + } + old_dim_pos <- which(names(final_dims_fake) == names(dim_params)[dim_param]) + new_dims <- c() + if (old_dim_pos > 1) { + new_dims <- c(new_dims, final_dims_fake[1:(old_dim_pos - 1)]) + } + new_dims <- c(new_dims, split_dims) + if (old_dim_pos < length(final_dims_fake)) { + new_dims <- c(new_dims, final_dims_fake[(old_dim_pos + 1):length(final_dims_fake)]) + } + final_dims_fake <- new_dims + } + } + } + } + + if (!silent) { + .message("Detected dimension sizes:") + longest_dim_len <- max(sapply(names(final_dims_fake), nchar)) + longest_size_len <- max(sapply(paste0(final_dims_fake, ''), nchar)) + sapply(names(final_dims_fake), + function(x) { + message(paste0("* ", paste(rep(' ', longest_dim_len - nchar(x)), collapse = ''), + x, ": ", paste(rep(' ', longest_size_len - nchar(paste0(final_dims_fake[x], ''))), collapse = ''), + final_dims_fake[x])) + }) + bytes <- prod(c(final_dims_fake, 8)) + dim_sizes <- paste(final_dims_fake, collapse = ' x ') + if (retrieve) { + .message(paste("Total size of requested data:")) + } else { + .message(paste("Total size of involved data:")) + } + .message(paste(dim_sizes, " x 8 bytes =", + format(structure(bytes, class = "object_size"), units = "auto")), + indent = 2) + } + + # The following several lines will only be run if retrieve = TRUE + if (retrieve) { +library(parallel) +library(future) +library(bigmemory) + ########## CREATING THE SHARED MATRIX AND DISPATCHING WORK PIECES ########### + # TODO: try performance of storing all in cols instead of rows + # Create the shared memory array, and a pointer to it, to be sent + # to the work pieces. + data_array <- big.matrix(nrow = prod(final_dims), ncol = 1) + shared_matrix_pointer <- describe(data_array) + if (is.null(num_procs)) { + num_procs <- availableCores() + } + # Creating a shared tmp folder to store metadata from each chunk + array_of_metadata_flags <- array(FALSE, dim = dim(array_of_files_to_load)) + if (!is.null(metadata_dims)) { + metadata_indices_to_load <- as.list(rep(1, length(dim(array_of_files_to_load)))) + names(metadata_indices_to_load) <- names(dim(array_of_files_to_load)) + metadata_indices_to_load[metadata_dims] <- as.list(rep(TRUE, length(metadata_dims))) + array_of_metadata_flags <- do.call('[<-', c(list(array_of_metadata_flags), metadata_indices_to_load, + list(value = rep(TRUE, prod(dim(array_of_files_to_load)[metadata_dims]))))) + } + metadata_file_counter <- 0 + metadata_folder <- tempfile('metadata') + dir.create(metadata_folder) + # Build the work pieces, each with: + # - file path + # - total size (dims) of store array + # - start position in store array + # - file selectors (to provide extra info. useful e.g. to select variable) + # - indices to take from file + work_pieces <- list() + for (i in 1:length(dat)) { + if (dataset_has_files[i]) { + selectors <- dat[[i]][['selectors']] + file_dims <- found_file_dims[[i]] + inner_dims <- expected_inner_dims[[i]] + sub_array_dims <- final_dims[file_dims] + sub_array_dims[found_pattern_dim] <- 1 + sub_array_of_files_to_load <- array(1:prod(sub_array_dims), + dim = sub_array_dims) + names(dim(sub_array_of_files_to_load)) <- names(sub_array_dims) + # Detect which of the dimensions of the dataset go across files. + file_dim_across_files <- lapply(inner_dims, + function(x) { + dim_across <- sapply(inner_dims_across_files, function(y) x %in% y) + if (any(dim_across)) { + names(inner_dims_across_files)[which(dim_across)[1]] + } else { + NULL + } + }) + names(file_dim_across_files) <- inner_dims + j <- 1 + while (j <= prod(sub_array_dims)) { + # Work out file path. + file_to_load_sub_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ] + names(file_to_load_sub_indices) <- names(sub_array_dims) + file_to_load_sub_indices[found_pattern_dim] <- i + big_dims <- rep(1, length(dim(array_of_files_to_load))) + names(big_dims) <- names(dim(array_of_files_to_load)) + file_to_load_indices <- .MergeArrayDims(file_to_load_sub_indices, big_dims)[[1]] + file_to_load <- do.call('[[', c(list(array_of_files_to_load), + as.list(file_to_load_indices))) + not_found_file <- do.call('[[', c(list(array_of_not_found_files), + as.list(file_to_load_indices))) + load_file_metadata <- do.call('[', c(list(array_of_metadata_flags), + as.list(file_to_load_indices))) + if (load_file_metadata) { + metadata_file_counter <- metadata_file_counter + 1 + } + if (!is.na(file_to_load) && !not_found_file) { + # Work out indices to take + first_round_indices <- lapply(inner_dims, + function (x) { + if (is.null(file_dim_across_files[[x]])) { + selectors[[x]][['fri']][[1]] + } else { + which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]] + selectors[[x]][['fri']][[which_chunk]] + } + }) + names(first_round_indices) <- inner_dims + second_round_indices <- lapply(inner_dims, + function (x) { + if (is.null(file_dim_across_files[[x]])) { + selectors[[x]][['sri']][[1]] + } else { + which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]] + selectors[[x]][['sri']][[which_chunk]] + } + }) +if (debug) { +print("-> BUILDING A WORK PIECE") +#print(str(selectors)) +} + names(second_round_indices) <- inner_dims + if (!any(sapply(first_round_indices, length) == 0)) { + work_piece <- list() + work_piece[['first_round_indices']] <- first_round_indices + work_piece[['second_round_indices']] <- second_round_indices + work_piece[['file_indices_in_array_of_files']] <- file_to_load_indices + work_piece[['file_path']] <- file_to_load + work_piece[['store_dims']] <- final_dims + # Work out store position + store_position <- final_dims + store_position[names(file_to_load_indices)] <- file_to_load_indices + store_position[inner_dims] <- rep(1, length(inner_dims)) + work_piece[['store_position']] <- store_position + # Work out file selectors + file_selectors <- sapply(file_dims, + function (x) { + vector_to_pick <- 1 + if (x %in% names(depending_file_dims)) { + vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]] + } + selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]] + }) + names(file_selectors) <- file_dims + work_piece[['file_selectors']] <- file_selectors + # Send variables for transformation + if (!is.null(transform) && (length(transform_vars) > 0)) { + vars_to_transform <- NULL + picked_vars_to_transform <- which(names(picked_vars[[i]]) %in% transform_vars) + if (length(picked_vars_to_transform) > 0) { + picked_vars_to_transform <- names(picked_vars[[i]])[picked_vars_to_transform] + vars_to_transform <- c(vars_to_transform, picked_vars[[i]][picked_vars_to_transform]) + if (any(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))) { + picked_vars_ordered_to_transform <- picked_vars_to_transform[which(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))] + vars_to_transform[picked_vars_ordered_to_transform] <- picked_vars_ordered[[i]][picked_vars_ordered_to_transform] + } + } + picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars) + if (length(picked_common_vars_to_transform) > 0) { + picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform] + vars_to_transform <- c(vars_to_transform, picked_common_vars[picked_common_vars_to_transform]) + if (any(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))) { + picked_common_vars_ordered_to_transform <- picked_common_vars_to_transform[which(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))] + vars_to_transform[picked_common_vars_ordered_to_transform] <- picked_common_vars_ordered[picked_common_vars_ordered_to_transform] + } + } + work_piece[['vars_to_transform']] <- vars_to_transform + } + # Send flag to load metadata + if (load_file_metadata) { + work_piece[['save_metadata_in']] <- paste0(metadata_folder, '/', metadata_file_counter) + } + work_pieces <- c(work_pieces, list(work_piece)) + } + } + j <- j + 1 + } + } + } +#print("N") +if (debug) { +print("-> WORK PIECES BUILT") +} + + # Calculate the progress %s that will be displayed and assign them to + # the appropriate work pieces. + if (length(work_pieces) / num_procs >= 2 && !silent) { + if (length(work_pieces) / num_procs < 10) { + amount <- 100 / ceiling(length(work_pieces) / num_procs) + reps <- ceiling(length(work_pieces) / num_procs) + } else { + amount <- 10 + reps <- 10 + } + progress_steps <- rep(amount, reps) + if (length(work_pieces) < (reps + 1)) { + selected_pieces <- length(work_pieces) + progress_steps <- c(sum(head(progress_steps, reps)), + tail(progress_steps, reps)) + } else { + selected_pieces <- round(seq(1, length(work_pieces), + length.out = reps + 1))[-1] + } + progress_steps <- paste0(' + ', round(progress_steps, 2), '%') + progress_message <- 'Progress: 0%' + } else { + progress_message <- '' + selected_pieces <- NULL + } + piece_counter <- 1 + step_counter <- 1 + work_pieces <- lapply(work_pieces, + function (x) { + if (piece_counter %in% selected_pieces) { + wp <- c(x, list(progress_amount = progress_steps[step_counter])) + step_counter <<- step_counter + 1 + } else { + wp <- x + } + piece_counter <<- piece_counter + 1 + wp + }) + if (!silent) { + .message("If the size of the requested data is close to or above the free shared RAM memory, R may crash.") + .message("If the size of the requested data is close to or above the half of the free RAM memory, R may crash.") + .message(paste0("Will now proceed to read and process ", length(work_pieces), " data files:")) + if (length(work_pieces) < 30) { + lapply(work_pieces, function (x) .message(x[['file_path']], indent = 2)) + } else { + .message("The list of files is long. You can check it after Start() finishes in the output '$Files'.", indent = 2, exdent = 5) + } + } + +# Build the cluster of processes that will do the work and dispatch work pieces + +# The function .LoadDataFile is applied to each work piece. This function will + + # open the data file, regrid if needed, subset, apply the mask, + # compute and apply the weights if needed, + # disable extreme values and store in the shared memory matrix. +#print("O") + if (!silent) { + .message("Loading... This may take several minutes...") + if (progress_message != '') { + .message(progress_message, appendLF = FALSE) + } + } + if (num_procs == 1) { + found_files <- lapply(work_pieces, .LoadDataFile, + shared_matrix_pointer = shared_matrix_pointer, + file_data_reader = file_data_reader, + synonims = synonims, + transform = transform, + transform_params = transform_params, + silent = silent, debug = debug) + } else { + cluster <- makeCluster(num_procs, outfile = "") + # Send the heavy work to the workers + work_errors <- try({ + found_files <- clusterApplyLB(cluster, work_pieces, .LoadDataFile, + shared_matrix_pointer = shared_matrix_pointer, + file_data_reader = file_data_reader, + synonims = synonims, + transform = transform, + transform_params = transform_params, + silent = silent, debug = debug) + }) + stopCluster(cluster) + } + + if (!silent) { + if (progress_message != '') { + .message("\n", tag = '') + } + } +#print("P") + data_array <- array(bigmemory::as.matrix(data_array), dim = final_dims_fake) + gc() + + # Load metadata and remove the metadata folder + if (!is.null(metadata_dims)) { + loaded_metadata_files <- list.files(metadata_folder) + loaded_metadata <- lapply(paste0(metadata_folder, '/', loaded_metadata_files), readRDS) + unlink(metadata_folder, recursive = TRUE) + return_metadata <- vector('list', length = prod(dim(array_of_metadata_flags)[metadata_dims])) + return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata + dim(return_metadata) <- dim(array_of_metadata_flags[metadata_dims]) + attr(data_array, 'Variables') <- return_metadata + # TODO: Try to infer data type from loaded_metadata + # as.integer(data_array) + } + + failed_pieces <- work_pieces[which(unlist(found_files))] + for (failed_piece in failed_pieces) { + array_of_not_found_files <- do.call('[<-', + c(list(array_of_not_found_files), + as.list(failed_piece[['file_indices_in_array_of_files']]), + list(value = TRUE))) + } + if (any(array_of_not_found_files)) { + for (i in 1:prod(dim(array_of_files_to_load))) { + if (is.na(array_of_not_found_files[i])) { + array_of_files_to_load[i] <- NA + } else { + if (array_of_not_found_files[i]) { + array_of_not_found_files[i] <- array_of_files_to_load[i] + array_of_files_to_load[i] <- NA + } else { + array_of_not_found_files[i] <- NA + } + } + } + } else { + array_of_not_found_files <- NULL + } + + } # End if (retrieve) + + # Replace the vars and common vars by the transformed vars and common vars + for (i in 1:length(dat)) { + if (length(names(transformed_vars[[i]])) > 0) { + picked_vars[[i]][names(transformed_vars[[i]])] <- transformed_vars[[i]] + } else if (length(names(picked_vars_ordered[[i]])) > 0) { + picked_vars[[i]][names(picked_vars_ordered[[i]])] <- picked_vars_ordered[[i]] + } + } + if (length(names(transformed_common_vars)) > 0) { + picked_common_vars[names(transformed_common_vars)] <- transformed_common_vars + } else if (length(names(picked_common_vars_ordered)) > 0) { + picked_common_vars[names(picked_common_vars_ordered)] <- picked_common_vars_ordered + } +if (debug) { +print("-> THE TRANSFORMED VARS:") +print(str(transformed_vars)) +print("-> THE PICKED VARS:") +print(str(picked_vars)) +} + + file_selectors <- NULL + for (i in 1:length(dat)) { + file_selectors[[dat[[i]][['name']]]] <- dat[[i]][['selectors']][which(names(dat[[i]][['selectors']]) %in% found_file_dims[[i]])] + } + if (retrieve) { + if (!silent) { + .message("Successfully retrieved data.") + } + var_backup <- attr(data_array, 'Variables')[[1]] + attr(data_array, 'Variables') <- NULL + attributes(data_array) <- c(attributes(data_array), + list(Variables = c(list(common = c(picked_common_vars, var_backup)), + picked_vars), + Files = array_of_files_to_load, + NotFoundFiles = array_of_not_found_files, + FileSelectors = file_selectors, + PatternDim = found_pattern_dim) + ) + attr(data_array, 'class') <- c('startR_array', attr(data_array, 'class')) + data_array + } else { + if (!silent) { + .message("Successfully discovered data dimensions.") + } + start_call <- match.call() + for (i in 2:length(start_call)) { + if (class(start_call[[i]]) %in% c('name', 'call')) { + start_call[[i]] <- eval.parent(start_call[[i]]) + } + } + start_call[['retrieve']] <- TRUE + attributes(start_call) <- c(attributes(start_call), + list(Dimensions = final_dims_fake, + Variables = c(list(common = picked_common_vars), picked_vars), + ExpectedFiles = array_of_files_to_load, + FileSelectors = file_selectors, + PatternDim = found_pattern_dim, + MergedDims = if (merge_across_dims) { + inner_dims_across_files + } else { + NULL + }, + SplitDims = if (split_multiselected_dims) { + all_split_dims + } else { + NULL + }) + ) + attr(start_call, 'class') <- c('startR_cube', attr(start_call, 'class')) + start_call + } +} + +# This function is the responsible for loading the data of each work +# piece. +.LoadDataFile <- function(work_piece, shared_matrix_pointer, + file_data_reader, synonims, + transform, transform_params, + silent = FALSE, debug = FALSE) { +# suppressPackageStartupMessages({library(bigmemory)}) +### TODO: Specify dependencies as parameter +# suppressPackageStartupMessages({library(ncdf4)}) + +#print("1") + store_indices <- as.list(work_piece[['store_position']]) + first_round_indices <- work_piece[['first_round_indices']] + second_round_indices <- work_piece[['second_round_indices']] +#print("2") + file_to_open <- work_piece[['file_path']] + sub_array <- file_data_reader(file_to_open, NULL, + work_piece[['file_selectors']], + first_round_indices, synonims) +if (debug) { +if (all(unlist(store_indices[1:6]) == 1)) { +print("-> LOADING A WORK PIECE") +print("-> STRUCTURE OF READ UNTRANSFORMED DATA:") +print(str(sub_array)) +print("-> STRUCTURE OF VARIABLES TO TRANSFORM:") +print(str(work_piece[['vars_to_transform']])) +print("-> COMMON ARRAY DIMENSIONS:") +print(str(work_piece[['store_dims']])) +} +} + if (!is.null(sub_array)) { + # Apply data transformation once we have the data arrays. + if (!is.null(transform)) { +if (debug) { +if (all(unlist(store_indices[1:6]) == 1)) { +print("-> PROCEEDING TO TRANSFORM ARRAY") +print("-> DIMENSIONS OF ARRAY RIGHT BEFORE TRANSFORMING:") +print(dim(sub_array)) +} +} + sub_array <- do.call(transform, c(list(data_array = sub_array, + variables = work_piece[['vars_to_transform']], + file_selectors = work_piece[['file_selectors']]), + transform_params)) +if (debug) { +if (all(unlist(store_indices[1:6]) == 1)) { +print("-> STRUCTURE OF ARRAY AND VARIABLES RIGHT AFTER TRANSFORMING:") +print(str(sub_array)) +print("-> DIMENSIONS OF ARRAY RIGHT AFTER TRANSFORMING:") +print(dim(sub_array$data_array)) +} +} + sub_array <- sub_array$data_array + # Subset with second round of indices + dims_to_crop <- which(!sapply(second_round_indices, is.null)) + if (length(dims_to_crop) > 0) { + dimnames_to_crop <- names(second_round_indices)[dims_to_crop] + sub_array <- Subset(sub_array, dimnames_to_crop, + second_round_indices[dimnames_to_crop]) + } +if (debug) { +if (all(unlist(store_indices[1:6]) == 1)) { +print("-> STRUCTURE OF ARRAY AND VARIABLES RIGHT AFTER SUBSETTING WITH 2nd ROUND INDICES:") +print(str(sub_array)) +} +} + } + + metadata <- attr(sub_array, 'variables') + + names_bk <- names(store_indices) + store_indices <- lapply(names(store_indices), + function (x) { + if (!(x %in% names(first_round_indices))) { + store_indices[[x]] + } else if (is.null(second_round_indices[[x]])) { + 1:dim(sub_array)[x] + } else { + if (is.numeric(second_round_indices[[x]])) { + ## TODO: Review carefully this line. Inner indices are all + ## aligned to the left-most positions. If dataset A has longitudes + ## 1, 2, 3, 4 but dataset B has only longitudes 3 and 4, then + ## they will be stored as follows: + ## 1, 2, 3, 4 + ## 3, 4, NA, NA + ##x - min(x) + 1 + 1:length(second_round_indices[[x]]) + } else { + 1:length(second_round_indices[[x]]) + } + } + }) + names(store_indices) <- names_bk +if (debug) { +if (all(unlist(store_indices) == 1)) { +print("-> STRUCTURE OF FIRST ROUND INDICES FOR THIS WORK PIECE:") +print(str(first_round_indices)) +print("-> STRUCTURE OF SECOND ROUND INDICES FOR THIS WORK PIECE:") +print(str(second_round_indices)) +print("-> STRUCTURE OF STORE INDICES FOR THIS WORK PIECE:") +print(str(store_indices)) +} +} + + store_indices <- lapply(store_indices, as.integer) + store_dims <- work_piece[['store_dims']] + + # split the storage work of the loaded subset in parts + largest_dim_name <- names(dim(sub_array))[which.max(dim(sub_array))] + max_parts <- length(store_indices[[largest_dim_name]]) + + # Indexing a data file of N MB with expand.grid takes 30*N MB + # The peak ram of Start is, minimum, 2 * total data to load from all files + # due to inefficiencies in other regions of the code + # The more parts we split the indexing done below in, the lower + # the memory footprint of the indexing and the fast. + # But more than 10 indexing iterations (parts) for each MB processed + # makes the iteration slower (tested empirically on BSC workstations). + subset_size_in_mb <- prod(dim(sub_array)) * 8 / 1024 / 1024 + best_n_parts <- ceiling(subset_size_in_mb * 10) + # We want to set n_parts to a greater value than the one that would + # result in a memory footprint (of the subset indexing code below) equal + # to 2 * total data to load from all files. + # s = subset size in MB + # p = number of parts to break it in + # T = total size of data to load + # then, s / p * 30 = 2 * T + # then, p = s * 15 / T + min_n_parts <- ceiling(prod(dim(sub_array)) * 15 / prod(store_dims)) + # Make sure we pick n_parts much greater than the minimum calculated + n_parts <- min_n_parts * 10 + if (n_parts > best_n_parts) { + n_parts <- best_n_parts + } + # Boundary checks + if (n_parts < 1) { + n_parts <- 1 + } + if (n_parts > max_parts) { + n_parts <- max_parts + } + + if (n_parts > 1) { + make_parts <- function(length, n) { + clusters <- cut(1:length, n, labels = FALSE) + lapply(1:n, function(y) which(clusters == y)) + } + part_indices <- make_parts(max_parts, n_parts) + parts <- lapply(part_indices, + function(x) { + store_indices[[largest_dim_name]][x] + }) + } else { + part_indices <- list(1:max_parts) + parts <- store_indices[largest_dim_name] + } + + # do the storage work + weights <- sapply(1:length(store_dims), + function(i) prod(c(1, store_dims)[1:i])) + part_indices_in_sub_array <- as.list(rep(TRUE, length(dim(sub_array)))) + names(part_indices_in_sub_array) <- names(dim(sub_array)) + data_array <- bigmemory::attach.big.matrix(shared_matrix_pointer) + for (i in 1:n_parts) { + store_indices[[largest_dim_name]] <- parts[[i]] + # Converting array indices to vector indices + matrix_indices <- do.call("expand.grid", store_indices) + # Given a matrix where each row is a set of array indices of an element + # the vector indices are computed + matrix_indices <- 1 + colSums(t(matrix_indices - 1) * weights) + part_indices_in_sub_array[[largest_dim_name]] <- part_indices[[i]] + data_array[matrix_indices] <- as.vector(do.call('[', + c(list(x = sub_array), + part_indices_in_sub_array))) + } + rm(data_array) + gc() + + if (!is.null(work_piece[['save_metadata_in']])) { + saveRDS(metadata, file = work_piece[['save_metadata_in']]) + } + } + if (!is.null(work_piece[['progress_amount']]) && !silent) { + message(work_piece[['progress_amount']], appendLF = FALSE) + } + is.null(sub_array) +} + + diff --git a/R/AddStep.R b/R/AddStep.R new file mode 100644 index 0000000..955ed84 --- /dev/null +++ b/R/AddStep.R @@ -0,0 +1,105 @@ +AddStep <- function(inputs, step_fun, ...) { + # Check step_fun + if (!('startR_step_fun' %in% class(step_fun))) { + stop("Parameter 'step_fun' must be a startR step function as returned by Step.") + } + + # Check inputs + if (any(c('startR_cube', 'startR_workflow') %in% class(inputs))) { + inputs <- list(inputs) + names(inputs) <- 'input1' + } + else if (is.list(inputs)) { + if (any(!sapply(inputs, + function(x) any(c('startR_cube', + 'startR_workflow') %in% class(x))))) { + stop("Parameter 'inputs' must be one or a list of objects of the class ", + "'startR_cube' or 'startR_workflow'.") + } + } else { + stop("Parameter 'inputs' must be one or a list of objects of the class ", + "'startR_cube' or 'startR_workflow'.") + } + + # Consistency checks + if (!is.null(attr(step_fun, "UseAttributes"))) { + if (!all(names(inputs) == names(attr(step_fun, "UseAttributes")))) { + names(inputs) <- names(attr(step_fun, "UseAttributes")) + .warning(paste("The name of inputs is not assigned or differs from", + "name of use_attributes list in Step(). Force inputs", + "name to be consistent with use_attributes list")) + } + } + + if (length(inputs) != length(attr(step_fun, 'TargetDims'))) { + stop("The number of provided 'inputs' (", length(inputs), ") does not ", + "match the number of expected inputs by the provided 'step_fun' (", + length(attr(step_fun, 'TargetDims')), ").") + } + + # Work out the total target dims of the step + previous_target_dims <- NULL + all_input_dims <- NULL + for (input in 1:length(inputs)) { + dims_to_compare <- names(attr(inputs[[input]], 'Dimensions')) + if (!all(attr(step_fun, 'TargetDims')[[input]] %in% dims_to_compare)) { + stop("The target dimensions required by 'step_fun' for the input ", input, + " are not present in the corresponding provided object in 'inputs'.") + } + if ('startR_workflow' %in% class(inputs[[input]])) { + if (is.null(previous_target_dims)) { + previous_target_dims <- attr(inputs[[input]], 'TargetDims') + } else { + dims1 <- rep(1, length(previous_target_dims)) + names(dims1) <- previous_target_dims + dims2 <- rep(1, length(attr(inputs[[input]], 'TargetDims'))) + names(dims2) <- attr(inputs[[input]], 'TargetDims') + previous_target_dims <- names(startR:::.MergeArrayDims(dims1, dims2)[[1]]) + } + } + new_input_dims <- attr(inputs[[input]], 'Dimensions') + if (any(is.na(new_input_dims))) { + new_input_dims[which(is.na(new_input_dims))] <- rep(1, length(which(is.na(new_input_dims)))) + } + if (is.null(all_input_dims)) { + all_input_dims <- new_input_dims + } else { + all_input_dims <- startR:::.MergeArrayDims(all_input_dims, new_input_dims)[[1]] + } + } + + new_target_dims <- unique(unlist(attr(step_fun, 'TargetDims'))) + result <- list() + dims1 <- rep(1, length(previous_target_dims)) + names(dims1) <- previous_target_dims + dims2 <- rep(1, length(new_target_dims)) + names(dims2) <- new_target_dims + target_dims <- names(startR:::.MergeArrayDims(dims1, dims2)[[1]]) + for (output in 1:length(attr(step_fun, 'OutputDims'))) { + workflow <- list(inputs = inputs, + fun = step_fun, + params = list(...)) + if (!is.null(attr(step_fun, 'OutputDims')[[output]])) { + dimensions <- rep(NA, length(attr(step_fun, 'OutputDims')[[output]])) + names(dimensions) <- attr(step_fun, 'OutputDims')[[output]] + } else { + dimensions <- NULL + } + in_dims_to_remove <- which(names(all_input_dims) %in% new_target_dims) + if (length(in_dims_to_remove) > 0) { + dimensions <- c(dimensions, all_input_dims[-in_dims_to_remove]) + } else { + dimensions <- c(dimensions, all_input_dims) + } + attr(workflow, 'Dimensions') <- dimensions + attr(workflow, 'AllTargetDims') <- target_dims + class(workflow) <- 'startR_workflow' + result[[names(attr(step_fun, 'OutputDims'))[output]]] <- workflow + } + + if (length(result) == 1) { + result[[1]] + } else { + result + } +} diff --git a/R/ByChunks.R b/R/ByChunks.R new file mode 100644 index 0000000..26d6662 --- /dev/null +++ b/R/ByChunks.R @@ -0,0 +1,906 @@ +ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', + threads_load = 2, threads_compute = 1, + cluster = NULL, + ecflow_suite_dir = NULL, + ecflow_server = NULL, + silent = FALSE, debug = FALSE, + wait = TRUE) { + # Build object to store profiling timings + t_begin_total <- Sys.time() + t_begin_bychunks_setup <- t_begin_total + timings <- list(nchunks = NULL, + concurrent_chunks = NULL, + cores_per_job = NULL, + threads_load = NULL, + threads_compute = NULL, + bychunks_setup = NULL, + transfer = NULL, + queue = NULL, + job_setup = NULL, + load = NULL, + compute = NULL, + transfer_back = NULL, + merge = NULL, + total = NULL) + + MergeArrays <- startR:::.MergeArrays + + # Check input headers + if ('startR_cube' %in% class(cube_headers)) { + cube_headers <- list(cube_headers) + } + if (!all(sapply(lapply(cube_headers, class), + function(x) 'startR_cube' %in% x))) { + stop("All objects passed in 'cube_headers' must be of class 'startR_cube', ", + "as returned by Start().") + } + + # Check step_fun + if (!is.function(step_fun)) { + stop("Parameter 'step_fun' must be a function.") + } + + # Check cores + if (!is.numeric(threads_load)) { + stop("Parameter 'threads_load' must be a numeric value.") + } + threads_load <- round(threads_load) + if (!is.numeric(threads_compute)) { + stop("Parameter 'threads_compute' must be a numeric value.") + } + threads_compute <- round(threads_compute) + timings[['threads_load']] <- threads_load + timings[['threads_compute']] <- threads_compute + + on_cluster <- !is.null(cluster) + + # Check ecflow_suite_dir + suite_id <- sample(10 ^ 10, 1) + ecflow_suite_dir_suite <- '' + if (on_cluster) { + if (is.null(ecflow_suite_dir)) { + stop("Parameter 'ecflow_suite_dir' must be specified when dispatching on a cluster.") + } + if (!is.character(ecflow_suite_dir)) { + stop("Parameter 'ecflow_suite_dir' must be a character string.") + } + ecflow_suite_dir_suite <- paste0(ecflow_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + dir.create(ecflow_suite_dir_suite, recursive = TRUE) + if (!dir.exists(ecflow_suite_dir_suite)) { + stop("Could not find or create the directory in ", + "parameter 'ecflow_suite_dir'.") + } + } + + # Check cluster + default_cluster <- list(queue_host = NULL, + queue_type = 'slurm', + data_dir = NULL, + temp_dir = NULL, + lib_dir = NULL, + init_commands = list(''), + r_module = 'R', + CDO_module = NULL, + ecflow_module = 'ecFlow', + node_memory = NULL, + cores_per_job = NULL, + job_wallclock = '01:00:00', + max_jobs = 6, + extra_queue_params = list(''), + bidirectional = TRUE, + polling_period = 10, + special_setup = 'none') + if (on_cluster) { + if (!is.list(cluster)) { + stop("Parameter 'cluster' must be a named list.") + } + if (is.null(names(cluster))) { + stop("Parameter 'cluster' must be a named list.") + } + if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', + 'temp_dir', 'lib_dir', 'init_commands', + 'r_module', 'CDO_module', + 'ecflow_module', 'node_memory', + 'cores_per_job', 'job_wallclock', 'max_jobs', + 'extra_queue_params', 'bidirectional', + 'polling_period', 'special_setup')))) { + stop("Found invalid component names in parameter 'cluster'.") + } + default_cluster[names(cluster)] <- cluster + } + localhost_name <- NULL + cluster <- default_cluster + remote_ecflow_suite_dir <- ecflow_suite_dir + is_data_dir_shared <- FALSE + is_ecflow_suite_dir_shared <- FALSE + if (on_cluster) { + #localhost_name <- Sys.info()[['nodename']] + localhost_name <- system('hostname -f', intern = TRUE) + if (Sys.which('ecflow_client') == '') { + stop("ecFlow must be installed in order to run the computation on clusters.") + } + if (is.null(cluster[['queue_host']])) { + queue_host <- localhost_name + } else if ((cluster[['queue_host']] %in% c('localhost', '127.0.0.1', localhost_name)) || + grepl(paste0('^', localhost_name), cluster[['queue_host']])) { + queue_host <- localhost_name + } + if (!(cluster[['queue_type']] %in% c('slurm', 'pbs', 'lsf', 'host'))) { + stop("The only supported 'queue_type's are 'slurm', 'pbs', 'lsf' and 'host'.") + } + if (is.null(cluster[['data_dir']])) { + is_data_dir_shared <- TRUE + } else { + if (!is.character(cluster[['data_dir']])) { + stop("The component 'data_dir' of the parameter 'cluster' must be a character string.") + } + remote_data_dir <- cluster[['data_dir']] + } + if (is.null(cluster[['temp_dir']])) { + is_ecflow_suite_dir_shared <- TRUE + } else { + if (!is.character(cluster[['temp_dir']])) { + stop("The component 'temp_dir' of the parameter 'cluster' must be a character string.") + } + remote_ecflow_suite_dir <- cluster[['temp_dir']] + } + if (!is.null(cluster[['lib_dir']])) { + if (!is.character(cluster[['lib_dir']])) { + stop("The component 'lib_dir', of the parameter 'cluster' must be NULL or ", + "a character string.") + } + } + if (!is.logical(cluster[['bidirectional']])) { + stop("The component 'bidirectional' of the parameter 'cluster' must be a logical value.") + } + if (cluster[['bidirectional']]) { + cluster[['init_commands']] <- c(cluster[['init_commands']], + list(paste('module load', cluster[['ecflow_module']]))) + } + if (!is.list(cluster[['init_commands']]) || + !all(sapply(cluster[['init_commands']], is.character))) { + stop("The component 'init_commands' of the parameter 'cluster' must be a list of ", + "character strings.") + } + if (!is.character(cluster[['r_module']])) { + stop("The component 'r_module' of the parameter 'cluster' must be a character string.") + } + if ((nchar(cluster[['r_module']]) < 1) || (grepl(' ', cluster[['r_module']]))) { + stop("The component 'r_module' of the parameter 'cluster' must have at least one character ", + "and contain no blank spaces.") + } + if (!is.null(cluster[['CDO_module']])) { + if (!is.character(cluster[['CDO_module']])) { + stop("The component 'CDO_module' of the parameter 'cluster' must be a character string.") + } + if (nchar(cluster[['CDO_module']]) < 1 || grepl(' ', cluster[['CDO_module']])) { + warning("The component 'CDO_module' of parameter 'cluster' must have ", + " than 1 and only the first element will be used.") + } + cluster[['r_module']] <- paste(cluster[['r_module']], cluster[['CDO_module']]) + } + if (!is.character(cluster[['ecflow_module']])) { + stop("The component 'ecflow_module' of the parameter 'cluster' must be a character string.") + } + if ((nchar(cluster[['ecflow_module']]) < 1) || + (grepl(' ', cluster[['ecflow_module']]))) { + stop("The component 'ecflow_module' of the parameter 'cluster' must have at least ", + "one character, and contain no blank spaces.") + } + if (is.null(cluster[['cores_per_job']])) { + cluster[['cores_per_job']] <- threads_compute + } + if (!is.numeric(cluster[['cores_per_job']])) { + stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") + } + cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) + if (cluster[['cores_per_job']] > threads_compute) { + startR:::.message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") + } + if (!is.list(cluster[['extra_queue_params']]) || + !all(sapply(cluster[['extra_queue_params']], is.character))) { + stop("The component 'extra_queue_params' of the parameter 'cluster' must be a list of ", + "character strings.") + } + if (!(cluster[['special_setup']] %in% c('none', 'marenostrum4'))) { + stop("The value provided for the component 'special_setup' of the parameter ", + "'cluster' is not recognized.") + } + } + + # Check ecflow_suite_dir + remote_ecflow_suite_dir_suite <- '' + if (on_cluster) { + remote_ecflow_suite_dir_suite <- paste0(remote_ecflow_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + } + + # Check ecflow_server + if (!is.null(ecflow_server) && !(is.character(ecflow_server))) { + stop("Parameter 'ecflow_server' must be a character string if specified.") + } + + # Check silent + if (!is.logical(silent)) { + stop("Parameter 'silent' must be logical.") + } + + # Check debug + if (!is.logical(debug)) { + stop("Parameter 'debug' must be logical.") + } + if (silent) { + debug <- FALSE + } + + # Check wait + if (!is.logical(wait)) { + stop("Parameter 'wait' must be logical.") + } + + # Work out chunked dimensions and target dimensions + all_dims <- lapply(cube_headers, attr, 'Dimensions') + all_dims_merged <- NULL + for (i in all_dims) { + if (is.null(all_dims_merged)) { + all_dims_merged <- i + } else { + all_dims_merged <- startR:::.MergeArrayDims(all_dims_merged, i)[[3]] + } + } + all_dimnames <- names(all_dims_merged) + + target_dims_indices <- which(all_dimnames %in% unlist(attr(step_fun, 'TargetDims'))) + target_dims <- NULL + if (length(target_dims_indices) > 0) { + target_dims <- all_dimnames[target_dims_indices] + } + + chunked_dims <- all_dimnames + if (length(target_dims_indices) > 0) { + chunked_dims <- chunked_dims[-target_dims_indices] + } + if (length(chunked_dims) < 1) { + stop("Not possible to process input by chunks. All input dimensions are ", + "target dimensions.") + } + + if (length(cube_headers) != length(attr(step_fun, 'TargetDims'))) { + stop("Number of inputs in parameter 'cube_headers' must be equal to the ", + "number of inputs expected by the function 'step_fun'.") + } + # Check all input headers have matching dimensions + cube_index <- 1 + for (cube_header in cube_headers) { + if (!all(attr(cube_header, 'Dimensions') == all_dims_merged[names(attr(cube_header, 'Dimensions'))])) { + stop("All provided 'cube_headers' must have matching dimension lengths ", + "with each other.") + } + if (!all(attr(step_fun, 'TargetDims')[[cube_index]] %in% names(attr(cube_header, 'Dimensions')))) { + stop("All provided 'cube_headers' must contain at least the target dimensions ", + "expected by 'step_fun'.") + } + cube_index <- cube_index + 1 + # work out expected result dimensions + } + + # Check chunks + default_chunks <- as.list(rep(1, length(chunked_dims))) + names(default_chunks) <- chunked_dims + if (length(chunks) == 1 && chunks == 'auto') { + chunks <- default_chunks + } + if (!is.list(chunks)) { + stop("Parameter 'chunks' must be a named list or 'auto'.") + } + if (is.null(names(chunks))) { + stop("Parameter 'chunks' must be a named list or 'auto'.") + } + if (any(!(names(chunks) %in% chunked_dims))) { + stop("All names in parameter 'chunks' must be one of the non-target dimensions ", + "present in the cubes in 'cube_headers'. The target dimensions are ", + paste(paste0("'", target_dims, "'"), collapse = ', '), ". The non-target ", + "dimensions (margins) are ", paste(paste0("'", chunked_dims, "'"), collapse = ', '), ".") + } + if (any(!(((unlist(chunks) %% 1) == 0) | (unlist(chunks) == 'all')))) { + stop("All values in parameter 'chunks' must take a numeric value or 'all'.") + } + if (any(unlist(chunks) < 1)) { + stop("All values in parameter 'chunks' must be >= 1.") + } + for (chunk_spec in 1:length(chunks)) { + if (chunks[[chunk_spec]] > all_dims_merged[names(chunks)[chunk_spec]]) { + stop("Too many chunks requested for the dimension ", names(chunks)[chunk_spec], + ". Maximum allowed is ", all_dims_merged[names(chunks)[chunk_spec]]) + } + } + default_chunks[names(chunks)] <- chunks + chunks <- default_chunks + timings[['nchunks']] <- prod(unlist(chunks)) + + # Check step_fun + if (!('startR_step_fun' %in% class(step_fun))) { + stop("Parameter 'step_fun' must be of the class 'startR_step_fun', as returned ", + "by the function Step.") + } + + # Replace 'all's + chunks_all <- which(unlist(chunks) == 'all') + if (length(chunks_all) > 0) { + chunks[chunks_all] <- all_dims[names(chunks)[chunks_all]] + } + # Mount the ecFlow suite + if (on_cluster) { + .message(paste0("ATTENTION: Dispatching chunks on a remote cluster", + ". Make sure passwordless ", + "access is properly set in both directions.")) + + # Copy load_process_save_chunk.R into shared folder + chunk_script <- file(system.file('chunking/load_process_save_chunk.R', + package = 'startR')) + chunk_script_lines <- readLines(chunk_script) + close(chunk_script) + chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', + paste(deparse(cluster[['lib_dir']]), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^out_dir <- *', paste0('out_dir <- ', + paste(deparse(remote_ecflow_suite_dir_suite), collapse = '\n')), chunk_script_lines) + chunk_script_lines <- gsub('^debug <- *', paste0('debug <- ', paste(deparse(debug), collapse = '\n')), + chunk_script_lines) + deparsed_calls <- paste0('start_calls <- list(') + extra_path <- '' + if (cluster[['special_setup']] == 'marenostrum4') { + extra_path <- '/gpfs/archive/bsc32/' + } + for (cube_header in 1:length(cube_headers)) { + pattern_dim <- attr(cube_headers[[cube_header]], 'PatternDim') + bk_pattern_dim <- cube_headers[[cube_header]][[pattern_dim]] + bk_expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + if (!is_data_dir_shared) { + cube_headers[[cube_header]][[pattern_dim]] <- paste0(remote_data_dir, '/', + extra_path, '/', cube_headers[[cube_header]][[pattern_dim]]) + for (file_n in 1:length(bk_expected_files)) { + attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n] <- paste0(remote_data_dir, '/', + extra_path, '/', attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n]) + } + } + deparsed_calls <- paste0(deparsed_calls, '\nquote(', + paste(deparse(cube_headers[[cube_header]]), collapse = '\n'), + ')') + cube_headers[[cube_header]][[pattern_dim]] <- bk_pattern_dim + attr(cube_headers[[cube_header]], 'ExpectedFiles') <- bk_expected_files + if (cube_header < length(cube_headers)) { + deparsed_calls <- paste0(deparsed_calls, ', ') + } + } + deparsed_calls <- paste0(deparsed_calls, '\n)') + chunk_script_lines <- gsub('^start_calls <- *', deparsed_calls, chunk_script_lines) + chunk_script_lines <- gsub('^start_calls_attrs <- *', paste0('start_calls_attrs <- ', paste(deparse(lapply(cube_headers, attributes)), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^param_dimnames <- *', paste0('param_dimnames <- ', paste(deparse(chunked_dims), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_load <- *', paste0('threads_load <- ', threads_load), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_compute <- *', paste0('threads_compute <- ', threads_compute), + chunk_script_lines) + chunk_script_lines <- gsub('^fun <- *', paste0('fun <- ', paste(deparse(step_fun), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), + chunk_script_lines) + writeLines(chunk_script_lines, paste0(ecflow_suite_dir_suite, '/load_process_save_chunk.R')) + + # Copy Chunk.ecf into shared folder + chunk_ecf_script <- file(system.file('chunking/Chunk.ecf', + package = 'startR')) + chunk_ecf_script_lines <- readLines(chunk_ecf_script) + close(chunk_ecf_script) + if (cluster[['queue_type']] == 'host') { + chunk_ecf_script_lines <- gsub('^include_queue_header', + '', + chunk_ecf_script_lines) + } else { + chunk_ecf_script_lines <- gsub('^include_queue_header', + paste0('%include "./', cluster[['queue_type']], '.h"'), + chunk_ecf_script_lines) + } + chunk_ecf_script_lines <- gsub('^include_init_commands', + paste0(paste0(cluster[['init_commands']], collapse = '\n'), '\n'), + chunk_ecf_script_lines) + chunk_ecf_script_lines <- gsub('^include_module_load', + paste0('module load ', cluster[['r_module']]), + chunk_ecf_script_lines) + ecf_vars <- paste0('%', as.vector(sapply(chunked_dims, + function(x) { + c(toupper(x), paste0(toupper(x), '_N')) + })), '%') +# if (!is_ecflow_suite_dir_shared && (cluster[['queue_host']] != localhost_name)) { +# #transfer_back_line <- paste0('rsync -rav %REMOTE_ECF_HOME% ', localhost_name, +# # ':%ECF_HOME%\nrm -f %ECF_HOME%/', +# # paste0('*', paste(ecf_vars[((1:(length(ecf_vars) / 2)) * 2) - 1], collapse = '*'), '*.Rds')) + result_file_id <- paste0('*', + paste(paste0('_', ecf_vars[((1:(length(ecf_vars) / 2)) * 2) - 1], '__'), + collapse = '*'), '*') +# transfer_back_line <- paste0('rsync -rav %REMOTE_ECF_HOME%/%SUITE%/ ', +# localhost_name, +# ':%ECF_HOME%/%SUITE%/\nscp %REMOTE_ECF_HOME%/', +# result_file_id, ' ', localhost_name, +# ':%ECF_HOME%\nrm -f %REMOTE_ECF_HOME%/', +# result_file_id) +# } else { +# transfer_back_line <- '' +# } + chunk_ecf_script_lines <- gsub('^Rscript load_process_save_chunk.R --args \\$task_path insert_indices', + paste0('Rscript load_process_save_chunk.R --args $task_path ', paste(ecf_vars, collapse = ' ')), + chunk_ecf_script_lines) + #chunk_ecf_script_lines <- gsub('^include_transfer_back_and_rm', transfer_back_line, chunk_ecf_script_lines) + writeLines(chunk_ecf_script_lines, paste0(ecflow_suite_dir_suite, '/Chunk.ecf')) + + # Copy merge_chunks.R into tmp folder +# merge_script <- file(system.file('chunking/merge_chunks.R', +# package = 'startR')) +# merge_script_lines <- readLines(merge_script) +# close(merge_script) +# merge_script_lines <- gsub('^shared_dir <- *', paste0('shared_dir <- ', +# paste(deparse(shared_dir_suite), collapse = '\n')), merge_script_lines) +# writeLines(merge_script_lines, paste0(shared_dir_suite, '/merge_chunks.R')) + + # Copy Merge.ecf into tmp folder + #TODO: Modify chain of parameters sent to r script when merging + #chunks progressively +# merge_ecf_script <- file(system.file('chunking/Merge.ecf', +# package = 'startR')) +# merge_ecf_script_lines <- readLines(merge_ecf_script) +# close(merge_ecf_script) +# writeLines(merge_ecf_script_lines, paste0(shared_dir_suite, '/Merge.ecf')) + + # Copy queue header into shared folder + #file.copy(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR'), + # ecflow_suite_dir_suite) + chunk_queue_header <- file(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR')) + chunk_queue_header_lines <- readLines(chunk_queue_header) + close(chunk_queue_header) + chunk_queue_header_lines <- gsub('^include_extra_queue_params', + paste0(paste0(cluster[['extra_queue_params']], collapse = '\n'), '\n'), + chunk_queue_header_lines) + writeLines(chunk_queue_header_lines, paste0(ecflow_suite_dir_suite, '/', cluster[['queue_type']], '.h')) + + # Copy headers + file.copy(system.file('chunking/head.h', package = 'startR'), + ecflow_suite_dir_suite) + file.copy(system.file('chunking/tail.h', package = 'startR'), + ecflow_suite_dir_suite) + } + + add_line <- function(suite, line, tabs) { + c(suite, paste0(paste(rep(' ', tabs), collapse = ''), line)) + } + suite <- NULL + tabs <- 0 + suite <- add_line(suite, paste0('suite STARTR_CHUNKING_', suite_id), tabs) + tabs <- tabs + 2 + submit_command <- '' + if (cluster[['queue_type']] == 'slurm') { + submit_command <- 'sbatch' + } else if (cluster[['queue_type']] == 'pbs') { + submit_command <- 'qsub' + } else if (cluster[['queue_type']] == 'lsf') { + submit_command <- 'bsub <' + } else if (cluster[['queue_type']] == 'host') { + submit_command <- 'bash' + } + if (on_cluster) { + suite <- add_line(suite, paste0("edit BIDIRECTIONAL '", cluster[['bidirectional']], "'"), tabs) + suite <- add_line(suite, paste0("edit QUEUE_HOST '", cluster[['queue_host']], "'"), tabs) + suite <- add_line(suite, paste0("edit ECF_HOST '", localhost_name, "'"), tabs) + suite <- add_line(suite, paste0("edit EC_HOST_FULL '", localhost_name, "'"), tabs) + suite <- add_line(suite, paste0("edit RESULT_FILE_ID '", result_file_id, "'"), tabs) + #} else { + # suite <- add_line(suite, paste0("edit ECF_JOB_CMD '", submit_command, " %ECF_JOB% > %ECF_JOBOUT% 2>&1 &'"), tabs) + } + suite <- add_line(suite, paste0("edit ECF_HOME '", ecflow_suite_dir_suite, "'"), tabs) + suite <- add_line(suite, paste0("edit REMOTE_ECF_HOME '", remote_ecflow_suite_dir_suite, "'"), tabs) + suite <- add_line(suite, paste0("edit CORES_PER_JOB ", cluster[['cores_per_job']], ""), tabs) + suite <- add_line(suite, paste0("edit JOB_WALLCLOCK '", cluster[['job_wallclock']], "'"), tabs) + suite <- add_line(suite, paste0("limit max_jobs ", cluster[['max_jobs']]), tabs) + suite <- add_line(suite, paste0("inlimit max_jobs"), tabs) + suite <- add_line(suite, "family computation", tabs) + tabs <- tabs + 2 + + if (on_cluster) { + # source $HOME/.profile ; + sync_command <- '' + if (!is_ecflow_suite_dir_shared) { + sync_command <- paste0("rsync -rav ", + "%ECF_HOME%/ ", + "%QUEUE_HOST%:%REMOTE_ECF_HOME%/ ; ") + } + suite <- add_line(suite, paste0("edit ECF_JOB_CMD '", + #"mkdir -p %REMOTE_ECF_HOME%/%SUITE%/ ; ", + sync_command, + "ssh %QUEUE_HOST% \"", + "date --rfc-3339=seconds > %REMOTE_ECF_HOME%/%ECF_NAME%.submit_time ; ", + submit_command, + " %REMOTE_ECF_HOME%/%ECF_NAME%.job%ECF_TRYNO% > ", + "%REMOTE_ECF_HOME%/%ECF_NAME%.%ECF_TRYNO% 2>&1 &\" ", + "2>&1'"), tabs) + if (is_ecflow_suite_dir_shared) { + suite <- add_line(suite, paste0("edit REPORT_BACK 'FALSE'"), tabs) + } else { + suite <- add_line(suite, paste0("edit REPORT_BACK 'TRUE'"), tabs) + } + } + + # Open nested ecFlow families + for (i in length(chunked_dims):1) { + suite <- add_line(suite, paste0('family ', chunked_dims[i], '_CHUNK_', 1), tabs) + tabs <- tabs + 2 + suite <- add_line(suite, paste0('edit ', toupper(chunked_dims[i]), ' ', 1), tabs) + suite <- add_line(suite, paste0('edit ', toupper(chunked_dims[i]), '_N ', chunks[[chunked_dims[i]]]), tabs) + } + + # Iterate through chunks + chunk_array <- array(1:prod(unlist(chunks)), dim = (unlist(chunks))) + arrays_of_results <- vector('list', length(attr(step_fun, 'OutputDims'))) + names(arrays_of_results) <- names(attr(step_fun, 'OutputDims')) + for (component in 1:length(arrays_of_results)) { + arrays_of_results[[component]] <- vector('list', prod((unlist(chunks)))) + dim(arrays_of_results[[component]]) <- (unlist(chunks)) + } + if (!on_cluster) { + t_end_bychunks_setup <- Sys.time() + timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, + t_begin_bychunks_setup, units = 'secs')) + timings[['transfer']] <- 0 + timings[['queue']] <- 0 + timings[['job_setup']] <- 0 + timings[['transfer_back']] <- 0 + if (!silent) { + startR:::.message(paste0("Processing chunks... ", + "remaining time estimate soon...")) + } + time_before_first_chunk <- Sys.time() + time_after_first_chunk <- NULL + } + previous_chunk_indices <- rep(1, length(chunks)) + found_first_result <- FALSE + for (i in 1:length(chunk_array)) { + chunk_indices <- which(chunk_array == i, arr.ind = TRUE)[1, ] + names(chunk_indices) <- names(dim(chunk_array)) + # ADD CHUNK SCRIPT TO SUITE + families_to_jump <- which(chunk_indices != previous_chunk_indices) + if (length(families_to_jump) > 0) { + families_to_jump <- max(families_to_jump) + # Close ecFlow families + for (j in 1:families_to_jump) { + tabs <- tabs - 2 + suite <- add_line(suite, paste0('endfamily'), tabs) + } + # Open ecFlow families + for (j in families_to_jump:1) { + suite <- add_line(suite, paste0('family ', (chunked_dims)[j], '_CHUNK_', chunk_indices[j]), tabs) + tabs <- tabs + 2 + suite <- add_line(suite, paste0('edit ', toupper((chunked_dims)[j]), ' ', chunk_indices[j]), tabs) + suite <- add_line(suite, paste0('edit ', toupper((chunked_dims)[j]), '_N ', chunks[[(chunked_dims)[j]]]), tabs) + } + } + suite <- add_line(suite, "task Chunk", tabs) + + if (!on_cluster) { + if (!silent) { + startR:::.message(paste("Loading chunk", i, + "out of", length(chunk_array), "...")) + } + data <- vector('list', length(cube_headers)) + t_begin_load <- Sys.time() + for (input in 1:length(data)) { + start_call <- cube_headers[[input]] + dims_to_alter <- which(names(attr(start_call, 'Dimensions')) %in% names(chunks)) + names_dims_to_alter <- names(attr(start_call, 'Dimensions'))[dims_to_alter] + # If any dimension comes from split dimensions + split_dims <- attr(start_call, 'SplitDims') + + if (length(split_dims) != 0){ + + for (k in 1:length(split_dims)) { + if (any(names(split_dims[[k]]) %in% names_dims_to_alter)) { + chunks_split_dims <- rep(1, length(split_dims[[k]])) + names(chunks_split_dims) <- names(split_dims[[k]]) + chunks_indices_split_dims <- chunks_split_dims + split_dims_to_alter <- which(names(split_dims[[k]]) %in% names_dims_to_alter) + chunks_split_dims[split_dims_to_alter] <- unlist(chunks[names(split_dims[[k]])[split_dims_to_alter]]) + chunks_indices_split_dims[split_dims_to_alter] <- chunk_indices[names(split_dims[[k]])[split_dims_to_alter]] + start_call[[names(split_dims)[k]]] <- chunk(chunks_indices_split_dims, chunks_split_dims, + eval(start_call[[names(split_dims)[k]]])) + dims_to_alter_to_remove <- which(names_dims_to_alter %in% names(split_dims[[k]])) + if (length(dims_to_alter_to_remove) > 0) { + dims_to_alter <- dims_to_alter[-dims_to_alter_to_remove] + names_dims_to_alter <- names_dims_to_alter[-dims_to_alter_to_remove] + } + } + } + } + + if (length(dims_to_alter) > 0) { + for (call_dim in names(attr(start_call, 'Dimensions'))[dims_to_alter]) { + start_call[[call_dim]] <- chunk(chunk_indices[call_dim], chunks[[call_dim]], + eval(start_call[[call_dim]])) + } + } + start_call[['silent']] <- !debug + if (!('num_procs' %in% names(start_call))) { + start_call[['num_procs']] <- threads_load + } + data[[input]] <- eval(start_call) + } + t_end_load <- Sys.time() + timings[['load']] <- c(timings[['load']], + as.numeric(difftime(t_end_load, t_begin_load, units = 'secs'))) + if (!silent) { + startR:::.message(paste("Processing...")) + } + #TODO: Find a better way to assign the names of data. When multiple steps for Compute is available, this way may fail. + names(data) <- names(cube_headers) + t_begin_compute <- Sys.time() + result <- multiApply::Apply(data, + target_dims = attr(step_fun, 'TargetDims'), + fun = step_fun, ..., + output_dims = attr(step_fun, 'OutputDims'), + use_attributes = attr(step_fun, 'UseAttributes'), + ncores = threads_compute) + if (!found_first_result) { + names(arrays_of_results) <- names(result) + found_first_result <- TRUE + } + for (component in 1:length(result)) { + arrays_of_results[[component]][[i]] <- result[[component]] + } + rm(data) + gc() + t_end_compute <- Sys.time() + timings[['compute']] <- c(timings[['compute']], + as.numeric(difftime(t_end_compute, + t_begin_compute, units = 'secs'))) + } + + # Time estimate + if (!on_cluster) { + if (is.null(time_after_first_chunk)) { + time_after_first_chunk <- Sys.time() + if (!silent) { + estimate <- (time_after_first_chunk - + time_before_first_chunk) * + (length(chunk_array) - 1) + units(estimate) <- 'mins' + startR:::.message( + paste0("Remaining time estimate (at ", format(time_after_first_chunk), ") ", + "(neglecting merge time): ", format(estimate)) + ) + } + } + } + previous_chunk_indices <- chunk_indices + } + + # Close nested ecFlow families + for (i in length(chunked_dims):1) { + tabs <- tabs - 2 + suite <- add_line(suite, paste0('endfamily'), tabs) + } + + # Close the ecFlow suite + tabs <- tabs - 2 + suite <- add_line(suite, paste0('endfamily'), tabs) +# suite <- add_line(suite, "family merge", tabs) +# tabs <- tabs + 2 +# suite <- add_line(suite, "trigger computation == complete", tabs) +# suite <- add_line(suite, "edit ECF_JOB_CMD 'bash %ECF_JOB% > %ECF_JOBOUT% 2>&1 &'", tabs) +# suite <- add_line(suite, "task Merge", tabs) +# tabs <- tabs - 2 +# suite <- add_line(suite, paste0('endfamily'), tabs) + + tabs <- tabs - 2 + suite <- add_line(suite, "endsuite", tabs) + + # Run ecFlow suite if needed + if (on_cluster) { + timings[['cores_per_job']] <- cluster[['cores_per_job']] + timings[['concurrent_chunks']] <- cluster[['max_jobs']] + suite_file <- paste0(ecflow_suite_dir_suite, '/startR_chunking.def') + suite_file_o <- file(suite_file) + writeLines(suite, suite_file_o) + close(suite_file_o) + + default_ecflow_server <- list(host = localhost_name, port = '5678') + if (is.null(ecflow_server)) { + .warning("Parameter 'ecflow_server' has not been specified but execution on ", + "cluster has been requested. An ecFlow server instance will ", + "be created on localhost:5678.") + } else { + if ('host' %in% names(ecflow_server)) { + stop("A host has been specified for the 'ecflow_server', but this option is not available yet.") + } + default_ecflow_server[names(ecflow_server)] <- ecflow_server + } + ecflow_server <- default_ecflow_server + system(paste0("ecflow_start.sh -p ", ecflow_server[['port']])) + system(paste0("ecflow_client --load=", suite_file, " --host=", + ecflow_server[['host']], " --port=", ecflow_server[['port']])) + if (!is_ecflow_suite_dir_shared) { + system(paste0('ssh ', cluster[['queue_host']], ' "mkdir -p ', + remote_ecflow_suite_dir_suite, '"')) + system(paste0('rsync -ra ', ecflow_suite_dir_suite, + ' ', cluster[['queue_host']], ':', + remote_ecflow_suite_dir_suite)) + } + t_end_bychunks_setup <- Sys.time() + timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, + t_begin_bychunks_setup, units = 'secs')) + if (!is_data_dir_shared) { + t_begin_transfer <- Sys.time() + startR:::.message("Sending involved files to the cluster file system...") + files_to_send <- NULL + #files_to_check <- NULL + for (cube_header in 1:length(cube_headers)) { + expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + #files_to_check <- c(files_to_check, expected_files) + #if (cluster[['special_setup']] == 'marenostrum4') { + # expected_files <- paste0('/gpfs/archive/bsc32/', expected_files) + #} + files_to_send <- c(files_to_send, expected_files) + } + #which_files_exist <- sapply(files_to_check, file.exists) + which_files_exist <- sapply(files_to_send, file.exists) + files_to_send <- files_to_send[which_files_exist] + if (cluster[['special_setup']] == 'marenostrum4') { + file_spec <- paste(paste0("/gpfs/archive/bsc32/", + files_to_send), collapse = ' ') + system(paste0("ssh ", cluster[['queue_host']], " 'mkdir -p ", remote_data_dir, + ' ; module load transfer ; cd ', remote_ecflow_suite_dir_suite, + ' ; dtrsync -Rrav ', '\'', file_spec, '\' "', remote_data_dir, '/"', + " ; sleep 1 ; ", + "while [[ ! $(ls dtrsync_*.out 2>/dev/null | wc -l) -ge 1 ]] ; ", + "do sleep 2 ; done", + " ; sleep 1 ; ", + 'while [[ ! $(grep "total size is" dtrsync_*.out | ', + "wc -l) -ge 1 ]] ; ", + "do sleep 5 ; done", "'")) + } else { + file_spec <- paste(files_to_send, collapse = ' :') + system(paste0("ssh ", cluster[['queue_host']], ' "mkdir -p ', + remote_data_dir, '"')) + system(paste0("rsync -Rrav '", file_spec, "' '", + cluster[['queue_host']], ":", remote_data_dir, "/'")) + } + startR:::.message("Files sent successfully.") + t_end_transfer <- Sys.time() + timings[['transfer']] <- as.numeric(difftime(t_end_transfer, t_begin_transfer, units = 'secs')) + } else { + timings[['transfer']] <- 0 + } + if (!silent) { + startR:::.message(paste0("Processing chunks... ")) + } + time_begin_first_chunk <- Sys.time() +# time_after_first_chunk <- NULL + system(paste0("ecflow_client --begin=STARTR_CHUNKING_", suite_id, + " --host=", ecflow_server[['host']], " --port=", + ecflow_server[['port']])) + + timings[['total']] <- t_begin_total + startr_exec <- list(cluster = cluster, ecflow_server = ecflow_server, + suite_id = suite_id, chunks = chunks, + num_outputs = length(arrays_of_results), + ecflow_suite_dir = ecflow_suite_dir, + timings = timings) + class(startr_exec) <- 'startR_exec' + if (wait) { + if (!silent) { + startR:::.message(paste0("Remaining time estimate soon... ")) +# while (is.null(time_after_first_chunk)) { +# if (any(grepl('.*\\.Rds$', list.files(ecflow_suite_dir_suite)))) { +# time_after_first_chunk <- Sys.time() +# estimate <- (time_after_first_chunk - +# time_before_first_chunk) * +# ceiling((prod(unlist(chunks)) - cluster[['max_jobs']]) / +# cluster[['max_jobs']]) +# units(estimate) <- 'mins' +# startR:::.message( +# paste0('Remaining time estimate (neglecting queue and ', +# 'merge time) (at ', format(time_after_first_chunk), +# '): ', format(estimate), ' (', +# format(time_after_first_chunk - +# time_before_first_chunk), ' per chunk)') +# ) +# } else if (!cluster[['bidirectional']]) { +# rsync_output <- tryCatch({ +# system(paste0("rsync -ra --ignore-missing-args ", +# cluster[['queue_host']], ":", +# remote_ecflow_suite_dir_suite, "/*.Rds ", +# ecflow_suite_dir_suite, "/"), intern = TRUE) +# }, error = function(e) { +# message("Warning: rsync from remote server to collect results failed. ", +# "Retrying soon.") +# failed <- TRUE +# }) +# Sys.sleep(cluster[['polling_period']]) +# } +# } + startr_exec[['t_begin_first_chunk']] <- time_begin_first_chunk + } + result <- Collect(startr_exec, wait = TRUE) + startR:::.message("Computation ended successfully.") + result + } else { + startr_exec + } + } else { + timings[['cores_per_job']] <- NA + timings[['concurrent_chunks']] <- 1 + t_begin_merge <- Sys.time() + for (component in 1:length(arrays_of_results)) { + arrays_of_results[[component]] <- startR:::.MergeArrayOfArrays(arrays_of_results[[component]]) + } + t_end_merge <- Sys.time() + timings[['merge']] <- as.numeric(difftime(t_end_merge, t_begin_merge, units = 'secs')) + t_end_total <- t_end_merge + timings[['total']] <- as.numeric(difftime(t_end_total, t_begin_total, units = 'secs')) + message(paste0("* Computation ended successfully.")) + message(paste0("* Number of chunks: ", + timings[['nchunks']])) + message(paste0("* Max. number of concurrent chunks (jobs): ", + timings[['concurrent_chunks']])) + message(paste0("* Requested cores per job: ", + timings[['cores_per_job']])) + message(paste0("* Load threads per chunk: ", + timings[['threads_load']])) + message(paste0("* Compute threads per chunk: ", + timings[['threads_compute']])) + message(paste0("* Total time (s): ", + timings[['total']])) + message(paste0("* Chunking setup: ", + timings[['bychunks_setup']])) + message(paste0("* Data upload to cluster: ", + timings[['transfer']])) + message(paste0("* All chunks: ", + timings[['total']] - + timings[['bychunks_setup']] - + timings[['transfer']] - + timings[['transfer_back']] - + timings[['merge']])) + message(paste0("* Transfer results from cluster: ", + timings[['transfer_back']])) + message(paste0("* Merge: ", + timings[['merge']])) + message(paste0("* Each chunk: ")) + message(paste0("* queue: ")) + message(paste0("* mean: ", + mean(timings[['queue']]))) + message(paste0("* min: ", + min(timings[['queue']]))) + message(paste0("* max: ", + max(timings[['queue']]))) + message(paste0("* job setup: ")) + message(paste0("* mean: ", + mean(timings[['job_setup']]))) + message(paste0("* min: ", + min(timings[['job_setup']]))) + message(paste0("* max: ", + max(timings[['job_setup']]))) + message(paste0("* load: ")) + message(paste0("* mean: ", + mean(timings[['load']]))) + message(paste0("* min: ", + min(timings[['load']]))) + message(paste0("* max: ", + max(timings[['load']]))) + message(paste0("* compute: ")) + message(paste0("* mean: ", + mean(timings[['compute']]))) + message(paste0("* min: ", + min(timings[['compute']]))) + message(paste0("* max: ", + max(timings[['compute']]))) + attr(arrays_of_results, 'startR_compute_profiling') <- timings + arrays_of_results + } + #TODO: check result dimensions match expected dimensions +} diff --git a/R/CDORemapper.R b/R/CDORemapper.R new file mode 100644 index 0000000..ab35234 --- /dev/null +++ b/R/CDORemapper.R @@ -0,0 +1,46 @@ +CDORemapper <- function(data_array, variables, file_selectors = NULL, ...) { + file_dims <- names(file_selectors) + known_lon_names <- s2dverification:::.KnownLonNames() + known_lat_names <- s2dverification:::.KnownLatNames() + if (!any(known_lon_names %in% names(variables)) || + !any(known_lat_names %in% names(variables))) { + stop("The longitude and latitude variables must be requested in ", + "'return_vars' and specified in 'transform_vars' for the ", + "CDORemapper to work.") + } + lon_name <- names(variables)[which(names(variables) %in% known_lon_names)[1]] + lons <- variables[[lon_name]] + if (!is.null(dim(lons))) { + dims_to_subset <- which(names(dim(lons)) %in% file_dims) + if (length(dims_to_subset) > 0) { + lons_to_use <- as.list(rep(TRUE, length(dim(lons)))) + names(lons_to_use) <- names(dim(lons)) + lons_to_use[dims_to_subset] <- as.list(rep(1, length(dims_to_subset))) + attr_bk <- attributes(lons) + lons <- do.call('[', c(list(x = lons), lons_to_use, list(drop = TRUE))) + attributes(lons) <- attr_bk + } + } + lat_name <- names(variables)[which(names(variables) %in% known_lat_names)[1]] + lats <- variables[[lat_name]] + if (!is.null(dim(lats))) { + dims_to_subset <- which(names(dim(lats)) %in% file_dims) + if (length(dims_to_subset) > 0) { + lats_to_use <- as.list(rep(TRUE, length(dim(lats)))) + names(lats_to_use) <- names(dim(lats)) + lats_to_use[dims_to_subset] <- as.list(rep(1, length(dims_to_subset))) + attr_bk <- attributes(lons) + lats <- do.call('[', c(list(x = lats), lats_to_use, list(drop = TRUE))) + attributes(lats) <- attr_bk + } + } + extra_params <- list(...) + if (!all(c('grid', 'method') %in% names(extra_params))) { + stop("Parameters 'grid' and 'method' must be specified for the ", + "CDORemapper, via the 'transform_params' argument.") + } + result <- s2dverification::CDORemap(data_array, lons, lats, ...) + return_variables <- list(result$lons, result$lats) + names(return_variables) <- c(lon_name, lat_name) + list(data_array = result$data_array, variables = return_variables) +} diff --git a/R/Collect.R b/R/Collect.R new file mode 100644 index 0000000..2325d55 --- /dev/null +++ b/R/Collect.R @@ -0,0 +1,275 @@ +Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { + if (!('startR_exec' %in% class(startr_exec))) { + stop("Parameter 'startr_exec' must be an object of the class ", + "'startR_exec', as returned by Collect(..., wait = FALSE).") + } + if (Sys.which('ecflow_client') == '') { + stop("ecFlow must be installed in order to collect results from a ", + "Compute() execution.") + } + cluster <- startr_exec[['cluster']] + ecflow_server <- startr_exec[['ecflow_server']] + suite_id <- startr_exec[['suite_id']] + chunks <- startr_exec[['chunks']] + num_outputs <- startr_exec[['num_outputs']] + ecflow_suite_dir <- startr_exec[['ecflow_suite_dir']] + timings <- startr_exec[['timings']] + ecflow_suite_dir_suite <- paste0(ecflow_suite_dir, '/STARTR_CHUNKING_', + suite_id, '/') + if (!is.null(cluster[['temp_dir']])) { + remote_ecflow_suite_dir_suite <- paste0(cluster[['temp_dir']], + '/STARTR_CHUNKING_', + suite_id, '/') + } + find_task_name <- function(received_file) { + file_name <- received_file + parts <- strsplit(file_name, '__')[[1]] + parts <- parts[c(2:(length(parts) - 1))] + chunk_indices <- rev(sapply(parts, function(x) { + as.numeric(strsplit(x, '_')[[1]][2]) + })) + task_pattern <- paste(paste0('*_', chunk_indices, '/'), + collapse = '') + task_glob <- paste0(ecflow_suite_dir_suite, '/*/*/', + task_pattern) + task_path <- Sys.glob(task_glob) + if (length(task_path) != 1) { + stop("Unexpected error while receiving results.") + } + task_name <- strsplit(task_path, 'computation')[[1]][2] + task_name <- paste0('/STARTR_CHUNKING_', suite_id, + '/computation', task_name) + task_name + } + done <- FALSE + attempt <- 1 + sum_received_chunks <- sum(grepl('output.*\\.Rds', + list.files(ecflow_suite_dir_suite))) + if (cluster[['bidirectional']]) { + t_transfer_back <- NA + } else { + t_transfer_back <- 0 + } + time_before_first_chunk <- startr_exec[['t_begin_first_chunk']] + first_chunk_received <- FALSE + rsync_petition_file_lines <- c('+ *.Rds', '+ *.timings', '+ *.crashed', + '+ *.running', '- *') + rsync_petition_file <- tempfile() + writeLines(rsync_petition_file_lines, rsync_petition_file) + Sys.sleep(2) + while (!done) { + failed <- FALSE + if (cluster[['bidirectional']]) { + status <- system(paste0("ecflow_client --get_state=STARTR_CHUNKING_", + suite_id, " --host=", + ecflow_server[['host']], " --port=", ecflow_server[['port']]), + intern = TRUE) + if (any(grepl(paste0("suite STARTR_CHUNKING_", suite_id, " #.* state:complete"), status))) { + done <- TRUE + } else if (!wait) { + stop("Computation in progress...") + } + if (!first_chunk_received) { + if (any(grepl('state:complete', status))) { + if (!is.null(time_before_first_chunk)) { + time_after_first_chunk <- Sys.time() + estimate <- (time_after_first_chunk - + time_before_first_chunk) * + ceiling((prod(unlist(chunks)) - cluster[['max_jobs']]) / + cluster[['max_jobs']]) + units(estimate) <- 'mins' + startR:::.message( + paste0('Remaining time estimate (neglecting queue and ', + 'merge time) (at ', format(time_after_first_chunk), + '): ', format(estimate), ' (', + format(time_after_first_chunk - + time_before_first_chunk), ' per chunk)') + ) + } + first_chunk_received <- TRUE + } + } + Sys.sleep(min(sqrt(attempt), 5)) + } else { + #if (sum_received_chunks == 0) { + # # Accounting for the fist chunk received in ByChunks and + # # setting it to complete + # # ByChunks needs the first chunk to calculate remaining time + # received_files <- list.files(ecflow_suite_dir_suite) + # received_chunks <- received_files[grepl('Rds$', + # received_files)] + #} + t_begin_transfer_back <- Sys.time() + rsync_output <- tryCatch({ + system(paste0("rsync -rav --include-from=", rsync_petition_file, " '", + cluster[['queue_host']], ":", remote_ecflow_suite_dir_suite, "' ", + ecflow_suite_dir_suite, "/"), intern = TRUE) + }, error = function(e) { + message("Warning: rsync from remote server to collect results failed. ", + "Retrying soon.") + failed <- TRUE + }) + t_end_transfer_back <- Sys.time() + t_transfer_back <- t_transfer_back + as.numeric(difftime(t_end_transfer_back, + t_begin_transfer_back, units = 'secs')) + if (!failed) { + #if (sum_received_chunks == 0) { + # rsync_output <- c(rsync_output, received_chunks) + #} + received_running <- grepl('running$', rsync_output) + for (received_chunk_index in which(received_running)) { + file_name <- rsync_output[received_chunk_index] + task_name <- find_task_name(file_name) + system(paste0('ecflow_client --force=active recursive ', + task_name, + " --host=", ecflow_server[['host']], + " --port=", ecflow_server[['port']])) + } + received_crashed <- grepl('crashed$', rsync_output) + for (received_chunk_index in which(received_crashed)) { + file_name <- rsync_output[received_chunk_index] + task_name <- find_task_name(file_name) + system(paste0('ecflow_client --force=aborted recursive ', + task_name, + " --host=", ecflow_server[['host']], + " --port=", ecflow_server[['port']])) + } + received_chunks <- grepl('Rds$', rsync_output) + for (received_chunk_index in which(received_chunks)) { + file_name <- rsync_output[received_chunk_index] + task_name <- find_task_name(file_name) + system(paste0('ecflow_client --force=complete recursive ', + task_name, + " --host=", ecflow_server[['host']], + " --port=", ecflow_server[['port']])) + sum_received_chunks <- sum_received_chunks + 1 + if (!first_chunk_received) { + if (!is.null(time_before_first_chunk)) { + time_after_first_chunk <- Sys.time() + estimate <- (time_after_first_chunk - + time_before_first_chunk) * + ceiling((prod(unlist(chunks)) - cluster[['max_jobs']]) / + cluster[['max_jobs']]) + units(estimate) <- 'mins' + startR:::.message( + paste0('Remaining time estimate (neglecting queue and ', + 'merge time) (at ', format(time_after_first_chunk), + '): ', format(estimate), ' (', + format(time_after_first_chunk - + time_before_first_chunk), ' per chunk)') + ) + } + first_chunk_received <- TRUE + } + } + if (sum_received_chunks / num_outputs == prod(unlist(chunks))) { + done <- TRUE + } else if (!wait) { + stop("Computation in progress...") + } + } + Sys.sleep(cluster[['polling_period']]) + } + attempt <- attempt + 1 + } + file.remove(rsync_petition_file) + timings[['transfer_back']] <- t_transfer_back + if (!is.null(cluster[['temp_dir']])) { + system(paste0('ssh ', cluster[['queue_host']], ' "rm -rf ', + remote_ecflow_suite_dir_suite, '"')) + } + if (remove) { + .warning("ATTENTION: The source chunks will be removed from the ", + "system. Store the result after Collect() ends if needed.") + } + t_begin_merge <- Sys.time() + result <- startR:::.MergeChunks(ecflow_suite_dir, suite_id, remove) + t_end_merge <- Sys.time() + timings[['merge']] <- as.numeric(difftime(t_end_merge, t_begin_merge, units = 'secs')) + received_files <- list.files(ecflow_suite_dir_suite, full.names = TRUE) + received_timings_files <- received_files[grepl('timings$', received_files)] + for (timings_file in received_timings_files) { + times <- readRDS(timings_file) + timings[['queue']] <- c(timings[['queue']], times['queue']) + timings[['job_setup']] <- c(timings[['job_setup']], times['job_setup']) + timings[['load']] <- c(timings[['load']], times['load']) + timings[['compute']] <- c(timings[['compute']], times['compute']) + } + if (remove) { + system(paste0("ecflow_client --delete=force yes /STARTR_CHUNKING_", + suite_id, " --host=", ecflow_server[['host']], + " --port=", ecflow_server[['port']])) + unlink(paste0(ecflow_suite_dir_suite), + recursive = TRUE) + } + if (attempt > 2) { + t_end_total <- Sys.time() + timings[['total']] <- as.numeric(difftime(t_end_total, timings[['total']], units = 'secs')) + } else { + # When attempt <= 2, it means all results were ready possibly from + # long ago, so is not straightfowrard to work out total time. + timings[['total']] <- NA + } + message(paste0("* Computation ended successfully.")) + message(paste0("* Number of chunks: ", + timings[['nchunks']])) + message(paste0("* Max. number of concurrent chunks (jobs): ", + timings[['concurrent_chunks']])) + message(paste0("* Requested cores per job: ", + timings[['cores_per_job']])) + message(paste0("* Load threads per chunk: ", + timings[['threads_load']])) + message(paste0("* Compute threads per chunk: ", + timings[['threads_compute']])) + message(paste0("* Total time (s): ", + timings[['total']])) + message(paste0("* Chunking setup: ", + timings[['bychunks_setup']])) + message(paste0("* Data upload to cluster: ", + timings[['transfer']])) + message(paste0("* All chunks: ", + timings[['total']] - + timings[['bychunks_setup']] - + timings[['transfer']] - + timings[['transfer_back']] - + timings[['merge']])) + message(paste0("* Transfer results from cluster: ", + timings[['transfer_back']])) + message(paste0("* Merge: ", + timings[['merge']])) + message(paste0("* Each chunk: ")) + message(paste0("* queue: ")) + message(paste0("* mean: ", + mean(timings[['queue']]))) + message(paste0("* min: ", + min(timings[['queue']]))) + message(paste0("* max: ", + max(timings[['queue']]))) + message(paste0("* job setup: ")) + message(paste0("* mean: ", + mean(timings[['job_setup']]))) + message(paste0("* min: ", + min(timings[['job_setup']]))) + message(paste0("* max: ", + max(timings[['job_setup']]))) + message(paste0("* load: ")) + message(paste0("* mean: ", + mean(timings[['load']]))) + message(paste0("* min: ", + min(timings[['load']]))) + message(paste0("* max: ", + max(timings[['load']]))) + message(paste0("* compute: ")) + message(paste0("* mean: ", + mean(timings[['compute']]))) + message(paste0("* min: ", + min(timings[['compute']]))) + message(paste0("* max: ", + max(timings[['compute']]))) + #system("ecflow_client --shutdown --port=5678") + #system("ecflow_stop.sh -p 5678") + #result <- readRDS(paste0(ecflow_output_dir, '/result.Rds')) + #file.remove(paste0(ecflow_output_dir, '/result.Rds')) + attr(result, 'startR_compute_profiling') <- timings + result +} diff --git a/R/Compute.R b/R/Compute.R new file mode 100644 index 0000000..12c2dce --- /dev/null +++ b/R/Compute.R @@ -0,0 +1,75 @@ +Compute <- function(workflow, chunks = 'auto', + threads_load = 1, threads_compute = 1, + cluster = NULL, ecflow_suite_dir = NULL, + ecflow_server = NULL, silent = FALSE, debug = FALSE, + wait = TRUE) { + # Check workflow + if (!any(c('startR_cube', 'startR_workflow') %in% class(workflow))) { + stop("Parameter 'workflow' must be an object of class 'startR_cube' as ", + "returned by Start or of class 'startR_workflow' as returned by ", + "AddStep.") + } + + if ('startR_cube' %in% class(workflow)) { + #machine_free_ram <- 1000000000 + #max_ram_ratio <- 0.5 + #data_size <- prod(c(attr(workflow, 'Dimensions'), 8)) + #if (data_size > (machine_free_ram * max_ram_ratio)) { + # stop("It is not possible to fit the requested data (", data_size, + # " bytes) into the maximum allowed free ram (", max_ram_ratio, + # " x ", machine_free_ram, ").") + #} + eval(workflow) + } else { + # TODO: + #explore tree of operations and identify set of operations that reduce dimensionality as much as possible + # while being able to fit in (cluster and to exploit number of available nodes) | (machine) + #combine set of operations into a single function + #Goal: to build manually a function following this pattern: + #operation <- function(input1, input2) { + # fun1 <- workflow$fun + # fun1(input1, input2, names(workflow$params)[1] = workflow$params[[1]]) + #} + op_text <- "function(" + op_text <- paste0(op_text, + paste(paste0('input', 1:length(workflow$inputs)), + collapse = ', ')) + op_text <- paste0(op_text, ") {") + op_text <- paste0(op_text, "\n fun1 <- ", paste(deparse(workflow$fun), collapse = '\n')) + op_text <- paste0(op_text, "\n res <- fun1(", + paste(paste0('input', 1:length(workflow$inputs)), + collapse = ", ")) + if (length(workflow$params) > 0) { + for (j in 1:length(workflow$params)) { + op_text <- paste0(op_text, ", ") + op_text <- paste0(op_text, names(workflow$params)[j], " = ", + paste(deparse(workflow$params[[j]]), collapse = '\n')) + } + } + op_text <- paste0(op_text, ")") + op_text <- paste0(op_text, "\n}") + operation <- eval(parse(text = op_text)) + operation <- Step(operation, + attr(workflow$fun, 'TargetDims'), + attr(workflow$fun, 'OutputDims'), + attr(workflow$fun, 'UseLibraries'), + attr(workflow$fun, 'UseAttributes')) + + if (!all(sapply(workflow$inputs, class) == 'startR_cube')) { + stop("Workflows with only one step supported by now.") + } + # Run ByChunks with the combined operation + res <- ByChunks(step_fun = operation, + cube_headers = workflow$inputs, + chunks = chunks, + threads_load = threads_load, + threads_compute = threads_compute, + cluster = cluster, + ecflow_suite_dir = ecflow_suite_dir, + ecflow_server = ecflow_server, + silent = silent, debug = debug, wait = wait) + # TODO: carry out remaining steps locally, using multiApply + # Return results + res + } +} diff --git a/R/NcCloser.R b/R/NcCloser.R new file mode 100644 index 0000000..bb5e892 --- /dev/null +++ b/R/NcCloser.R @@ -0,0 +1,3 @@ +NcCloser <- function(file_object) { + easyNCDF::NcClose(file_object) +} diff --git a/R/NcDataReader.R b/R/NcDataReader.R new file mode 100644 index 0000000..8213128 --- /dev/null +++ b/R/NcDataReader.R @@ -0,0 +1,182 @@ +# Parameter 'file_selectos' expects a named character vector of single +# file dimension selectors. +# Parameter 'inner_indices' expects a named list of numeric vectors. +NcDataReader <- function(file_path = NULL, file_object = NULL, + file_selectors = NULL, inner_indices = NULL, + synonims) { + close <- FALSE + if (!is.null(file_object)) { + file_to_read <- file_object + file_path <- file_object$filename + } else if (!is.null(file_path)) { + file_to_read <- NcOpener(file_path) + close <- TRUE + } else { + stop("Either 'file_path' or 'file_object' must be provided.") + } + + if (is.null(file_to_read)) { + return(NULL) + } + + var_requested <- is.null(inner_indices) + + drop_var_dim <- FALSE + if (any(c('var', 'variable') %in% names(file_selectors))) { + if (!any(c('var', 'variable') %in% names(inner_indices))) { + inner_indices <- c(inner_indices, + list(var = file_selectors[[which(names(file_selectors) %in% + c('var', 'variable'))[1]]])) + drop_var_dim <- TRUE + } + } + + vars_in_file <- easyNCDF::NcReadVarNames(file_to_read) + if (any(names(inner_indices) %in% c('var', 'variable'))) { + position_of_var <- which(names(inner_indices) %in% c('var', 'variable'))[1] + } else if (length(vars_in_file) == 1) { + inner_indices <- c(inner_indices, + list(var = vars_in_file)) + drop_var_dim <- TRUE + position_of_var <- length(inner_indices) + } else { + stop("A 'var'/'variable' file dimension or inner dimension must be ", + "requested for NcDataReader() to read NetCDF files with more than ", + "one variable.") + } + + inner_indices[[position_of_var]] <- sapply(inner_indices[[position_of_var]], + function(x) { + if (x %in% names(synonims)) { + x_in_file <- which(synonims[[x]] %in% vars_in_file) + if (length(x_in_file) < 1) { + stop("Could not find variable '", x, "' (or its synonims if ", + "specified) in the file ", file_path) + } + if (length(x_in_file) > 1) { + stop("Found more than one matches for the synonims of the ", + "variable '", x, "' in the file ", file_path) + } + synonims[[x]][x_in_file] + } else { + if (is.character(x) && !(x %in% c('all', 'first', 'last'))) { + if (!(x %in% vars_in_file)) { + stop("Could not find variable '", x, "' (or its synonims if ", + "specified) in the file ", file_path) + } + } + x + } + }) + #inner_indices[[position_of_var]] <- SelectorChecker(inner_indices[[position_of_var]], vars_in_file) + dims_in_file <- NcDimReader(NULL, file_to_read, NULL, + inner_indices[position_of_var], synonims) + names(inner_indices) <- sapply(names(inner_indices), + function(x) { + if (x %in% names(synonims)) { + x_in_file <- which(synonims[[x]] %in% names(dims_in_file)) + if (length(x_in_file) < 1) { + stop("Could not find dimension '", x, "' (or its synonims if ", + "specified) in the file ", file_path) + } + if (length(x_in_file) > 1) { + stop("Found more than one matches for the synonims of the ", + "dimension '", x, "' in the file ", file_path) + } + synonims[[x]][x_in_file] + } else { + if (!(x %in% names(dims_in_file))) { + stop("Could not find dimension '", x, "' (or its synonims if ", + "specified) in the file ", file_path) + } + x + } + }) + if (drop_var_dim) { + dims_in_file <- dims_in_file[-which(names(dims_in_file) %in% c('var', 'variable'))] + } + singleton_unspecified_dims <- which((dims_in_file == 1) & + !(names(dims_in_file) %in% names(inner_indices))) + if (length(singleton_unspecified_dims) > 0) { + dims_in_file <- dims_in_file[-singleton_unspecified_dims] + } + if (var_requested) { + result <- easyNCDF::NcToArray(file_to_read, inner_indices, drop_var_dim = drop_var_dim, + expect_all_indices = FALSE, allow_out_of_range = TRUE) + } else { + if (any(!(names(dims_in_file) %in% names(inner_indices)))) { + expected_dim_names <- names(inner_indices) + if (drop_var_dim) { + expected_dim_names <- expected_dim_names[-position_of_var] + } + stop("Unexpected extra dimensions (of length > 1) in the file.\nExpected: ", + paste(expected_dim_names, collapse = ', '), "\n", + "Found: ", paste(names(dims_in_file), collapse = ', '), "\n", + file_path) + } + result <- easyNCDF::NcToArray(file_to_read, inner_indices, drop_var_dim = drop_var_dim, + expect_all_indices = TRUE, allow_out_of_range = TRUE) + } + if (!is.null(dim(result))) { + names(dim(result)) <- sapply(names(dim(result)), + function(x) { + which_entry <- which(sapply(synonims, function(y) x %in% y)) + if (length(which_entry) > 0) { + names(synonims)[which_entry] + } else { + x + } + }) + } + if (!is.null(result)) { + names(attr(result, 'variables')) <- sapply(names(attr(result, 'variables')), + function(x) { + which_entry <- which(sapply(synonims, function(y) x %in% y)) + if (length(which_entry) > 0) { + names(synonims)[which_entry] + } else { + x + } + }) + if (length(names(attr(result, 'variables'))) == 1) { + var_name <- names(attr(result, 'variables')) + units <- attr(result, 'variables')[[var_name]][['units']] + if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) { + if (units == 'seconds') { + units <- 'secs' + } else if (units == 'minutes') { + units <- 'mins' + } + result[] <- paste(result[], units) + } else if (grepl(' since ', units)) { + parts <- strsplit(units, ' since ')[[1]] + units <- parts[1] + if (units %in% c('second', 'seconds')) { + units <- 'secs' + } else if (units %in% c('minute', 'minutes')) { + units <- 'mins' + } else if (units == 'day') { + units <- 'days' + } else if (units %in% c('month', 'months')) { + result <- result * 30.5 + units <- 'days' + } + + new_array <- rep(as.POSIXct(parts[2]), length(result)) + + as.difftime(result[], units = units) + #new_array <- seq(as.POSIXct(parts[2]), + # length = max(result, na.rm = TRUE) + 1, + # by = units)[result[] + 1] + dim(new_array) <- dim(result) + attr(new_array, 'variables') <- attr(result, 'variables') + result <- new_array + } + } + } + + if (close) { + NcCloser(file_to_read) + } + + result +} diff --git a/R/NcDimReader.R b/R/NcDimReader.R new file mode 100644 index 0000000..e93de95 --- /dev/null +++ b/R/NcDimReader.R @@ -0,0 +1,77 @@ +# Parameter 'file_selectors' expects a named character vector of single +# file dimension selectors. +# Parameter 'inner_indices' expects a named list of numeric or +# character string vectors. +NcDimReader <- function(file_path = NULL, file_object = NULL, + file_selectors = NULL, inner_indices = NULL, + synonims) { + close <- FALSE + if (!is.null(file_object)) { + file_to_read <- file_object + file_path <- file_object$filename + } else if (!is.null(file_path)) { + file_to_read <- NcOpener(file_path) + close <- TRUE + } else { + stop("Either 'file_path' or 'file_object' must be provided.") + } + + vars_in_file <- easyNCDF::NcReadVarNames(file_to_read) + if (any(c('var', 'variable') %in% names(inner_indices))) { + vars_to_read <- inner_indices[[which(names(inner_indices) %in% c('var', 'variable'))[1]]] + var_tag <- names(inner_indices)[[which(names(inner_indices) %in% c('var', 'variable'))[1]]] + } else if (any(c('var', 'variable') %in% names(file_selectors))) { + vars_to_read <- file_selectors[[which(names(file_selectors) %in% c('var', 'variable'))[1]]] + var_tag <- names(file_selectors)[[which(names(file_selectors) %in% c('var', 'variable'))[1]]] + } else if (length(vars_in_file) == 1) { + vars_to_read <- vars_in_file + file_selectors <- c(file_selectors, list(var = vars_in_file)) + var_tag <- 'var' + } else { + stop("NcDimReader expected to find a requested 'var' or 'variable' in 'file_selectors'.") + } + + if ((length(vars_to_read) == 1) && (vars_to_read[1] == 'var_names')) { + result <- setNames(length(vars_in_file), var_tag) + } else { + vars_to_read <- sapply(vars_to_read, + function(x) { + if (x %in% names(synonims)) { + x_in_file <- which(synonims[[x]] %in% vars_in_file) + if (length(x_in_file) < 1) { + stop("Could not find variable '", x, "' (or its synonims if ", + "specified) in the file ", file_path) + } + if (length(x_in_file) > 1) { + stop("Found more than one matches for the synonims of the ", + "variable '", x, "' in the file ", file_path) + } + synonims[[x]][x_in_file] + } else { + if (is.character(x) && !(x %in% c('all', 'last', 'first'))) { + if (!(x %in% vars_in_file)) { + stop("Could not find variable '", x, "' (or its synonims if ", + "specified) in the file ", file_path) + } + } + x + } + }) + vars_to_read <- SelectorChecker(vars_to_read, vars_in_file, + return_indices = FALSE) + read_dims <- easyNCDF::NcReadDims(file_to_read, vars_to_read) + if (any(c('var', 'variable') %in% names(inner_indices))) { + names(read_dims)[which(names(read_dims) == 'var')] <- var_tag + read_dims[var_tag] <- length(vars_in_file) + } else { + read_dims <- read_dims[-which(names(read_dims) == 'var')] + } + result <- read_dims + } + + if (close) { + NcCloser(file_to_read) + } + + result +} diff --git a/R/NcOpener.R b/R/NcOpener.R new file mode 100644 index 0000000..ca6e9ed --- /dev/null +++ b/R/NcOpener.R @@ -0,0 +1,3 @@ +NcOpener <- function(file_path) { + easyNCDF::NcOpen(file_path) +} diff --git a/R/NcVarReader.R b/R/NcVarReader.R new file mode 100644 index 0000000..c794244 --- /dev/null +++ b/R/NcVarReader.R @@ -0,0 +1,27 @@ +NcVarReader <- function(file_path = NULL, file_object = NULL, + file_selectors = NULL, var_name = NULL, + synonims) { + if (!is.null(file_object)) { + file_to_read <- file_object + } else if (!is.null(file_path)) { + file_to_read <- file_path + } else { + stop("Either 'file_path' or 'file_object' must be provided.") + } + if (var_name %in% c('var_names')) { + vars_in_file <- easyNCDF::NcReadVarNames(file_to_read) + vars_in_file <- sapply(vars_in_file, + function(x) { + which_entry <- which(sapply(synonims, function(y) x %in% y)) + if (length(which_entry) > 0) { + names(synonims)[which_entry] + } else { + x + } + }) + dim(vars_in_file) <- c(var_names = length(vars_in_file)) + vars_in_file + } else { + NcDataReader(file_path, file_object, list(var = var_name), NULL, synonims) + } +} diff --git a/R/SelectorChecker.R b/R/SelectorChecker.R new file mode 100644 index 0000000..14bae7e --- /dev/null +++ b/R/SelectorChecker.R @@ -0,0 +1,269 @@ +SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, + tolerance = NULL) { + if (length(selectors) == 0) { + stop("No selectors provided in 'selectors'.") + } + if (return_indices) { + if (is.list(selectors)) { + if (length(selectors) != 2) { + stop("'selectors' provided in a wrong format.") + } + crescent_selectors <- TRUE + if (all(sapply(selectors, + function(x) { + any(c('numeric', "POSIXct", "POSIXlt", "POSIXt", "Date") %in% class(x)) + }))) { + if (selectors[[2]] < selectors[[1]]) { + crescent_selectors <- FALSE + } + } + for (i in 1:length(selectors)) { + if (is.null(var)) { + if (!is.numeric(selectors[[i]])) { + stop("No selector values provided in 'var'.") + } else { + selectors[[i]] <- round(selectors[[i]]) + } + } else if (is.na(selectors[[i]])) { + if (i == 1) { + if (crescent_selectors) { + selectors[[i]] <- 1 + } else { + selectors[[i]] <- length(var) + } + } + else { + if (crescent_selectors) { + selectors[[i]] <- length(var) + } else { + selectors[[i]] <- 1 + } + } + } else if (is.character(selectors[[i]])) { + if (is.character(var)) { + candidate <- which(var == selectors[[i]]) + if (length(candidate) > 0) { + selectors[[i]] <- candidate[1] + } else { + stop("Selector value not found in 'var'.") + } + } else { + stop("Character selectors provided but possible values in 'var' are not character.") + } + } else if (is.numeric(selectors[[i]])) { + if (is.numeric(var)) { + + tol <- 0 + if (!is.null(tolerance)) { + if (!any(class(tolerance) %in% "numeric")) { + stop("Expected a numeric *_tolerance.") + } + tol <- tolerance + } + + val <- selectors[[i]] + + if (i == 1) { + if (crescent_selectors) { + val <- val - tol + if (var[1] < var[2]) { + selectors[[i]] <- which(var >= val)[1] + } else if (var[1] > var[2]) { + selectors[[i]] <- rev(which(var >= val))[1] + } + + } else { + val <- val + tol + if (var[1] < var[2]) { + selectors[[i]] <- rev(which(var <= val))[1] + } else if (var[1] > var[2]) { + selectors[[i]] <- which(var <= val)[1] + } + } + } + else if (i == 2) { + if (crescent_selectors) { + val <- val + tol + if (var[1] < var[2]) { + selectors[[i]] <- rev(which(var <= val))[1] + } else if (var[1] > var[2]) { + selectors[[i]] <- which(var <= val)[1] + } + + } else { + val <- val - tol + if (var[1] < var[2]) { + selectors[[i]] <- which(var >= val)[1] + } else if (var[1] > var[2]) { + selectors[[i]] <- rev(which(var >= val))[1] + } + } + } + + + } else { + stop("Numeric selectors provided but possible values in 'var' are not numeric.") + } + } else if (any(c("POSIXct", "POSIXlt", "POSIXt", "Date") %in% class(selectors[[i]]))) { + # TODO: Here, change to as above (numeric part). + if (any(c("POSIXct", "POSIXlt", "POSIXt", "Date") %in% class(var))) { + val <- selectors[[i]] + tol <- 0 + if (!is.null(tolerance)) { + if (!any(class(tolerance) %in% "difftime")) { + stop("Expected a difftime *_tolerance.") + } + tol <- tolerance + } + if (i == 1) { + if (crescent_selectors) { + val <- val - tol + selectors[[i]] <- which(var >= val)[1] + } else { + val <- val + tol + selectors[[i]] <- rev(which(var <= val))[1] + } + } + else { + if (crescent_selectors) { + val <- val + tol + selectors[[i]] <- rev(which(var <= val))[1] + } else { + val <- val - tol + selectors[[i]] <- which(var >= val)[1] + } + } + } else { + stop("Datetime selectors provided but possible values in 'var' are not datetime.") + } + } + } + + # The checker is returning a list of two indices. + ##selectors[[1]]:selectors[[2]] + selectors + } else if (is.numeric(selectors)) { + if (is.null(var)) { + ## TODO: Crash if negative indices? + round(selectors) + } else { + if (is.numeric(var)) { + if (!all(selectors %in% var)) { + .warning(paste0("Numeric selectors have been ", + "provided for a dimension defined along a ", + "numeric variable, but no exact match ", + "found for all the selectors. Taking the index of the ", + "nearest values.")) + } + if (!is.null(tolerance)) { + if (!any(class(tolerance) %in% 'numeric')) { + stop("Expected a numeric *_tolerance.") + } + } + sapply(selectors, function(x) { + dif <- abs(var - x) + res <- which.min(dif)[1] + if (!is.null(tolerance)) { + if (dif[res] > tolerance) { + stop("Could not find a value in 'var' close ", + "enough to one of the 'selectors', ", + "according to 'tolerance'.") + } + } + res + }) + } else { + stop("Numeric selectors provided but possible values in 'var' are not numeric.") + } + } + } else if (any(c('POSIXct', 'POSIXlt', 'POSIXt', 'Date') %in% class(selectors))) { + if (is.null(var)) { + stop("Numeric selectors have been provided for a dimension ", + "defined along a date variable, but no possible values ", + "provided in 'var'.") + } + if (!all(selectors %in% var)) { + .warning(paste0("Date selectors have been ", + "provided for a dimension defined along a ", + "date variable, but no exact match ", + "found for all the selectors. Taking the index of the ", + "nearest values.")) + } + if (!is.null(tolerance)) { + if (!any(class(tolerance) %in% 'difftime')) { + stop("Expected a difftime *_tolerance.") + } + } + sapply(selectors, function(x) { + dif <- abs(var - x) + res <- which.min(dif)[1] + if (!is.null(tolerance)) { + if (dif[res] > tolerance) { + res <- NA + #stop("Could not find a value in 'var' close ", + # "enough to one of the 'selectors', ", + # "according to 'tolerance'.") + } + } + res + }) + } else { + if (is.null(var)) { + stop("No selector values provided in 'var'.") + } else { + if ((length(selectors) == 1) && + (selectors %in% c('all', 'first', 'last'))) { + if (selectors == 'all') { + 1:length(var) + } else if (selectors == 'first') { + 1 + } else { + length(var) + } + } else { + if (!identical(class(var), class(selectors))) { + stop("Class of provided selectors does not match class of 'var'.") + } + candidates <- match(as.vector(selectors), as.vector(var)) + if (length(candidates) == 0 | any(is.na(candidates))) { + stop("Selectors do not match values in 'var'.") + } else if (length(candidates) != length(selectors)) { + stop("Some selectors do not match values in 'var'.") + } + candidates + } + } + } + } else { + if (!is.null(var)) { + if (is.list(selectors)) { + if (length(selectors) != 2) { + stop("'selectors' provided in a wrong format.") + } else { + var[selectors[[1]]:selectors[[2]]] + } + } else if (is.numeric(selectors)) { + if (length(selectors) > 0) { + var[selectors] + } else { + stop("No selectors provided.") + } + } else { + if ((length(selectors) == 1) && + (selectors %in% c('all', 'first', 'last'))) { + if (selectors == 'all') { + var + } else if (selectors == 'first') { + head(var, 1) + } else { + tail(var, 1) + } + } else { + selectors + } + } + } else { + selectors + } + } +} diff --git a/R/Sort.R b/R/Sort.R new file mode 100644 index 0000000..bff2654 --- /dev/null +++ b/R/Sort.R @@ -0,0 +1,36 @@ +Sort <- function(...) { + params <- list(...) + f <- "function(x) { + dim_bk <- dim(x) + x <- do.call(sort, c(list(x, index.return = TRUE), + PARAMS)) + dim(x$x) <- dim_bk + dim(x$ix) <- dim_bk + x + }" + f <- gsub("PARAMS", deparse(params), f) + r <- eval(parse(text = f)) + attr(r, 'circular') <- FALSE + r +} + +CircularSort <- function(start, end, ...) { + params <- list(...) + f <- "function (x) { + start <- START + end <- END + dim_bk <- dim(x) + x <- do.call(sort, c(list((x - start) %% (end - start) + start, + index.return = TRUE), + PARAMS)) + dim(x$x) <- dim_bk + dim(x$ix) <- dim_bk + x + }" + f <- gsub("START", deparse(start), f) + f <- gsub("END", deparse(end), f) + f <- gsub("PARAMS", deparse(params), f) + r <- eval(parse(text = f)) + attr(r, 'circular') <- TRUE + r +} diff --git a/R/Start.R b/R/Start.R new file mode 100644 index 0000000..9f40576 --- /dev/null +++ b/R/Start.R @@ -0,0 +1,3286 @@ +Start <- function(..., # dim = indices/selectors, + # dim_var = 'var', + # dim_reorder = Sort/CircularSort, + # dim_tolerance = number, + # dim_depends = 'file_dim', + # dim_across = 'file_dim', + return_vars = NULL, + synonims = NULL, + file_opener = NcOpener, + file_var_reader = NcVarReader, + file_dim_reader = NcDimReader, + file_data_reader = NcDataReader, + file_closer = NcCloser, + transform = NULL, + transform_params = NULL, + transform_vars = NULL, + transform_extra_cells = 2, + apply_indices_after_transform = FALSE, + pattern_dims = NULL, + metadata_dims = NULL, + selector_checker = SelectorChecker, + merge_across_dims = FALSE, + split_multiselected_dims = FALSE, + path_glob_permissive = FALSE, + retrieve = FALSE, + num_procs = 1, + silent = FALSE, debug = FALSE) { + #, config_file = NULL + #dictionary_dim_names = , + #dictionary_var_names = + dim_params <- list(...) + + # Take *_var parameters apart + var_params_ind <- grep('_var$', names(dim_params)) + var_params <- dim_params[var_params_ind] + # Check all *_var are NULL or vectors of character strings, and + # that they all have a matching dimension param. + i <- 1 + for (var_param in var_params) { + if (!is.character(var_param)) { + stop("All '*_var' parameters must be character strings.") + } else if (!any(grepl(paste0('^', strsplit(names(var_params)[i], + '_var$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_var' parameters must be associated to a dimension parameter. Found parameter '", + names(var_params)[i], "' but no parameter '", + strsplit(names(var_params)[i], '_var$')[[1]][1], "'.")) + } + i <- i + 1 + } + # Make the keys of 'var_params' to be the name of + # the corresponding dimension. + if (length(var_params) < 1) { + var_params <- NULL + } else { + names(var_params) <- gsub('_var$', '', names(var_params)) + } + + # Take *_reorder parameters apart + dim_reorder_params_ind <- grep('_reorder$', names(dim_params)) + dim_reorder_params <- dim_params[dim_reorder_params_ind] + # Make the keys of 'dim_reorder_params' to be the name of + # the corresponding dimension. + if (length(dim_reorder_params) < 1) { + dim_reorder_params <- NULL + } else { + names(dim_reorder_params) <- gsub('_reorder$', '', names(dim_reorder_params)) + } + + # Take *_tolerance parameters apart + tolerance_params_ind <- grep('_tolerance$', names(dim_params)) + tolerance_params <- dim_params[tolerance_params_ind] + + # Take *_depends parameters apart + depends_params_ind <- grep('_depends$', names(dim_params)) + depends_params <- dim_params[depends_params_ind] + # Check all *_depends are NULL or vectors of character strings, and + # that they all have a matching dimension param. + i <- 1 + for (depends_param in depends_params) { +print(depends_param) + if (!is.character(depends_param) || (length(depends_param) > 1)) { + stop("All '*_depends' parameters must be single character strings.") + } else if (!any(grepl(paste0('^', strsplit(names(depends_params)[i], + '_depends$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_depends' parameters must be associated to a dimension parameter. Found parameter '", + names(depends_params)[i], "' but no parameter '", + strsplit(names(depends_params)[i], '_depends$')[[1]][1], "'.")) + } + i <- i + 1 + } + # Make the keys of 'depends_params' to be the name of + # the corresponding dimension. + if (length(depends_params) < 1) { + depends_params <- NULL + } else { + names(depends_params) <- gsub('_depends$', '', names(depends_params)) + } +print(depends_params) + # Change name to depending_file_dims + depending_file_dims <- depends_params +print(depending_file_dims) + # Take *_across parameters apart + across_params_ind <- grep('_across$', names(dim_params)) + across_params <- dim_params[across_params_ind] + # Check all *_across are NULL or vectors of character strings, and + # that they all have a matching dimension param. + i <- 1 + for (across_param in across_params) { + if (!is.character(across_param) || (length(across_param) > 1)) { + stop("All '*_across' parameters must be single character strings.") + } else if (!any(grepl(paste0('^', strsplit(names(across_params)[i], + '_across$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_across' parameters must be associated to a dimension parameter. Found parameter '", + names(across_params)[i], "' but no parameter '", + strsplit(names(across_params)[i], '_across$')[[1]][1], "'.")) + } + i <- i + 1 + } + # Make the keys of 'across_params' to be the name of + # the corresponding dimension. + if (length(across_params) < 1) { + across_params <- NULL + } else { + names(across_params) <- gsub('_across$', '', names(across_params)) + } + # Change name to inner_dims_across_files + inner_dims_across_files <- across_params + + # Check merge_across_dims + if (!is.logical(merge_across_dims)) { + stop("Parameter 'merge_across_dims' must be TRUE or FALSE.") + } + + # Leave alone the dimension parameters in the variable dim_params + if (length(c(var_params_ind, dim_reorder_params_ind, tolerance_params_ind, + depends_params_ind, across_params_ind)) > 0) { + dim_params <- dim_params[-c(var_params_ind, dim_reorder_params_ind, + tolerance_params_ind, depends_params_ind, + across_params_ind)] +print("Dimensions:") +print(dim_params) + # Reallocating pairs of across file and inner dimensions if they have + # to be merged. They are put one next to the other to ease merge later. + if (merge_across_dims) { + for (inner_dim_across in names(inner_dims_across_files)) { + inner_dim_pos <- which(names(dim_params) == inner_dim_across) + file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]]) + new_pos <- inner_dim_pos + if (file_dim_pos < inner_dim_pos) { + new_pos <- new_pos - 1 + } + dim_params_to_move <- dim_params[c(inner_dim_pos, file_dim_pos)] + dim_params <- dim_params[-c(inner_dim_pos, file_dim_pos)] + new_dim_params <- list() + if (new_pos > 1) { + new_dim_params <- c(new_dim_params, dim_params[1:(new_pos - 1)]) + } + new_dim_params <- c(new_dim_params, dim_params_to_move) + if (length(dim_params) >= new_pos) { + new_dim_params <- c(new_dim_params, dim_params[new_pos:length(dim_params)]) + } + dim_params <- new_dim_params + } + } + } + dim_names <- names(dim_params) + if (is.null(dim_names)) { + stop("At least one pattern dim must be specified.") + } + + # Look for chunked dims + chunks <- vector('list', length(dim_names)) + names(chunks) <- dim_names + for (dim_name in dim_names) { + if (!is.null(attr(dim_params[[dim_name]], 'chunk'))) { + chunks[[dim_name]] <- attr(dim_params[[dim_name]], 'chunk') + attributes(dim_params[[dim_name]]) <- attributes(dim_params[[dim_name]])[-which(names(attributes(dim_params[[dim_name]])) == 'chunk')] + } else { + chunks[[dim_name]] <- c(chunk = 1, n_chunks = 1) + } + } + # This is a helper function to compute the chunk indices to take once the total + # number of indices for a dimension has been discovered. + chunk_indices <- function(n_indices, chunk, n_chunks, dim_name) { + if (n_chunks > n_indices) { + stop("Requested to divide dimension '", dim_name, "' of length ", + n_indices, " in ", n_chunks, " chunks, which is not possible.") + } + chunk_sizes <- rep(floor(n_indices / n_chunks), n_chunks) + chunks_to_extend <- n_indices - chunk_sizes[1] * n_chunks + if (chunks_to_extend > 0) { + chunk_sizes[1:chunks_to_extend] <- chunk_sizes[1:chunks_to_extend] + 1 + } + chunk_size <- chunk_sizes[chunk] + offset <- 0 + if (chunk > 1) { + offset <- sum(chunk_sizes[1:(chunk - 1)]) + } + indices <- 1:chunk_sizes[chunk] + offset + array(indices, dim = setNames(length(indices), dim_name)) + } + + # Check pattern_dims + if (is.null(pattern_dims)) { + .warning(paste0("Parameter 'pattern_dims' not specified. Taking the first dimension, '", + dim_names[1], "' as 'pattern_dims'.")) + pattern_dims <- dim_names[1] + } else if (is.character(pattern_dims) && (length(pattern_dims) > 0)) { + pattern_dims <- unique(pattern_dims) + } else { + stop("Parameter 'pattern_dims' must be a vector of character strings.") + } + if (any(names(var_params) %in% pattern_dims)) { + stop("'*_var' parameters specified for pattern dimensions. Remove or fix them.") + } + # Find the pattern dimension with the pattern specifications + found_pattern_dim <- NULL + for (pattern_dim in pattern_dims) { + # Check all specifications in pattern_dim are valid + dat <- datasets <- dim_params[[pattern_dim]] + if (is.null(dat) || !(is.character(dat) && all(nchar(dat) > 0)) && !is.list(dat)) { + stop(paste0("Parameter '", pattern_dim, + "' must be a list of lists with pattern specifications or a vector of character strings.")) + } + if (!is.null(dim_reorder_params[[pattern_dim]])) { + .warning(paste0("A reorder for the selectors of '", pattern_dim, + "' has been specified, but it is a pattern dimension and the reorder will be ignored.")) + } + if (is.list(dat) || any(sapply(dat, is.list))) { + if (is.null(found_pattern_dim)) { + found_pattern_dim <- pattern_dim + } else { + stop("Found more than one pattern dim with pattern specifications (list of lists). One and only one pattern dim must contain pattern specifications.") + } + } + } + if (is.null(found_pattern_dim)) { + .warning(paste0("Could not find any pattern dim with explicit data set descriptions (in the form of list of lists). Taking the first pattern dim, '", pattern_dims[1], "', as dimension with pattern specifications.")) + found_pattern_dim <- pattern_dims[1] + } + + # Check all *_reorder are NULL or functions, and that they all have + # a matching dimension param. + i <- 1 + for (dim_reorder_param in dim_reorder_params) { + if (!is.function(dim_reorder_param)) { + stop("All '*_reorder' parameters must be functions.") + } else if (!any(grepl(paste0('^', strsplit(names(dim_reorder_params)[i], + '_reorder$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_reorder' parameters must be associated to a dimension parameter. Found parameter '", + names(dim_reorder_params)[i], "' but no parameter '", + strsplit(names(dim_reorder_params)[i], '_reorder$')[[1]][1], "'.")) + #} else if (!any(grepl(paste0('^', strsplit(names(dim_reorder_params)[i], + # '_reorder$')[[1]][1], '$'), + # names(var_params)))) { + # stop(paste0("All '*_reorder' parameters must be associated to a dimension parameter associated to a ", + # "variable. Found parameter '", names(dim_reorder_params)[i], "' and dimension parameter '", + # strsplit(names(dim_reorder_params)[i], '_reorder$')[[1]][1], "' but did not find variable ", + # "parameter '", strsplit(names(dim_reorder_params)[i], '_reorder$')[[1]][1], "_var'.")) + } + i <- i + 1 + } + + # Check all *_tolerance are NULL or vectors of character strings, and + # that they all have a matching dimension param. + i <- 1 + for (tolerance_param in tolerance_params) { + if (!any(grepl(paste0('^', strsplit(names(tolerance_params)[i], + '_tolerance$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_tolerance' parameters must be associated to a dimension parameter. Found parameter '", + names(tolerance_params)[i], "' but no parameter '", + strsplit(names(tolerance_params)[i], '_tolerance$')[[1]][1], "'.")) + #} else if (!any(grepl(paste0('^', strsplit(names(tolerance_params)[i], + # '_tolerance$')[[1]][1], '$'), + # names(var_params)))) { + # stop(paste0("All '*_tolerance' parameters must be associated to a dimension parameter associated to a ", + # "variable. Found parameter '", names(tolerance_params)[i], "' and dimension parameter '", + # strsplit(names(tolerance_params)[i], '_tolerance$')[[1]][1], "' but did not find variable ", + # "parameter '", strsplit(names(tolerance_params)[i], '_tolerance$')[[1]][1], "_var'.")) + } + i <- i + 1 + } + # Make the keys of 'tolerance_params' to be the name of + # the corresponding dimension. + if (length(tolerance_params) < 1) { + tolerance_params <- NULL + } else { + names(tolerance_params) <- gsub('_tolerance$', '', names(tolerance_params)) + } + + # Check metadata_dims + if (!is.null(metadata_dims)) { + if (is.na(metadata_dims)) { + metadata_dims <- NULL + } else if (!is.character(metadata_dims) || (length(metadata_dims) < 1)) { + stop("Parameter 'metadata' dims must be a vector of at least one character string.") + } + } else { + metadata_dims <- pattern_dims + } + + # Once the pattern dimension with dataset specifications is found, + # the variable 'dat' is mounted with the information of each + # dataset. + # Take only the datasets for the requested chunk + dats_to_take <- chunk_indices(length(dim_params[[found_pattern_dim]]), + chunks[[found_pattern_dim]]['chunk'], + chunks[[found_pattern_dim]]['n_chunks'], + found_pattern_dim) + dim_params[[found_pattern_dim]] <- dim_params[[found_pattern_dim]][dats_to_take] + dat <- datasets <- dim_params[[found_pattern_dim]] + dat_info_names <- c('name', 'path')#, 'nc_var_name', 'suffix', 'var_min', 'var_max', 'dimnames') + dat_to_fetch <- c() + dat_names <- c() + if (!is.list(dat)) { + dat <- as.list(dat) + } else { + if (!any(sapply(dat, is.list))) { + dat <- list(dat) + } + } + for (i in 1:length(dat)) { + if (is.character(dat[[i]]) && length(dat[[i]]) == 1 && nchar(dat[[i]]) > 0) { + if (grepl('^(\\./|\\.\\./|/.*/|~/)', dat[[i]])) { + dat[[i]] <- list(path = dat[[i]]) + } else { + dat[[i]] <- list(name = dat[[i]]) + } + } else if (!is.list(dat[[i]])) { + stop(paste0("Parameter '", pattern_dim, + "' is incorrect. It must be a list of lists or character strings.")) + } + #if (!(all(names(dat[[i]]) %in% dat_info_names))) { + # stop("Error: parameter 'dat' is incorrect. There are unrecognized components in the information of some of the datasets. Check 'dat' in ?Load for details.") + #} + if (!('name' %in% names(dat[[i]]))) { + dat[[i]][['name']] <- paste0('dat', i) + if (!('path' %in% names(dat[[i]]))) { + stop(paste0("Parameter '", found_pattern_dim, + "' is incorrect. A 'path' should be provided for each dataset if no 'name' is provided.")) + } + } else if (!('path' %in% names(dat[[i]]))) { + dat_to_fetch <- c(dat_to_fetch, i) + } + #if ('path' %in% names(dat[[i]])) { + # if (!('nc_var_name' %in% names(dat[[i]]))) { + # dat[[i]][['nc_var_name']] <- '$var_name$' + # } + # if (!('suffix' %in% names(dat[[i]]))) { + # dat[[i]][['suffix']] <- '' + # } + # if (!('var_min' %in% names(dat[[i]]))) { + # dat[[i]][['var_min']] <- '' + # } + # if (!('var_max' %in% names(dat[[i]]))) { + # dat[[i]][['var_max']] <- '' + # } + #} + dat_names <- c(dat_names, dat[[i]][['name']]) + } + if ((length(dat_to_fetch) > 0) && (length(dat_to_fetch) < length(dat))) { + .warning("'path' has been provided for some datasets. Any information in the configuration file related to these will be ignored.") + } + if (length(dat_to_fetch) > 0) { + stop("Specified only the name for some data sets, but not the path ", + "pattern. This option has not been yet implemented.") + } + + # Reorder inner_dims_across_files (to make the keys be the file dimensions, + # and the values to be the inner dimensions that go across it). + if (!is.null(inner_dims_across_files)) { + # Reorder: example, convert list(ftime = 'chunk', ensemble = 'member', xx = 'chunk') + # to list(chunk = c('ftime', 'xx'), member = 'ensemble') + new_idaf <- list() + for (i in names(inner_dims_across_files)) { + if (!(inner_dims_across_files[[i]] %in% names(new_idaf))) { + new_idaf[[inner_dims_across_files[[i]]]] <- i + } else { + new_idaf[[inner_dims_across_files[[i]]]] <- c(new_idaf[[inner_dims_across_files[[i]]]], i) + } + } + inner_dims_across_files <- new_idaf + } + + # Check return_vars + if (is.null(return_vars)) { + return_vars <- list() +# if (length(var_params) > 0) { +# return_vars <- as.list(paste0(names(var_params), '_var')) +# } else { +# return_vars <- list() +# } + } + if (!is.list(return_vars)) { + stop("Parameter 'return_vars' must be a list or NULL.") + } + if (length(return_vars) > 0 && is.null(names(return_vars))) { +# names(return_vars) <- rep('', length(return_vars)) + stop("Parameter 'return_vars' must be a named list.") + } + i <- 1 + while (i <= length(return_vars)) { +# if (names(return_vars)[i] == '') { +# if (!(is.character(return_vars[[i]]) && (length(return_vars[[i]]) == 1))) { +# stop("The ", i, "th specification in 'return_vars' is malformed.") +# } +# if (!grepl('_var$', return_vars[[i]])) { +# stop("The ", i, "th specification in 'return_vars' is malformed.") +# } +# dim_name <- strsplit(return_vars[[i]], '_var$')[[1]][1] +# if (!(dim_name %in% names(var_params))) { +# stop("'", dim_name, "_var' requested in 'return_vars' but ", +# "no '", dim_name, "_var' specified in the .Load call.") +# } +# names(return_vars)[i] <- var_params[[dim_name]] +# return_vars[[i]] <- found_pattern_dim +# } else + if (length(return_vars[[i]]) > 0) { + if (!is.character(return_vars[[i]])) { + stop("The ", i, "th specification in 'return_vars' is malformed. It ", + "must be a vector of character strings of valid file dimension ", + "names.") + } + } + i <- i + 1 + } + + # Check synonims + if (!is.null(synonims)) { + error <- FALSE + if (!is.list(synonims)) { + error <- TRUE + } + for (synonim_entry in names(synonims)) { + if (!(synonim_entry %in% names(dim_params)) && + !(synonim_entry %in% names(return_vars))) { + error <- TRUE + } + if (!is.character(synonims[[synonim_entry]]) || + length(synonims[[synonim_entry]]) < 1) { + error <- TRUE + } + } + if (error) { + stop("Parameter 'synonims' must be a named list, where the names are ", + "a name of a requested dimension or variable and the values are ", + "vectors of character strings with at least one alternative name ", + " for each dimension or variable in 'synonims'.") + } + } + if (length(unique(names(synonims))) < length(names(synonims))) { + stop("There must not be repeated entries in 'synonims'.") + } + if (length(unique(unlist(synonims))) < length(unlist(synonims))) { + stop("There must not be repeated values in 'synonims'.") + } + # Make that all dims and vars have an entry in synonims, even if only dim_name = dim_name + dim_entries_to_add <- which(!(names(dim_params) %in% names(synonims))) + if (length(dim_entries_to_add) > 0) { + synonims[names(dim_params)[dim_entries_to_add]] <- as.list(names(dim_params)[dim_entries_to_add]) + } + var_entries_to_add <- which(!(names(var_params) %in% names(synonims))) + if (length(var_entries_to_add) > 0) { + synonims[names(var_params)[var_entries_to_add]] <- as.list(names(var_params)[var_entries_to_add]) + } + + # Check selector_checker + if (is.null(selector_checker) || !is.function(selector_checker)) { + stop("Parameter 'selector_checker' must be a function.") + } + + # Check file_opener + if (is.null(file_opener) || !is.function(file_opener)) { + stop("Parameter 'file_opener' must be a function.") + } + + # Check file_var_reader + if (!is.null(file_var_reader) && !is.function(file_var_reader)) { + stop("Parameter 'file_var_reader' must be a function.") + } + + # Check file_dim_reader + if (!is.null(file_dim_reader) && !is.function(file_dim_reader)) { + stop("Parameter 'file_dim_reader' must be a function.") + } + + # Check file_data_reader + if (is.null(file_data_reader) || !is.function(file_data_reader)) { + stop("Parameter 'file_data_reader' must be a function.") + } + + # Check file_closer + if (is.null(file_closer) || !is.function(file_closer)) { + stop("Parameter 'file_closer' must be a function.") + } + + # Check transform + if (!is.null(transform)) { + if (!is.function(transform)) { + stop("Parameter 'transform' must be a function.") + } + } + + # Check transform_params + if (!is.null(transform_params)) { + if (!is.list(transform_params)) { + stop("Parameter 'transform_params' must be a list.") + } + if (is.null(names(transform_params))) { + stop("Parameter 'transform_params' must be a named list.") + } + } + + # Check transform_vars + if (!is.null(transform_vars)) { + if (!is.character(transform_vars)) { + stop("Parameter 'transform_vars' must be a vector of character strings.") + } + } + if (any(!(transform_vars %in% names(return_vars)))) { + stop("All the variables specified in 'transform_vars' must also be specified in 'return_vars'.") + } + + # Check apply_indices_after_transform + if (!is.logical(apply_indices_after_transform)) { + stop("Parameter 'apply_indices_after_transform' must be either TRUE or FALSE.") + } + aiat <- apply_indices_after_transform + + # Check transform_extra_cells + if (!is.numeric(transform_extra_cells)) { + stop("Parameter 'transform_extra_cells' must be numeric.") + } + transform_extra_cells <- round(transform_extra_cells) + + # Check split_multiselected_dims + if (!is.logical(split_multiselected_dims)) { + stop("Parameter 'split_multiselected_dims' must be TRUE or FALSE.") + } + + # Check path_glob_permissive + if (!is.numeric(path_glob_permissive) && !is.logical(path_glob_permissive)) { + stop("Parameter 'path_glob_permissive' must be TRUE, FALSE or an integer.") + } + if (length(path_glob_permissive) != 1) { + stop("Parameter 'path_glob_permissive' must be of length 1.") + } + + # Check retrieve + if (!is.logical(retrieve)) { + stop("Parameter 'retrieve' must be TRUE or FALSE.") + } + + # Check num_procs + if (!is.null(num_procs)) { + if (!is.numeric(num_procs)) { + stop("Parameter 'num_procs' must be numeric.") + } else { + num_procs <- round(num_procs) + } + } + + # Check silent + if (!is.logical(silent)) { + stop("Parameter 'silent' must be logical.") + } + + dim_params[[found_pattern_dim]] <- dat_names + + if (!silent) { + .message(paste0("Exploring files... This will take a variable amount ", + "of time depending on the issued request and the ", + "performance of the file server...")) + } + +if (!is.character(debug)) { +dims_to_check <- c('time') +} else { +dims_to_check <- debug +debug <- TRUE +} + + ############################## READING FILE DIMS ############################ + # Check that no unrecognized variables are present in the path patterns + # and also that no file dimensions are requested to THREDDs catalogs. + # And in the mean time, build all the work pieces and look for the + # first available file of each dataset. + array_of_files_to_load <- NULL + array_of_not_found_files <- NULL + indices_of_first_files_with_data <- vector('list', length(dat)) + selectors_of_first_files_with_data <- vector('list', length(dat)) + dataset_has_files <- rep(FALSE, length(dat)) + found_file_dims <- vector('list', length(dat)) + expected_inner_dims <- vector('list', length(dat)) + +#print("A") + for (i in 1:length(dat)) { +#print("B") + dat_selectors <- dim_params + dat_selectors[[found_pattern_dim]] <- dat_selectors[[found_pattern_dim]][i] + dim_vars <- paste0('$', dim_names, '$') + file_dims <- which(sapply(dim_vars, grepl, dat[[i]][['path']], fixed = TRUE)) + if (length(file_dims) > 0) { + file_dims <- dim_names[file_dims] + } + file_dims <- unique(c(pattern_dims, file_dims)) + found_file_dims[[i]] <- file_dims + expected_inner_dims[[i]] <- dim_names[which(!(dim_names %in% file_dims))] + # (Check the depending_file_dims). + if (any(c(names(depending_file_dims), unlist(depending_file_dims)) %in% + expected_inner_dims[[i]])) { + stop(paste0("The dimension dependancies specified in ", + "'depending_file_dims' can only be between file ", + "dimensions, but some inner dimensions found in ", + "dependancies for '", dat[[i]][['name']], "', which ", + "has the following file dimensions: ", + paste(paste0("'", file_dims, "'"), collapse = ', '), ".")) + } else { + a <- names(depending_file_dims) %in% file_dims + b <- unlist(depending_file_dims) %in% file_dims + ab <- a & b + if (any(!ab)) { + .warning(paste0("Detected some dependancies in 'depending_file_dims' with ", + "non-existing dimension names. These will be disregarded.")) + depending_file_dims <- depending_file_dims[-which(!ab)] + } + if (any(names(depending_file_dims) == unlist(depending_file_dims))) { + depending_file_dims <- depending_file_dims[-which(names(depending_file_dims) == unlist(depending_file_dims))] + } + } +print("Depending file dims:") +print(depending_file_dims) + # (Check the inner_dims_across_files). + if (any(!(names(inner_dims_across_files) %in% file_dims)) || + any(!(unlist(inner_dims_across_files) %in% expected_inner_dims[[i]]))) { + stop(paste0("All relationships specified in ", + "'_across' parameters must be between a inner ", + "dimension and a file dimension. Found wrong ", + "specification for '", dat[[i]][['name']], "', which ", + "has the following file dimensions: ", + paste(paste0("'", file_dims, "'"), collapse = ', '), + ", and the following inner dimensions: ", + paste(paste0("'", expected_inner_dims[[i]], "'"), + collapse = ', '), ".")) + } + # (Check the return_vars). + j <- 1 + while (j <= length(return_vars)) { + if (any(!(return_vars[[j]] %in% file_dims))) { + if (any(return_vars[[j]] %in% expected_inner_dims[[i]])) { + stop("Found variables in 'return_vars' requested ", + "for some inner dimensions (for dataset '", + dat[[i]][['name']], "'), but variables can only be ", + "requested for file dimensions.") + } else { + stop("Found variables in 'return_vars' requested ", + "for non-existing dimensions.") + } + } + j <- j + 1 + } + # (Check the metadata_dims). + if (!is.null(metadata_dims)) { + if (any(!(metadata_dims %in% file_dims))) { + stop("All dimensions in 'metadata_dims' must be file dimensions.") + } + } + ## Look for _var params that should be requested automatically. + for (dim_name in dim_names) { + if (!(dim_name %in% pattern_dims)) { + if (is.null(attr(dat_selectors[[dim_name]], 'values')) || + is.null(attr(dat_selectors[[dim_name]], 'indices'))) { + flag <- ((dat_selectors[[dim_name]] %in% c('all', 'first', 'last')) || + (is.numeric(unlist(dat_selectors[[dim_name]])))) + attr(dat_selectors[[dim_name]], 'values') <- !flag + attr(dat_selectors[[dim_name]], 'indices') <- flag + } + ## The following code 'rewrites' var_params for all datasets. If providing different + ## path pattern repositories with different file/inner dimensions, var_params might + ## have to be handled for each dataset separately. + if ((attr(dat_selectors[[dim_name]], 'values') || (dim_name %in% c('var', 'variable'))) && + !(dim_name %in% names(var_params)) && !(dim_name %in% file_dims)) { + if (dim_name %in% c('var', 'variable')) { + var_params <- c(var_params, setNames(list('var_names'), dim_name)) + .warning(paste0("Found specified values for dimension '", dim_name, "' but no '", + dim_name, "_var' requested. ", '"', dim_name, "_var = '", + 'var_names', "'", '"', " has been automatically added to ", + "the Start call.")) + } else { + var_params <- c(var_params, setNames(list(dim_name), dim_name)) + .warning(paste0("Found specified values for dimension '", dim_name, "' but no '", + dim_name, "_var' requested. ", '"', dim_name, "_var = '", + dim_name, "'", '"', " has been automatically added to ", + "the Start call.")) + } + } + } + } + ## (Check the *_var parameters). + if (any(!(unlist(var_params) %in% names(return_vars)))) { + vars_to_add <- which(!(unlist(var_params) %in% names(return_vars))) + new_return_vars <- vector('list', length(vars_to_add)) + names(new_return_vars) <- unlist(var_params)[vars_to_add] + return_vars <- c(return_vars, new_return_vars) + .warning(paste0("All '*_var' params must associate a dimension to one of the ", + "requested variables in 'return_vars'. The following variables", + " have been added to 'return_vars': ", + paste(paste0("'", unlist(var_params), "'"), collapse = ', '))) + } + + replace_values <- vector('list', length = length(file_dims)) + names(replace_values) <- file_dims + # Take the first selector for all possible file dimensions + for (file_dim in file_dims) { + if (file_dim %in% names(var_params)) { + .warning(paste0("The '", file_dim, "_var' param will be ignored since '", + file_dim, "' is a file dimension (for the dataset with pattern ", + dat[[i]][['path']], ").")) + } + if (!is.list(dat_selectors[[file_dim]]) || + (is.list(dat_selectors[[file_dim]]) && + length(dat_selectors[[file_dim]]) == 2 && + is.null(names(dat_selectors[[file_dim]])))) { + dat_selectors[[file_dim]] <- list(dat_selectors[[file_dim]]) + } + first_class <- class(dat_selectors[[file_dim]][[1]]) + first_length <- length(dat_selectors[[file_dim]][[1]]) + for (j in 1:length(dat_selectors[[file_dim]])) { + sv <- selector_vector <- dat_selectors[[file_dim]][[j]] + if (!identical(first_class, class(sv)) || + !identical(first_length, length(sv))) { + stop("All provided selectors for depending dimensions must ", + "be vectors of the same length and of the same class.") + } + if (is.character(sv) && !((length(sv) == 1) && (sv[1] %in% c('all', 'first', 'last')))) { + dat_selectors[[file_dim]][[j]] <- selector_checker(selectors = sv, + return_indices = FALSE) + # Take chunk if needed + dat_selectors[[file_dim]][[j]] <- dat_selectors[[file_dim]][[j]][chunk_indices(length(dat_selectors[[file_dim]][[j]]), + chunks[[file_dim]]['chunk'], + chunks[[file_dim]]['n_chunks'], + file_dim)] + } else if (!(is.numeric(sv) || + (is.character(sv) && (length(sv) == 1) && (sv %in% c('all', 'first', 'last'))) || + (is.list(sv) && (length(sv) == 2) && (all(sapply(sv, is.character)) || + all(sapply(sv, is.numeric)))))) { + stop("All explicitly provided selectors for file dimensions must be character strings.") + } + } + sv <- dat_selectors[[file_dim]][[1]] + if (is.character(sv) && !((length(sv) == 1) && (sv[1] %in% c('all', 'first', 'last')))) { + replace_values[[file_dim]] <- dat_selectors[[file_dim]][[1]][1] + } + } +#print("C") + # Now we know which dimensions whose selectors are provided non-explicitly. + undefined_file_dims <- file_dims[which(sapply(replace_values, is.null))] + defined_file_dims <- file_dims[which(!(file_dims %in% undefined_file_dims))] + # Quickly check if the depending dimensions are provided properly. + for (file_dim in file_dims) { + if (file_dim %in% names(depending_file_dims)) { + ## TODO: Detect multi-dependancies and forbid. +print(file_dim) +print(depending_file_dims[[file_dim]]) + if (all(c(file_dim, depending_file_dims[[file_dim]]) %in% defined_file_dims)) { + if (length(dat_selectors[[file_dim]]) != length(dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) { + stop(paste0("If providing selectors for the depending ", + "dimension '", file_dim, "', a ", + "vector of selectors must be provided for ", + "each selector of the dimension it depends on, '", + depending_file_dims[[file_dim]], "'.")) + } else if (!all(names(dat_selectors[[file_dim]]) == dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) { + stop(paste0("If providing selectors for the depending ", + "dimension '", file_dim, "', the name of the ", + "provided vectors of selectors must match ", + "exactly the selectors of the dimension it ", + "depends on, '", depending_file_dims[[file_dim]], "'.")) + } + } + } + } + # Find the possible values for the selectors that are provided as + # indices. If the requested file is on server, impossible operation. + if (length(grep("^http", dat[[i]][['path']])) > 0) { + if (length(undefined_file_dims) > 0) { + stop(paste0("All selectors for the file dimensions must be ", + "character strings if requesting data to a remote ", + "server. Found invalid selectors for the file dimensions ", + paste(paste0("'", undefined_file_dims, "'"), collapse = ', '), ".")) + } + dataset_has_files[i] <- TRUE + } else { + dat[[i]][['path']] <- path.expand(dat[[i]][['path']]) + # Iterate over the known dimensions to find the first existing file. + # The path to the first existing file will be used to find the + # values for the non explicitly defined selectors. + first_file <- NULL + first_file_selectors <- NULL + if (length(undefined_file_dims) > 0) { + replace_values[undefined_file_dims] <- '*' + } + ## TODO: What if length of defined_file_dims is 0? code might crash (in practice it worked for an example case) + files_to_check <- sapply(dat_selectors[defined_file_dims], function(x) length(x[[1]])) + sub_array_of_files_to_check <- array(1:prod(files_to_check), dim = files_to_check) + j <- 1 +#print("D") + while (j <= prod(files_to_check) && is.null(first_file)) { + selector_indices <- which(sub_array_of_files_to_check == j, arr.ind = TRUE)[1, ] + selectors <- sapply(1:length(defined_file_dims), + function (x) { + vector_to_pick <- 1 + if (defined_file_dims[x] %in% names(depending_file_dims)) { + vector_to_pick <- selector_indices[which(defined_file_dims == depending_file_dims[[defined_file_dims[x]]])] + } + dat_selectors[defined_file_dims][[x]][[vector_to_pick]][selector_indices[x]] + }) + replace_values[defined_file_dims] <- selectors + file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values) + file_path <- Sys.glob(file_path) + if (length(file_path) > 0) { + first_file <- file_path[1] + first_file_selectors <- selectors + } + j <- j + 1 + } +print("E") + # Start looking for values for the non-explicitly defined selectors. + if (is.null(first_file)) { + .warning(paste0("No found files for the datset '", dat[[i]][['name']], + "'. Provide existing selectors for the file dimensions ", + " or check and correct its path pattern: ", dat[[i]][['path']])) + } else { + dataset_has_files[i] <- TRUE + ## TODO: Improve message here if no variable found: + if (length(undefined_file_dims) > 0) { + # Looking for the first values, parsed from first_file. + first_values <- vector('list', length = length(undefined_file_dims)) + names(first_values) <- undefined_file_dims + found_values <- 0 + stop <- FALSE + try_dim <- 1 + last_success <- 1 + while ((found_values < length(undefined_file_dims)) && !stop) { + u_file_dim <- undefined_file_dims[try_dim] + if (is.null(first_values[[u_file_dim]])) { + path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][['path']], + replace_values[-which(file_dims == u_file_dim)], + allow_undefined_key_vars = TRUE) + found_value <- .FindTagValue(path_with_globs_and_tag, + first_file, u_file_dim) + if (!is.null(found_value)) { + found_values <- found_values + 1 + last_success <- try_dim + first_values[[u_file_dim]] <- found_value + replace_values[[u_file_dim]] <- found_value + } + } + try_dim <- (try_dim %% length(undefined_file_dims)) + 1 + if (try_dim == last_success) { + stop <- TRUE + } + } + if (found_values < length(undefined_file_dims)) { + stop(paste0("Path pattern of dataset '", dat[[i]][['name']], + "' is too complex. Could not automatically ", + "detect values for all non-explicitly defined ", + "indices. Check its pattern: ", dat[[i]][['path']])) + } + ## TODO: Replace ReplaceGlobExpressions by looped call to FindTagValue? As done above + ## Maybe it can solve more cases actually. I got warnings in ReplGlobExp with a typical + ## cmor case, requesting all members and chunks for fixed var and sdate. Not fixing + ## sdate raised 'too complex' error. + # Replace shell globs in path pattern and keep the file_dims as tags + dat[[i]][['path']] <- .ReplaceGlobExpressions(dat[[i]][['path']], first_file, replace_values, + file_dims, dat[[i]][['name']], path_glob_permissive) + # Now time to look for the available values for the non + # explicitly defined selectors for the file dimensions. +#print("H") + # Check first the ones that do not depend on others. + ufd <- c(undefined_file_dims[which(!(undefined_file_dims %in% names(depending_file_dims)))], + undefined_file_dims[which(undefined_file_dims %in% names(depending_file_dims))]) + + for (u_file_dim in ufd) { + replace_values[undefined_file_dims] <- first_values + replace_values[[u_file_dim]] <- '*' + depended_dim <- NULL + depended_dim_values <- NA + selectors <- dat_selectors[[u_file_dim]][[1]] + if (u_file_dim %in% names(depending_file_dims)) { + depended_dim <- depending_file_dims[[u_file_dim]] + depended_dim_values <- dat_selectors[[depended_dim]][[1]] + dat_selectors[[u_file_dim]] <- vector('list', length = length(depended_dim_values)) + names(dat_selectors[[u_file_dim]]) <- depended_dim_values + } else { + dat_selectors[[u_file_dim]] <- list() + } + if (u_file_dim %in% unlist(depending_file_dims)) { + depending_dims <- names(depending_file_dims)[which(sapply(depending_file_dims, function(x) u_file_dim %in% x))] + replace_values[depending_dims] <- rep('*', length(depending_dims)) + } + for (j in 1:length(depended_dim_values)) { + parsed_values <- c() + if (!is.null(depended_dim)) { + replace_values[[depended_dim]] <- depended_dim_values[j] + } + path_with_globs <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values) + found_files <- Sys.glob(path_with_globs) + ## TODO: Enhance this error message, or change by warning. + ## Raises if a wrong sdate is specified, for example. + if (length(found_files) == 0) { + .warning(paste0("Could not find files for any '", u_file_dim, + "' for '", depended_dim, "' = '", + depended_dim_values[j], "'.")) + dat_selectors[[u_file_dim]][[j]] <- NA + } else { + for (found_file in found_files) { + path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][['path']], + replace_values[-which(file_dims == u_file_dim)], + allow_undefined_key_vars = TRUE) + parsed_values <- c(parsed_values, + .FindTagValue(path_with_globs_and_tag, found_file, + u_file_dim)) + } + dat_selectors[[u_file_dim]][[j]] <- selector_checker(selectors = selectors, + var = unique(parsed_values), + return_indices = FALSE) + # Take chunk if needed + dat_selectors[[u_file_dim]][[j]] <- dat_selectors[[u_file_dim]][[j]][chunk_indices(length(dat_selectors[[u_file_dim]][[j]]), + chunks[[u_file_dim]]['chunk'], + chunks[[u_file_dim]]['n_chunks'], + u_file_dim)] + } + } + } +#print("I") + } else { + dat[[i]][['path']] <- .ReplaceGlobExpressions(dat[[i]][['path']], first_file, replace_values, + defined_file_dims, dat[[i]][['name']], path_glob_permissive) + } + } + } + # Now fetch for the first available file + if (dataset_has_files[i]) { + known_dims <- file_dims + } else { + known_dims <- defined_file_dims + } + replace_values <- vector('list', length = length(known_dims)) + names(replace_values) <- known_dims + files_to_load <- sapply(dat_selectors[known_dims], function(x) length(x[[1]])) + files_to_load[found_pattern_dim] <- 1 + sub_array_of_files_to_load <- array(1:prod(files_to_load), + dim = files_to_load) + names(dim(sub_array_of_files_to_load)) <- known_dims + sub_array_of_not_found_files <- array(!dataset_has_files[i], + dim = files_to_load) + names(dim(sub_array_of_not_found_files)) <- known_dims + j <- 1 + selector_indices_save <- vector('list', prod(files_to_load)) + while (j <= prod(files_to_load)) { + selector_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ] + names(selector_indices) <- known_dims + selector_indices_save[[j]] <- selector_indices + selectors <- sapply(1:length(known_dims), + function (x) { + vector_to_pick <- 1 + if (known_dims[x] %in% names(depending_file_dims)) { + vector_to_pick <- selector_indices[which(known_dims == depending_file_dims[[known_dims[x]]])] + } + dat_selectors[known_dims][[x]][[vector_to_pick]][selector_indices[x]] + }) + names(selectors) <- known_dims + replace_values[known_dims] <- selectors + if (!dataset_has_files[i]) { + if (any(is.na(selectors))) { + replace_values <- replace_values[-which(names(replace_values) %in% names(selectors[which(is.na(selectors))]))] + } + file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values, TRUE) + sub_array_of_files_to_load[j] <- file_path + #sub_array_of_not_found_files[j] <- TRUE??? + } else { + if (any(is.na(selectors))) { + replace_values <- replace_values[-which(names(replace_values) %in% names(selectors[which(is.na(selectors))]))] + file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values, TRUE) + sub_array_of_files_to_load[j] <- file_path + sub_array_of_not_found_files[j] <- TRUE + } else { + file_path <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values) + if (!(length(grep("^http", file_path)) > 0)) { + if (grepl(file_path, '*', fixed = TRUE)) { + file_path_full <- Sys.glob(file_path)[1] + if (nchar(file_path_full) > 0) { + file_path <- file_path_full + } + } + } + sub_array_of_files_to_load[j] <- file_path + if (is.null(indices_of_first_files_with_data[[i]])) { + if (!(length(grep("^http", file_path)) > 0)) { + if (!file.exists(file_path)) { + file_path <- NULL + } + } + if (!is.null(file_path)) { + test_file <- NULL + ## TODO: suppress error messages + test_file <- file_opener(file_path) + if (!is.null(test_file)) { + selector_indices[which(known_dims == found_pattern_dim)] <- i + indices_of_first_files_with_data[[i]] <- selector_indices + selectors_of_first_files_with_data[[i]] <- selectors + file_closer(test_file) + } + } + } + } + } + j <- j + 1 + } + # Extend array as needed progressively + if (is.null(array_of_files_to_load)) { + array_of_files_to_load <- sub_array_of_files_to_load + array_of_not_found_files <- sub_array_of_not_found_files + } else { + array_of_files_to_load <- .MergeArrays(array_of_files_to_load, sub_array_of_files_to_load, + along = found_pattern_dim) + ## TODO: file_dims, and variables like that.. are still ok now? I don't think so + array_of_not_found_files <- .MergeArrays(array_of_not_found_files, sub_array_of_not_found_files, + along = found_pattern_dim) + } + dat[[i]][['selectors']] <- dat_selectors + } + if (all(sapply(indices_of_first_files_with_data, is.null))) { + stop("No data files found for any of the specified datasets.") + } + + ########################### READING INNER DIMS. ############################# +#print("J") + ## TODO: To be run in parallel (local multi-core) + # Now time to work out the inner file dimensions. + # First pick the requested variables. + dims_to_iterate <- NULL + for (return_var in names(return_vars)) { + dims_to_iterate <- unique(c(dims_to_iterate, return_vars[[return_var]])) + } + if (found_pattern_dim %in% dims_to_iterate) { + dims_to_iterate <- dims_to_iterate[-which(dims_to_iterate == found_pattern_dim)] + } + common_return_vars <- NULL + common_first_found_file <- NULL + common_return_vars_pos <- NULL + if (length(return_vars) > 0) { + common_return_vars_pos <- which(sapply(return_vars, function(x) !(found_pattern_dim %in% x))) + } + if (length(common_return_vars_pos) > 0) { + common_return_vars <- return_vars[common_return_vars_pos] + return_vars <- return_vars[-common_return_vars_pos] + common_first_found_file <- rep(FALSE, length(which(sapply(common_return_vars, length) == 0))) + names(common_first_found_file) <- names(common_return_vars[which(sapply(common_return_vars, length) == 0)]) + } + return_vars <- lapply(return_vars, + function(x) { + if (found_pattern_dim %in% x) { + x[-which(x == found_pattern_dim)] + } else { + x + } + }) + if (length(common_return_vars) > 0) { + picked_common_vars <- vector('list', length = length(common_return_vars)) + names(picked_common_vars) <- names(common_return_vars) + } else { + picked_common_vars <- NULL + } + picked_common_vars_ordered <- picked_common_vars + picked_common_vars_unorder_indices <- picked_common_vars + picked_vars <- vector('list', length = length(dat)) + names(picked_vars) <- dat_names + picked_vars_ordered <- picked_vars + picked_vars_unorder_indices <- picked_vars + for (i in 1:length(dat)) { + if (dataset_has_files[i]) { + # Put all selectors in a list of a single list/vector of selectors. + # The dimensions that go across files will later be extended to have + # lists of lists/vectors of selectors. + for (inner_dim in expected_inner_dims[[i]]) { + if (!is.list(dat[[i]][['selectors']][[inner_dim]]) || + (is.list(dat[[i]][['selectors']][[inner_dim]]) && + length(dat[[i]][['selectors']][[inner_dim]]) == 2 && + is.null(names(dat[[i]][['selectors']][[inner_dim]])))) { + dat[[i]][['selectors']][[inner_dim]] <- list(dat[[i]][['selectors']][[inner_dim]]) + } + } + if (length(return_vars) > 0) { + picked_vars[[i]] <- vector('list', length = length(return_vars)) + names(picked_vars[[i]]) <- names(return_vars) + picked_vars_ordered[[i]] <- picked_vars[[i]] + picked_vars_unorder_indices[[i]] <- picked_vars[[i]] + } + indices_of_first_file <- as.list(indices_of_first_files_with_data[[i]]) + array_file_dims <- sapply(dat[[i]][['selectors']][found_file_dims[[i]]], function(x) length(x[[1]])) + names(array_file_dims) <- found_file_dims[[i]] + if (length(dims_to_iterate) > 0) { + indices_of_first_file[dims_to_iterate] <- lapply(array_file_dims[dims_to_iterate], function(x) 1:x) + } + array_of_var_files <- do.call('[', c(list(x = array_of_files_to_load), indices_of_first_file, list(drop = FALSE))) + array_of_var_indices <- array(1:length(array_of_var_files), dim = dim(array_of_var_files)) + array_of_not_found_var_files <- do.call('[', c(list(x = array_of_not_found_files), indices_of_first_file, list(drop = FALSE))) + previous_indices <- rep(-1, length(indices_of_first_file)) + names(previous_indices) <- names(indices_of_first_file) + first_found_file <- NULL + if (length(return_vars) > 0) { + first_found_file <- rep(FALSE, length(which(sapply(return_vars, length) == 0))) + names(first_found_file) <- names(return_vars[which(sapply(return_vars, length) == 0)]) + } + for (j in 1:length(array_of_var_files)) { + current_indices <- which(array_of_var_indices == j, arr.ind = TRUE)[1, ] + names(current_indices) <- names(indices_of_first_file) + if (!is.na(array_of_var_files[j]) && !array_of_not_found_var_files[j]) { + changed_dims <- which(current_indices != previous_indices) + vars_to_read <- NULL + if (length(return_vars) > 0) { + vars_to_read <- names(return_vars)[sapply(return_vars, function(x) any(names(changed_dims) %in% x))] + } + if (!is.null(first_found_file)) { + if (any(!first_found_file)) { + vars_to_read <- c(vars_to_read, names(first_found_file[which(!first_found_file)])) + } + } + if ((i == 1) && (length(common_return_vars) > 0)) { + vars_to_read <- c(vars_to_read, names(common_return_vars)[sapply(common_return_vars, function(x) any(names(changed_dims) %in% x))]) + } + if (!is.null(common_first_found_file)) { + if (any(!common_first_found_file)) { + vars_to_read <- c(vars_to_read, names(common_first_found_file[which(!common_first_found_file)])) + } + } + file_object <- file_opener(array_of_var_files[j]) + if (!is.null(file_object)) { + for (var_to_read in vars_to_read) { + if (var_to_read %in% unlist(var_params)) { + associated_dim_name <- names(var_params)[which(unlist(var_params) == var_to_read)] + } + var_name_to_reader <- var_to_read + names(var_name_to_reader) <- 'var' + var_dims <- file_dim_reader(NULL, file_object, var_name_to_reader, NULL, + synonims) + # file_dim_reader returns dimension names as found in the file. + # Need to translate accoridng to synonims: + names(var_dims) <- sapply(names(var_dims), + function(x) { + which_entry <- which(sapply(synonims, function(y) x %in% y)) + if (length(which_entry) > 0) { + names(synonims)[which_entry] + } else { + x + } + }) + if (!is.null(var_dims)) { + var_file_dims <- NULL + if (var_to_read %in% names(common_return_vars)) { + var_to_check <- common_return_vars[[var_to_read]] + } else { + var_to_check <- return_vars[[var_to_read]] + } + if (any(names(dim(array_of_files_to_load)) %in% var_to_check)) { + var_file_dims <- dim(array_of_files_to_load)[which(names(dim(array_of_files_to_load)) %in% + var_to_check)] + } + if (((var_to_read %in% names(common_return_vars)) && + is.null(picked_common_vars[[var_to_read]])) || + ((var_to_read %in% names(return_vars)) && + is.null(picked_vars[[i]][[var_to_read]]))) { + if (any(names(var_file_dims) %in% names(var_dims))) { + stop("Found a requested var in 'return_var' requested for a ", + "file dimension which also appears in the dimensions of ", + "the variable inside the file.\n", array_of_var_files[j]) + } + special_types <- list('POSIXct' = as.POSIXct, 'POSIXlt' = as.POSIXlt, + 'Date' = as.Date) + first_sample <- file_var_reader(NULL, file_object, NULL, + var_to_read, synonims) + if (any(class(first_sample) %in% names(special_types))) { + array_size <- prod(c(var_file_dims, var_dims)) + new_array <- rep(special_types[[class(first_sample)[1]]](NA), array_size) + dim(new_array) <- c(var_file_dims, var_dims) + } else { + new_array <- array(dim = c(var_file_dims, var_dims)) + } + attr(new_array, 'variables') <- attr(first_sample, 'variables') + if (var_to_read %in% names(common_return_vars)) { + picked_common_vars[[var_to_read]] <- new_array + pick_ordered <- FALSE + if (var_to_read %in% unlist(var_params)) { + if (associated_dim_name %in% names(dim_reorder_param) && !aiat) { + picked_common_vars_ordered[[var_to_read]] <- new_array + pick_ordered <- TRUE + } + } + if (!pick_ordered) { + picked_common_vars_ordered[[var_to_read]] <- NULL + } + } else { + picked_vars[[i]][[var_to_read]] <- new_array + pick_ordered <- FALSE + if (var_to_read %in% unlist(var_params)) { + if (associated_dim_name %in% names(dim_reorder_params) && !aiat) { + picked_vars_ordered[[i]][[var_to_read]] <- new_array + pick_ordered <- TRUE + } + } + if (!pick_ordered) { + picked_vars_ordered[[i]][[var_to_read]] <- NULL + } + } + } else { + if (var_to_read %in% names(common_return_vars)) { + array_var_dims <- dim(picked_common_vars[[var_to_read]]) + } else { + array_var_dims <- dim(picked_vars[[i]][[var_to_read]]) + } + full_array_var_dims <- array_var_dims + if (any(names(array_var_dims) %in% names(var_file_dims))) { + array_var_dims <- array_var_dims[-which(names(array_var_dims) %in% names(var_file_dims))] + } + if (names(array_var_dims) != names(var_dims)) { + stop("Error while reading the variable '", var_to_read, "' from ", + "the file. Dimensions do not match.\nExpected ", + paste(paste0("'", names(array_var_dims), "'"), + collapse = ', '), " but found ", + paste(paste0("'", names(var_dims), "'"), + collapse = ', '), ".\n", array_of_var_files[j]) + } + if (any(var_dims > array_var_dims)) { + longer_dims <- which(var_dims > array_var_dims) + if (length(longer_dims) == 1) { + longer_dims_in_full_array <- longer_dims + if (any(names(full_array_var_dims) %in% names(var_file_dims))) { + candidates <- (1:length(full_array_var_dims))[-which(names(full_array_var_dims) %in% names(var_file_dims))] + longer_dims_in_full_array <- candidates[longer_dims] + } + padding_dims <- full_array_var_dims + padding_dims[longer_dims_in_full_array] <- var_dims[longer_dims] - + array_var_dims[longer_dims] + special_types <- list('POSIXct' = as.POSIXct, 'POSIXlt' = as.POSIXlt, + 'Date' = as.Date) + if (var_to_read %in% names(common_return_vars)) { + var_class <- class(picked_common_vars[[var_to_read]]) + } else { + var_class <- class(picked_vars[[i]][[var_to_read]]) + } + if (any(var_class %in% names(special_types))) { + padding_size <- prod(padding_dims) + padding <- rep(special_types[[var_class[1]]](NA), padding_size) + dim(padding) <- padding_dims + } else { + padding <- array(dim = padding_dims) + } + if (var_to_read %in% names(common_return_vars)) { + picked_common_vars[[var_to_read]] <- .abind2( + picked_common_vars[[var_to_read]], + padding, + names(full_array_var_dims)[longer_dims_in_full_array] + ) + } else { + picked_vars[[i]][[var_to_read]] <- .abind2( + picked_vars[[i]][[var_to_read]], + padding, + names(full_array_var_dims)[longer_dims_in_full_array] + ) + } + } else { + stop("Error while reading the variable '", var_to_read, "' from ", + "the file. Found size (", paste(var_dims, collapse = ' x '), + ") is greater than expected maximum size (", + array_var_dims, ").") + } + } + } + var_store_indices <- c(as.list(current_indices[names(var_file_dims)]), lapply(var_dims, function(x) 1:x)) + var_values <- file_var_reader(NULL, file_object, NULL, var_to_read, synonims) + if (var_to_read %in% unlist(var_params)) { + if ((associated_dim_name %in% names(dim_reorder_params)) && !aiat) { + ## Is this check really needed? + if (length(dim(var_values)) > 1) { + stop("Requested a '", associated_dim_name, "_reorder' for a dimension ", + "whose coordinate variable that has more than 1 dimension. This is ", + "not supported.") + } + ordered_var_values <- dim_reorder_params[[associated_dim_name]](var_values) + attr(ordered_var_values$x, 'variables') <- attr(var_values, 'variables') + if (!all(c('x', 'ix') %in% names(ordered_var_values))) { + stop("All the dimension reorder functions must return a list with the components 'x' and 'ix'.") + } + # Save the indices to reorder back the ordered variable values. + # This will be used to define the first round indices. + unorder <- sort(ordered_var_values$ix, index.return = TRUE)$ix + if (var_to_read %in% names(common_return_vars)) { + picked_common_vars_ordered[[var_to_read]] <- do.call('[<-', + c(list(x = picked_common_vars_ordered[[var_to_read]]), + var_store_indices, + list(value = ordered_var_values$x))) + picked_common_vars_unorder_indices[[var_to_read]] <- do.call('[<-', + c(list(x = picked_common_vars_unorder_indices[[var_to_read]]), + var_store_indices, + list(value = unorder))) + } else { + picked_vars_ordered[[i]][[var_to_read]] <- do.call('[<-', + c(list(x = picked_vars_ordered[[i]][[var_to_read]]), + var_store_indices, + list(value = ordered_var_values$x))) + picked_vars_unorder_indices[[i]][[var_to_read]] <- do.call('[<-', + c(list(x = picked_vars_unorder_indices[[i]][[var_to_read]]), + var_store_indices, + list(value = unorder))) + } + } + } + if (var_to_read %in% names(common_return_vars)) { + picked_common_vars[[var_to_read]] <- do.call('[<-', + c(list(x = picked_common_vars[[var_to_read]]), + var_store_indices, + list(value = var_values))) + } else { + picked_vars[[i]][[var_to_read]] <- do.call('[<-', + c(list(x = picked_vars[[i]][[var_to_read]]), + var_store_indices, + list(value = var_values))) + } + if (var_to_read %in% names(first_found_file)) { + first_found_file[var_to_read] <- TRUE + } + if (var_to_read %in% names(common_first_found_file)) { + common_first_found_file[var_to_read] <- TRUE + } + } else { + stop("Could not find variable '", var_to_read, + "' in the file ", array_of_var_files[j]) + } + } + file_closer(file_object) + } + } + previous_indices <- current_indices + } + } + } + # Once we have the variable values, we can work out the indices + # for the implicitly defined selectors. + # + # Trnasforms a vector of indices v expressed in a world of + # length N from 1 to N, into a world of length M, from + # 1 to M. Repeated adjacent indices are collapsed. + transform_indices <- function(v, n, m) { + #unique2 turns e.g. 1 1 2 2 2 3 3 1 1 1 into 1 2 3 1 + unique2 <- function(v) { + if (length(v) < 2) { + v + } else { + v[c(1, v[2:length(v)] - v[1:(length(v) - 1)]) != 0] + } + } + unique2(round(((v - 1) / (n - 1)) * (m - 1))) + 1 # this rounding may generate 0s. what then? + } + beta <- transform_extra_cells + dims_to_crop <- vector('list') + transformed_vars <- vector('list', length = length(dat)) + names(transformed_vars) <- dat_names + transformed_vars_ordered <- transformed_vars + transformed_vars_unorder_indices <- transformed_vars + transformed_common_vars <- NULL + transformed_common_vars_ordered <- NULL + transformed_common_vars_unorder_indices <- NULL + + for (i in 1:length(dat)) { + if (dataset_has_files[i]) { + indices <- indices_of_first_files_with_data[[i]] + if (!is.null(indices)) { + file_path <- do.call("[", c(list(array_of_files_to_load), as.list(indices_of_first_files_with_data[[i]]))) + # The following 5 lines should go several lines below, but were moved + # here for better performance. + # If any of the dimensions comes without defining variable, then we read + # the data dimensions. + data_dims <- NULL + if (length(unlist(var_params[expected_inner_dims[[i]]])) < length(expected_inner_dims[[i]])) { + file_to_open <- file_path + data_dims <- file_dim_reader(file_to_open, NULL, selectors_of_first_files_with_data[[i]], + lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1), + synonims) + # file_dim_reader returns dimension names as found in the file. + # Need to translate accoridng to synonims: + names(data_dims) <- sapply(names(data_dims), + function(x) { + which_entry <- which(sapply(synonims, function(y) x %in% y)) + if (length(which_entry) > 0) { + names(synonims)[which_entry] + } else { + x + } + }) + } + # Transform the variables if needed and keep them apart. + if (!is.null(transform) && (length(transform_vars) > 0)) { + if (!all(transform_vars %in% c(names(picked_vars[[i]]), names(picked_common_vars)))) { + stop("Could not find all the required variables in 'transform_vars' ", + "for the dataset '", dat[[i]][['name']], "'.") + } + vars_to_transform <- NULL + picked_vars_to_transform <- which(names(picked_vars[[i]]) %in% transform_vars) + if (length(picked_vars_to_transform) > 0) { + picked_vars_to_transform <- names(picked_vars[[i]])[picked_vars_to_transform] + new_vars_to_transform <- picked_vars[[i]][picked_vars_to_transform] + which_are_ordered <- which(!sapply(picked_vars_ordered[[i]][picked_vars_to_transform], is.null)) + +##NOTE: The following 'if' replaces the original with reordering vector + if (length(which_are_ordered) > 0) { + tmp <- which(!is.na(match(names(picked_vars_ordered[[i]]), names(which_are_ordered)))) + new_vars_to_transform[which_are_ordered] <- picked_vars_ordered[[i]][tmp] + + } + vars_to_transform <- c(vars_to_transform, new_vars_to_transform) + } + +##NOTE: Above is non-common vars, here is common vars (ie, return_vars = NULL). + picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars) + if (length(picked_common_vars_to_transform) > 0) { + picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform] + + new_vars_to_transform <- picked_common_vars[picked_common_vars_to_transform] + which_are_ordered <- which(!sapply(picked_common_vars_ordered[picked_common_vars_to_transform], is.null)) + + if (length(which_are_ordered) > 0) { + + tmp <- which(!is.na(match(names(picked_common_vars_ordered), names(which_are_ordered)))) + new_vars_to_transform[which_are_ordered] <- picked_common_vars_ordered[tmp] + } + vars_to_transform <- c(vars_to_transform, new_vars_to_transform) + } + + # Transform the variables + transformed_data <- do.call(transform, c(list(data_array = NULL, + variables = vars_to_transform, + file_selectors = selectors_of_first_files_with_data[[i]]), + transform_params)) + # Discard the common transformed variables if already transformed before + if (!is.null(transformed_common_vars)) { + common_ones <- which(names(picked_common_vars) %in% names(transformed_data$variables)) + if (length(common_ones) > 0) { + transformed_data$variables <- transformed_data$variables[-common_ones] + } + } + transformed_vars[[i]] <- list() + transformed_vars_ordered[[i]] <- list() + transformed_vars_unorder_indices[[i]] <- list() + # Order the transformed variables if needed + # 'var_to_read' should be 'transformed_var', but is kept to reuse the same code as above. + for (var_to_read in names(transformed_data$variables)) { + if (var_to_read %in% unlist(var_params)) { + associated_dim_name <- names(var_params)[which(unlist(var_params) == var_to_read)] + if ((associated_dim_name %in% names(dim_reorder_params)) && aiat) { + ## Is this check really needed? + if (length(dim(transformed_data$variables[[associated_dim_name]])) > 1) { + stop("Requested a '", associated_dim_name, "_reorder' for a dimension ", + "whose coordinate variable that has more than 1 dimension (after ", + "transform). This is not supported.") + } + ordered_var_values <- dim_reorder_params[[associated_dim_name]](transformed_data$variables[[associated_dim_name]]) + attr(ordered_var_values, 'variables') <- attr(transformed_data$variables[[associated_dim_name]], 'variables') + if (!all(c('x', 'ix') %in% names(ordered_var_values))) { + stop("All the dimension reorder functions must return a list with the components 'x' and 'ix'.") + } + # Save the indices to reorder back the ordered variable values. + # This will be used to define the first round indices. + unorder <- sort(ordered_var_values$ix, index.return = TRUE)$ix + if (var_to_read %in% names(picked_common_vars)) { + transformed_common_vars_ordered[[var_to_read]] <- ordered_var_values$x + transformed_common_vars_unorder_indices[[var_to_read]] <- unorder + } else { + transformed_vars_ordered[[i]][[var_to_read]] <- ordered_var_values$x + transformed_vars_unorder_indices[[i]][[var_to_read]] <- unorder + } + } + } + } + transformed_picked_vars <- which(names(picked_vars[[i]]) %in% names(transformed_data$variables)) + if (length(transformed_picked_vars) > 0) { + transformed_picked_vars <- names(picked_vars[[i]])[transformed_picked_vars] + transformed_vars[[i]][transformed_picked_vars] <- transformed_data$variables[transformed_picked_vars] + } + if (is.null(transformed_common_vars)) { + transformed_picked_common_vars <- which(names(picked_common_vars) %in% names(transformed_data$variables)) + if (length(transformed_picked_common_vars) > 0) { + transformed_picked_common_vars <- names(picked_common_vars)[transformed_picked_common_vars] + transformed_common_vars <- transformed_data$variables[transformed_picked_common_vars] + } + } + } + # Once the variables are transformed, we compute the indices to be + # taken for each inner dimension. + # In all cases, indices will have to be computed to know which data + # values to take from the original data for each dimension (if a + # variable is specified for that dimension, it will be used to + # convert the provided selectors into indices). These indices are + # referred to as 'first round of indices'. + # The taken data will then be transformed if needed, together with + # the dimension variable if specified, and, in that case, indices + # will have to be computed again to know which values to take from the + # transformed data. These are the 'second round of indices'. In the + # case there is no transformation, the second round of indices will + # be all the available indices, i.e. from 1 to the number of taken + # values with the first round of indices. + for (inner_dim in expected_inner_dims[[i]]) { +if (debug) { +print("-> DEFINING INDICES FOR INNER DIMENSION:") +print(inner_dim) +} + file_dim <- NULL + if (inner_dim %in% unlist(inner_dims_across_files)) { + file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]] + chunk_amount <- length(dat[[i]][['selectors']][[file_dim]][[1]]) + names(chunk_amount) <- file_dim + } else { + chunk_amount <- 1 + } + # In the special case that the selectors for a dimension are 'all', 'first', ... + # and chunking (dividing in more than 1 chunk) is requested, the selectors are + # replaced for equivalent indices. + if ((dat[[i]][['selectors']][[inner_dim]][[1]] %in% c('all', 'first', 'last')) && + (chunks[[inner_dim]]['n_chunks'] != 1)) { + selectors <- dat[[i]][['selectors']][[inner_dim]][[1]] + if (selectors == 'all') { + selectors <- indices(1:(data_dims[[inner_dim]] * chunk_amount)) + } else if (selectors == 'first') { + selectors <- indices(1) + } else { + selectors <- indices(data_dims[[inner_dim]] * chunk_amount) + } + dat[[i]][['selectors']][[inner_dim]][[1]] <- selectors + } + # The selectors for the inner dimension are taken. + selector_array <- dat[[i]][['selectors']][[inner_dim]][[1]] +if (debug) { +if (inner_dim %in% dims_to_check) { +print(paste0("-> DEBUG MESSAGES FOR THE DATASET", i, " AND INNER DIMENSION '", inner_dim, "':")) +print("-> STRUCTURE OF SELECTOR ARRAY:") +print(str(selector_array)) +print("-> PICKED VARS:") +print(picked_vars) +print("-> TRANSFORMED VARS:") +print(transformed_vars) +} +} + if (is.null(dim(selector_array))) { + dim(selector_array) <- length(selector_array) + } + if (is.null(names(dim(selector_array)))) { + if (length(dim(selector_array)) == 1) { + names(dim(selector_array)) <- inner_dim + } else { + stop("Provided selector arrays must be provided with dimension ", + "names. Found an array of selectors without dimension names ", + "for the dimension '", inner_dim, "'.") + } + } + selectors_are_indices <- FALSE + if (!is.null(attr(selector_array, 'indices'))) { + if (!is.logical(attr(selector_array, 'indices'))) { + stop("The atribute 'indices' for the selectors for the dimension '", + inner_dim, "' must be TRUE or FALSE.") + } + selectors_are_indices <- attr(selector_array, 'indices') + } + taken_chunks <- rep(FALSE, chunk_amount) + selector_file_dims <- 1 + if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) { + selector_file_dims <- dim(selector_array)[which(names(dim(selector_array)) %in% found_file_dims[[i]])] + } + selector_inner_dims <- dim(selector_array)[which(!(names(dim(selector_array)) %in% found_file_dims[[i]]))] + var_with_selectors <- NULL + var_with_selectors_name <- var_params[[inner_dim]] + var_ordered <- NULL + var_unorder_indices <- NULL + with_transform <- FALSE + # If the selectors come with an associated variable + if (!is.null(var_with_selectors_name)) { + if ((var_with_selectors_name %in% transform_vars) && (!is.null(transform))) { + with_transform <- TRUE + if (!is.null(file_dim)) { + stop("Requested a transformation over the dimension '", + inner_dim, "', wich goes across files. This feature ", + "is not supported. Either do the request without the ", + "transformation or request it over dimensions that do ", + "not go across files.") + } + } +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> NAME OF THE VARIABLE WITH THE SELECTOR VALUES FOR THE CURRENT INNER DIMENSION:") +print(var_with_selectors_name) +print("-> NAMES OF THE VARIABLES TO BE TRANSFORMED:") +print(transform_vars) +print("-> STRUCTURE OF THE TRANSFORMATION FUNCTION:") +print(str(transform)) +} +} + if (var_with_selectors_name %in% names(picked_vars[[i]])) { + var_with_selectors <- picked_vars[[i]][[var_with_selectors_name]] + var_ordered <- picked_vars_ordered[[i]][[var_with_selectors_name]] + var_unorder_indices <- picked_vars_unorder_indices[[i]][[var_with_selectors_name]] + } else if (var_with_selectors_name %in% names(picked_common_vars)) { + var_with_selectors <- picked_common_vars[[var_with_selectors_name]] + var_ordered <- picked_common_vars_ordered[[var_with_selectors_name]] + var_unorder_indices <- picked_common_vars_unorder_indices[[var_with_selectors_name]] + } + n <- prod(dim(var_with_selectors)) + if (is.null(var_unorder_indices)) { + var_unorder_indices <- 1:n + } + if (with_transform) { + if (var_with_selectors_name %in% names(transformed_vars[[i]])) { + m <- prod(dim(transformed_vars[[i]][[var_with_selectors_name]])) + if (aiat) { + var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]] + var_ordered <- transformed_vars_ordered[[i]][[var_with_selectors_name]] + var_unorder_indices <- transformed_vars_unorder_indices[[i]][[var_with_selectors_name]] + } + } else if (var_with_selectors_name %in% names(transformed_common_vars)) { + m <- prod(dim(transformed_common_vars[[var_with_selectors_name]])) + if (aiat) { + var_with_selectors <- transformed_common_vars[[var_with_selectors_name]] + var_ordered <- transformed_common_vars_ordered[[var_with_selectors_name]] + var_unorder_indices <- transformed_common_vars_unorder_indices[[var_with_selectors_name]] + } + } + if (is.null(var_unorder_indices)) { + var_unorder_indices <- 1:m + } + } +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> SIZE OF ORIGINAL VARIABLE:") +print(n) +print("-> SIZE OF TRANSFORMED VARIABLE:") +if (with_transform) print(m) +print("-> STRUCTURE OF ORDERED VAR:") +print(str(var_ordered)) +print("-> UNORDER INDICES:") +print(var_unorder_indices) +} +} + var_dims <- dim(var_with_selectors) + var_file_dims <- 1 + if (any(names(var_dims) %in% found_file_dims[[i]])) { + if (with_transform) { + stop("Requested transformation for inner dimension '", + inner_dim, "' but provided selectors for such dimension ", + "over one or more file dimensions. This is not ", + "supported. Either request no transformation for the ", + "dimension '", inner_dim, "' or specify the ", + "selectors for this dimension without the file dimensions.") + } + var_file_dims <- var_dims[which(names(var_dims) %in% found_file_dims[[i]])] + var_dims <- var_dims[-which(names(var_dims) %in% found_file_dims[[i]])] + } +## # Keep the selectors if they correspond to a variable that will be transformed. +## if (with_transform) { +## if (var_with_selectors_name %in% names(picked_vars[[i]])) { +## transformed_var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]] +## } else if (var_with_selectors_name %in% names(picked_common_vars)) { +## transformed_var_with_selectors <- transformed_common_vars[[var_with_selectors_name]] +## } +## transformed_var_dims <- dim(transformed_var_with_selectors) +## transformed_var_file_dims <- 1 +## if (any(names(transformed_var_dims) %in% found_file_dims[[i]])) { +## transformed_var_file_dims <- transformed_var_dims[which(names(transformed_var_dims) %in% found_file_dims[[i]])] +## transformed_var_dims <- tranasformed_var_dims[-which(names(transformed_var_dims) %in% found_file_dims[[i]])] +## } +##if (inner_dim %in% dims_to_check) { +##print("111m") +##print(str(transformed_var_dims)) +##} +## +## m <- prod(transformed_var_dims) +## } + # Work out var file dims and inner dims. + if (inner_dim %in% unlist(inner_dims_across_files)) { + #TODO: if (chunk_amount != number of chunks in selector_file_dims), crash + if (length(var_dims) > 1) { + stop("Specified a '", inner_dim, "_var' for the dimension '", + inner_dim, "', which goes across files (across '", file_dim, + "'). The specified variable, '", var_with_selectors_name, "', has more ", + "than one dimension and can not be used as selector variable. ", + "Select another variable or fix it in the files.") + } + } + ## TODO HERE:: + #- indices_of_first_files_with_data may change, because array is now extended + var_full_dims <- dim(var_with_selectors) + if (!(inner_dim %in% names(var_full_dims))) { + stop("Could not find the dimension '", inner_dim, "' in ", + "the file. Either change the dimension name in ", + "your request, adjust the parameter ", + "'dim_names_in_files' or fix the dimension name in ", + "the file.\n", file_path) + } + } else if (((is.numeric(selector_array) || is.list(selector_array)) && selectors_are_indices) || + (is.character(selector_array) && (length(selector_array) == 1) && + (selector_array %in% c('all', 'first', 'last')) && + !is.null(file_dim_reader))) { + #### TODO HERE:: + ###- indices_of_first_files_with_data may change, because array is now extended + # Lines moved above for better performance. + ##data_dims <- file_dim_reader(file_path, NULL, selectors_of_first_files_with_data[[i]], + ## lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1)) + if (!(inner_dim %in% names(data_dims))) { + stop("Could not find the dimension '", inner_dim, "' in ", + "the file. Either change the dimension name in ", + "your request, adjust the parameter ", + "'dim_names_in_files' or fix the dimension name in ", + "the file.\n", file_path) + } + } else { + stop(paste0("Can not translate the provided selectors for '", inner_dim, + "' to numeric indices. Provide numeric indices and a ", + "'file_dim_reader' function, or a '", inner_dim, + "_var' in order to calculate the indices.")) + } + # At this point, if no selector variable was provided, the variable + # data_dims has been populated. If a selector variable was provided, + # the variables var_dims, var_file_dims and var_full_dims have been + # populated instead. + fri <- first_round_indices <- NULL + sri <- second_round_indices <- NULL + # This variable will keep the indices needed to crop the transformed + # variable (the one that has been transformed without being subset + # with the first round indices). + tvi <- tranaformed_variable_indices <- NULL + ordered_fri <- NULL + ordered_sri <- NULL + if ((length(selector_array) == 1) && is.character(selector_array) && + (selector_array %in% c('all', 'first', 'last')) && + (chunks[[inner_dim]]['n_chunks'] == 1)) { + if (is.null(var_with_selectors_name)) { + fri <- vector('list', length = chunk_amount) + dim(fri) <- c(chunk_amount) + sri <- vector('list', length = chunk_amount) + dim(sri) <- c(chunk_amount) + if (selector_array == 'all') { + fri[] <- replicate(chunk_amount, list(1:(data_dims[inner_dim]))) + taken_chunks <- rep(TRUE, chunk_amount) + #sri <- NULL + } else if (selector_array == 'first') { + fri[[1]] <- 1 + taken_chunks[1] <- TRUE + #sri <- NULL + } else if (selector_array == 'last') { + fri[[chunk_amount]] <- data_dims[inner_dim] + taken_chunks[length(taken_chunks)] <- TRUE + #sri <- NULL + } + } else { + if ((!is.null(file_dim)) && !(file_dim %in% names(var_file_dims))) { + stop("The variable '", var_with_selectors_name, "' must also be ", + "requested for the file dimension '", file_dim, "' in ", + "this configuration.") + } + fri <- vector('list', length = prod(var_file_dims)) + dim(fri) <- var_file_dims + ordered_fri <- fri + sri <- vector('list', length = prod(var_file_dims)) + dim(sri) <- var_file_dims + ordered_sri <- sri + if (selector_array == 'all') { +# TODO: Populate ordered_fri + ordered_fri[] <- replicate(prod(var_file_dims), list(1:n)) + fri[] <- replicate(prod(var_file_dims), list(var_unorder_indices[1:n])) + taken_chunks <- rep(TRUE, chunk_amount) + if (!with_transform) { + #fri[] <- replicate(prod(var_file_dims), list(1:n)) + #taken_chunks <- rep(TRUE, chunk_amount) + #sri <- NULL + } else { + ordered_sri[] <- replicate(prod(var_file_dims), list(1:m)) + sri[] <- replicate(prod(var_file_dims), list(1:m)) + ## var_file_dims instead?? + #if (!aiat) { + #fri[] <- replicate(prod(var_file_dims), list(1:n)) + #taken_chunks <- rep(TRUE, chunk_amount) + #sri[] <- replicate(prod(transformed_var_file_dims), list(1:m)) + #} else { + #fri[] <- replicate(prod(var_file_dims), list(1:n)) + #taken_chunks <- rep(TRUE, chunk_amount) + #sri[] <- replicate(prod(transformed_var_file_dims), list(1:m)) + #} + tvi <- 1:m + } + } else if (selector_array == 'first') { + taken_chunks[1] <- TRUE + if (!with_transform) { + ordered_fri[[1]] <- 1 + fri[[1]] <- var_unorder_indices[1] + #taken_chunks[1] <- TRUE + #sri <- NULL + } else { + if (!aiat) { + ordered_fri[[1]] <- 1 + fri[[1]] <- var_unorder_indices[1] +# TODO: TO BE IMPROVED + #taken_chunks[1] <- TRUE + ordered_sri[[1]] <- 1:ceiling(m / n) + sri[[1]] <- 1:ceiling(m / n) + tvi <- 1:ceiling(m / n) + } else { + ordered_fri[[1]] <- 1:ceiling(m / n) + fri[[1]] <- var_unorder_indices[1:ceiling(m / n)] + #taken_chunks[1] <- TRUE + ordered_sri[[1]] <- 1 + sri[[1]] <- 1 + tvi <- 1 + } + } + } else if (selector_array == 'last') { + taken_chunks[length(taken_chunks)] <- TRUE + if (!with_transform) { + ordered_fri[[prod(var_file_dims)]] <- n + fri[[prod(var_file_dims)]] <- var_unorder_indices[n] + #taken_chunks[length(taken_chunks)] <- TRUE + #sri <- NULL + } else { + if (!aiat) { + ordered_fri[[prod(var_file_dims)]] <- prod(var_dims) + fri[[prod(var_file_dims)]] <- var_unorder_indices[prod(var_dims)] + #taken_chunks[length(taken_chunks)] <- TRUE + ordered_sri[[prod(var_file_dims)]] <- 1:ceiling(m / n) + sri[[prod(var_file_dims)]] <- 1:ceiling(m / n) +# TODO: TO BE IMPROVED. THE TVI MAY BE WRONG IF THERE'S BEEN A REORDERING. + tvi <- 1:ceiling(m / n) + } else { + ordered_fri[[prod(var_file_dims)]] <- (n - ceiling(m / n) + 1):n + fri[[prod(var_file_dims)]] <- var_unorder_indices[(n - ceiling(m / n) + 1):n] + #taken_chunks[length(taken_chunks)] <- TRUE + ordered_sri[[prod(var_file_dims)]] <- 1 + sri[[prod(var_file_dims)]] <- 1 + tvi <- 1 + } + } + } + } + # If the selectors are not 'all', 'first', 'last', ... + } else { + if (!is.null(var_with_selectors_name)) { + unmatching_file_dims <- which(!(names(var_file_dims) %in% names(selector_file_dims))) + if ((length(unmatching_file_dims) > 0)) { + raise_error <- FALSE + if (is.null(file_dim)) { + raise_error <- TRUE + } else { + if (!((length(unmatching_file_dims) == 1) && + (names(var_file_dims)[unmatching_file_dims] == file_dim) && + (inner_dim %in% names(selector_inner_dims)))) { + raise_error <- TRUE + } + } + if (raise_error) { + stop("Provided selectors for the dimension '", inner_dim, "' must have as many ", + "file dimensions as the variable the dimension is defined along, '", + var_with_selectors_name, "', with the exceptions of the file pattern dimension ('", + found_pattern_dim, "') and any depended file dimension (if specified as ", + "depended dimension in parameter 'inner_dims_across_files' and the ", + "depending file dimension is present in the provided selector array).") + } + } + if (any(names(selector_file_dims) %in% names(dim(var_with_selectors)))) { + if (any(dim(var_with_selectors)[names(selector_file_dims)] != selector_file_dims)) { + stop("Size of selector file dimensions must mach size of requested ", + "variable dimensions.") + } + } + } + ## TODO: If var dimensions are not in the same order as selector dimensions, reorder + if (is.null(names(selector_file_dims))) { + if (is.null(file_dim)) { + fri_dims <- 1 + } else { + fri_dims <- chunk_amount + names(fri_dims) <- file_dim + } + } else { + fri_dim_names <- names(selector_file_dims) + if (!is.null(file_dim)) { + fri_dim_names <- c(fri_dim_names, file_dim) + } + fri_dim_names <- found_file_dims[[i]][which(found_file_dims[[i]] %in% fri_dim_names)] + fri_dims <- rep(NA, length(fri_dim_names)) + names(fri_dims) <- fri_dim_names + fri_dims[names(selector_file_dims)] <- selector_file_dims + if (!is.null(file_dim)) { + fri_dims[file_dim] <- chunk_amount + } + } + fri <- vector('list', length = prod(fri_dims)) + dim(fri) <- fri_dims + sri <- vector('list', length = prod(fri_dims)) + dim(sri) <- fri_dims + selector_file_dim_array <- array(1:prod(selector_file_dims), dim = selector_file_dims) + selector_store_position <- fri_dims + for (j in 1:prod(dim(selector_file_dim_array))) { + selector_indices_to_take <- which(selector_file_dim_array == j, arr.ind = TRUE)[1, ] + names(selector_indices_to_take) <- names(selector_file_dims) + selector_store_position[names(selector_indices_to_take)] <- selector_indices_to_take + sub_array_of_selectors <- Subset(selector_array, names(selector_indices_to_take), + as.list(selector_indices_to_take), drop = 'selected') +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> ITERATING OVER FILE DIMENSIONS OF THE SELECTORS.") +print("-> STRUCTURE OF A SUB ARRAY:") +print(str(sub_array_of_selectors)) +print("-> STRUCTURE OF THE VARIABLE WITH SELECTORS:") +print(str(var_with_selectors)) +print(dim(var_with_selectors)) +} +} + if (selectors_are_indices) { + sub_array_of_values <- NULL + #} else if (!is.null(var_ordered)) { + # sub_array_of_values <- var_ordered + } else { + if (length(var_file_dims) > 0) { + var_indices_to_take <- selector_indices_to_take[which(names(selector_indices_to_take) %in% names(var_file_dims))] + sub_array_of_values <- Subset(var_with_selectors, names(var_indices_to_take), + as.list(var_indices_to_take), drop = 'selected') + } else { + sub_array_of_values <- var_with_selectors + } + } +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> STRUCTURE OF THE SUB ARRAY FROM THE VARIABLE CORRESPONDING TO THE SUB ARRAY OF SELECTORS") +print(str(sub_array_of_values)) +print(dim(sub_array_of_values)) +print("-> NAME OF THE FILE DIMENSION THE CURRENT INNER DIMENSION EXTENDS ALONG:") +print(file_dim) +} +} + if ((!is.null(file_dim) && (file_dim %in% names(selector_file_dims))) || is.null(file_dim)) { + if (length(sub_array_of_selectors) > 0) { +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> THE INNER DIMENSION DOES NOT GO ACROSS ANY FILE DIMENSION OR IT DOES BUT IS IN THE PROVIDED SELECTOR ARRAY.") +} +} + if (selectors_are_indices) { + if (!is.null(var_with_selectors_name)) { + max_allowed <- ifelse(aiat, m, n) + } else { + max_allowed <- data_dims[inner_dim] + } + if (any(na.omit(unlist(sub_array_of_selectors)) > max_allowed) || + any(na.omit(unlist(sub_array_of_selectors)) < 1)) { + stop("Provided indices out of range for dimension '", inner_dim, "' ", + "for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ", + max_allowed, ").") + } + } + + # The selector_checker will return either a vector of indices or a list + # with the first and last desired indices. + goes_across_prime_meridian <- FALSE + if (!is.null(var_ordered) && !selectors_are_indices) { + if (!is.null(dim_reorder_params[[inner_dim]])) { + if (is.list(sub_array_of_selectors)) { + +## NOTE: The check of 'goes_across_prime_meridian' is moved forward to here. + is_circular_dim <- attr(dim_reorder_params[[inner_dim]], "circular") + if (!is.null(is_circular_dim)) { + if (is_circular_dim) { + +# NOTE: Use CircularSort() to put the values in the assigned range, and get the order. +# For example, [-10, 20] in CircularSort(0, 360) is [350, 20]. The $ix list is [2, 1]. +# 'goes_across_prime_meridian' means the selector range across the border. For example, +# CircularSort(-180, 180) with selector [170, 190] -> goes_across_prime_meridian = TRUE. + tmp <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$ix + goes_across_prime_meridian <- tmp[1] > tmp[2] + } + } + + # HERE change to the same code as below (under 'else'). Not sure why originally + #it uses additional lines, which make reorder not work. + sub_array_of_selectors <- as.list(dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors))$x) + #sub_array_reordered <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors)) + #sub_array_unorder <- sort(sub_array_reordered$ix, index.return = TRUE)$ix + #sub_array_of_selectors <- as.list(sub_array_reordered$x[sub_array_unorder]) + +# Add warning if the boundary is out of range + if (sub_array_of_selectors[1] < range(var_ordered)[1] | sub_array_of_selectors[1] > range(var_ordered)[2]) { + .warning(paste0("The lower boundary of selector of ", + inner_dim, + " is out of range [", + min(var_ordered), ", ", max(var_ordered), "]. ", + "Check if the desired range is all included.")) + } + if (sub_array_of_selectors[2] < range(var_ordered)[1] | sub_array_of_selectors[2] > range(var_ordered)[2]) { + .warning(paste0("The upper boundary of selector of ", + inner_dim, + " is out of range [", + min(var_ordered), ", ", max(var_ordered), "]. ", + "Check if the desired range is all included.")) + } + + + } else { + sub_array_of_selectors <- dim_reorder_params[[inner_dim]](sub_array_of_selectors)$x + } + } + +# NOTE: The ideal solution for selecting indices in goes_across_prime_meridian case +# is modified SelectorCheckor.R. But now SelectorCheckor doesn't know the info of +#goes_across_prime_meridian, so I do the adjustion after calling SelectorCheckor(). + sub_array_of_indices <- selector_checker(sub_array_of_selectors, var_ordered, + tolerance = if (aiat) { + NULL + } else { + tolerance_params[[inner_dim]] + }) + + if (goes_across_prime_meridian & sub_array_of_indices[[1]] < sub_array_of_indices[[2]]) { + if (!(sub_array_of_selectors[[1]] %in% var_ordered)){ + sub_array_of_indices[[1]] <- sub_array_of_indices[[1]] - 1 + } + + if (!(sub_array_of_selectors[[2]] %in% var_ordered)){ + sub_array_of_indices[[2]] <- sub_array_of_indices[[2]] + 1 + } + } + +#NOTE: the possible case? + if (goes_across_prime_meridian & sub_array_of_indices[[1]] > sub_array_of_indices[[2]]) { + .stop("The case is goes_across_prime_meridian but no adjustion for the indices!") + } + + if (any(is.na(sub_array_of_indices))) { + + stop(paste0("The selectors of ", inner_dim, + " are out of range [", min(var_ordered), + ", ", max(var_ordered), "].")) + } + + } else { + +# Add warning if the boundary is out of range + if (is.list(sub_array_of_selectors)) { + if (sub_array_of_selectors[1] < + min(sub_array_of_values) | sub_array_of_selectors[1] > + max(sub_array_of_values)) { + .warning(paste0("The lower boundary of selector of ", + inner_dim, " is out of range [", + min(sub_array_of_values), ", ", + max(sub_array_of_values), "]. ", + "Check if the desired range is all included.")) + } + if (sub_array_of_selectors[2] < + min(sub_array_of_values) | sub_array_of_selectors[2] > + max(sub_array_of_values)) { + .warning(paste0("The upper boundary of selector of ", + inner_dim, " is out of range [", + min(sub_array_of_values), ", ", + max(sub_array_of_values), "]. ", + "Check if the desired range is all included.")) + } + } + + sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values, + tolerance = if (aiat) { + NULL + } else { + tolerance_params[[inner_dim]] + }) + + if (any(is.na(sub_array_of_indices))) { + + stop(paste0("The selectors of ", inner_dim, + " are out of range [", min(sub_array_of_values), + ", ", max(sub_array_of_values), "].")) + } + + } + ## This 'if' runs in both Start() and Compute(). In Start(), it doesn't have any effect (no chunk). + ## In Compute(), it creates the indices for each chunk. For example, if 'sub_array_of_indices' + ## is c(5:10) and chunked into 2, 'sub_array_of_indices' becomes c(5:7) for chunk = 1, c(8:10) + ## for chunk = 2. If 'sub_array_of_indices' is list(55, 62) and chunked into 2, it becomes + ## list(55, 58) for chunk = 1 and list(59, 62) for chunk = 2. + ## TODO: The list can be turned into vector here? So afterward no need to judge if it is list + ## or vector. + if (!is.list(sub_array_of_indices)) { + sub_array_of_indices <- sub_array_of_indices[chunk_indices(length(sub_array_of_indices), + chunks[[inner_dim]]["chunk"], + chunks[[inner_dim]]["n_chunks"], + inner_dim)] + } else { + tmp <- chunk_indices(length(sub_array_of_indices[[1]]:sub_array_of_indices[[2]]), + chunks[[inner_dim]]["chunk"], chunks[[inner_dim]]["n_chunks"], + inner_dim) + vect <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] + sub_array_of_indices[[1]] <- vect[tmp[1]] + sub_array_of_indices[[2]] <- vect[tmp[length(tmp)]] + } + # The sub_array_of_indices now contains numeric indices of the values to be taken by each chunk. + +# Check if all the files have the selectors assigned (e.g., region = 'Grnland') _20191015 +if (is.character(sub_array_of_selectors)) { + array_of_var_files_check <- vector('list', length(selector_indices)) + for (k in 1:length(selector_indices)) { + asdasd <- selector_indices[[k]] + array_of_var_files_check <- do.call('[', c(list(x = array_of_files_to_load), asdasd, list(drop = FALSE)))[j] + file_object <- file_opener(array_of_var_files_check) + var_values_check <- file_var_reader(NULL, file_object, NULL, var_to_read, synonims) + if (any(as.vector(var_values_check)[sub_array_of_indices] != sub_array_of_selectors)) { + .warning('Not all the files has correponding selectors. Check the selector attributes') + } + } +} + +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> TRANSFORMATION REQUESTED?") +print(with_transform) +print("-> BETA:") +print(beta) +} +} + if (with_transform) { + # If there is a transformation and selector values are provided, these + # selectors will be processed in the same way either if aiat = TRUE or + # aiat = FALSE. +## TODO: If sub_array_of_selectors was integer and aiat then... do what's commented 50 lines below. +## otherwise, do what's coded. +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> SELECTORS REQUESTED BEFORE TRANSFORM.") +} +} + +###NOTE: Here, the transform, is different from the below part of non-transform. +# search 'if (goes_across_prime_meridian' to find the lines below. + if (goes_across_prime_meridian) { +# NOTE: before changing, the return is already correct. sub_array_of_fri is defined +# again afterward. Not sure if here is redundant. + sub_array_of_fri <- c(1:min(unlist(sub_array_of_indices)), + max(unlist(sub_array_of_indices)):n) + + + #gap_width <- sub_array_of_indices[[1]] - sub_array_of_indices[[2]] - 1 + #sub_array_of_fri <- c((1:(sub_array_of_indices[[2]] + min(gap_width, beta))), + # (sub_array_of_indices[[1]] - min(gap_width, beta)):n) + } else { + if (is.list(sub_array_of_indices)) { + sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] + } + first_index <- min(unlist(sub_array_of_indices)) + last_index <- max(unlist(sub_array_of_indices)) + + start_padding <- min(beta, first_index - 1) + end_padding <- min(beta, n - last_index) + sub_array_of_fri <- (first_index - start_padding):(last_index + end_padding) + if (start_padding != beta | end_padding != beta) { + .warning(paste0("Adding parameter transform_extra_cells = ", + transform_extra_cells, " to the transformed index excesses ", + "the border. The border index is used for transformation.")) + } + + } + subset_vars_to_transform <- vars_to_transform + if (!is.null(var_ordered)) { + +##NOTE: if var_ordered is common_vars, it doesn't have attributes and it is a vector. +## Turn it into array and add dimension name. + if (!is.array(var_ordered)) { + var_ordered <- as.array(var_ordered) + names(dim(var_ordered)) <- inner_dim + } + + subset_vars_to_transform[[var_with_selectors_name]] <- Subset(var_ordered, inner_dim, sub_array_of_fri) + } else { +##NOTE: It should be redundant because without reordering the var should remain array +## But just stay same with above... + if (!is.array(sub_array_of_values)) { + sub_array_of_values <- as.array(sub_array_of_values) + names(dim(sub_array_of_values)) <- inner_dim + } + + subset_vars_to_transform[[var_with_selectors_name]] <- Subset(sub_array_of_values, inner_dim, sub_array_of_fri) + } +## NOTE: Remove 'crop' from transform_params if no reorder. It causes error. +## But 'crop' has effect on reorder cases... need further investigation + if (is.null(dim_reorder_params[[inner_dim]])) { + if ('crop' %in% names(transform_params)) { + transform_params <- transform_params[-which(names(transform_params) == 'crop')] + } + } + + transformed_subset_var <- do.call(transform, c(list(data_array = NULL, + variables = subset_vars_to_transform, + file_selectors = selectors_of_first_files_with_data[[i]]), + transform_params))$variables[[var_with_selectors_name]] + # Sorting the transformed variable and working out the indices again after transform. + if (!is.null(dim_reorder_params[[inner_dim]])) { + transformed_subset_var_reorder <- dim_reorder_params[[inner_dim]](transformed_subset_var) + transformed_subset_var <- transformed_subset_var_reorder$x + transformed_subset_var_unorder <- sort(transformed_subset_var_reorder$ix, index.return = TRUE)$ix + } else { + transformed_subset_var_unorder <- 1:length(transformed_subset_var) + } + sub_array_of_sri <- selector_checker(sub_array_of_selectors, transformed_subset_var, + tolerance = if (aiat) { + tolerance_params[[inner_dim]] + } else { + NULL + }) + +# Check if selectors fall out of the range of the transform grid +# It may happen when original lon is [-180, 180] while want to regrid to +# [0, 360], and lon selector = [-20, -10]. + if (any(is.na(sub_array_of_sri))) { + stop(paste0("The selectors of ", + inner_dim, " are out of range of transform grid '", + transform_params$grid, "'. Use parameter '", + inner_dim, "_reorder' or change ", inner_dim, + " selectors.")) + } + + if (goes_across_prime_meridian) { + # Because sub_array_of_sri order is exchanged due to + # previous development, here [[1]] and [[2]] should exchange + sub_array_of_sri <- c(1:sub_array_of_sri[[1]], + sub_array_of_sri[[2]]:length(transformed_subset_var)) + + } else if (is.list(sub_array_of_sri)) { + sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]] + } + ordered_sri <- sub_array_of_sri + sub_array_of_sri <- transformed_subset_var_unorder[sub_array_of_sri] + # In this case, the tvi are not defined and the 'transformed_subset_var' + # will be taken instead of the var transformed before in the code. +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> FIRST INDEX:") +print(first_index) +print("-> LAST INDEX:") +print(last_index) +print("-> STRUCTURE OF FIRST ROUND INDICES:") +print(str(sub_array_of_fri)) +print("-> STRUCTURE OF SECOND ROUND INDICES:") +print(str(sub_array_of_sri)) +print("-> STRUCTURE OF TRANSFORMED VARIABLE INDICES:") +print(str(tvi)) +} +} +### # If the selectors are expressed after transformation +### } else { +###if (debug) { +###if (inner_dim %in% dims_to_check) { +###print("-> SELECTORS REQUESTED AFTER TRANSFORM.") +###} +###} +### if (goes_across_prime_meridian) { +### sub_array_of_indices <- c(sub_array_of_indices[[1]]:m, +### 1:sub_array_of_indices[[2]]) +### } +### first_index <- min(unlist(sub_array_of_indices)) +### last_index <- max(unlist(sub_array_of_indices)) +### first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1) +### last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n) +### sub_array_of_fri <- first_index_before_transform:last_index_before_transform +### n_of_extra_cells <- round(beta / n * m) +### if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) { +### sub_array_of_sri <- 1:(last_index - first_index + 1) +### if (is.null(tvi)) { +### tvi <- sub_array_of_sri + first_index - 1 +### } +### } else { +### sub_array_of_sri <- sub_array_of_indices - first_index + 1 +### if (is.null(tvi)) { +### tvi <- sub_array_of_indices +### } +### } +### sub_array_of_sri <- sub_array_of_sri + n_of_extra_cells + sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position), + list(value = sub_array_of_sri))) + } else { + if (goes_across_prime_meridian) { + sub_array_of_fri <- c(1:min(unlist(sub_array_of_indices)), + max(unlist(sub_array_of_indices)):n) + + } else if (is.list(sub_array_of_indices)) { + sub_array_of_fri <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] + } else { + sub_array_of_fri <- sub_array_of_indices + } + } + if (!is.null(var_unorder_indices)) { + if (is.null(ordered_fri)) { + ordered_fri <- sub_array_of_fri + } + sub_array_of_fri <- var_unorder_indices[sub_array_of_fri] + } + fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position), + list(value = sub_array_of_fri))) + if (!is.null(file_dim)) { + taken_chunks[selector_store_position[[file_dim]]] <- TRUE + } else { + taken_chunks <- TRUE + } + } + } else { +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> THE INNER DIMENSION GOES ACROSS A FILE DIMENSION.") +} +} + if (inner_dim %in% names(dim(sub_array_of_selectors))) { + if (is.null(var_with_selectors_name)) { + if (any(na.omit(unlist(sub_array_of_selectors)) < 1) || + any(na.omit(unlist(sub_array_of_selectors)) > data_dims[inner_dim] * chunk_amount)) { + stop("Provided indices out of range for dimension '", inner_dim, "' ", + "for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ", + data_dims[inner_dim] * chunk_amount, ").") + } + } else { + if (inner_dim %in% names(dim(sub_array_of_values))) { + inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_values)) == inner_dim) + if (inner_dim_pos_in_sub_array != 1) { + new_sub_array_order <- (1:length(dim(sub_array_of_values)))[-inner_dim_pos_in_sub_array] + new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order) + sub_array_of_values <- .aperm2(sub_array_of_values, new_sub_array_order) + } + } + } + inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_selectors)) == inner_dim) + if (inner_dim_pos_in_sub_array != 1) { + new_sub_array_order <- (1:length(dim(sub_array_of_selectors)))[-inner_dim_pos_in_sub_array] + new_sub_array_order <- c(inner_dim_pos_in_sub_array, new_sub_array_order) + sub_array_of_selectors <- .aperm2(sub_array_of_selectors, new_sub_array_order) + } + sub_array_of_indices <- selector_checker(sub_array_of_selectors, sub_array_of_values, + tolerance = tolerance_params[[inner_dim]]) + # It is needed to expand the indices here, otherwise for + # values(list(date1, date2)) only 2 values are picked. + if (is.list(sub_array_of_indices)) { + sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] + } + sub_array_of_indices <- sub_array_of_indices[chunk_indices(length(sub_array_of_indices), + chunks[[inner_dim]]['chunk'], + chunks[[inner_dim]]['n_chunks'], + inner_dim)] + sub_array_is_list <- FALSE + if (is.list(sub_array_of_indices)) { + sub_array_is_list <- TRUE + sub_array_of_indices <- unlist(sub_array_of_indices) + } + if (is.null(var_with_selectors_name)) { + indices_chunk <- floor((sub_array_of_indices - 1) / data_dims[inner_dim]) + 1 + transformed_indices <- ((sub_array_of_indices - 1) %% data_dims[inner_dim]) + 1 + } else { + indices_chunk <- floor((sub_array_of_indices - 1) / var_full_dims[inner_dim]) + 1 + transformed_indices <- ((sub_array_of_indices - 1) %% var_full_dims[inner_dim]) + 1 + } + if (sub_array_is_list) { + sub_array_of_indices <- as.list(sub_array_of_indices) + } +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> GOING TO ITERATE ALONG CHUNKS.") +} +} + for (chunk in 1:chunk_amount) { + if (!is.null(names(selector_store_position))) { + selector_store_position[file_dim] <- chunk + } else { + selector_store_position <- chunk + } + chunk_selectors <- transformed_indices[which(indices_chunk == chunk)] + sub_array_of_indices <- chunk_selectors + if (with_transform) { + # If the provided selectors are expressed in the world + # before transformation + if (!aiat) { + first_index <- min(unlist(sub_array_of_indices)) + last_index <- max(unlist(sub_array_of_indices)) + sub_array_of_fri <- max(c(first_index - beta, 1)):min(c(last_index + beta, n)) + sub_array_of_sri <- transform_indices(unlist(sub_array_of_indices) - first_index + 1, n, m) + if (is.list(sub_array_of_indices)) { + if (length(sub_array_of_sri) > 1) { + sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]] + } + } +##TODO: TRANSFORM SUBSET VARIABLE AS ABOVE, TO COMPUTE SRI + # If the selectors are expressed after transformation + } else { + first_index <- min(unlist(sub_array_of_indices)) + last_index <- max(unlist(sub_array_of_indices)) + first_index_before_transform <- max(transform_indices(first_index, m, n) - beta, 1) + last_index_before_transform <- min(transform_indices(last_index, m, n) + beta, n) + sub_array_of_fri <- first_index_before_transform:last_index_before_transform + if (is.list(sub_array_of_indices) && (length(sub_array_of_indices) > 1)) { + sub_array_of_sri <- 1:(last_index - first_index + 1) + + round(beta / n * m) + } else { + sub_array_of_sri <- sub_array_of_indices - first_index + 1 + + round(beta / n * m) + } +##TODO: FILL IN TVI + } + sri <- do.call('[[<-', c(list(x = sri), as.list(selector_store_position), + list(value = sub_array_of_sri))) + if (length(sub_array_of_sri) > 0) { + taken_chunks[chunk] <- TRUE + } + } else { + sub_array_of_fri <- sub_array_of_indices + if (length(sub_array_of_fri) > 0) { + taken_chunks[chunk] <- TRUE + } + } + if (!is.null(var_unorder_indices)) { + ordered_fri <- sub_array_of_fri + sub_array_of_fri <- var_unorder_indices[sub_array_of_fri] + } + fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position), + list(value = sub_array_of_fri))) + } +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> FINISHED ITERATING ALONG CHUNKS") +} +} + } else { + stop("Provided array of indices for dimension '", inner_dim, "', ", + "which goes across the file dimension '", file_dim, "', but ", + "the provided array does not have the dimension '", inner_dim, + "', which is mandatory.") + } + } + } + } +if (debug) { +if (inner_dim %in% dims_to_check) { +print("-> PROCEEDING TO CROP VARIABLES") +} +} + #if ((length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last'))) { + #if (!is.null(var_with_selectors_name) || (is.null(var_with_selectors_name) && is.character(selector_array) && + # (length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last')))) { + empty_chunks <- which(!taken_chunks) + if (length(empty_chunks) >= length(taken_chunks)) { + stop("Selectors do not match any of the possible values for the dimension '", inner_dim, "'.") + } + if (length(empty_chunks) > 0) { +# # Get the first group of chunks to remove, and remove them. +# # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 1 and 2 +# dist <- abs(rev(empty_chunks) - c(rev(empty_chunks)[1] - 1, head(rev(empty_chunks), length(rev(empty_chunks)) - 1))) +# if (all(dist == 1)) { +# start_chunks_to_remove <- NULL +# } else { +# first_chunk_to_remove <- tail(which(dist > 1), 1) +# start_chunks_to_remove <- rev(rev(empty_chunks)[first_chunk_to_remove:length(empty_chunks)]) +# } +# # Get the last group of chunks to remove, and remove them. +# # E.g., from c(1, 2, 4, 5, 6, 8, 9) remove only 8 and 9 +# dist <- abs(empty_chunks - c(empty_chunks[1] - 1, head(empty_chunks, length(empty_chunks) - 1))) +# if (all(dist == 1)) { +# first_chunk_to_remove <- 1 +# } else { +# first_chunk_to_remove <- tail(which(dist > 1), 1) +# } +# end_chunks_to_remove <- empty_chunks[first_chunk_to_remove:length(empty_chunks)] +# chunks_to_keep <- which(!((1:length(taken_chunks)) %in% c(start_chunks_to_remove, end_chunks_to_remove))) + chunks_to_keep <- which(taken_chunks) + dims_to_crop[[file_dim]] <- c(dims_to_crop[[file_dim]], list(chunks_to_keep)) +# found_indices <- Subset(found_indices, file_dim, chunks_to_keep) +# # Crop dataset variables file dims. +# for (picked_var in names(picked_vars[[i]])) { +# if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) { +# picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, chunks_to_keep) +# } +# } + } + #} + dat[[i]][['selectors']][[inner_dim]] <- list(fri = fri, sri = sri) + # Crop dataset variables inner dims. + # Crop common variables inner dims. + types_of_var_to_crop <- 'picked' + if (with_transform) { + types_of_var_to_crop <- c(types_of_var_to_crop, 'transformed') + } + if (!is.null(dim_reorder_params[[inner_dim]])) { + types_of_var_to_crop <- c(types_of_var_to_crop, 'reordered') + } + for (type_of_var_to_crop in types_of_var_to_crop) { + if (type_of_var_to_crop == 'transformed') { + if (is.null(tvi)) { + if (!is.null(dim_reorder_params[[inner_dim]])) { + crop_indices <- unique(unlist(ordered_sri)) + } else { + crop_indices <- unique(unlist(sri)) + } + } else { + crop_indices <- unique(unlist(tvi)) + } + vars_to_crop <- transformed_vars[[i]] + common_vars_to_crop <- transformed_common_vars + } else if (type_of_var_to_crop == 'reordered') { + crop_indices <- unique(unlist(ordered_fri)) + vars_to_crop <- picked_vars_ordered[[i]] + common_vars_to_crop <- picked_common_vars_ordered + } else { + crop_indices <- unique(unlist(fri)) + vars_to_crop <- picked_vars[[i]] + common_vars_to_crop <- picked_common_vars + } + for (var_to_crop in names(vars_to_crop)) { + if (inner_dim %in% names(dim(vars_to_crop[[var_to_crop]]))) { + if (!is.null(crop_indices)) { + if (type_of_var_to_crop == 'transformed') { + if (!aiat) { + vars_to_crop[[var_to_crop]] <- Subset(transformed_subset_var, inner_dim, crop_indices) + } else { + vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices) + } + } else { + vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], inner_dim, crop_indices) + } + } + } + } + if (i == length(dat)) { + for (common_var_to_crop in names(common_vars_to_crop)) { + if (inner_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { + common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices) + } + } + } + if (type_of_var_to_crop == 'transformed') { + if (!is.null(vars_to_crop)) { + transformed_vars[[i]] <- vars_to_crop + } + if (i == length(dat)) { + transformed_common_vars <- common_vars_to_crop + } + } else if (type_of_var_to_crop == 'reordered') { + if (!is.null(vars_to_crop)) { + picked_vars_ordered[[i]] <- vars_to_crop + } + if (i == length(dat)) { + picked_common_vars_ordered <- common_vars_to_crop + } + } else { + if (!is.null(vars_to_crop)) { + picked_vars[[i]] <- vars_to_crop + } + if (i == length(dat)) { + picked_common_vars <- common_vars_to_crop + } + } + } + #} + } + # After the selectors have been picked (using the original variables), + # the variables are transformed. At that point, the original selectors + # for the transformed variables are also kept in the variable original_selectors. +#print("L") + } + } + } +# if (!is.null(transformed_common_vars)) { +# picked_common_vars[names(transformed_common_vars)] <- transformed_common_vars +# } + # Remove the trailing chunks, if any. + for (file_dim in names(dims_to_crop)) { +# indices_to_keep <- min(sapply(dims_to_crop[[file_dim]], min)):max(sapply(dims_to_crop[[file_dim]], max)) + ## TODO: Merge indices in dims_to_crop with some advanced mechanism? + indices_to_keep <- unique(unlist(dims_to_crop[[file_dim]])) + array_of_files_to_load <- Subset(array_of_files_to_load, file_dim, indices_to_keep) + array_of_not_found_files <- Subset(array_of_not_found_files, file_dim, indices_to_keep) + for (i in 1:length(dat)) { + # Crop selectors + for (selector_dim in names(dat[[i]][['selectors']])) { + if (selector_dim == file_dim) { + for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['fri']])) { + dat[[i]][['selectors']][[selector_dim]][['fri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['fri']][[j]][indices_to_keep] + } + for (j in 1:length(dat[[i]][['selectors']][[selector_dim]][['sri']])) { + dat[[i]][['selectors']][[selector_dim]][['sri']][[j]] <- dat[[i]][['selectors']][[selector_dim]][['sri']][[j]][indices_to_keep] + } + } + if (file_dim %in% names(dim(dat[[i]][['selectors']][[selector_dim]][['fri']]))) { + dat[[i]][['selectors']][[selector_dim]][['fri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['fri']], file_dim, indices_to_keep) + dat[[i]][['selectors']][[selector_dim]][['sri']] <- Subset(dat[[i]][['selectors']][[selector_dim]][['sri']], file_dim, indices_to_keep) + } + } + # Crop dataset variables file dims. + for (picked_var in names(picked_vars[[i]])) { + if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) { + picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, indices_to_keep) + } + } + for (transformed_var in names(transformed_vars[[i]])) { + if (file_dim %in% names(dim(transformed_vars[[i]][[transformed_var]]))) { + transformed_vars[[i]][[transformed_var]] <- Subset(transformed_vars[[i]][[transformed_var]], file_dim, indices_to_keep) + } + } + } + # Crop common variables file dims. + for (picked_common_var in names(picked_common_vars)) { + if (file_dim %in% names(dim(picked_common_vars[[picked_common_var]]))) { + picked_common_vars[[picked_common_var]] <- Subset(picked_common_vars[[picked_common_var]], file_dim, indices_to_keep) + } + } + for (transformed_common_var in names(transformed_common_vars)) { + if (file_dim %in% names(dim(transformed_common_vars[[transformed_common_var]]))) { + transformed_common_vars[[transformed_common_var]] <- Subset(transformed_common_vars[[transformed_common_var]], file_dim, indices_to_keep) + } + } + } + # Calculate the size of the final array. + total_inner_dims <- NULL + for (i in 1:length(dat)) { + if (dataset_has_files[i]) { + inner_dims <- expected_inner_dims[[i]] + inner_dims <- sapply(inner_dims, + function(x) { + if (!all(sapply(dat[[i]][['selectors']][[x]][['sri']], is.null))) { + max(sapply(dat[[i]][['selectors']][[x]][['sri']], length)) + } else { + if (length(var_params[[x]]) > 0) { + if (var_params[[x]] %in% names(transformed_vars[[i]])) { + length(transformed_vars[[i]][[var_params[[x]]]]) + } else if (var_params[[x]] %in% names(transformed_common_vars)) { + length(transformed_common_vars[[var_params[[x]]]]) + } else { + max(sapply(dat[[i]][['selectors']][[x]][['fri']], length)) + } + } else { + max(sapply(dat[[i]][['selectors']][[x]][['fri']], length)) + } + } + }) + names(inner_dims) <- expected_inner_dims[[i]] + if (is.null(total_inner_dims)) { + total_inner_dims <- inner_dims + } else { + new_dims <- .MergeArrayDims(total_inner_dims, inner_dims) + total_inner_dims <- new_dims[[3]] + } + } + } + new_dims <- .MergeArrayDims(dim(array_of_files_to_load), total_inner_dims) + final_dims <- new_dims[[3]][dim_names] + # final_dims_fake is the vector of final dimensions after having merged the + # 'across' file dimensions with the respective 'across' inner dimensions, and + # after having broken into multiple dimensions those dimensions for which + # multidimensional selectors have been provided. + # final_dims will be used for collocation of data, whereas final_dims_fake + # will be used for shaping the final array to be returned to the user. + final_dims_fake <- final_dims + if (merge_across_dims) { + if (!is.null(inner_dims_across_files)) { + for (file_dim_across in names(inner_dims_across_files)) { + inner_dim_pos <- which(names(final_dims_fake) == inner_dims_across_files[[file_dim_across]]) + new_dims <- c() + if (inner_dim_pos > 1) { + new_dims <- c(new_dims, final_dims_fake[1:(inner_dim_pos - 1)]) + } + new_dims <- c(new_dims, setNames(prod(final_dims_fake[c(inner_dim_pos, inner_dim_pos + 1)]), + inner_dims_across_files[[file_dim_across]])) + if (inner_dim_pos + 1 < length(final_dims_fake)) { + new_dims <- c(new_dims, final_dims_fake[(inner_dim_pos + 2):length(final_dims_fake)]) + } + final_dims_fake <- new_dims + } + } + } + all_split_dims <- NULL + if (split_multiselected_dims) { + for (dim_param in 1:length(dim_params)) { + if (!is.null(dim(dim_params[[dim_param]]))) { + if (length(dim(dim_params[[dim_param]])) > 1) { + split_dims <- dim(dim_params[[dim_param]]) + all_split_dims <- c(all_split_dims, setNames(list(split_dims), + names(dim_params)[dim_param])) + if (is.null(names(split_dims))) { + names(split_dims) <- paste0(names(dim_params)[dim_param], + 1:length(split_dims)) + } + old_dim_pos <- which(names(final_dims_fake) == names(dim_params)[dim_param]) + new_dims <- c() + if (old_dim_pos > 1) { + new_dims <- c(new_dims, final_dims_fake[1:(old_dim_pos - 1)]) + } + new_dims <- c(new_dims, split_dims) + if (old_dim_pos < length(final_dims_fake)) { + new_dims <- c(new_dims, final_dims_fake[(old_dim_pos + 1):length(final_dims_fake)]) + } + final_dims_fake <- new_dims + } + } + } + } + + if (!silent) { + .message("Detected dimension sizes:") + longest_dim_len <- max(sapply(names(final_dims_fake), nchar)) + longest_size_len <- max(sapply(paste0(final_dims_fake, ''), nchar)) + sapply(names(final_dims_fake), + function(x) { + message(paste0("* ", paste(rep(' ', longest_dim_len - nchar(x)), collapse = ''), + x, ": ", paste(rep(' ', longest_size_len - nchar(paste0(final_dims_fake[x], ''))), collapse = ''), + final_dims_fake[x])) + }) + bytes <- prod(c(final_dims_fake, 8)) + dim_sizes <- paste(final_dims_fake, collapse = ' x ') + if (retrieve) { + .message(paste("Total size of requested data:")) + } else { + .message(paste("Total size of involved data:")) + } + .message(paste(dim_sizes, " x 8 bytes =", + format(structure(bytes, class = "object_size"), units = "auto")), + indent = 2) + } + + # The following several lines will only be run if retrieve = TRUE + if (retrieve) { + + ########## CREATING THE SHARED MATRIX AND DISPATCHING WORK PIECES ########### + # TODO: try performance of storing all in cols instead of rows + # Create the shared memory array, and a pointer to it, to be sent + # to the work pieces. + data_array <- big.matrix(nrow = prod(final_dims), ncol = 1) + shared_matrix_pointer <- describe(data_array) + if (is.null(num_procs)) { + num_procs <- availableCores() + } + # Creating a shared tmp folder to store metadata from each chunk + array_of_metadata_flags <- array(FALSE, dim = dim(array_of_files_to_load)) + if (!is.null(metadata_dims)) { + metadata_indices_to_load <- as.list(rep(1, length(dim(array_of_files_to_load)))) + names(metadata_indices_to_load) <- names(dim(array_of_files_to_load)) + metadata_indices_to_load[metadata_dims] <- as.list(rep(TRUE, length(metadata_dims))) + array_of_metadata_flags <- do.call('[<-', c(list(array_of_metadata_flags), metadata_indices_to_load, + list(value = rep(TRUE, prod(dim(array_of_files_to_load)[metadata_dims]))))) + } + metadata_file_counter <- 0 + metadata_folder <- tempfile('metadata') + dir.create(metadata_folder) + # Build the work pieces, each with: + # - file path + # - total size (dims) of store array + # - start position in store array + # - file selectors (to provide extra info. useful e.g. to select variable) + # - indices to take from file + work_pieces <- list() + for (i in 1:length(dat)) { + if (dataset_has_files[i]) { + selectors <- dat[[i]][['selectors']] + file_dims <- found_file_dims[[i]] + inner_dims <- expected_inner_dims[[i]] + sub_array_dims <- final_dims[file_dims] + sub_array_dims[found_pattern_dim] <- 1 + sub_array_of_files_to_load <- array(1:prod(sub_array_dims), + dim = sub_array_dims) + names(dim(sub_array_of_files_to_load)) <- names(sub_array_dims) + # Detect which of the dimensions of the dataset go across files. + file_dim_across_files <- lapply(inner_dims, + function(x) { + dim_across <- sapply(inner_dims_across_files, function(y) x %in% y) + if (any(dim_across)) { + names(inner_dims_across_files)[which(dim_across)[1]] + } else { + NULL + } + }) + names(file_dim_across_files) <- inner_dims + j <- 1 + while (j <= prod(sub_array_dims)) { + # Work out file path. + file_to_load_sub_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ] + names(file_to_load_sub_indices) <- names(sub_array_dims) + file_to_load_sub_indices[found_pattern_dim] <- i + big_dims <- rep(1, length(dim(array_of_files_to_load))) + names(big_dims) <- names(dim(array_of_files_to_load)) + file_to_load_indices <- .MergeArrayDims(file_to_load_sub_indices, big_dims)[[1]] + file_to_load <- do.call('[[', c(list(array_of_files_to_load), + as.list(file_to_load_indices))) + not_found_file <- do.call('[[', c(list(array_of_not_found_files), + as.list(file_to_load_indices))) + load_file_metadata <- do.call('[', c(list(array_of_metadata_flags), + as.list(file_to_load_indices))) + if (load_file_metadata) { + metadata_file_counter <- metadata_file_counter + 1 + } + if (!is.na(file_to_load) && !not_found_file) { + # Work out indices to take + first_round_indices <- lapply(inner_dims, + function (x) { + if (is.null(file_dim_across_files[[x]])) { + selectors[[x]][['fri']][[1]] + } else { + which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]] + selectors[[x]][['fri']][[which_chunk]] + } + }) + names(first_round_indices) <- inner_dims + second_round_indices <- lapply(inner_dims, + function (x) { + if (is.null(file_dim_across_files[[x]])) { + selectors[[x]][['sri']][[1]] + } else { + which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]] + selectors[[x]][['sri']][[which_chunk]] + } + }) +if (debug) { +print("-> BUILDING A WORK PIECE") +#print(str(selectors)) +} + names(second_round_indices) <- inner_dims + if (!any(sapply(first_round_indices, length) == 0)) { + work_piece <- list() + work_piece[['first_round_indices']] <- first_round_indices + work_piece[['second_round_indices']] <- second_round_indices + work_piece[['file_indices_in_array_of_files']] <- file_to_load_indices + work_piece[['file_path']] <- file_to_load + work_piece[['store_dims']] <- final_dims + # Work out store position + store_position <- final_dims + store_position[names(file_to_load_indices)] <- file_to_load_indices + store_position[inner_dims] <- rep(1, length(inner_dims)) + work_piece[['store_position']] <- store_position + # Work out file selectors + file_selectors <- sapply(file_dims, + function (x) { + vector_to_pick <- 1 + if (x %in% names(depending_file_dims)) { + vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]] + } + selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]] + }) + names(file_selectors) <- file_dims + work_piece[['file_selectors']] <- file_selectors + # Send variables for transformation + if (!is.null(transform) && (length(transform_vars) > 0)) { + vars_to_transform <- NULL + picked_vars_to_transform <- which(names(picked_vars[[i]]) %in% transform_vars) + if (length(picked_vars_to_transform) > 0) { + picked_vars_to_transform <- names(picked_vars[[i]])[picked_vars_to_transform] + vars_to_transform <- c(vars_to_transform, picked_vars[[i]][picked_vars_to_transform]) + if (any(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))) { + picked_vars_ordered_to_transform <- picked_vars_to_transform[which(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))] + vars_to_transform[picked_vars_ordered_to_transform] <- picked_vars_ordered[[i]][picked_vars_ordered_to_transform] + } + } + picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars) + if (length(picked_common_vars_to_transform) > 0) { + picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform] + vars_to_transform <- c(vars_to_transform, picked_common_vars[picked_common_vars_to_transform]) + if (any(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))) { + picked_common_vars_ordered_to_transform <- picked_common_vars_to_transform[which(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))] + vars_to_transform[picked_common_vars_ordered_to_transform] <- picked_common_vars_ordered[picked_common_vars_ordered_to_transform] + } + } + work_piece[['vars_to_transform']] <- vars_to_transform + } + # Send flag to load metadata + if (load_file_metadata) { + work_piece[['save_metadata_in']] <- paste0(metadata_folder, '/', metadata_file_counter) + } + work_pieces <- c(work_pieces, list(work_piece)) + } + } + j <- j + 1 + } + } + } +#print("N") +if (debug) { +print("-> WORK PIECES BUILT") +} + + # Calculate the progress %s that will be displayed and assign them to + # the appropriate work pieces. + if (length(work_pieces) / num_procs >= 2 && !silent) { + if (length(work_pieces) / num_procs < 10) { + amount <- 100 / ceiling(length(work_pieces) / num_procs) + reps <- ceiling(length(work_pieces) / num_procs) + } else { + amount <- 10 + reps <- 10 + } + progress_steps <- rep(amount, reps) + if (length(work_pieces) < (reps + 1)) { + selected_pieces <- length(work_pieces) + progress_steps <- c(sum(head(progress_steps, reps)), + tail(progress_steps, reps)) + } else { + selected_pieces <- round(seq(1, length(work_pieces), + length.out = reps + 1))[-1] + } + progress_steps <- paste0(' + ', round(progress_steps, 2), '%') + progress_message <- 'Progress: 0%' + } else { + progress_message <- '' + selected_pieces <- NULL + } + piece_counter <- 1 + step_counter <- 1 + work_pieces <- lapply(work_pieces, + function (x) { + if (piece_counter %in% selected_pieces) { + wp <- c(x, list(progress_amount = progress_steps[step_counter])) + step_counter <<- step_counter + 1 + } else { + wp <- x + } + piece_counter <<- piece_counter + 1 + wp + }) + if (!silent) { + .message("If the size of the requested data is close to or above the free shared RAM memory, R may crash.") + .message("If the size of the requested data is close to or above the half of the free RAM memory, R may crash.") + .message(paste0("Will now proceed to read and process ", length(work_pieces), " data files:")) + if (length(work_pieces) < 30) { + lapply(work_pieces, function (x) .message(x[['file_path']], indent = 2)) + } else { + .message("The list of files is long. You can check it after Start() finishes in the output '$Files'.", indent = 2, exdent = 5) + } + } + + # Build the cluster of processes that will do the work and dispatch work pieces. + # The function .LoadDataFile is applied to each work piece. This function will + # open the data file, regrid if needed, subset, apply the mask, + # compute and apply the weights if needed, + # disable extreme values and store in the shared memory matrix. +#print("O") + if (!silent) { + .message("Loading... This may take several minutes...") + if (progress_message != '') { + .message(progress_message, appendLF = FALSE) + } + } + if (num_procs == 1) { + found_files <- lapply(work_pieces, .LoadDataFile, + shared_matrix_pointer = shared_matrix_pointer, + file_data_reader = file_data_reader, + synonims = synonims, + transform = transform, + transform_params = transform_params, + silent = silent, debug = debug) + } else { + cluster <- makeCluster(num_procs, outfile = "") + # Send the heavy work to the workers + work_errors <- try({ + found_files <- clusterApplyLB(cluster, work_pieces, .LoadDataFile, + shared_matrix_pointer = shared_matrix_pointer, + file_data_reader = file_data_reader, + synonims = synonims, + transform = transform, + transform_params = transform_params, + silent = silent, debug = debug) + }) + stopCluster(cluster) + } + + if (!silent) { + if (progress_message != '') { + .message("\n", tag = '') + } + } +#print("P") + data_array <- array(bigmemory::as.matrix(data_array), dim = final_dims_fake) + gc() + + # Load metadata and remove the metadata folder + if (!is.null(metadata_dims)) { + loaded_metadata_files <- list.files(metadata_folder) + loaded_metadata <- lapply(paste0(metadata_folder, '/', loaded_metadata_files), readRDS) + unlink(metadata_folder, recursive = TRUE) + return_metadata <- vector('list', length = prod(dim(array_of_metadata_flags)[metadata_dims])) + return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata + dim(return_metadata) <- dim(array_of_metadata_flags[metadata_dims]) + attr(data_array, 'Variables') <- return_metadata + # TODO: Try to infer data type from loaded_metadata + # as.integer(data_array) + } + + failed_pieces <- work_pieces[which(unlist(found_files))] + for (failed_piece in failed_pieces) { + array_of_not_found_files <- do.call('[<-', + c(list(array_of_not_found_files), + as.list(failed_piece[['file_indices_in_array_of_files']]), + list(value = TRUE))) + } + if (any(array_of_not_found_files)) { + for (i in 1:prod(dim(array_of_files_to_load))) { + if (is.na(array_of_not_found_files[i])) { + array_of_files_to_load[i] <- NA + } else { + if (array_of_not_found_files[i]) { + array_of_not_found_files[i] <- array_of_files_to_load[i] + array_of_files_to_load[i] <- NA + } else { + array_of_not_found_files[i] <- NA + } + } + } + } else { + array_of_not_found_files <- NULL + } + + } # End if (retrieve) + + # Replace the vars and common vars by the transformed vars and common vars + for (i in 1:length(dat)) { + if (length(names(transformed_vars[[i]])) > 0) { + picked_vars[[i]][names(transformed_vars[[i]])] <- transformed_vars[[i]] + } else if (length(names(picked_vars_ordered[[i]])) > 0) { + picked_vars[[i]][names(picked_vars_ordered[[i]])] <- picked_vars_ordered[[i]] + } + } + if (length(names(transformed_common_vars)) > 0) { + picked_common_vars[names(transformed_common_vars)] <- transformed_common_vars + } else if (length(names(picked_common_vars_ordered)) > 0) { + picked_common_vars[names(picked_common_vars_ordered)] <- picked_common_vars_ordered + } +if (debug) { +print("-> THE TRANSFORMED VARS:") +print(str(transformed_vars)) +print("-> THE PICKED VARS:") +print(str(picked_vars)) +} + + file_selectors <- NULL + for (i in 1:length(dat)) { + file_selectors[[dat[[i]][['name']]]] <- dat[[i]][['selectors']][which(names(dat[[i]][['selectors']]) %in% found_file_dims[[i]])] + } + if (retrieve) { + if (!silent) { + .message("Successfully retrieved data.") + } + var_backup <- attr(data_array, 'Variables')[[1]] + attr(data_array, 'Variables') <- NULL + attributes(data_array) <- c(attributes(data_array), + list(Variables = c(list(common = c(picked_common_vars, var_backup)), + picked_vars), + Files = array_of_files_to_load, + NotFoundFiles = array_of_not_found_files, + FileSelectors = file_selectors, + PatternDim = found_pattern_dim) + ) + attr(data_array, 'class') <- c('startR_array', attr(data_array, 'class')) + data_array + } else { + if (!silent) { + .message("Successfully discovered data dimensions.") + } + start_call <- match.call() + for (i in 2:length(start_call)) { + if (class(start_call[[i]]) %in% c('name', 'call')) { + start_call[[i]] <- eval.parent(start_call[[i]]) + } + } + start_call[['retrieve']] <- TRUE + attributes(start_call) <- c(attributes(start_call), + list(Dimensions = final_dims_fake, + Variables = c(list(common = picked_common_vars), picked_vars), + ExpectedFiles = array_of_files_to_load, + FileSelectors = file_selectors, + PatternDim = found_pattern_dim, + MergedDims = if (merge_across_dims) { + inner_dims_across_files + } else { + NULL + }, + SplitDims = if (split_multiselected_dims) { + all_split_dims + } else { + NULL + }) + ) + attr(start_call, 'class') <- c('startR_cube', attr(start_call, 'class')) + start_call + } +} + +# This function is the responsible for loading the data of each work +# piece. +.LoadDataFile <- function(work_piece, shared_matrix_pointer, + file_data_reader, synonims, + transform, transform_params, + silent = FALSE, debug = FALSE) { +# suppressPackageStartupMessages({library(bigmemory)}) +### TODO: Specify dependencies as parameter +# suppressPackageStartupMessages({library(ncdf4)}) + +#print("1") + store_indices <- as.list(work_piece[['store_position']]) + first_round_indices <- work_piece[['first_round_indices']] + second_round_indices <- work_piece[['second_round_indices']] +#print("2") + file_to_open <- work_piece[['file_path']] + sub_array <- file_data_reader(file_to_open, NULL, + work_piece[['file_selectors']], + first_round_indices, synonims) +if (debug) { +if (all(unlist(store_indices[1:6]) == 1)) { +print("-> LOADING A WORK PIECE") +print("-> STRUCTURE OF READ UNTRANSFORMED DATA:") +print(str(sub_array)) +print("-> STRUCTURE OF VARIABLES TO TRANSFORM:") +print(str(work_piece[['vars_to_transform']])) +print("-> COMMON ARRAY DIMENSIONS:") +print(str(work_piece[['store_dims']])) +} +} + if (!is.null(sub_array)) { + # Apply data transformation once we have the data arrays. + if (!is.null(transform)) { +if (debug) { +if (all(unlist(store_indices[1:6]) == 1)) { +print("-> PROCEEDING TO TRANSFORM ARRAY") +print("-> DIMENSIONS OF ARRAY RIGHT BEFORE TRANSFORMING:") +print(dim(sub_array)) +} +} + sub_array <- do.call(transform, c(list(data_array = sub_array, + variables = work_piece[['vars_to_transform']], + file_selectors = work_piece[['file_selectors']]), + transform_params)) +if (debug) { +if (all(unlist(store_indices[1:6]) == 1)) { +print("-> STRUCTURE OF ARRAY AND VARIABLES RIGHT AFTER TRANSFORMING:") +print(str(sub_array)) +print("-> DIMENSIONS OF ARRAY RIGHT AFTER TRANSFORMING:") +print(dim(sub_array$data_array)) +} +} + sub_array <- sub_array$data_array + # Subset with second round of indices + dims_to_crop <- which(!sapply(second_round_indices, is.null)) + if (length(dims_to_crop) > 0) { + dimnames_to_crop <- names(second_round_indices)[dims_to_crop] + sub_array <- Subset(sub_array, dimnames_to_crop, + second_round_indices[dimnames_to_crop]) + } +if (debug) { +if (all(unlist(store_indices[1:6]) == 1)) { +print("-> STRUCTURE OF ARRAY AND VARIABLES RIGHT AFTER SUBSETTING WITH 2nd ROUND INDICES:") +print(str(sub_array)) +} +} + } + + metadata <- attr(sub_array, 'variables') + + names_bk <- names(store_indices) + store_indices <- lapply(names(store_indices), + function (x) { + if (!(x %in% names(first_round_indices))) { + store_indices[[x]] + } else if (is.null(second_round_indices[[x]])) { + 1:dim(sub_array)[x] + } else { + if (is.numeric(second_round_indices[[x]])) { + ## TODO: Review carefully this line. Inner indices are all + ## aligned to the left-most positions. If dataset A has longitudes + ## 1, 2, 3, 4 but dataset B has only longitudes 3 and 4, then + ## they will be stored as follows: + ## 1, 2, 3, 4 + ## 3, 4, NA, NA + ##x - min(x) + 1 + 1:length(second_round_indices[[x]]) + } else { + 1:length(second_round_indices[[x]]) + } + } + }) + names(store_indices) <- names_bk +if (debug) { +if (all(unlist(store_indices) == 1)) { +print("-> STRUCTURE OF FIRST ROUND INDICES FOR THIS WORK PIECE:") +print(str(first_round_indices)) +print("-> STRUCTURE OF SECOND ROUND INDICES FOR THIS WORK PIECE:") +print(str(second_round_indices)) +print("-> STRUCTURE OF STORE INDICES FOR THIS WORK PIECE:") +print(str(store_indices)) +} +} + + store_indices <- lapply(store_indices, as.integer) + store_dims <- work_piece[['store_dims']] + + # split the storage work of the loaded subset in parts + largest_dim_name <- names(dim(sub_array))[which.max(dim(sub_array))] + max_parts <- length(store_indices[[largest_dim_name]]) + + # Indexing a data file of N MB with expand.grid takes 30*N MB + # The peak ram of Start is, minimum, 2 * total data to load from all files + # due to inefficiencies in other regions of the code + # The more parts we split the indexing done below in, the lower + # the memory footprint of the indexing and the fast. + # But more than 10 indexing iterations (parts) for each MB processed + # makes the iteration slower (tested empirically on BSC workstations). + subset_size_in_mb <- prod(dim(sub_array)) * 8 / 1024 / 1024 + best_n_parts <- ceiling(subset_size_in_mb * 10) + # We want to set n_parts to a greater value than the one that would + # result in a memory footprint (of the subset indexing code below) equal + # to 2 * total data to load from all files. + # s = subset size in MB + # p = number of parts to break it in + # T = total size of data to load + # then, s / p * 30 = 2 * T + # then, p = s * 15 / T + min_n_parts <- ceiling(prod(dim(sub_array)) * 15 / prod(store_dims)) + # Make sure we pick n_parts much greater than the minimum calculated + n_parts <- min_n_parts * 10 + if (n_parts > best_n_parts) { + n_parts <- best_n_parts + } + # Boundary checks + if (n_parts < 1) { + n_parts <- 1 + } + if (n_parts > max_parts) { + n_parts <- max_parts + } + + if (n_parts > 1) { + make_parts <- function(length, n) { + clusters <- cut(1:length, n, labels = FALSE) + lapply(1:n, function(y) which(clusters == y)) + } + part_indices <- make_parts(max_parts, n_parts) + parts <- lapply(part_indices, + function(x) { + store_indices[[largest_dim_name]][x] + }) + } else { + part_indices <- list(1:max_parts) + parts <- store_indices[largest_dim_name] + } + + # do the storage work + weights <- sapply(1:length(store_dims), + function(i) prod(c(1, store_dims)[1:i])) + part_indices_in_sub_array <- as.list(rep(TRUE, length(dim(sub_array)))) + names(part_indices_in_sub_array) <- names(dim(sub_array)) + data_array <- bigmemory::attach.big.matrix(shared_matrix_pointer) + for (i in 1:n_parts) { + store_indices[[largest_dim_name]] <- parts[[i]] + # Converting array indices to vector indices + matrix_indices <- do.call("expand.grid", store_indices) + # Given a matrix where each row is a set of array indices of an element + # the vector indices are computed + matrix_indices <- 1 + colSums(t(matrix_indices - 1) * weights) + part_indices_in_sub_array[[largest_dim_name]] <- part_indices[[i]] + data_array[matrix_indices] <- as.vector(do.call('[', + c(list(x = sub_array), + part_indices_in_sub_array))) + } + rm(data_array) + gc() + + if (!is.null(work_piece[['save_metadata_in']])) { + saveRDS(metadata, file = work_piece[['save_metadata_in']]) + } + } + if (!is.null(work_piece[['progress_amount']]) && !silent) { + message(work_piece[['progress_amount']], appendLF = FALSE) + } + is.null(sub_array) +} + diff --git a/R/Step.R b/R/Step.R new file mode 100644 index 0000000..1f54d08 --- /dev/null +++ b/R/Step.R @@ -0,0 +1,83 @@ +Step <- function(fun, target_dims, output_dims, + use_libraries = NULL, use_attributes = NULL) { + # Check fun + if (!is.function(fun)) { + stop("Parameter 'fun' must be a function.") + } + + # Check target_dims + if (is.character(target_dims)) { + target_dims <- list(target_dims) + names(target_dims) <- 'input1' + } + if (is.list(target_dims)) { + sapply(target_dims, + function(x) { + if (!(is.character(x) && (length(x) > 0))) { + stop("Parameter 'target_dims' must be one or a list of vectors ", + "of target dimension names for each data array input in ", + "the function 'fun'.") + } + }) + if (is.null(names(target_dims))) { + names(target_dims) <- paste0('input', 1:length(target_dims)) + } + } + + # Check output_dims + if (is.character(output_dims) || is.null(output_dims)) { + output_dims <- list(output_dims) + names(output_dims) <- 'output1' + } + if (is.list(output_dims)) { + sapply(output_dims, + function(x) { + if (!(is.character(x) || is.null(x))) { + stop("Parameter 'output_dims' must be one or a list of vectors ", + "of target dimension names for each data array input in ", + "the function 'fun'.") + } + }) + if (is.null(names(output_dims))) { + names(output_dims) <- paste0('output', 1:length(output_dims)) + } + } + + # Check use_libraries + if (!is.null(use_libraries)) { + if (!is.character(use_libraries)) { + stop("Parameter 'use_libraries' must be a vector of character ", + "strings.") + } + } + + # Check use_attributes + if (!is.null(use_attributes)) { + raise_error <- FALSE + if (!is.list(use_attributes)) { + raise_error <- TRUE + } + if (!all(sapply(use_attributes, + function(x) { + is.character(x) || + (is.list(x) && all(sapply(x, is.character))) + }))) { + raise_error <- TRUE + } + if (raise_error) { + stop("Parameter 'use_attributes' must be a list of vectors of ", + "character strings or of lists of vectors of character ", + "strings.") + } + } + + attr(fun, 'TargetDims') <- target_dims + attr(fun, 'OutputDims') <- output_dims + attr(fun, 'UseLibraries') <- use_libraries + attr(fun, 'UseAttributes') <- use_attributes + + # TODO: Add provenance info + class(fun) <- 'startR_step_fun' + + fun +} diff --git a/R/Subset.R b/R/Subset.R new file mode 100644 index 0000000..0e6b52a --- /dev/null +++ b/R/Subset.R @@ -0,0 +1,97 @@ +Subset <- function(x, along, indices, drop = FALSE) { + # Check x + if (!is.array(x)) { + stop("Input array 'x' must be a numeric array.") + } + + # Take the input array dimension names + dim_names <- attr(x, 'dimensions') + if (!is.character(dim_names)) { + dim_names <- names(dim(x)) + } + if (!is.character(dim_names)) { + if (any(sapply(along, is.character))) { + stop("The input array 'x' doesn't have labels for the dimensions but the parameter 'along' contains dimension names.") + } + } + + # Check along + if (any(sapply(along, function(x) !is.numeric(x) && !is.character(x)))) { + stop("All provided dimension indices in 'along' must be integers or character strings.") + } + if (any(sapply(along, is.character))) { + req_dimnames <- along[which(sapply(along, is.character))] + if (length(unique(req_dimnames)) < length(req_dimnames)) { + stop("The parameter 'along' must not contain repeated dimension names.") + } + along[which(sapply(along, is.character))] <- match(req_dimnames, dim_names) + if (any(is.na(along))) { + stop("Could not match all dimension names in 'indices' with dimension names in input array 'x'.") + } + along <- as.numeric(along) + } + + # Check indices + if (!is.list(indices)) { + indices <- list(indices) + } + + # Check parameter drop + dims_to_drop <- c() + if (is.character(drop)) { + if (drop == 'all') { + drop <- TRUE + } else if (any(drop %in% c('selected', 'non-selected', 'none'))) { + if (drop == 'selected') { + dims_to_drop <- along[which(sapply(indices, length) == 1)] + } else if (drop == 'non-selected') { + dims_to_drop <- dim(x) == 1 + dims_to_drop[along] <- FALSE + dims_to_drop <- which(dims_to_drop) + } + drop <- FALSE + } else { + stop("Parameter 'drop' must be one of TRUE, FALSE, 'all', 'selected', 'non-selected', 'none'.") + } + } else if (!is.logical(drop)) { + stop("Parameter 'drop' must be one of TRUE, FALSE, 'all', 'selected', 'non-selected', 'none'.") + } + + # Take the subset + nd <- length(dim(x)) + index <- as.list(rep(TRUE, nd)) + index[along] <- indices + subset <- eval(as.call(c(as.name("["), as.name("x"), index, drop = drop))) + # If dropped all dimensions, need to drop dimnames too + if (is.character(dim_names) && drop == TRUE) { + dim_names_to_remove <- unique(c(along[which(sapply(indices, length) == 1)], + which(dim(x) == 1))) + if (length(dim_names_to_remove) > 0) { + dim_names <- dim_names[-dim_names_to_remove] + } + } + + # Amend the final dimensions and put dimnames and attributes + metadata <- attributes(x) + metadata[['dim']] <- dim(subset) + if (length(dims_to_drop) > 0) { + metadata[['dim']] <- metadata[['dim']][-dims_to_drop] + if (is.character(dim_names)) { + names(metadata[['dim']]) <- dim_names[-dims_to_drop] + if ('dimensions' %in% names(attributes(x))) { + metadata[['dimensions']] <- dim_names[-dims_to_drop] + } + } + if (length(metadata[['dim']]) == 0) { + metadata['dim'] <- list(NULL) + metadata['dimensions'] <- list(NULL) + } + } else if (is.character(dim_names)) { + names(metadata[['dim']]) <- dim_names + if ('dimensions' %in% names(attributes(x))) { + metadata[['dimensions']] <- dim_names + } + } + attributes(subset) <- metadata + subset +} diff --git a/R/Utils.R b/R/Utils.R new file mode 100644 index 0000000..0d2b2fd --- /dev/null +++ b/R/Utils.R @@ -0,0 +1,841 @@ +indices <- function(x) { + attr(x, 'indices') <- TRUE + attr(x, 'values') <- FALSE + attr(x, 'chunk') <- c(chunk = 1, n_chunks = 1) + x +} + +values <- function(x) { + attr(x, 'indices') <- FALSE + attr(x, 'values') <- TRUE + attr(x, 'chunk') <- c(chunk = 1, n_chunks = 1) + x +} + +chunk <- function(chunk, n_chunks, selectors) { + if (any(chunk > n_chunks)) { + stop("Requested chunk index out of bounds.") + } + if (length(chunk) == 1 && length(n_chunks) == 1) { + if (!is.null(attr(selectors, 'chunk'))) { + attr(selectors, 'chunk') <- c((attr(selectors, 'chunk')['chunk'] - 1) * n_chunks + + chunk, + attr(selectors, 'chunk')['n_chunks'] * n_chunks) + } else { + attr(selectors, 'chunk') <- c(chunk = unname(chunk), n_chunks = unname(n_chunks)) + } + } else { + # Chunking arrays of multidimensional selectors. + # This should be done in Start.R but implies modifications. + if (length(chunk) != length(n_chunks)) { + stop("Wrong chunk specification.") + } + if (!is.null(attr(selectors, 'values'))) { + stop("Multidimensional chunking only available when selector ", + "values provided.") + } + if (is.null(dim(selectors))) { + stop("Multidimensional chunking only available when multidimensional ", + "selector values provided.") + } + if (length(dim(selectors)) != length(chunk)) { + stop("As many chunk indices and chunk lengths as dimensions in the ", + "multidimensional selector array must be specified.") + } + old_indices <- attr(selectors, 'indices') + old_values <- attr(selectors, 'values') + selectors <- Subset(selectors, 1:length(chunk), + lapply(1:length(chunk), + function(x) { + n_indices <- dim(selectors)[x] + chunk_sizes <- rep(floor(n_indices / n_chunks[x]), n_chunks[x]) + chunks_to_extend <- n_indices - chunk_sizes[1] * n_chunks[x] + if (chunks_to_extend > 0) { + chunk_sizes[1:chunks_to_extend] <- chunk_sizes[1:chunks_to_extend] + 1 + } + chunk_size <- chunk_sizes[chunk[x]] + offset <- 0 + if (chunk[x] > 1) { + offset <- sum(chunk_sizes[1:(chunk[x] - 1)]) + } + 1:chunk_sizes[chunk[x]] + offset + })) + attr(selectors, 'indices') <- old_indices + attr(selectors, 'values') <- old_values + } + selectors +} + +.ReplaceVariablesInString <- function(string, replace_values, allow_undefined_key_vars = FALSE) { + # This function replaces all the occurrences of a variable in a string by + # their corresponding string stored in the replace_values. + if (length(strsplit(string, "\\$")[[1]]) > 1) { + parts <- strsplit(string, "\\$")[[1]] + output <- "" + i <- 0 + for (part in parts) { + if (i %% 2 == 0) { + output <- paste(output, part, sep = "") + } else { + if (part %in% names(replace_values)) { + output <- paste(output, .ReplaceVariablesInString(replace_values[[part]], replace_values, allow_undefined_key_vars), sep = "") + } else if (allow_undefined_key_vars) { + output <- paste0(output, "$", part, "$") + } else { + stop(paste('Error: The variable $', part, '$ was not defined in the configuration file.', sep = '')) + } + } + i <- i + 1 + } + output + } else { + string + } +} + +.ReplaceGlobExpressions <- function(path_with_globs, actual_path, + replace_values, tags_to_keep, + dataset_name, permissive) { + # The goal of this function is to replace the shell globbing expressions in + # a path pattern (that may contain shell globbing expressions and Load() + # tags) by the corresponding part of the real existing path. + # What is done actually is to replace all the values of the tags in the + # actual path by the corresponding $TAG$ + # + # It takes mainly two inputs. The path with expressions and tags, e.g.: + # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc + # and a complete known path to one of the matching files, e.g.: + # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc + # and it returns the path pattern but without shell globbing expressions: + # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc + # + # To do that, it needs also as inputs the list of replace values (the + # association of each tag to their value). + # + # All the tags not present in the parameter tags_to_keep will be repalced. + # + # Not all cases can be resolved with the implemented algorithm. In an + # unsolvable case a warning is given and one possible guess is returned. + # + # In some cases it is interesting to replace only the expressions in the + # path to the file, but not the ones in the file name itself. To keep the + # expressions in the file name, the parameter permissive can be set to + # TRUE. To replace all the expressions it can be set to FALSE. + + # Tests + #a <- "/esarchive/exp/ecearth/a13c/3hourly/$var$_*/$var$_*-LR_historical_r1i1p1f1_gr_$chunk$.nc" + #b <- "/esarchive/exp/ecearth/a13c/3hourly/psl_f6h/psl_E3hrPt_EC-Earth3-LR_historical_r1i1p1f1_gr_195001010000-195001312100.nc" + #c <- list(dat = 'dat1', var = 'psl', chunk = '195001010000-195001312100') + #d <- c('dat', 'var', 'chunk') + #e <- 'dat1' + #f <- FALSE #TRUE/0/1/2/3 + #r <- .ReplaceGlobExpressions(a, b, c, d, e, f) + + clean <- function(x) { + if (nchar(x) > 0) { + x <- gsub('\\\\', '', x) + x <- gsub('\\^', '', x) + x <- gsub('\\$', '', x) + x <- unname(sapply(strsplit(x, '[',fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) + do.call(paste0, as.list(x)) + } else { + x + } + } + + strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") + + if (permissive == 0) { + permissive <- FALSE + } else { + if (permissive == TRUE) { + permissive_levels <- 1 + } else { + permissive_levels <- round(permissive[1]) + permissive <- TRUE + } + } + + if (permissive) { + actual_path_chunks <- strsplit(actual_path, '/')[[1]] + if (permissive_levels >= length(actual_path_chunks)) { + stop("Error: Provided levels out of scope in parameter 'permissive'.") + } + permissive_levels <- 1:permissive_levels + permissive_levels <- length(actual_path_chunks) - (rev(permissive_levels) - 1) + actual_path <- paste(actual_path_chunks[-permissive_levels], collapse = '/') + file_name <- paste(actual_path_chunks[permissive_levels], collapse = '/') + if (length(actual_path_chunks) > 1) { + file_name <- paste0('/', file_name) + } + path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] + path_with_globs <- paste(path_with_globs_chunks[-permissive_levels], + collapse = '/') + path_with_globs_no_tags <- .ReplaceVariablesInString(path_with_globs, replace_values) + file_name_with_globs <- paste(path_with_globs_chunks[permissive_levels], collapse = '/') + if (length(path_with_globs_chunks) > 1) { + file_name_with_globs <- paste0('/', file_name_with_globs) + } + right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) + right_known_no_tags <- .ReplaceVariablesInString(right_known, replace_values) + path_with_globs_no_tags_rx <- utils::glob2rx(paste0(path_with_globs_no_tags, right_known_no_tags)) + match <- regexpr(gsub('$', '', path_with_globs_no_tags_rx, fixed = TRUE), paste0(actual_path, file_name)) + if (match != 1) { + stop("Incorrect parameters to replace glob expressions. The path with expressions does not match the actual path.") + } + #if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { + # path_with_globs_no_tags <- paste0(path_with_globs_no_tags, right_known_no_tags, '*') + # file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) + #} + } + path_with_globs_rx <- utils::glob2rx(path_with_globs) + values_to_replace <- c() + tags_to_replace_starts <- c() + tags_to_replace_ends <- c() + give_warning <- FALSE + for (tag in tags_to_keep) { + matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] + lengths <- attr(matches, 'match.length') + if (!(length(matches) == 1 && matches[1] == -1)) { + for (i in 1:length(matches)) { + left <- NULL + if (matches[i] > 1) { + left <- .ReplaceVariablesInString(substr(path_with_globs_rx, 1, matches[i] - 1), replace_values) + left_known <- strReverse(head(strsplit(strReverse(left), strReverse('.*'), fixed = TRUE)[[1]], 1)) + } + right <- NULL + if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { + right <- .ReplaceVariablesInString(substr(path_with_globs_rx, matches[i] + lengths[i], nchar(path_with_globs_rx)), replace_values) + right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) + } + final_match <- NULL + match_limits <- NULL + if (!is.null(left)) { + left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) + match_len <- attr(left_match, 'match.length') + left_match_limits <- c(left_match + match_len - 1 - nchar(clean(right_known)) - nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - nchar(clean(right_known))) + if (!(left_match < 1)) { + match_limits <- left_match_limits + } + } + right_match <- NULL + if (!is.null(right)) { + right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) + match_len <- attr(right_match, 'match.length') + right_match_limits <- c(right_match + nchar(clean(left_known)), + right_match + nchar(clean(left_known)) + nchar(replace_values[[tag]]) - 1) + if (is.null(match_limits) && !(right_match < 1)) { + match_limits <- right_match_limits + } + } + if (!is.null(right_match) && !is.null(left_match)) { + if (!identical(right_match_limits, left_match_limits)) { + give_warning <- TRUE + } + } + if (is.null(match_limits)) { + stop("Too complex path pattern specified for ", dataset_name, + ". Specify a simpler path pattern for this dataset.") + } + values_to_replace <- c(values_to_replace, tag) + tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) + tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) + } + } + } + + actual_path_with_tags <- actual_path + if (length(tags_to_replace_starts) > 0) { + reorder <- sort(tags_to_replace_starts, index.return = TRUE) + tags_to_replace_starts <- reorder$x + values_to_replace <- values_to_replace[reorder$ix] + tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] + while (length(values_to_replace) > 0) { + actual_path_with_tags <- paste0(substr(actual_path_with_tags, 1, head(tags_to_replace_starts, 1) - 1), + '$', head(values_to_replace, 1), '$', + substr(actual_path_with_tags, head(tags_to_replace_ends, 1) + 1, nchar(actual_path_with_tags))) + extra_chars <- nchar(head(values_to_replace, 1)) + 2 - (head(tags_to_replace_ends, 1) - head(tags_to_replace_starts, 1) + 1) + values_to_replace <- values_to_replace[-1] + tags_to_replace_starts <- tags_to_replace_starts[-1] + tags_to_replace_ends <- tags_to_replace_ends[-1] + tags_to_replace_starts <- tags_to_replace_starts + extra_chars + tags_to_replace_ends <- tags_to_replace_ends + extra_chars + } + } + + if (give_warning) { + .warning(paste0("Too complex path pattern specified for ", dataset_name, + ". Double check carefully the '$Files' fetched for this dataset or specify a simpler path pattern.")) + } + + if (permissive) { + paste0(actual_path_with_tags, file_name_with_globs) + } else { + actual_path_with_tags + } +} + +.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { + + addition_warning = FALSE + + if (!all(sapply(c(path_with_globs_and_tag, actual_path, tag), is.character))) { + stop("All 'path_with_globs_and_tag', 'actual_path' and 'tag' must be character strings.") + } + + if (grepl('$', tag, fixed = TRUE)) { + stop("The provided 'tag' must not contain '$' symbols.") + } + full_tag <- paste0('$', tag, '$') + + if (!grepl(full_tag, path_with_globs_and_tag, fixed = TRUE)) { + stop("The provided 'path_with_globs_and_tag' must contain the tag in 'tag' surrounded by '$' symbols.") + } + + parts <- strsplit(path_with_globs_and_tag, full_tag, fixed = TRUE)[[1]] + if (length(parts) == 1) { + parts <- c(parts, '') + } + parts[1] <- paste0('^', parts[1]) + parts[length(parts)] <- paste0(parts[length(parts)], '$') + + # Group the parts in 2 groups, in a way that both groups have a number + # of characters as similar as possible. + part_lengths <- sapply(parts, nchar) + group_len_diffs <- sapply(1:(length(parts) - 1), + function(x) { + sum(part_lengths[(x + 1):length(parts)]) - sum(part_lengths[1:x]) + } + ) + clp <- chosen_left_part <- which.min(group_len_diffs)[1] + + left_expr <- paste(parts[1:clp], collapse = full_tag) + + #because ? means sth, use . (any char) to substitute ? + left_expr <- gsub('?', '.', left_expr, fixed = TRUE) + test_left_expr <- left_expr + + # because * means zero or more char, use . to substitute *. + # And the * behind . means zero or more char. '?' for lazy evaluation. + left_expr <- gsub('*', '.*?', left_expr, fixed = TRUE) + left_expr <- gsub(full_tag, '.*?', left_expr, fixed = TRUE) + + # To test if the pattern matches only one... dont use lazy evaluation + test_left_expr <- gsub('*', '.*', test_left_expr, fixed = TRUE) + test_left_expr <- gsub(full_tag, '.*', test_left_expr, fixed = TRUE) + + # Find the match chars from left + left_match <- regexec(left_expr, actual_path)[[1]] + test_left_match <- regexec(test_left_expr, actual_path)[[1]] + + if (left_match < 0) { + stop("Unexpected error in .FindTagValue.") + } + + if (attr(test_left_match, "match.length") != attr(left_match, "match.length")) { + addition_warning = TRUE + warning("Detect more than one possibility derived from the global expression of path.") + } + + #Cut down the left match part + actual_path_sub <- substr(actual_path, + attr(left_match, 'match.length') + 1, + nchar(actual_path)) + + #----------Search match chars from right + right_expr <- paste(parts[(clp + 1):(length(parts))], collapse = full_tag) + right_expr <- gsub('?', '.', right_expr, fixed = TRUE) + + test_right_expr <- right_expr + # For lazy evaulation to work, pattern and string have to be reversed. + right_expr <- gsub('*', '.*?', right_expr, fixed = TRUE) + right_expr <- gsub(full_tag, '.*?', right_expr, fixed = TRUE) + right_expr <- gsub('$', '^', right_expr, fixed = TRUE) + + # To test if the pattern matches only one... dont use lazy evaluation + test_right_expr <- gsub('*', '.*', test_right_expr, fixed = TRUE) + test_right_expr <- gsub(full_tag, '.*', test_right_expr, fixed = TRUE) + test_right_expr <- gsub('$', '^', test_right_expr, fixed = TRUE) + + rev_str <- function(s) { + paste(rev(strsplit(s, NULL)[[1]]), collapse = '') + } + + right_expr <- rev_str(right_expr) + test_right_expr <- rev_str(test_right_expr) + + right_expr <- gsub('?*.', '.*?', right_expr, fixed = TRUE) + right_match <- regexec(right_expr, rev_str(actual_path))[[1]] + + test_right_expr <- gsub('*.', '.*', test_right_expr, fixed = TRUE) + test_right_match <- regexec(test_right_expr, rev_str(actual_path_sub))[[1]] + + if (right_match < 0) { + stop("Unexpected error in .FindTagValue.") + } + + if (attr(test_right_match, "match.length") != attr(right_match, "match.length")) { + addition_warning = TRUE + warning(paste0("Detect more than one possibility derived from the global ", + "expression of path.")) + } + + #-------------get tag value + right_match[] <- nchar(actual_path) - + (right_match[] + attr(right_match, 'match.length') - 1) + 1 + + if (addition_warning) { + warning(paste0("The extracted parameter ", full_tag, " is ", + substr(actual_path, left_match + attr(left_match, 'match.length'), + right_match - 1), + ". Check if all the desired files were read in. ", + "If not, specify parameter '", tag, + "' by values instead of indices, or set parameter ", + "'path_glob_permissive' as TRUE")) + } + + if ((left_match + attr(left_match, 'match.length')) > + (right_match - 1)) { + NULL + } else { + substr(actual_path, left_match + attr(left_match, 'match.length'), + right_match - 1) + } +} + +.message <- function(...) { + # Function to use the 'message' R function with our custom settings + # Default: new line at end of message, indent to 0, exdent to 3, + # collapse to \n* + args <- list(...) + + ## In case we need to specify message arguments + if (!is.null(args[["appendLF"]])) { + appendLF <- args[["appendLF"]] + } else { + ## Default value in message function + appendLF <- TRUE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value in message function + domain <- NULL + } + args[["appendLF"]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n*" + } + args[["collapse"]] <- NULL + + ## Message tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "* " + } + args[["tag"]] <- NULL + + message(paste0(tag, paste(strwrap( + args, indent = indent, exdent = exdent + ), collapse = collapse)), appendLF = appendLF, domain = domain) +} + +.warning <- function(...) { + # Function to use the 'warning' R function with our custom settings + # Default: no call information, indent to 0, exdent to 3, + # collapse to \n + args <- list(...) + + ## In case we need to specify warning arguments + if (!is.null(args[["call."]])) { + call <- args[["call."]] + } else { + ## Default: don't show info about the call where the warning came up + call <- FALSE + } + if (!is.null(args[["immediate."]])) { + immediate <- args[["immediate."]] + } else { + ## Default value in warning function + immediate <- FALSE + } + if (!is.null(args[["noBreaks."]])) { + noBreaks <- args[["noBreaks."]] + } else { + ## Default value warning function + noBreaks <- FALSE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value warning function + domain <- NULL + } + args[["call."]] <- NULL + args[["immediate."]] <- NULL + args[["noBreaks."]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n!" + } + args[["collapse"]] <- NULL + + ## Warning tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "! Warning: " + } + args[["tag"]] <- NULL + + warning(paste0(tag, paste(strwrap( + args, indent = indent, exdent = exdent + ), collapse = collapse)), call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) +} + +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +.aperm2 <- function(x, new_order) { + old_dims <- dim(x) + attr_bk <- attributes(x) + if ('dim' %in% names(attr_bk)) { + attr_bk[['dim']] <- NULL + } + if (is.numeric(x)) { + x <- aperm(x, new_order) + } else { + y <- array(1:length(x), dim = dim(x)) + y <- aperm(y, new_order) + x <- x[as.vector(y)] + } + dim(x) <- old_dims[new_order] + attributes(x) <- c(attributes(x), attr_bk) + x +} + +# Function to bind arrays of non-atomic elements (e.g. POSIXct) +# 'x' and 'y' must have dimension names +# parameter 'along' must be a dimension name +.abind2 <- function(x, y, along) { + x_along <- which(names(dim(x)) == along) + if (x_along != length(dim(x))) { + tmp_order_x <- c((1:length(dim(x)))[-x_along], x_along) + x <- .aperm2(x, tmp_order_x) + } + y_along <- which(names(dim(y)) == along) + if (y_along != length(dim(y))) { + tmp_order_y <- c((1:length(dim(y)))[-y_along], y_along) + y <- .aperm2(y, tmp_order_y) + } + r <- c(x, y) + new_dims <- dim(x) + new_dims[length(new_dims)] <- dim(x)[length(dim(x))] + dim(y)[length(dim(y))] + dim(r) <- new_dims + if (x_along != length(dim(x))) { + final_order <- NULL + if (x_along > 1) { + final_order <- c(final_order, (1:length(dim(r)))[1:(x_along - 1)]) + } + final_order <- c(final_order, length(dim(r))) + final_order <- c(final_order, (1:length(dim(r)))[x_along:(length(dim(r)) - 1)]) + r <- .aperm2(r, final_order) + } + r +} + +# This function is a helper for the function .MergeArrays. +# It expects as inputs two named numeric vectors, and it extends them +# with dimensions of length 1 until an ordered common dimension +# format is reached. +# The first output is dims1 extended with 1s. +# The second output is dims2 extended with 1s. +# The third output is a merged dimension vector. If dimensions with +# the same name are found in the two inputs, and they have a different +# length, the maximum is taken. +.MergeArrayDims <- function(dims1, dims2) { + new_dims1 <- c() + new_dims2 <- c() + while (length(dims1) > 0) { + if (names(dims1)[1] %in% names(dims2)) { + pos <- which(names(dims2) == names(dims1)[1]) + dims_to_add <- rep(1, pos - 1) + if (length(dims_to_add) > 0) { + names(dims_to_add) <- names(dims2[1:(pos - 1)]) + } + new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) + new_dims2 <- c(new_dims2, dims2[1:pos]) + dims1 <- dims1[-1] + dims2 <- dims2[-c(1:pos)] + } else { + new_dims1 <- c(new_dims1, dims1[1]) + new_dims2 <- c(new_dims2, 1) + names(new_dims2)[length(new_dims2)] <- names(dims1)[1] + dims1 <- dims1[-1] + } + } + if (length(dims2) > 0) { + dims_to_add <- rep(1, length(dims2)) + names(dims_to_add) <- names(dims2) + new_dims1 <- c(new_dims1, dims_to_add) + new_dims2 <- c(new_dims2, dims2) + } + list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) +} + +# This function takes two named arrays and merges them, filling with +# NA where needed. +# dim(array1) +# 'b' 'c' 'e' 'f' +# 1 3 7 9 +# dim(array2) +# 'a' 'b' 'd' 'f' 'g' +# 2 3 5 9 11 +# dim(.MergeArrays(array1, array2, 'b')) +# 'a' 'b' 'c' 'e' 'd' 'f' 'g' +# 2 4 3 7 5 9 11 +.MergeArrays <- function(array1, array2, along) { + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in 1:length(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } + } + } + } + } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 + } + array1 +} + +# Takes as input a list of arrays. The list must have named dimensions. +.MergeArrayOfArrays <- function(array_of_arrays) { + MergeArrays <- .MergeArrays + array_dims <- (dim(array_of_arrays)) + dim_names <- names(array_dims) + + # Merge the chunks. + for (dim_index in 1:length(dim_names)) { + dim_sub_array_of_chunks <- dim_sub_array_of_chunk_indices <- NULL + if (dim_index < length(dim_names)) { + dim_sub_array_of_chunks <- array_dims[(dim_index + 1):length(dim_names)] + names(dim_sub_array_of_chunks) <- dim_names[(dim_index + 1):length(dim_names)] + dim_sub_array_of_chunk_indices <- dim_sub_array_of_chunks + sub_array_of_chunk_indices <- array(1:prod(dim_sub_array_of_chunk_indices), + dim_sub_array_of_chunk_indices) + } else { + sub_array_of_chunk_indices <- NULL + } + sub_array_of_chunks <- vector('list', prod(dim_sub_array_of_chunks)) + dim(sub_array_of_chunks) <- dim_sub_array_of_chunks + for (i in 1:prod(dim_sub_array_of_chunks)) { + if (!is.null(sub_array_of_chunk_indices)) { + chunk_sub_indices <- which(sub_array_of_chunk_indices == i, arr.ind = TRUE)[1, ] + } else { + chunk_sub_indices <- NULL + } + for (j in 1:(array_dims[dim_index])) { + new_chunk <- do.call('[[', c(list(x = array_of_arrays), + as.list(c(j, chunk_sub_indices)))) + if (is.null(new_chunk)) { + stop("Chunks missing.") + } + if (is.null(sub_array_of_chunks[[i]])) { + sub_array_of_chunks[[i]] <- new_chunk + } else { + sub_array_of_chunks[[i]] <- MergeArrays(sub_array_of_chunks[[i]], + new_chunk, + dim_names[dim_index]) + } + } + } + array_of_arrays <- sub_array_of_chunks + rm(sub_array_of_chunks) + gc() + } + + array_of_arrays[[1]] +} + +.MergeChunks <- function(shared_dir, suite_id, remove) { + MergeArrays <- startR:::.MergeArrays + + args <- NULL + shared_dir <- paste0(shared_dir, '/STARTR_CHUNKING_', suite_id) + + all_chunk_files_original <- list.files(paste0(shared_dir, '/'), '.*\\.Rds$') + all_chunk_files <- gsub('\\.Rds$', '', all_chunk_files_original) + chunk_filename_parts_all_components <- strsplit(all_chunk_files, '__') + all_components <- sapply(chunk_filename_parts_all_components, '[[', 1) + components <- unique(all_components) + result <- vector('list', length(components)) + names(result) <- components + for (component in components) { + chunk_files_original <- all_chunk_files_original[which(all_components == component)] + chunk_filename_parts <- chunk_filename_parts_all_components[which(all_components == component)] + chunk_filename_parts <- lapply(chunk_filename_parts, '[', -1) + if (length(unique(sapply(chunk_filename_parts, length))) != 1) { + stop("Detected chunks with more dimensions than others.") + } + dim_names <- sapply(chunk_filename_parts[[1]], + # TODO: strsplit by the last '_' match, not the first. + function(x) strsplit(x, '_')[[1]][1]) + # TODO check all files have exactly the same dimnames + found_chunk_indices <- sapply(chunk_filename_parts, + function(x) as.numeric(sapply(strsplit(x, '_'), '[[', 2))) + found_chunk_indices <- array(found_chunk_indices, + dim = c(length(dim_names), + length(found_chunk_indices) / length(dim_names)) + ) + found_chunks_str <- apply(found_chunk_indices, 2, paste, collapse = '_') + + if (length(args) > 0) { + if ((length(args) %% 2) != 0) { + stop("Wrong number of parameters.") + } + expected_dim_names <- args[(1:(length(args) / 2) - 1) * 2 + 1] + if (any(!is.character(expected_dim_names))) { + stop("Expected dimension names in parameters at odd positions.") + } + dim_indices <- args[(1:(length(args) / 2) - 1) * 2 + 2] + if (!any(dim_indices == 'all')) { + stop("Expected one dimension index to be 'all'.") + } + dim_to_merge <- which(dim_indices == 'all') + if (length(dim_indices) > 1) { + if (!all(is.numeric(dim_indices[-dim_to_merge]))) { + stop("Expected all dimension index but one to be numeric.") + } + } + # Check expected dim names match dim names + ## TODO + # Merge indices that vary along dim_to_merge whereas other fixed by dim_indices + # REMOVE FILES + ## TODO + stop("Feature not implemented.") + } else { + chunks_indices <- 1:length(dim_names) + chunks_indices <- lapply(chunks_indices, function(x) sort(unique(found_chunk_indices[x, ]))) + names(chunks_indices) <- dim_names + + # Load all found chunks into the array 'array_of_chuks'. + array_dims <- sapply(chunks_indices, length) + names(array_dims) <- dim_names + array_of_chunks <- vector('list', prod(array_dims)) + dim(array_of_chunks) <- array_dims + array_of_chunks_indices <- array(1:prod(array_dims), array_dims) + for (i in 1:prod(array_dims)) { + chunk_indices <- which(array_of_chunks_indices == i, arr.ind = TRUE)[1, ] + j <- 1 + chunk_indices_on_file <- sapply(chunk_indices, + function(x) { + r <- chunks_indices[[j]][x] + j <<- j + 1 + r + }) + found_chunk <- which(found_chunks_str == paste(chunk_indices_on_file, + collapse = '_'))[1] + if (length(found_chunk) > 0) { + num_tries <- 5 + found <- FALSE + try_num <- 1 + while ((try_num <= num_tries) && !found) { + array_of_chunks[[i]] <- try({ + readRDS(paste0(shared_dir, '/', + chunk_files_original[found_chunk])) + }) + if (('try-error' %in% class(array_of_chunks[[i]]))) { + message("Waiting for an incomplete file transfer...") + Sys.sleep(5) + } else { + found <- TRUE + } + try_num <- try_num + 1 + } + if (!found) { + stop("Could not open one of the chunks. Might be a large chunk ", + "in transfer. Merge aborted, files have been preserved.") + } + } + } + + result[[component]] <- startR:::.MergeArrayOfArrays(array_of_chunks) + rm(array_of_chunks) + gc() + } + } + + if (remove) { + sapply(all_chunk_files_original, + function(x) { + file.remove(paste0(shared_dir, '/', x)) + }) + } + + result +} diff --git a/Rissues/CST_LoadIssue_class_19.R b/Rissues/CST_LoadIssue_class_19.R new file mode 100644 index 0000000..dc042b3 --- /dev/null +++ b/Rissues/CST_LoadIssue_class_19.R @@ -0,0 +1,50 @@ +#this code shows the problem with CST_Load, that only assign the correct class if exp and obs are loaded: +#https://earth.bsc.es/gitlab/external/cstools/issues/19 + +library(CSTools) +library(zeallot) + +# 10 starting dates: +start <- as.Date(paste("1993", "11", "01", sep = ""), "%Y%m%d") +end <- as.Date(paste("2002", "11", "01", sep = ""), "%Y%m%d") +dateseq <- format(seq(start, end, by = "year"), "%Y%m%d") +# path structure: +glosea5 <- '/esnas/exp/glosea5/specs-seasonal_i1p1/$STORE_FREQ$_mean/$VAR_NAME$-allmemb/$VAR_NAME$_$START_DATE$.nc' +# CST_Load using zeallot package: +c(exp, obs) %<-% + CST_Load(var = "tas", exp = list(list(name = 'glosea5', path = glosea5), + list(name = 'ecmwf/system4_m1'), + list(name = 'meteofrance/system5_m1')), + obs = "erainterim", sdates = dateseq, leadtimemin = 2, leadtimemax = 4, + lonmin = -20, lonmax = 70, latmin = 25, latmax = 75, + storefreq = "monthly", sampleperiod = 1, nmember = 9, + output = "lonlat", method = "bilinear", + grid = "r256x128") +# Check class: +class(exp) +class(obs) +dim(exp$data) + +exp$load_parameters$sdates + + +rm(list = c("obs", "exp")) + + +exp <- CST_Load(var = "tas", exp = list(list(name = 'glosea5', path = glosea5), + list(name = 'ecmwf/system4_m1'), + list(name = 'meteofrance/system5_m1')), + #obs = "erainterim", + sdates = dateseq, leadtimemin = 2, leadtimemax = 4, + lonmin = -20, lonmax = 70, latmin = 25, latmax = 75, + storefreq = "monthly", sampleperiod = 1, nmember = 9, + output = "lonlat", method = "bilinear", + grid = "r256x128") +class(exp) +class(obs) + +str(lonlat_prec) +class(lonlat_prec) +str(lonlat_data) +class(lonlat_data) +str(lonlat_data$exp) diff --git a/R/CST_SaveNC.R b/Rissues/CST_SaveNC.R similarity index 100% rename from R/CST_SaveNC.R rename to Rissues/CST_SaveNC.R diff --git a/R/CST_SaveNC_v2.R b/Rissues/CST_SaveNC_v2.R similarity index 100% rename from R/CST_SaveNC_v2.R rename to Rissues/CST_SaveNC_v2.R diff --git a/R/CST_SaveNC_v3.R b/Rissues/CST_SaveNC_v3.R similarity index 100% rename from R/CST_SaveNC_v3.R rename to Rissues/CST_SaveNC_v3.R diff --git a/R/CST_SaveNC_v4.R b/Rissues/CST_SaveNC_v4.R similarity index 100% rename from R/CST_SaveNC_v4.R rename to Rissues/CST_SaveNC_v4.R diff --git a/R/CSTools_CRANDownloads.R b/Rissues/CSTools_CRANDownloads.R similarity index 100% rename from R/CSTools_CRANDownloads.R rename to Rissues/CSTools_CRANDownloads.R diff --git a/R/Image_test.R b/Rissues/Image_test.R similarity index 100% rename from R/Image_test.R rename to Rissues/Image_test.R diff --git a/R/JostRainFarm.R b/Rissues/JostRainFarm.R similarity index 100% rename from R/JostRainFarm.R rename to Rissues/JostRainFarm.R diff --git a/R/Medscope_splitingfunction.R b/Rissues/Medscope_splitingfunction.R similarity index 100% rename from R/Medscope_splitingfunction.R rename to Rissues/Medscope_splitingfunction.R diff --git a/R/MonthsToString.R b/Rissues/MonthsToString.R similarity index 100% rename from R/MonthsToString.R rename to Rissues/MonthsToString.R diff --git a/R/SelectSector.R b/Rissues/SelectSector.R similarity index 100% rename from R/SelectSector.R rename to Rissues/SelectSector.R diff --git a/R/StartExampleClimateProj.R b/Rissues/StartExampleClimateProj.R similarity index 100% rename from R/StartExampleClimateProj.R rename to Rissues/StartExampleClimateProj.R diff --git a/R/script_demo_Prakash.R b/Rissues/script_demo_Prakash.R similarity index 100% rename from R/script_demo_Prakash.R rename to Rissues/script_demo_Prakash.R diff --git a/RowColIndices.R b/RowColIndices.R new file mode 100644 index 0000000..f1e1f6a --- /dev/null +++ b/RowColIndices.R @@ -0,0 +1,8 @@ +# RowColIndices(8, num_row = 3, num_col = 3) + +RowColIndices <- function(number, num_row, num_col) { + pos_col <- ceiling(number / num_row) + pos_row <- (number - pos_col * num_row) + num_row + return(list(row = pos_row, col = pos_col)) +} + diff --git a/esarchive_search.R b/esarchive_search.R new file mode 100644 index 0000000..f326a2f --- /dev/null +++ b/esarchive_search.R @@ -0,0 +1,46 @@ +# folder <- "/esarchive/exp/" +# institution <- c("meteofrance", "cmcc") +# res <- esarchive_search(institution = c('meteofrance'), +# variables = c('tas_', 'prlr_'), freq = 'daily') +esarchive_search <- function(folder = "/esarchive/exp/", institution = NULL, + variables, freq = NULL) { + information <- NULL + if (is.null(institution)) { + institution <- list.dirs(folder, recursive = FALSE, full.names = F) + print(paste("Total", length(institution), "institutions.")) + } + if (is.null(freq)) { + freq <- c('daily', 'monthly', 'annual', 'hourly') + } + paths <- lapply(institution, function(x) {file.path(folder, x)}) + for (i in 1:length(paths)) { + print("Inventorying esarchive...") + dirs <- list.dirs(paths[[i]], recursive = TRUE) + print("Subsetting ... ") + pos <- lapply(variables, function(x) {grep(x, dirs)}) + dirs <- lapply(pos, function(x) {dirs[x]}) + pos <- lapply(dirs, function(y) { + lapply(freq, function(x) {grep(x, y)})}) + dirs <- lapply(1:length(variables), function(x) {dirs[[x]][unlist(pos[[x]])]}) + for (k in 1:length(variables)) { + for (j in dirs[[k]]) { + files <- list.files(j, pattern = ".nc") + if (length(files) > 0) { + period <- range(as.numeric(substr(files, nchar(variables[k]) + 1, + nchar(variables[k]) + 4))) + tryCatch({ + f <- ncdf4::nc_open(file.path(j,files[1])) + meta <- ncdf4::ncatt_get(f, + substr(variables[k], 1, nchar(variables[k]) - 1)) + ncdf4::nc_close(f) + }, error = function(e) { + meta <- list(long_name = NA, units = NA)}) + info <- c(variables[k], j, period ,meta$long_name, meta$units) + information <- rbind(information, info) + } + } + } + } + return(information) +} + diff --git a/source_dir.R b/source_dir.R new file mode 100644 index 0000000..a1ed0c2 --- /dev/null +++ b/source_dir.R @@ -0,0 +1,6 @@ +# source_dir("/esarchive/scratch/nperez/git/startR/R/") +source_dir <- function(dir) { + files <- list.files(dir) + invisible(lapply(files, function(x) { + source(file.path(dir, x))})) +} diff --git a/source_lines.R b/source_lines.R new file mode 100644 index 0000000..9623893 --- /dev/null +++ b/source_lines.R @@ -0,0 +1,7 @@ +# source_lines("/esarchive/scratch/nperez/git/startR/inst/doc/usecase/ex2_1_timedim.R", +# start = 4, end = 14) +source_lines <- function(file, start, end, ...) { + file.lines <- scan(file, what=character(), skip=start-1, nlines=end-start+1, sep='\n') + file.lines.collapsed <- paste(file.lines, collapse='\n') + source(textConnection(file.lines.collapsed), ...) +} -- GitLab