diff --git a/.Rbuildignore b/.Rbuildignore index 834c4fa124507946a4eb8131288fe93d2ed7cf00..eeac7b8d49f9b316dc301481288b0cef99db5909 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,5 +1,21 @@ -.git -.gitignore -.tar.gz -.pdf -./.nc +.*\.git$ +.*\.gitignore$ +.*\.tar.gz$ +.*\.pdf$ +.*^(?!inst)\.nc$ +^README\.md$ +#\..*\.RData$ +#^vignettes$ +^tests$ +^inst/doc$ +#^inst/doc/*$ +#^inst/doc/figures/$ +#^inst/doc/usecase/$ +#^inst/PlotProfiling\.R$ + +# Suggested by http://r-pkgs.had.co.nz/package.html +^.*\.Rproj$ # Automatically added by RStudio, +^\.Rproj\.user$ # used for temporary files. +^README\.Rmd$ # An Rmarkdown file used to generate README.md +^cran-comments\.md$ # Comments for CRAN submission +^NEWS\.md$ # A news file written in Markdown diff --git a/DESCRIPTION b/DESCRIPTION index 0cda1baa89cd345b7d9b55fcbd2cc4021e2f5a2e..b1b9425ba0dc5863286c1501c935a41ec169afbe 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,23 +1,23 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 1.0.3 +Version: 2.0.0 Authors@R: c( person("BSC-CNS", role = c("aut", "cph")), person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), - person("An Chi", "Ho", , "aho@bsc.es", role = c("ctb")), - person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = c("ctb", "cre"), comment = c(ORCID = "0000-0001-8568-3071")), + person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("ctb", "cre")), + person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = c("ctb"), comment = c(ORCID = "0000-0001-8568-3071")), person("Javier", "Vegas", , "javier.vegas@bsc.es", role = c("ctb")), person("Pierre-Antoine", "Bretonniere", , "pierre-antoine.bretonniere@bsc.es", role = c("ctb")), person("Roberto", "Serrano", , "rsnotivoli@gmal.com", role = c("ctb"))) -Description: Tool to automatically fetch, transform and arrange subsets of - multidimensional data sets (collections of files) stored in local and/or remote +Description: Tool to automatically fetch, transform and arrange subsets of multi- + dimensional data sets (collections of files) stored in local and/or remote file systems or servers, using multicore capabilities where possible. The tool provides an interface to perceive a collection of data sets as a single large multidimensional data array, and enables the user to request for automatic retrieval, processing and arrangement of subsets of the large array. Wrapper functions to add support for custom file formats can be plugged in/out, making - the tool suitable for any research field where large multidimensional data sets - are involved. + the tool suitable for any research field where large multidimensional data + sets are involved. Depends: R (>= 3.2.0) Imports: @@ -25,12 +25,16 @@ Imports: bigmemory, future, multiApply (>= 2.1.1), - parallel -Suggests: + parallel, easyNCDF, - s2dverification + s2dverification, + ClimProjDiags +Suggests: + stats, + utils License: LGPL-3 URL: https://earth.bsc.es/gitlab/es/startR/ -BugReports: https://earth.bsc.es/gitlab/es/startR/issues +BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues LazyData: true +SystemRequirements: cdo RoxygenNote: 5.0.0 diff --git a/NAMESPACE b/NAMESPACE index 18987942da4869ebe47e9d2ca8ea226be8f22bec..a3ad75ff6f708fdbf186ab589ce3c267cb576c46 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,29 @@ -exportPattern("^[^\\.]") -import(stats, utils, abind, bigmemory, future, parallel) +# Generated by roxygen2: do not edit by hand + +export(AddStep) +export(CDORemapper) +export(CircularSort) +export(Collect) +export(Compute) +export(NcCloser) +export(NcDataReader) +export(NcDimReader) +export(NcOpener) +export(NcVarReader) +export(SelectorChecker) +export(Sort) +export(Start) +export(Step) +export(indices) +export(values) +import(abind) +import(bigmemory) +import(easyNCDF) +import(future) +import(multiApply) +import(parallel) +importFrom(ClimProjDiags,Subset) +importFrom(s2dverification,CDORemap) +importFrom(stats,na.omit) +importFrom(stats,setNames) +importFrom(utils,str) diff --git a/NEWS.md b/NEWS.md index baf436ea9ba95919a44911ee1f51256193bbec03..e38cd188132b8baefbf0ade788566a9eff43cd7a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# startR v2.0.0 (Release date: 2020-08-06) +- Adopt Roxygen2 documentation format +- Remove Subset() to avoid duplicated function. Use ClimProjDiags::Subset instead. + # startR v1.0.3 (Release date: 2020-06-19) - Bugfix for requiring the repetitive values from a single file when using 'merge_across_dims' and 'split_multiselected_dims'. The value positions were not diff --git a/R/AddStep.R b/R/AddStep.R index 3ca5a3542399348b2509db84df549f83e2bbacf3..037bd58ca9aae1f11be3042dd046dc488d9ac005 100644 --- a/R/AddStep.R +++ b/R/AddStep.R @@ -1,3 +1,47 @@ +#'Create the workflow with the previous defined operation and data. +#' +#'The step that combines the previous declared data and operation together to +#'create the complete workflow. It is the final step before data processing. +#' +#'@param inputs One or a list of objects of the class 'startR_cube' returned by +#' Start(), indicating the data to be processed. +#'@param step_fun A startR step function as returned by Step(). +#'@param \dots Additional parameters for the inputs of function defined in +#' 'step_fun' by Step(). +#' +#'@return A list of the class 'startR_workflow' containing all the objects +#' needed for the data operation. +#'@examples +#' data_path <- system.file('extdata', package = 'startR') +#' path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' sdates <- c('200011', '200012') +#' data <- Start(dat = list(list(path = path_obs)), +#' var = 'tos', +#' sdate = sdates, +#' time = 'all', +#' latitude = 'all', +#' longitude = 'all', +#' return_vars = list(latitude = 'dat', +#' longitude = 'dat', +#' time = 'sdate'), +#' retrieve = FALSE) +#' pi_short <- 3.14 +#' fun <- function(x, pi_val) { +#' lat = attributes(x)$Variables$dat1$latitude +#' weight = sqrt(cos(lat * pi_val / 180)) +#' corrected = Apply(list(x), target_dims = "latitude", +#' fun = function(x) {x * weight}) +#' } +#' +#' +#' step <- Step(fun = fun, +#' target_dims = 'latitude', +#' output_dims = 'latitude', +#' use_libraries = c('multiApply'), +#' use_attributes = list(data = "Variables")) +#' wf <- AddStep(data, step, pi_val = pi_short) +#' +#'@export AddStep <- function(inputs, step_fun, ...) { # Check step_fun if (!('startR_step_fun' %in% class(step_fun))) { @@ -54,7 +98,7 @@ AddStep <- function(inputs, step_fun, ...) { names(dims1) <- previous_target_dims dims2 <- rep(1, length(attr(inputs[[input]], 'TargetDims'))) names(dims2) <- attr(inputs[[input]], 'TargetDims') - previous_target_dims <- names(startR:::.MergeArrayDims(dims1, dims2)[[1]]) + previous_target_dims <- names(.MergeArrayDims(dims1, dims2)[[1]]) } } new_input_dims <- attr(inputs[[input]], 'Dimensions') @@ -64,7 +108,7 @@ AddStep <- function(inputs, step_fun, ...) { if (is.null(all_input_dims)) { all_input_dims <- new_input_dims } else { - all_input_dims <- startR:::.MergeArrayDims(all_input_dims, new_input_dims)[[1]] + all_input_dims <- .MergeArrayDims(all_input_dims, new_input_dims)[[1]] } } @@ -74,7 +118,7 @@ AddStep <- function(inputs, step_fun, ...) { names(dims1) <- previous_target_dims dims2 <- rep(1, length(new_target_dims)) names(dims2) <- new_target_dims - target_dims <- names(startR:::.MergeArrayDims(dims1, dims2)[[1]]) + target_dims <- names(.MergeArrayDims(dims1, dims2)[[1]]) for (output in 1:length(attr(step_fun, 'OutputDims'))) { workflow <- list(inputs = inputs, fun = step_fun, diff --git a/R/ByChunks.R b/R/ByChunks.R index 22b8667fa612160a215111ff450f90d9be4e50fc..5f0bba5783974ee912825b25c2449e26893d5f72 100644 --- a/R/ByChunks.R +++ b/R/ByChunks.R @@ -1,3 +1,86 @@ +#'Execute the operation by chunks +#' +#'This is an internal function used in Compute(), executing the operation by +#'the chunks specified in Compute(). It also returns the configuration details +#'and profiling information. +#' +#'@param step_fun A function with the class 'startR_step_fun' containing the +#' details of operation. +#'@param cube_headers A list with the class 'startR_cube' returned by Start(). +#' It contains the details of data to be operated. +#'@param \dots Additional parameters for the inputs of 'step_fun'. +#'@param chunks A named list of dimensions which to split the data along and +#' the number of chunks to make for each. The chunked dimension can only be +#' those not required as the target dimension in function Step(). The default +#' value is 'auto', which lists all the non-target dimensions and each one has +#' one chunk. +#'@param threads_load An integer indicating the number of execution threads to +#' use for the data retrieval stage. The default value is 1. +#'@param threads_compute An integer indicating the number of execution threads +#' to use for the computation. The default value is 1. +#'@param cluster A list of components that define the configuration of the +#' machine to be run on. The comoponents vary from the different machines. +#' Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more +#' details and examples. +#' Only needed when the computation is not run locally. The default value is +#' NULL. +#'@param ecflow_suite_dir A character string indicating the path to a folder in +#' the local workstation where to store temporary files generated for the +#' automatic management of the workflow. Only needed when the execution is run +#' remotely. The default value is NULL. +#'@param ecflow_server A named vector indicating the host and port of the +#' EC-Flow server. The vector form should be +#' \code{c(host = 'hostname', port = port_number)}. Only needed when the +#' execution is run remotely. The default value is NULL. +#'@param silent A logical value deciding whether to print the computation +#' progress (FALSE) on the R session or not (TRUE). It only works when the +#' execution runs locally or the parameter 'wait' is TRUE. The default value +#' is FALSE. +#'@param debug A logical value deciding whether to return detailed messages on +#' the progress and operations in a Compute() call (TRUE) or not (FALSE). +#' Automatically changed to FALSE if parameter 'silent' is TRUE. The default +#' value is FALSE. +#'@param wait A logical value deciding whether the R session waits for the +#' Compute() call to finish (TRUE) or not (FALSE). If FALSE, it will return an +#' object with all the information of the startR execution that can be stored +#' in your disk. After that, the R session can be closed and the results can +#' be collected later with the Collect() function. The default value is TRUE. +#' +#'@return A list of data arrays for the output returned by the last step in the +#' specified workflow. The configuration details and profiling information are +#' attached as attributes to the returned list of arrays. +#' +#'@examples +#' # ByChunks() is internally used in Compute(), not intended to be used by +#' # users. The example just illustrates the inputs of ByChunks(). +#' # data_path <- system.file('extdata', package = 'startR') +#' # path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' # sdates <- c('200011', '200012') +#' # data <- Start(dat = list(list(path = path_obs)), +#' # var = 'tos', +#' # sdate = sdates, +#' # time = 'all', +#' # latitude = 'all', +#' # longitude = 'all', +#' # return_vars = list(latitude = 'dat', +#' # longitude = 'dat', +#' # time = 'sdate'), +#' # retrieve = FALSE) +#' # fun <- function(x) { +#' # lat = attributes(x)$Variables$dat1$latitude +#' # weight = sqrt(cos(lat * pi / 180)) +#' # corrected = Apply(list(x), target_dims = "latitude", +#' # fun = function(x) {x * weight}) +#' # } +#' # step <- Step(fun = fun, +#' # target_dims = 'latitude', +#' # output_dims = 'latitude', +#' # use_libraries = c('multiApply'), +#' # use_attributes = list(data = "Variables")) +#' #ByChunks(step, data) +#' +#'@import multiApply +#'@noRd ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', threads_load = 2, threads_compute = 1, cluster = NULL, @@ -23,7 +106,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', merge = NULL, total = NULL) - MergeArrays <- startR:::.MergeArrays + MergeArrays <- .MergeArrays # Check input headers if ('startR_cube' %in% class(cube_headers)) { @@ -195,7 +278,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', } cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) if (cluster[['cores_per_job']] > threads_compute) { - startR:::.message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") + .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") } if (!is.list(cluster[['extra_queue_params']]) || !all(sapply(cluster[['extra_queue_params']], is.character))) { @@ -244,7 +327,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', if (is.null(all_dims_merged)) { all_dims_merged <- i } else { - all_dims_merged <- startR:::.MergeArrayDims(all_dims_merged, i)[[3]] + all_dims_merged <- .MergeArrayDims(all_dims_merged, i)[[3]] } } all_dimnames <- names(all_dims_merged) @@ -554,7 +637,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', timings[['job_setup']] <- 0 timings[['transfer_back']] <- 0 if (!silent) { - startR:::.message(paste0("Processing chunks... ", + .message(paste0("Processing chunks... ", "remaining time estimate soon...")) } time_before_first_chunk <- Sys.time() @@ -586,7 +669,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', if (!on_cluster) { if (!silent) { - startR:::.message(paste("Loading chunk", i, + .message(paste("Loading chunk", i, "out of", length(chunk_array), "...")) } data <- vector('list', length(cube_headers)) @@ -608,7 +691,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', split_dims_to_alter <- which(names(split_dims[[k]]) %in% names_dims_to_alter) chunks_split_dims[split_dims_to_alter] <- unlist(chunks[names(split_dims[[k]])[split_dims_to_alter]]) chunks_indices_split_dims[split_dims_to_alter] <- chunk_indices[names(split_dims[[k]])[split_dims_to_alter]] - start_call[[names(split_dims)[k]]] <- chunk(chunks_indices_split_dims, chunks_split_dims, + start_call[[names(split_dims)[k]]] <- .chunk(chunks_indices_split_dims, chunks_split_dims, eval(start_call[[names(split_dims)[k]]])) dims_to_alter_to_remove <- which(names_dims_to_alter %in% names(split_dims[[k]])) if (length(dims_to_alter_to_remove) > 0) { @@ -621,7 +704,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', if (length(dims_to_alter) > 0) { for (call_dim in names(attr(start_call, 'Dimensions'))[dims_to_alter]) { - start_call[[call_dim]] <- chunk(chunk_indices[call_dim], chunks[[call_dim]], + start_call[[call_dim]] <- .chunk(chunk_indices[call_dim], chunks[[call_dim]], eval(start_call[[call_dim]])) } } @@ -635,7 +718,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', timings[['load']] <- c(timings[['load']], as.numeric(difftime(t_end_load, t_begin_load, units = 'secs'))) if (!silent) { - startR:::.message(paste("Processing...")) + .message(paste("Processing...")) } #TODO: Find a better way to assign the names of data. When multiple steps for Compute is available, this way may fail. names(data) <- names(cube_headers) @@ -670,7 +753,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', time_before_first_chunk) * (length(chunk_array) - 1) units(estimate) <- 'mins' - startR:::.message( + .message( paste0("Remaining time estimate (at ", format(time_after_first_chunk), ") ", "(neglecting merge time): ", format(estimate)) ) @@ -736,7 +819,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', t_begin_bychunks_setup, units = 'secs')) if (!is_data_dir_shared) { t_begin_transfer <- Sys.time() - startR:::.message("Sending involved files to the cluster file system...") + .message("Sending involved files to the cluster file system...") files_to_send <- NULL #files_to_check <- NULL for (cube_header in 1:length(cube_headers)) { @@ -770,14 +853,14 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', system(paste0("rsync -Rrav '", file_spec, "' '", cluster[['queue_host']], ":", remote_data_dir, "/'")) } - startR:::.message("Files sent successfully.") + .message("Files sent successfully.") t_end_transfer <- Sys.time() timings[['transfer']] <- as.numeric(difftime(t_end_transfer, t_begin_transfer, units = 'secs')) } else { timings[['transfer']] <- 0 } if (!silent) { - startR:::.message(paste0("Processing chunks... ")) + .message(paste0("Processing chunks... ")) } time_begin_first_chunk <- Sys.time() # time_after_first_chunk <- NULL @@ -794,7 +877,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', class(startr_exec) <- 'startR_exec' if (wait) { if (!silent) { - startR:::.message(paste0("Remaining time estimate soon... ")) + .message(paste0("Remaining time estimate soon... ")) # while (is.null(time_after_first_chunk)) { # if (any(grepl('.*\\.Rds$', list.files(ecflow_suite_dir_suite)))) { # time_after_first_chunk <- Sys.time() @@ -803,7 +886,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', # ceiling((prod(unlist(chunks)) - cluster[['max_jobs']]) / # cluster[['max_jobs']]) # units(estimate) <- 'mins' - # startR:::.message( + # .message( # paste0('Remaining time estimate (neglecting queue and ', # 'merge time) (at ', format(time_after_first_chunk), # '): ', format(estimate), ' (', @@ -827,7 +910,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', startr_exec[['t_begin_first_chunk']] <- time_begin_first_chunk } result <- Collect(startr_exec, wait = TRUE) - startR:::.message("Computation ended successfully.") + .message("Computation ended successfully.") result } else { startr_exec @@ -837,7 +920,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', timings[['concurrent_chunks']] <- 1 t_begin_merge <- Sys.time() for (component in 1:length(arrays_of_results)) { - arrays_of_results[[component]] <- startR:::.MergeArrayOfArrays(arrays_of_results[[component]]) + arrays_of_results[[component]] <- .MergeArrayOfArrays(arrays_of_results[[component]]) } t_end_merge <- Sys.time() timings[['merge']] <- as.numeric(difftime(t_end_merge, t_begin_merge, units = 'secs')) diff --git a/R/CDORemapper.R b/R/CDORemapper.R index b109f548f3ec8b1faa4c243b09ef27169af9f417..815b92a651d409a2ab1917125d486729ebeda52f 100644 --- a/R/CDORemapper.R +++ b/R/CDORemapper.R @@ -1,7 +1,61 @@ +#'CDO Remap Data Transformation for 'startR' +#' +#'This is a transform function that uses CDO software to remap longitude-latitude +#'data subsets onto a specified target grid, intended for use as parameter +#''transform' in a Start() call. This function complies with the input/output +#'interface required by Start() defined in the documentation for the parameter +#''transform' of function Start().\cr\cr +#'This function uses the function CDORemap() in the package 's2dverification' to +#'perform the interpolation, hence CDO is required to be installed. +#' +#'@param data_array A data array to be transformed. See details in the +#' documentation of the parameter 'transform' of the function Start(). +#'@param variables A list of auxiliary variables required for the transformation, +#' automatically provided by Start(). See details in the documentation of the +#' parameter 'transform' of the function Start(). +#'@param file_selectors A charcter vector indicating the information of the path of +#' the file parameter 'data_array' comes from. See details in the documentation of +#' the parameter 'transform' of the function Start(). The default value is NULL. +#'@param \dots A list of additional parameters to adjust the transform process, +#' as provided in the parameter 'transform_params' in a Start() call. See details +#' in the documentation of the parameter 'transform' of the function Start(). +#' +#'@return An array with the same amount of dimensions as the input data array, +#' potentially with different sizes, and potentially with the attribute +#' 'variables' with additional auxiliary data. See details in the documentation +#' of the parameter 'transform' of the function Start(). +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@examples +#'# Used in Start(): +#' data_path <- system.file('extdata', package = 'startR') +#' path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' sdates <- c('200011') +#' \donttest{ +#' data <- Start(dat = list(list(path = path_obs)), +#' var = 'tos', +#' sdate = sdates, +#' time = 'all', +#' latitude = values(list(-60, 60)), +#' latitude_reorder = Sort(decreasing = TRUE), +#' longitude = values(list(-120, 120)), +#' longitude_reorder = CircularSort(-180, 180), +#' transform = CDORemapper, +#' transform_params = list(grid = 'r360x181', +#' method = 'conservative', +#' crop = c(-120, 120, -60, 60)), +#' transform_vars = c('latitude', 'longitude'), +#' return_vars = list(latitude = 'dat', +#' longitude = 'dat', +#' time = 'sdate'), +#' retrieve = FALSE) +#' } +#'@importFrom s2dverification CDORemap +#'@export CDORemapper <- function(data_array, variables, file_selectors = NULL, ...) { file_dims <- names(file_selectors) - known_lon_names <- s2dverification:::.KnownLonNames() - known_lat_names <- s2dverification:::.KnownLatNames() + known_lon_names <- .KnownLonNames() + known_lat_names <- .KnownLatNames() if (!any(known_lon_names %in% names(variables)) || !any(known_lat_names %in% names(variables))) { stop("The longitude and latitude variables must be requested in ", diff --git a/R/Collect.R b/R/Collect.R index f714899e55e56939b27edb5f13782828c5131fed..bf387297722c15e61e36d6b768624aec0b73c605 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -1,3 +1,76 @@ +#'Collect and merge the computation results +#' +#'The final step of the startR workflow after the data operation. It is used when +#'the parameter 'wait' of Compute() is FALSE, and the functionality includes +#'updating the job status shown on the EC-Flow GUI and collecting all the chunks +#'of results as one data array when the execution is done. See more details on +#'\href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab}. +#' +#'@param startr_exec An R object returned by Compute() when the parameter 'wait' +#' of Compute() is FALSE. It can be directly from a Compute() call or read from +#' the RDS file. +#'@param wait A logical value deciding whether the R session waits for the +#' Collect() call to finish (TRUE) or not (FALSE). If TRUE, it will be a +#' blocking call, in which Collect() will retrieve information from the HPC, +#' including signals and outputs, each polling_period seconds. The the status +#' can be monitored on the EC-Flow GUI. Collect() will not return until the +#' results of all chunks have been received. If FALSE, Collect() will crash with +#' an error if the execution has not finished yet, otherwise it will return the +#' merged array. The default value is TRUE. +#'@param remove A logical value deciding whether to remove of all data results +#' received from the HPC (and stored under 'ecflow_suite_dir', the parameter in +#' Compute()) after being collected. To preserve the data and Collect() it as +#' many times as desired, set remove to FALSE. The default value is TRUE. +#'@return A list of merged data array. +#' +#'@examples +#' data_path <- system.file('extdata', package = 'startR') +#' path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' sdates <- c('200011', '200012') +#' data <- Start(dat = list(list(path = path_obs)), +#' var = 'tos', +#' sdate = sdates, +#' time = 'all', +#' latitude = 'all', +#' longitude = 'all', +#' return_vars = list(latitude = 'dat', +#' longitude = 'dat', +#' time = 'sdate'), +#' retrieve = FALSE) +#' fun <- function(x) { +#' lat = attributes(x)$Variables$dat1$latitude +#' weight = sqrt(cos(lat * pi / 180)) +#' corrected = Apply(list(x), target_dims = "latitude", +#' fun = function(x) {x * weight}) +#' } +#' step <- Step(fun = fun, +#' target_dims = 'latitude', +#' output_dims = 'latitude', +#' use_libraries = c('multiApply'), +#' use_attributes = list(data = "Variables")) +#' wf <- AddStep(data, step) +#' \dontrun{ +#' res <- Compute(wf, chunks = list(longitude = 2, sdate = 2), +#' threads_load = 1, +#' threads_compute = 4, +#' cluster = list(queue_host = 'nord3', +#' queue_type = 'lsf', +#' temp_dir = '/on_hpc/tmp_dir/', +#' cores_per_job = 2, +#' job_wallclock = '05:00', +#' max_jobs = 4, +#' extra_queue_params = list('#BSUB -q bsc_es'), +#' bidirectional = FALSE, +#' polling_period = 10 +#' ), +#' ecflow_suite_dir = '/on_local_machine/username/ecflow_dir/', +#' wait = FALSE) +#' saveRDS(res, file = 'test_collect.Rds') +#' collect_info <- readRDS('test_collect.Rds') +#' result <- Collect(collect_info, wait = TRUE) +#' } +#' +#'@export Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { if (!('startR_exec' %in% class(startr_exec))) { stop("Parameter 'startr_exec' must be an object of the class ", @@ -78,7 +151,7 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { ceiling((prod(unlist(chunks)) - cluster[['max_jobs']]) / cluster[['max_jobs']]) units(estimate) <- 'mins' - startR:::.message( + .message( paste0('Remaining time estimate (neglecting queue and ', 'merge time) (at ', format(time_after_first_chunk), '): ', format(estimate), ' (', @@ -151,7 +224,7 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { ceiling((prod(unlist(chunks)) - cluster[['max_jobs']]) / cluster[['max_jobs']]) units(estimate) <- 'mins' - startR:::.message( + .message( paste0('Remaining time estimate (neglecting queue and ', 'merge time) (at ', format(time_after_first_chunk), '): ', format(estimate), ' (', @@ -183,7 +256,7 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { "system. Store the result after Collect() ends if needed.") } t_begin_merge <- Sys.time() - result <- startR:::.MergeChunks(ecflow_suite_dir, suite_id, remove) + result <- .MergeChunks(ecflow_suite_dir, suite_id, remove) t_end_merge <- Sys.time() timings[['merge']] <- as.numeric(difftime(t_end_merge, t_begin_merge, units = 'secs')) received_files <- list.files(ecflow_suite_dir_suite, full.names = TRUE) diff --git a/R/Compute.R b/R/Compute.R index 570b16d4d5f52a90ec8ca3a5d33307f6efd1ad68..0e8d42cd134a3bbc66fc896a357db1e4f5cd65a5 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -1,3 +1,88 @@ +#'Specify the execution parameters and trigger the execution +#' +#'The step of the startR workflow after the complete workflow is defined by +#'AddStep(). This function specifies the execution parameters and triggers the +#'execution. The execution can be operated locally or on a remote machine. If +#'it is the latter case, the configuration of the machine needs to be +#'sepecified in the function, and the EC-Flow server is required to be +#'installed.\cr\cr +#'The execution can be operated by chunks to avoid overloading the RAM memory. +#'After all the chunks are finished, Compute() will gather and merge them, and +#'return a single data object, including one or multiple multidimensional data +#'arrays and additional metadata. +#' +#'@param workflow A list of the class 'startR_workflow' returned by function +#' AddSteop() or of class 'startR_cube' returned by function Start(). It +#' contains all the objects needed for the execution. +#'@param chunks A named list of dimensions which to split the data along and +#' the number of chunks to make for each. The chunked dimension can only be +#' those not required as the target dimension in function Step(). The default +#' value is 'auto', which lists all the non-target dimensions and each one has +#' one chunk. +#'@param threads_load An integer indicating the number of execution threads to +#' use for the data retrieval stage. The default value is 1. +#'@param threads_compute An integer indicating the number of execution threads +#' to use for the computation. The default value is 1. +#'@param cluster A list of components that define the configuration of the +#' machine to be run on. The comoponents vary from the different machines. +#' Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more +#' details and examples. Only needed when the computation is not run locally. +#' The default value is NULL. +#'@param ecflow_suite_dir A character string indicating the path to a folder in +#' the local workstation where to store temporary files generated for the +#' automatic management of the workflow. Only needed when the execution is run +#' remotely. The default value is NULL. +#'@param ecflow_server A named vector indicating the host and port of the +#' EC-Flow server. The vector form should be +#' \code{c(host = 'hostname', port = port_number)}. Only needed when the +#' execution is run#' remotely. The default value is NULL. +#'@param silent A logical value deciding whether to print the computation +#' progress (FALSE) on the R session or not (TRUE). It only works when the +#' execution runs locally or the parameter 'wait' is TRUE. The default value +#' is FALSE. +#'@param debug A logical value deciding whether to return detailed messages on +#' the progress and operations in a Compute() call (TRUE) or not (FALSE). +#' Automatically changed to FALSE if parameter 'silent' is TRUE. The default +#' value is FALSE. +#'@param wait A logical value deciding whether the R session waits for the +#' Compute() call to finish (TRUE) or not (FALSE). If FALSE, it will return an +#' object with all the information of the startR execution that can be stored +#' in your disk. After that, the R session can be closed and the results can +#' be collected later with the Collect() function. The default value is TRUE. +#' +#'@return A list of data arrays for the output returned by the last step in the +#' specified workflow (wait = TRUE), or an object with information about the +#' startR execution (wait = FALSE). The configuration details and profiling +#' information are attached as attributes to the returned list of arrays. +#'@examples +#' data_path <- system.file('extdata', package = 'startR') +#' path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' sdates <- c('200011', '200012') +#' data <- Start(dat = list(list(path = path_obs)), +#' var = 'tos', +#' sdate = sdates, +#' time = 'all', +#' latitude = 'all', +#' longitude = 'all', +#' return_vars = list(latitude = 'dat', +#' longitude = 'dat', +#' time = 'sdate'), +#' retrieve = FALSE) +#' fun <- function(x) { +#' lat = attributes(x)$Variables$dat1$latitude +#' weight = sqrt(cos(lat * pi / 180)) +#' corrected = Apply(list(x), target_dims = "latitude", +#' fun = function(x) {x * weight}) +#' } +#' step <- Step(fun = fun, +#' target_dims = 'latitude', +#' output_dims = 'latitude', +#' use_libraries = c('multiApply'), +#' use_attributes = list(data = "Variables")) +#' wf <- AddStep(data, step) +#' res <- Compute(wf, chunks = list(longitude = 4, sdate = 2)) +#' +#'@export Compute <- function(workflow, chunks = 'auto', threads_load = 1, threads_compute = 1, cluster = NULL, ecflow_suite_dir = NULL, diff --git a/R/NcCloser.R b/R/NcCloser.R index bb5e89248f4206fb2e880a148d2f6d76ac7d08e0..476592ee8e5b2ab00d2af5285b6eea8bd9c06ec2 100644 --- a/R/NcCloser.R +++ b/R/NcCloser.R @@ -1,3 +1,25 @@ +#'NetCDF file closer for 'startR' +#' +#'This is a file closer function for NetCDF files, intended for use as +#'parameter 'file_closer' in a Start() call. This function complies with the +#'input/output interface required by Start() defined in the documentation for +#'the parameter 'file_closer'.\cr\cr +#'This function uses the function NcClose() in the package 'easyNCDF', +#'which in turn uses nc_close() in the package 'ncdf4'. +#' +#'@param file_object An open connection to a NetCDF file, optionally with +#' additional header information. See details in the documentation of the +#' parameter 'file_closer' of the function Start(). +#'@return This function returns NULL. +#'@examples +#'data_path <- system.file('extdata', package = 'startR') +#'path_obs <- file.path(data_path, 'obs/monthly_mean/tos/tos_200011.nc') +#'connection <- NcOpener(path_obs) +#'NcCloser(connection) +#'@seealso \code{\link{NcOpener}} \code{\link{NcDataReader}} +#' \code{\link{NcDimReader}} \code{\link{NcVarReader}} +#'@import easyNCDF +#'@export NcCloser <- function(file_object) { easyNCDF::NcClose(file_object) } diff --git a/R/NcDataReader.R b/R/NcDataReader.R index cba05fac8a5f721eb41c209437d5900722acb297..0dfe0b2c0872a8043fbb399fde843c8d30d3c3de 100644 --- a/R/NcDataReader.R +++ b/R/NcDataReader.R @@ -1,6 +1,50 @@ -# Parameter 'file_selectos' expects a named character vector of single -# file dimension selectors. -# Parameter 'inner_indices' expects a named list of numeric vectors. +#'NetCDF file data reader for 'startR' +#' +#'This is a data reader function for NetCDF files, intended for use as parameter +#'file_data_reader in a Start() call. This function complies with the +#'input/output interface required by Start() defined in the documentation for +#'the parameter 'file_data_reader'.\cr\cr +#'This function uses the function NcToArray() in the package 'easyNCDF', which +#'in turn uses nc_var_get() in the package 'ncdf4'. +#' +#'@param file_path A character string indicating the path to the data file to +#' read. See details in the documentation of the parameter 'file_data_reader' +#' of the function Start(). The default value is NULL. +#'@param file_object An open connection to a NetCDF file, optionally with +#' additional header information. See details in the documentation of the +#' parameter 'file_data_reader' of the function Start(). The default value is +#' NULL. +#'@param file_selectors A named list containing the information of the path of +#' the file to read data from. It is automatically provided by Start(). See +#' details in the documentation of the parameter 'file_data_reader' of the +#' function Start(). The default value is NULL. +#'@param inner_indices A named list of numeric vectors indicating the indices +#' to take from each of the inner dimensions in the requested file. It is +#' automatically provided by Start(). See details in the documentation of the +#' parameter 'file_data_reader' of the function Start(). The default value is +#' NULL. +#'@param synonims A named list indicating the synonims for the dimension names +#' to look for in the requested file, exactly as provided in the parameter +#' 'synonims' in a Start() call. See details in the documentation of the +#' parameter 'file_data_reader' of the function Start(). +#' +#'@return A multidimensional data array with the named dimensions and indices +#' requested in 'inner_indices', potentially with the attribute 'variables' +#' with additional auxiliary data. See details in the documentation of the +#' parameter 'file_data_reader' of the function Start(). +#'@examples +#' data_path <- system.file('extdata', package = 'startR', mustWork = TRUE) +#' file_to_open <- file.path(data_path, 'obs/monthly_mean/tos/tos_200011.nc') +#' file_selectors <- c(dat = 'dat1', var = 'tos', sdate = '200011') +#' first_round_indices <- list(time = 1, latitude = 1:8, longitude = 1:16) +#' synonims <- list(dat = 'dat', var = 'var', sdate = 'sdate', time = 'time', +#' latitude = 'latitude', longitude = 'longitude') +#' sub_array <- NcDataReader(file_to_open, NULL, file_selectors, +#' first_round_indices, synonims) +#'@seealso \code{\link{NcOpener}} \code{\link{NcDimReader}} +#' \code{\link{NcCloser}} \code{\link{NcVarReader}} +#'@import easyNCDF +#'@export NcDataReader <- function(file_path = NULL, file_object = NULL, file_selectors = NULL, inner_indices = NULL, synonims) { diff --git a/R/NcDimReader.R b/R/NcDimReader.R index e93de95370d2170baa2f2161a82129aaae55742f..9b11a599ad48687f561b58b74a1a9816383483a2 100644 --- a/R/NcDimReader.R +++ b/R/NcDimReader.R @@ -1,7 +1,48 @@ -# Parameter 'file_selectors' expects a named character vector of single -# file dimension selectors. -# Parameter 'inner_indices' expects a named list of numeric or -# character string vectors. +#'NetCDF dimension reader for 'startR' +#' +#'A dimension reader function for NetCDF files, intended for use as parameter +#''file_dim_reader' in a Start() call. It complies with the input/output +#'interface required by Start() defined in the documentation for the parameter +#''file_dim_reader' of that function.\cr\cr +#'This function uses the function NcReadDims() in the package 'easyNCDF'. +#' +#'@param file_path A character string indicating the path to the data file to +#' read. See details in the documentation of the parameter 'file_dim_reader' +#' of the function Start(). The default value is NULL. +#'@param file_object An open connection to a NetCDF file, optionally with +#' additional header information. See details in the documentation of the +#' parameter 'file_dim_reader' of the function Start(). The default value is +#' NULL. +#'@param file_selectors A named list containing the information of the path of +#' the file to read data from. It is automatically provided by Start(). See +#' details in the documentation of the parameter 'file_dim_reader' of the +#' function Start(). The default value is NULL. +#'@param inner_indices A named list of numeric vectors indicating the indices +#' to take from each of the inner dimensions in the requested file. It is +#' automatically provided by Start(). See details in the documentation of the +#' parameter 'file_dim_reader' of the function Start(). The default value is +#' NULL. +#'@param synonims A named list indicating the synonims for the dimension names +#' to look for in the requested file, exactly as provided in the parameter +#' 'synonims' in a Start() call. See details in the documentation of the +#' parameter 'file_dim_reader' of the function Start(). +#' +#'@return A named numeric vector with the names and sizes of the dimensions of +#' the requested file. +#'@examples +#' data_path <- system.file('extdata', package = 'startR') +#' file_to_open <- file.path(data_path, 'obs/monthly_mean/tos/tos_200011.nc') +#' file_selectors <- c(dat = 'dat1', var = 'tos', sdate = '200011') +#' first_round_indices <- list(time = 1, latitude = 1:8, longitude = 1:16) +#' synonims <- list(dat = 'dat', var = 'var', sdate = 'sdate', time = 'time', +#' latitude = 'latitude', longitude = 'longitude') +#' dim_of_file <- NcDimReader(file_to_open, NULL, file_selectors, +#' first_round_indices, synonims) +#'@seealso \code{\link{NcOpener}} \code{\link{NcDataReader}} +#' \code{\link{NcCloser}} \code{\link{NcVarReader}} +#'@import easyNCDF +#'@importFrom stats setNames +#'@export NcDimReader <- function(file_path = NULL, file_object = NULL, file_selectors = NULL, inner_indices = NULL, synonims) { diff --git a/R/NcOpener.R b/R/NcOpener.R index ca6e9edbe3d31fdf8021cf269e7aa67073032b3e..a301432b0c3347f41bc8c8082e1b1a9250027bd0 100644 --- a/R/NcOpener.R +++ b/R/NcOpener.R @@ -1,3 +1,27 @@ +#'NetCDF file opener for 'startR' +#' +#'This is a file opener function for NetCDF files, intended for use as parameter +#''file_opener' in a Start() call. This function complies with the input/output +#'interface required by Start() defined in the documentation for the parameter +#''file_opener'.\cr\cr +#'This function uses the function NcOpen() in the package 'easyNCDF', which in +#'turn uses nc_open() in the package 'ncdf4'. +#' +#'@param file_path A character string indicating the path to the data file to +#' read. See details in the documentation of the parameter 'file_opener' of the +#' function Start(). +#'@return An open connection to a NetCDF file with additional header +#' information as returned by nc_open() in the package 'ncdf4'. See details in +#' the documentation of the parameter 'file_opener' of the function Start(). +#'@examples +#'data_path <- system.file('extdata', package = 'startR') +#'path_obs <- file.path(data_path, 'obs/monthly_mean/tos/tos_200011.nc') +#'connection <- NcOpener(path_obs) +#'NcCloser(connection) +#'@seealso \code{\link{NcDimReader}} \code{\link{NcDataReader}} +#' \code{\link{NcCloser}} \code{\link{NcVarReader}} +#'@import easyNCDF +#'@export NcOpener <- function(file_path) { easyNCDF::NcOpen(file_path) } diff --git a/R/NcVarReader.R b/R/NcVarReader.R index c79424439bd7d8da8074649c5c7904b48daa6a4e..b78e89eb060c76dc07fda3e534369fd8a0ca7497 100644 --- a/R/NcVarReader.R +++ b/R/NcVarReader.R @@ -1,3 +1,46 @@ +#'NetCDF variable reader for 'startR' +#' +#'This is an auxiliary variable reader function for NetCDF files, intended for +#'use as parameter 'file_var_reader' in a Start() call. It complies with the +#'input/output interface required by Start() defined in the documentation for +#'the parameter 'file_var_reader' of that function.\cr\cr +#'This function uses the function NcDataReader() in the package 'startR', +#'which in turn uses NcToArray() in the package 'easyNCDF', which in turn uses +#'nc_var_get() in the package 'ncdf4'. +#' +#'@param file_path A character string indicating the path to the data file to +#' read the variable from. See details in the documentation of the parameter +#' 'file_var_reader' of the function Start(). The default value is NULL. +#'@param file_object An open connection to a NetCDF file, optionally with +#' additional header information. See details in the documentation of the +#' parameter 'file_var_reader' of the function Start(). The default value is +#' NULL. +#'@param file_selectors A named list containing the information of the path of +#' the file to read data from. It is automatically provided by Start(). See +#' details in the documentation of the parameter 'file_var_reader' of the +#' function Start(). The default value is NULL. +#'@param var_name A character string with the name of the variable to be read. +#' The default value is NULL. +#'@param synonims A named list indicating the synonims for the dimension names +#' to look for in the requested file, exactly as provided in the parameter +#' 'synonims' in a Start() call. See details in the documentation of the +#' parameter 'file_var_reader' of the function Start(). +#' +#'@return A multidimensional data array with the named dimensions, potentially +#' with the attribute 'variables' with additional auxiliary data. See details +#' in the documentation of the parameter 'file_var_reader' of the function +#' Start(). +#'@examples +#' data_path <- system.file('extdata', package = 'startR') +#' file_to_open <- file.path(data_path, 'obs/monthly_mean/tos/tos_200011.nc') +#' file_selectors <- c(dat = 'dat1', var = 'tos', sdate = '200011') +#' synonims <- list(dat = 'dat', var = 'var', sdate = 'sdate', time = 'time', +#' latitude = 'latitude', longitude = 'longitude') +#' var <- NcVarReader(file_to_open, NULL, file_selectors, +#' 'tos', synonims) +#'@seealso \code{\link{NcOpener}} \code{\link{NcDataReader}} +#' \code{\link{NcCloser}} \code{\link{NcDimReader}} +#'@export NcVarReader <- function(file_path = NULL, file_object = NULL, file_selectors = NULL, var_name = NULL, synonims) { diff --git a/R/SelectorChecker.R b/R/SelectorChecker.R index 8765da287cba3277b7ac1874b056588d49252688..81ec4889ff344b808e1b9ab0569a05d893e80aa6 100644 --- a/R/SelectorChecker.R +++ b/R/SelectorChecker.R @@ -1,3 +1,41 @@ +#'Translate a set of selectors into a set of numeric indices +#' +#'This is a selector checker function intended for use as parameter +#''selector_checker' in a Start() call. It translates a set of selectors which +#'is the value for one dimension into a set of numeric indices corresponding to +#'the coordinate variable. The function complies with the input/output interface +#'required by Start() defined in the documentation for the parameter +#''selector_checker' of Start(). +#' +#'@param selectors A vector or a list of two of numeric indices or variable +#' values to be retrieved for a dimension, automatically provided by Start(). +#' See details in the documentation of the parameters 'selector_checker' and +#' '\dots' of the function Start(). +#'@param var A vector of values of a coordinate variable for which to search +#' matches with the provided indices or values in the parameter 'selectors', +#' automatically provided by Start(). See details in the documentation of the +#' parameters 'selector_checker' and '\dots' of the function Start(). The +#' default value is NULL. When not specified, SelectorChecker() simply returns +#' the input indices. +#'@param return_indices A logical value automatically configured by Start(), +#' telling whether to return the numeric indices or coordinate variable values +#' after the matching. The default value is TRUE. +#'@param tolerance A numeric value indicating a tolerance value to be used in +#' the matching of 'selectors' and 'var'. See documentation on +#' '_tolerance' in \code{\dots} in the documentation of the function +#' Start(). The default value is NULL. +#' +#'@return A vector of either the indices of the matching values (if +#' return_indices = TRUE) or the matching values themselves (if return_indices +#' = FALSE). +#'@examples +#'# Get the latitudes from 10 to 20 degree +#'sub_array_of_selectors <- list(10, 20) +#'# The latitude values from original file +#'sub_array_of_values <- seq(90, -90, length.out = 258)[2:257] +#'SelectorChecker(sub_array_of_selectors, sub_array_of_values) +#' +#'@export SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, tolerance = NULL) { if (length(selectors) == 0) { diff --git a/R/Sort.R b/R/Sort.R index 825272bf860d8ec569fdf9559bc21cfff9417554..4f74d68eb8273cd2edc46e83bec0316a070fc96f 100644 --- a/R/Sort.R +++ b/R/Sort.R @@ -1,3 +1,50 @@ +#'Sort the coordinate variable values in a Start() call +#' +#'The reorder function intended for use as parameter '_reorder' +#'in a call to the function Start(). This function complies with the +#'input/output interface required by Start() defined in the documentation +#'for the parameter \code{\dots} of that function.\cr\cr +#'The coordinate applied to Sort() consists of an increasing or decreasing +#'sort of the values. It is useful for adjusting the latitude order.\cr\cr +#'The coordinate applied to CircularSort() consists of a circular sort of +#'values, where any values beyond the limits specified in the parameters +#''start' and 'end' is applied a modulus to fall in the specified +#'range. This is useful for circular coordinates such as the Earth longitudes. +#'@name Sort +#'@aliases CircularSort +#'@param start A numeric indicating the lower bound of the circular range. +#'@param end A numeric indicating the upper bound of the circular range. +#'@param \dots Additional parameters to adjust the reorderig. See function +#' sort() for more details. +#' +#'@return +#'A list of 2 containing: +#'\item{$x}{ +#' The reordered values. +#'} +#'\item{$ix}{ +#' The permutation indices of $x in the original coordinate. +#'} +#'@examples +#' # Used in Start(): +#' data_path <- system.file('extdata', package = 'startR') +#' path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' sdates <- c('200011', '200012') +#' data <- Start(dat = list(list(path = path_obs)), +#' var = 'tos', +#' sdate = sdates, +#' time = 'all', +#' latitude = values(list(-60, 60)), +#' latitude_reorder = Sort(decreasing = TRUE), +#' longitude = values(list(-120, 120)), +#' longitude_reorder = CircularSort(-180, 180), +#' return_vars = list(latitude = 'dat', +#' longitude = 'dat', +#' time = 'sdate'), +#' retrieve = FALSE) +#' +#'@rdname Sort +#'@export Sort <- function(...) { params <- list(...) f <- "function(x) { @@ -14,6 +61,8 @@ Sort <- function(...) { r } +#'@rdname Sort +#'@export CircularSort <- function(start, end, ...) { params <- list(...) f <- "function (x) { diff --git a/R/Start.R b/R/Start.R index b23e0ebb82987abbaa6d968c33878198f68ff88f..62d7e8caaa87420da2463d1e0b26c61c713a7848 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1,3 +1,776 @@ +#'Declare, discover, subset and retrieve multidimensional distributed data sets +#' +#'See the \href{https://earth.bsc.es/gitlab/es/startR}{startR documentation and +#'tutorial} for a step-by-step explanation on how to use Start().\cr\cr +#'Nowadays in the era of big data, large multidimensional data sets from +#'diverse sources need to be combined and processed. Analysis of big data in any +#'field is often highly complex and time-consuming. Taking subsets of these data +#'sets and processing them efficiently become an indispensable practice. This +#'technique is also known as Domain Decomposition, Map Reduce or, more commonly, +#''chunking'.\cr\cr +#'startR (Subset, TrAnsform, ReTrieve, arrange and process large +#'multidimensional data sets in R) is an R project started at BSC with the aim +#'to develop a tool that allows the user to automatically process large +#'multidimensional distributed data sets. It is an open source project that is +#'open to external collaboration and funding, and will continuously evolve to +#'support as many data set formats as possible while maximizing its efficiency.\cr\cr +#'startR provides a framework under which a data set (collection of one +#'or multiple data files, potentially distributed over various remote servers) +#'are perceived as if they all were part of a single large multidimensional +#'array. Once such multidimensional array is declared, any user-defined function +#'can be applied to the data in a \code{apply}-like fashion, where startR +#'transparently implements the Map Reduce paradigm. The steps to follow in order +#'to process a collection of big data sets are as follows:\cr +#'\itemize{ +#' \item{ +#'Declaring the data set, i.e. declaring the distribution of the data files +#'involved, the dimensions and shape of the multidimensional array, and the +#'boundaries of the target data. This step can be performed with the +#'Start() function. Numeric indices or coordinate values can be used when +#'fixing the boundaries. It is common having the need to apply transformations, +#'pre-processing or reordering to the data. Start() accepts user-defined +#'transformation or reordering functions to be applied for such purposes. Once a +#'data set is declared, a list of involved files, dimension lengths, memory size +#'and other metadata is made available. Optionally, the data set can be +#'retrieved and loaded onto the current R session if it is small enough. +#' } +#' \item{ +#'Declaring the workflow of operations to perform on the involved data set(s). +#'This step can be performed with the Step() and AddStep() functions. +#' } +#' \item{ +#'Defining the computation settings. The mandatory settings include a) how many +#'subsets to divide the data sets into and along which dimensions; b) which +#'platform to perform the workflow of operations on (local machine or remote +#'machine/HPC?), how to communicate with it (unidirectional or bidirectional +#'connection? shared or separate file systems?), which queuing system it uses +#'(slurm, PBS, LSF, none?); and c) how many parallel jobs and execution threads +#'per job to use when running the calculations. This step can be performed when +#'building up the call to the Compute() function. +#' } +#' \item{ +#'Running the computation. startR transparently implements the Map Reduce +#'paradigm, according to the settings in the previous steps. The progress can +#'optionally be monitored with the EC-Flow workflow management tool. When the +#'computation ends, a report of performance timings is displayed. This step can +#'be triggered with the Compute() function. +#' } +#'} +#'startR is not bound to a specific file format. Interface functions to +#'custom file formats can be provided for Start() to read them. As this +#'version, startR includes interface functions to the following file formats: +#'\itemize{ +#' \item{ +#'NetCDF +#' } +#'} +#'Metadata and auxilliary data is also preserved and arranged by Start() +#'in the measure that it is retrieved by the interface functions for a specific +#'file format. +#' +#'@param \dots A selection of custemized parameters depending on the data +#'format. When we retrieve data from one or a collection of data sets, +#'the involved data can be perceived as belonging to a large multi-dimensional +#'array. For instance, let us consider an example case. We want to retrieve data +#'from a source, which contains data for the number of monthly sales of various +#'items, and also for their retail price each month. The data on source is +#'stored as follows:\cr\cr +#'\command{ +#'\cr # /data/ +#'\cr # |-> sales/ +#'\cr # | |-> electronics +#'\cr # | | |-> item_a.data +#'\cr # | | |-> item_b.data +#'\cr # | | |-> item_c.data +#'\cr # | |-> clothing +#'\cr # | |-> item_d.data +#'\cr # | |-> idem_e.data +#'\cr # | |-> idem_f.data +#'\cr # |-> prices/ +#'\cr # |-> electronics +#'\cr # | |-> item_a.data +#'\cr # | |-> item_b.data +#'\cr # | |-> item_c.data +#'\cr # |-> clothing +#'\cr # |-> item_d.data +#'\cr # |-> item_e.data +#'\cr # |-> item_f.data +#'}\cr\cr +#'Each item file contains data, stored in whichever format, for the sales or +#'prices over a time period, e.g. for the past 24 months, registered at 100 +#'different stores over the world. Whichever the format it is stored in, each +#'file can be perceived as a container of a data array of 2 dimensions, time and +#'store. Let us assume the '.data' format allows to keep a name for each of +#'these dimensions, and the actual names are 'time' and 'store'.\cr\cr +#'The different item files for sales or prices can be perceived as belonging to +#'an 'item' dimension of length 3, and the two groups of three items to a +#''section' dimension of length 2, and the two groups of two sections (one with +#'the sales and the other with the prices) can be perceived as belonging also to +#'another dimension 'variable' of length 2. Even the source can be perceived as +#'belonging to a dimension 'source' of length 1.\cr\cr +#'All in all, in this example, the whole data could be perceived as belonging to +#'a multidimensional 'large array' of dimensions\cr +#'\command{ +#'\cr # source variable section item store month +#'\cr # 1 2 2 3 100 24 +#'} +#'\cr\cr +#'The dimensions of this 'large array' can be classified in two types. The ones +#'that group actual files (the file dimensions) and the ones that group data +#'values inside the files (the inner dimensions). In the example, the file +#'dimensions are 'source', 'variable', 'section' and 'item', whereas the inner +#'dimensions are 'store' and 'month'. +#'\cr\cr +#'Having the dimensions of our target sources in mind, the parameter \code{\dots} +#'expects to receive information on: +#' \itemize{ +#' \item{ +#'The names of the expected dimensions of the 'large dataset' we want to +#'retrieve data from +#' } +#' \item{ +#'The indices to take from each dimension (and other constraints) +#' } +#' \item{ +#'How to reorder the dimension if needed +#' } +#' \item{ +#'The location and organization of the files of the data sets +#' } +#' } +#'For each dimension, the 3 first information items can be specified with a set +#'of parameters to be provided through \code{\dots}. For a given dimension +#''dimname', six parameters can be specified:\cr +#'\command{ +#'\cr # dimname = , # 'all' / 'first' / 'last' / +#'\cr # # indices(c(1, 10, 20)) / +#'\cr # # indices(c(1:20)) / +#'\cr # # indices(list(1, 20)) / +#'\cr # # c(1, 10, 20) / c(1:20) / +#'\cr # # list(1, 20) +#'\cr # dimname_var = , +#'\cr # dimname_tolerance = , +#'\cr # dimname_reorder = , +#'\cr # dimname_depends = , +#'\cr # dimname_across = +#'} +#'\cr\cr +#'The \bold{indices to take} can be specified in three possible formats (see +#'code comments above for examples). The first format consists in using +#'character tags, such as 'all' (take all the indices available for that +#'dimension), 'first' (take only the first) and 'last' (only the last). The +#'second format consists in using numeric indices, which have to be wrapped in a +#'call to the indices() helper function. For the second format, either a +#'vector of numeric indices can be provided, or a list with two numeric indices +#'can be provided to take all the indices in the range between the two specified +#'indices (both extremes inclusive). The third format consists in providing a +#'vector character strings (for file dimensions) or of values of whichever type +#'(for inner dimensions). For the file dimensions, the provided character +#'strings in the third format will be used as components to build up the final +#'path to the files (read further). For inner dimensions, the provided values in +#'the third format will be compared to the values of an associated coordinate +#'variable (must be specified in '_reorder', read further), and the +#'indices of the closest values will be retrieved. When using the third format, +#'a list with two values can also be provided to take all the indices of the +#'values within the specified range. +#'\cr\cr +#'The \bold{name of the associated coordinate variable} must be a character +#'string with the name of an associated coordinate variable to be found in the +#'data files (in all* of them). For this to work, a 'file_var_reader' +#'function must be specified when calling Start() (see parameter +#''file_var_reader'). The coordinate variable must also be requested in the +#'parameter 'return_vars' (see its section for details). This feature only +#'works for inner dimensions. +#'\cr\cr +#'The \bold{tolerance value} is useful when indices for an inner dimension are +#'specified in the third format (values of whichever type). In that case, the +#'indices of the closest values in the coordinate variable are seeked. However +#'the closest value might be too distant and we would want to consider no real +#'match exists for such provided value. This is possible via the tolerance, +#'which allows to specify a threshold beyond which not to seek for matching +#'values and mark that index as missing value. +#'\cr\cr +#'The \bold{reorder_function} is useful when indices for an inner dimension are +#'specified in the third fromat, and the retrieved indices need to be reordered +#'in function of their provided associated variable values. A function can be +#'provided, which receives as input a vector of values, and returns as outputs a +#'list with the components \code{$x} with the reordered values, and \code{$ix} +#'with the permutation indices. Two reordering functions are included in +#'startR, the Sort() and the CircularSort(). +#'\cr\cr +#'The \bold{name of another dimension} to be specified in _depends, +#'only available for file dimensions, must be a character string with the name +#'of another requested \bold{file dimension} in \code{\dots}, and will make +#'Start() aware that the path components of a file dimension can vary in +#'function of the path component of another file dimension. For instance, in the +#'example above, specifying \code{item_depends = 'section'} will make +#'Start() aware that the item names vary in function of the section, i.e. +#'section 'electronics' has items 'a', 'b' and 'c' but section 'clothing' has +#'items 'd', 'e', 'f'. Otherwise Start() would expect to find the same +#'item names in all the sections. +#'\cr\cr +#'The \bold{name of another dimension} to be specified in '_across', +#'only available for inner dimensions, must be a character string with the name +#'of another requested \bold{inner dimension} in \code{\dots}, and will make +#'Start() aware that an inner dimension extends along multiple files. For +#'instance, let us imagine that in the example above, the records for each item +#'are so large that it becomes necessary to split them in multiple files each +#'one containing the registers for a different period of time, e.g. in 10 files +#'with 100 months each ('item_a_period1.data', 'item_a_period2.data', and so on). +#'In that case, the data can be perceived as having an extra file dimension, the +#''period' dimension. The inner dimension 'month' would extend across multiple +#'files, and providing the parameter \code{month = indices(1, 300)} would make +#'Start() crash because it would perceive we have made a request out of +#'bounds (each file contains 100 'month' indices, but we requested 1 to 300). +#'This can be solved by specifying the parameter \code{month_across = period} (a +#'long with the full specification of the dimension 'period'). +#'\cr\cr +#'\bold{Defining the path pattern} +#'\cr +#'As mentioned above, the parameter \dots also expects to receive information +#'with the location of the data files. In order to do this, a special dimension +#'must be defined. In that special dimension, in place of specifying indices to +#'take, a path pattern must be provided. The path pattern is a character string +#'that encodes the way the files are organized in their source. It must be a +#'path to one of the data set files in an accessible local or remote file system, +#'or a URL to one of the files provided by a local or remote server. The regions +#'of this path that vary across files (along the file dimensions) must be +#'replaced by wildcards. The wildcards must match any of the defined file +#'dimensions in the call to Start() and must be delimited with heading +#'and trailing '$'. Shell globbing expressions can be used in the path pattern. +#'See the next code snippet for an example of a path pattern. +#'\cr\cr +#'All in all, the call to Start() to load the entire data set in the +#'example of store item sales, would look as follows: +#'\cr +#'\command{ +#'\cr # data <- Start(source = paste0('/data/$variable$/', +#'\cr # '$section$/$item$.data'), +#'\cr # variable = 'all', +#'\cr # section = 'all', +#'\cr # item = 'all', +#'\cr # item_depends = 'section', +#'\cr # store = 'all', +#'\cr # month = 'all') +#'} +#'\cr\cr +#'Note that in this example it would still be pending to properly define the +#'parameters 'file_opener', 'file_closer', 'file_dim_reader', +#''file_var_reader' and 'file_data_reader' for the '.data' file format +#'(see the corresponding sections). +#'\cr\cr +#'The call to Start() will return a multidimensional R array with the +#'following dimensions: +#'\cr +#'\command{ +#'\cr # source variable section item store month +#'\cr # 1 2 2 3 100 24 +#'} +#'\cr +#'The dimension specifications in the \code{\dots} do not have to follow any +#'particular order. The returned array will have the dimensions in the same order +#'as they have been specified in the call. For example, the following call: +#'\cr +#'\command{ +#'\cr # data <- Start(source = paste0('/data/$variable$/', +#'\cr # '$section$/$item$.data'), +#'\cr # month = 'all', +#'\cr # store = 'all', +#'\cr # item = 'all', +#'\cr # item_depends = 'section', +#'\cr # section = 'all', +#'\cr # variable = 'all') +#'} +#'\cr\cr +#'would return an array with the following dimensions: +#'\cr +#'\command{ +#'\cr # source month store item section variable +#'\cr # 1 24 100 3 2 2 +#'} +#'\cr\cr +#'Next, a more advanced example to retrieve data for only the sales records, for +#'the first section ('electronics'), for the 1st and 3rd items and for the +#'stores located in Barcelona (assuming the files contain the variable +#''store_location' with the name of the city each of the 100 stores are located +#'at): +#'\cr +#'\command{ +#'\cr # data <- Start(source = paste0('/data/$variable$/', +#'\cr # '$section$/$item$.data'), +#'\cr # variable = 'sales', +#'\cr # section = 'first', +#'\cr # item = indices(c(1, 3)), +#'\cr # item_depends = 'section', +#'\cr # store = 'Barcelona', +#'\cr # store_var = 'store_location', +#'\cr # month = 'all', +#'\cr # return_vars = list(store_location = NULL)) +#'} +#'\cr\cr +#'The defined names for the dimensions do not necessarily have to match the +#'names of the dimensions inside the file. Lists of alternative names to be +#'seeked can be defined in the parameter 'synonims'. +#'\cr\cr +#'If data from multiple sources (not necessarily following the same structure) +#'has to be retrieved, it can be done by providing a vector of character strings +#'with path pattern specifications, or, in the extended form, by providing a +#'list of lists with the components 'name' and 'path', and the name of the +#'dataset and path pattern as values, respectively. For example: +#'\cr +#'\command{ +#'\cr # data <- Start(source = list( +#'\cr # list(name = 'sourceA', +#'\cr # path = paste0('/sourceA/$variable$/', +#'\cr # '$section$/$item$.data')), +#'\cr # list(name = 'sourceB', +#'\cr # path = paste0('/sourceB/$section$/', +#'\cr # '$variable$/$item$.data')) +#'\cr # ), +#'\cr # variable = 'sales', +#'\cr # section = 'first', +#'\cr # item = indices(c(1, 3)), +#'\cr # item_depends = 'section', +#'\cr # store = 'Barcelona', +#'\cr # store_var = 'store_location', +#'\cr # month = 'all', +#'\cr # return_vars = list(store_location = NULL)) +#'} +#'\cr +#' +#'@param return_vars A named list where the names are the names of the +#'variables to be fetched in the files, and the values are vectors of +#'character strings with the names of the file dimension which to retrieve each +#'variable for, or NULL if the variable has to be retrieved only once +#'from any (the first) of the involved files.\cr\cr +#'Apart from retrieving a multidimensional data array, retrieving auxiliary +#'variables inside the files can also be needed. The parameter +#''return_vars' allows for requesting such variables, as long as a +#''file_var_reader' function is also specified in the call to +#'Start() (see documentation on the corresponding parameter). +#'\cr\cr +#'In the case of the the item sales example (see documentation on parameter +#'\code{\dots)}, the store location variable is requested with the parameter\cr +#'\code{return_vars = list(store_location = NULL)}.\cr This will cause +#'Start() to fetch once the variable 'store_location' and return it in +#'the component\cr \code{$Variables$common$store_location},\cr and will be an +#'array of character strings with the location names, with the dimensions +#'\code{c('store' = 100)}. Although useless in this example, we could ask +#'Start() to fetch and return such variable for each file along the +#'items dimension as follows: \cr +#'\code{return_vars = list(store_location = c('item'))}.\cr In that case, the +#'variable will be fetched once from a file of each of the items, and will be +#'returned as an array with the dimensions \code{c('item' = 3, 'store' = 100)}. +#'\cr\cr +#'If a variable is requested along a file dimension that contains path pattern +#'specifications ('source' in the example), the fetched variable values will be +#'returned in the component\cr \code{$Variables$$}.\cr +#'For example: +#'\cr +#'\command{ +#'\cr # data <- Start(source = list( +#'\cr # list(name = 'sourceA', +#'\cr # path = paste0('/sourceA/$variable$/', +#'\cr # '$section$/$item$.data')), +#'\cr # list(name = 'sourceB', +#'\cr # path = paste0('/sourceB/$section$/', +#'\cr # '$variable$/$item$.data')) +#'\cr # ), +#'\cr # variable = 'sales', +#'\cr # section = 'first', +#'\cr # item = indices(c(1, 3)), +#'\cr # item_depends = 'section', +#'\cr # store = 'Barcelona', +#'\cr # store_var = 'store_location', +#'\cr # month = 'all', +#'\cr # return_vars = list(store_location = c('source', +#'\cr # 'item'))) +#'\cr # # Checking the structure of the returned variables +#'\cr # str(found_data$Variables) +#'\cr # Named list +#'\cr # ..$common: NULL +#'\cr # ..$sourceA: Named list +#'\cr # .. ..$store_location: char[1:18(3d)] 'Barcelona' 'Barcelona' ... +#'\cr # ..$sourceB: Named list +#'\cr # .. ..$store_location: char[1:18(3d)] 'Barcelona' 'Barcelona' ... +#'\cr # # Checking the dimensions of the returned variable +#'\cr # # for the source A +#'\cr # dim(found_data$Variables$sourceA) +#'\cr # item store +#'\cr # 3 3 +#'} +#'\cr\cr +#'The names of the requested variables do not necessarily have to match the +#'actual variable names inside the files. A list of alternative names to be +#'seeked can be specified via the parameter 'synonims'. +#' +#'@param synonims A named list where the names are the requested variable or +#'dimension names, and the values are vectors of character strings with +#'alternative names to seek for such dimension or variable.\cr\cr +#'In some requests, data from different sources may follow different naming +#'conventions for the dimensions or variables, or even files in the same source +#'could have varying names. This parameter is in order for Start() to +#'properly identify the dimensions or variables with different names. +#'\cr\cr +#'In the example used in parameter 'return_vars', it may be the case that +#'the two involved data sources follow slightly different naming conventions. +#'For example, source A uses 'sect' as name for the sections dimension, whereas +#'source B uses 'section'; source A uses 'store_loc' as variable name for the +#'store locations, whereas source B uses 'store_location'. This can be taken +#'into account as follows: +#'\cr +#'\command{ +#'\cr # data <- Start(source = list( +#'\cr # list(name = 'sourceA', +#'\cr # path = paste0('/sourceA/$variable$/', +#'\cr # '$section$/$item$.data')), +#'\cr # list(name = 'sourceB', +#'\cr # path = paste0('/sourceB/$section$/', +#'\cr # '$variable$/$item$.data')) +#'\cr # ), +#'\cr # variable = 'sales', +#'\cr # section = 'first', +#'\cr # item = indices(c(1, 3)), +#'\cr # item_depends = 'section', +#'\cr # store = 'Barcelona', +#'\cr # store_var = 'store_location', +#'\cr # month = 'all', +#'\cr # return_vars = list(store_location = c('source', +#'\cr # 'item')), +#'\cr # synonims = list( +#'\cr # section = c('sec', 'section'), +#'\cr # store_location = c('store_loc', +#'\cr # 'store_location') +#'\cr # )) +#'} +#'\cr +#' +#'@param file_opener A function that receives as a single parameter +#' 'file_path' a character string with the path to a file to be opened, +#' and returns an object with an open connection to the file (optionally with +#' header information) on success, or returns NULL on failure. +#'\cr\cr +#'This parameter takes by default NcOpener() (an opener function for NetCDF +#'files). +#'\cr\cr +#'See NcOpener() for a template to build a file opener for your own file +#'format. +#' +#'@param file_var_reader A function with the header \code{file_path = NULL}, +#' \code{file_object = NULL}, \code{file_selectors = NULL}, \code{var_name}, +#' \code{synonims} that returns an array with auxiliary data (i.e. data from a +#' variable) inside a file. Start() will provide automatically either a +#' 'file_path' or a 'file_object' to the 'file_var_reader' +#' function (the function has to be ready to work whichever of these two is +#' provided). The parameter 'file_selectors' will also be provided +#' automatically to the variable reader, containing a named list where the +#' names are the names of the file dimensions of the queried data set (see +#' documentation on \code{\dots}) and the values are single character strings +#' with the components used to build the path to the file being read (the one +#' provided in 'file_path' or 'file_object'). The parameter 'var_name' +#' will be filled in automatically by Start() also, with the name of one +#' of the variales to be read. The parameter 'synonims' will be filled in +#' with exactly the same value as provided in the parameter 'synonims' in +#' the call to Start(), and has to be used in the code of the variable +#' reader to check for alternative variable names inside the target file. The +#' 'file_var_reader' must return a (multi)dimensional array with named +#' dimensions, and optionally with the attribute 'variales' with other +#' additional metadata on the retrieved variable. +#'\cr\cr +#'Usually, the 'file_var_reader' should be a degenerate case of the +#''file_data_reader' (see documentation on the corresponding parameter), +#'so it is recommended to code the 'file_data_reder' in first place. +#'\cr\cr +#'This parameter takes by default NcVarReader() (a variable reader function +#'for NetCDF files). +#'\cr\cr +#'See NcVarReader() for a template to build a variale reader for your own +#'file format. +#' +#'@param file_dim_reader A function with the header \code{file_path = NULL}, +#' \code{file_object = NULL}, \code{file_selectors = NULL}, \code{synonims} +#' that returns a named numeric vector where the names are the names of the +#' dimensions of the multidimensional data array in the file and the values are +#' the sizes of such dimensions. Start() will provide automatically +#' either a 'file_path' or a 'file_object' to the +#' 'file_dim_reader' function (the function has to be ready to work +#' whichever of these two is provided). The parameter 'file_selectors' +#' will also be provided automatically to the dimension reader, containing a +#' named list where the names are the names of the file dimensions of the +#' queried data set (see documentation on \code{\dots}) and the values are +#' single character strings with the components used to build the path to the +#' file being read (the one provided in 'file_path' or 'file_object'). +#' The parameter 'synonims' will be filled in with exactly the same value +#' as provided in the parameter 'synonims' in the call to Start(), +#' and can optionally be used in advanced configurations. +#'\cr\cr +#'This parameter takes by default NcDimReader() (a dimension reader +#'function for NetCDF files). +#'\cr\cr +#'See NcDimReader() for (an advanced) template to build a dimension reader +#'for your own file format. +#' +#'@param file_data_reader A function with the header \code{file_path = NULL}, +#' \code{file_object = NULL}, \code{file_selectors = NULL}, +#' \code{inner_indices = NULL}, \code{synonims} that returns a subset of the +#' multidimensional data array inside a file (even if internally it is not an +#' array). Start() will provide automatically either a 'file_path' +#' or a 'file_object' to the 'file_data_reader' function (the +#' function has to be ready to work whichever of these two is provided). The +#' parameter 'file_selectors' will also be provided automatically to the +#' data reader, containing a named list where the names are the names of the +#' file dimensions of the queried data set (see documentation on \code{\dots}) +#' and the values are single character strings with the components used to +#' build the path to the file being read (the one provided in 'file_path' or +#' 'file_object'). The parameter 'inner_indices' will be filled in +#' automatically by Start() also, with a named list of numeric vectors, +#' where the names are the names of all the expected inner dimensions in a file +#' to be read, and the numeric vectors are the indices to be taken from the +#' corresponding dimension (the indices may not be consecutive nor in order). +#' The parameter 'synonims' will be filled in with exactly the same value +#' as provided in the parameter 'synonims' in the call to Start(), +#' and has to be used in the code of the data reader to check for alternative +#' dimension names inside the target file. The 'file_data_reader' must +#' return a (multi)dimensional array with named dimensions, and optionally with +#' the attribute 'variables' with other additional metadata on the retrieved +#' data. +#'\cr\cr +#'Usually, 'file_data_reader' should use 'file_dim_reader' +#'(see documentation on the corresponding parameter), so it is recommended to +#'code 'file_dim_reder' in first place. +#'\cr\cr +#'This parameter takes by default NcDataReader() (a data reader function +#'for NetCDF files). +#'\cr\cr +#'See NcDataReader() for a template to build a data reader for your own +#'file format. +#' +#'@param file_closer A function that receives as a single parameter +#' 'file_object' an open connection (as returned by 'file_opener') +#' to one of the files to be read, optionally with header information, and +#' closes the open connection. Always returns NULL. +#'\cr\cr +#'This parameter takes by default NcCloser() (a closer function for NetCDF +#'files). +#'\cr\cr +#'See NcCloser() for a template to build a file closer for your own file +#'format. +#' +#'@param transform A function with the header \code{dara_array}, +#' \code{variables}, \code{file_selectors = NULL}, \code{\dots}. It receives as +#' input, through the parameter \code{data_array}, a subset of a +#' multidimensional array (as returned by 'file_data_reader'), applies a +#' transformation to it and returns it, preserving the amount of dimensions but +#' potentially modifying their size. This transformation may require data from +#' other auxiliary variables, automatically provided to 'transform' +#' through the parameter 'variables', in the form of a named list where +#' the names are the variable names and the values are (multi)dimensional +#' arrays. Which variables need to be sent to 'transform' can be specified +#' with the parameter 'transform_vars' in Start(). The parameter +#' 'file_selectors' will also be provided automatically to +#' 'transform', containing a named list where the names are the names of +#' the file dimensions of the queried data set (see documentation on +#' \code{\dots}) and the values are single character strings with the +#' components used to build the path to the file the subset being processed +#' belongs to. The parameter \code{\dots} will be filled in with other +#' additional parameters to adjust the transformation, exactly as provided in +#' the call to Start() via the parameter 'transform_params'. +#'@param transform_params A named list with additional parameters to be sent to +#' the 'transform' function (if specified). See documentation on parameter +#' 'transform' for details. +#'@param transform_vars A vector of character strings with the names of +#' auxiliary variables to be sent to the 'transform' function (if +#' specified). All the variables to be sent to 'transform' must also +#' have been requested as return variables in the parameter 'return_vars' +#' of Start(). +#'@param transform_extra_cells An integer of extra indices to retrieve from the +#' data set, beyond the requested indices in \code{\dots}, in order for +#' 'transform' to dispose of additional information to properly apply +#' whichever transformation (if needed). As many as +#' 'transform_extra_cells' will be retrieved beyond each of the limits for +#' each of those inner dimensions associated to a coordinate variable and sent +#' to 'transform' (i.e. present in 'transform_vars'). After +#' 'transform' has finished, Start() will take again and return a +#' subset of the result, for the returned data to fall within the specified +#' bounds in \code{\dots}. The default value is 2. +#'@param apply_indices_after_transform A logical value indicating when a +#' 'transform' is specified in Start() and numeric indices are +#' provided for any of the inner dimensions that depend on coordinate variables, +#' these numeric indices can be made effective (retrieved) before applying the +#' transformation or after. The boolean flag allows to adjust this behaviour. +#' It takes FALSE by default (numeric indices are applied before sending +#' data to 'transform'). +#'@param pattern_dims A character string indicating the name of the dimension +#' with path pattern specifications (see \code{\dots} for details). If not +#' specified, Start() assumes the first provided dimension is the pattern +#' dimension, with a warning. +#'@param metadata_dims A vector of character strings with the names of the file +#' dimensions which to return metadata for. As noted in 'file_data_reader', +#' the data reader can optionally return auxiliary data via the attribute +#' 'variables' of the returned array. Start() by default returns the +#' auxiliary data read for only the first file of each source (or data set) in +#' the pattern dimension (see \code{\dots} for info on what the pattern +#' dimension is). However it can be configured to return the metadata for all +#' the files along any set of file dimensions. The parameter 'metadata_dims' +#' allows to configure this level of granularity of the returned metadata. +#'@param selector_checker A function used internaly by Start() to +#' translate a set of selectors (values for a dimension associated to a +#' coordinate variable) into a set of numeric indices. It takes by default +#' SelectorChecker() and, in principle, it should not be required to +#' change it for customized file formats. The option to replace it is left open +#' for more versatility. See the code of SelectorChecker() for details on +#' the inputs, functioning and outputs of a selector checker. +#'@param merge_across_dims A logical value indicating whether to merge +#' dimensions across which another dimension extends (according to the +#' '_across' parameters). Takes the value FALSE by default. For +#' example, if the dimension 'time' extends across the dimension 'chunk' and +#' \code{merge_across_dims = TRUE}, the resulting data array will only contain +#' only the dimension 'time' as long as all the chunks together. +#'@param merge_across_dims_narm A logical value indicating whether to remove +#' the additional NAs from data when parameter 'merge_across_dims' is TRUE. +#' It is helpful when the length of the to-be-merged dimension is different +#' across another dimension. For example, if the dimension 'time' extends +#' across dimension 'chunk', and the time length along the first chunk is 2 +#' while along the second chunk is 10. Setting this parameter as TRUE can +#' remove the additional 8 NAs at position 3 to 10. The default value is FALSE. +#'@param split_multiselected_dims A logical value indicating whether to split a +#' dimension that has been selected with a multidimensional array of selectors +#' into as many dimensions as present in the selector array. The default value +#' is FALSE. +#'@param path_glob_permissive A logical value or an integer specifying how many +#' folder levels in the path pattern, beginning from the end, the shell glob +#' expressions must be preserved and worked out for each file. The default +#' value is FALSE, which is equivalent to 0. TRUE is equivalent to 1.\cr\cr +#'When specifying a path pattern for a dataset, it might contain shell glob +#'experissions. For each dataset, the first file matching the path pattern is +#'found, and the found file is used to work out fixed values for the glob +#'expressions that will be used for all the files of the dataset. However in +#'some cases the values of the shell glob expressions may not be constant for +#'all files in a dataset, and they need to be worked out for each file +#'involved.\cr\cr +#'For example, a path pattern could be as follows: \cr +#'\code{'/path/to/dataset/$var$_*/$date$_*_foo.nc'}. \cr Leaving +#'\code{path_glob_permissive = FALSE} will trigger automatic seek of the +#' contents to replace the asterisks (e.g. the first asterisk matches with +#' \code{'bar'} and the second with \code{'baz'}. The found contents will be +#' used for all files in the dataset (in the example, the path pattern will be +#' fixed to\cr \code{'/path/to/dataset/$var$_bar/$date$_baz_foo.nc'}. However, if +#' any of the files in the dataset have other contents in the position of the +#' asterisks, Start() will not find them (in the example, a file like \cr +#' \code{'/path/to/dataset/precipitation_bar/19901101_bin_foo.nc'} would not be +#' found). Setting \code{path_glob_permissive = 1} would preserve global +#' expressions in the latest level (in the example, the fixed path pattern +#' would be\cr \code{'/path/to/dataset/$var$_bar/$date$_*_foo.nc'}, and the +#' problematic file mentioned before would be found), but of course this would +#' slow down the Start() call if the dataset involves a large number of +#' files. Setting \code{path_glob_permissive = 2} would leave the original path +#' pattern with the original glob expressions in the 1st and 2nd levels (in the +#' example, both asterisks would be preserved, thus would allow Start() +#' to recognize files such as \cr +#' \code{'/path/to/dataset/precipitation_zzz/19901101_yyy_foo.nc'}). +#'@param retrieve A logical value indicating whether to retrieve the data +#' defined in the Start() call or to explore only its dimension lengths +#' and names, and the values for the file and inner dimensions. The default +#' value is FALSE. +#'@param num_procs An integer of number of processes to be created for the +#' parallel execution of the retrieval/transformation/arrangement of the +#' multiple involved files in a call to Start(). If set to NULL, +#' takes the number of available cores (as detected by detectCores() in +#' the package 'future'). The default value is 1 (no parallel execution). +#'@param silent A logical value of whether to display progress messages (FALSE) +#' or not (TRUE). The default value is FALSE. +#'@param debug A logical value of whether to return detailed messages on the +#' progress and operations in a Start() call (TRUE) or not (FALSE). The +#' default value is FALSE. +#' +#'@return If \code{retrieve = TRUE} the involved data is loaded into RAM memory +#' and an object of the class 'startR_cube' with the following components is +#' returned:\cr +#' \item{Data}{ +#' Multidimensional data array with named dimensions, with the data values +#' requested via \code{\dots} and other parameters. This array can potentially +#' contain metadata in the attribute 'variables'. +#' } +#' \item{Variables}{ +#' Named list of 1 + N components, containing lists of retrieved variables (as +#' requested in 'return_vars') common to all the data sources (in the 1st +#' component, \code{$common}), and for each of the N dara sources (named after +#' the source name, as specified in \dots, or, if not specified, \code{$dat1}, +#' \code{$dat2}, ..., \code{$datN}). Each of the variables are contained in a +#' multidimensional array with named dimensions, and potentially with the +#' attribute 'variables' with additional auxiliary data. +#' } +#' \item{Files}{ +#' Multidimensonal character string array with named dimensions. Its dimensions +#' are the file dimensions (as requested in \code{\dots}). Each cell in this +#' array contains a path to a retrieved file, or NULL if the corresponding +#' file was not found. +#' } +#' \item{NotFoundFiles}{ +#' Array with the same shape as \code{$Files} but with NULL in the +#' positions for which the corresponding file was found, and a path to the +#' expected file in the positions for which the corresponding file was not +#' found. +#' } +#' \item{FileSelectors}{ +#' Multidimensional character string array with named dimensions, with the same +#' shape as \code{$Files} and \code{$NotFoundFiles}, which contains the +#' components used to build up the paths to each of the files in the data +#' sources. +#' } +#'If \code{retrieve = FALSE} the involved data is not loaded into RAM memory and +#'an object of the class 'startR_header' with the following components is +#' returned:\cr +#' \item{Dimensions}{ +#' Named vector with the dimension lengths and names of the data involved in +#' the Start() call. +#' } +#' \item{Variables}{ +#' Named list of 1 + N components, containing lists of retrieved variables (as +#' requested in 'return_vars') common to all the data sources (in the 1st +#' component, \code{$common}), and for each of the N dara sources (named after +#' the source name, as specified in \dots, or, if not specified, \code{$dat1}, +#' \code{$dat2}, ..., \code{$datN}). Each of the variables are contained in a +#' multidimensional array with named dimensions, and potentially with the +#' attribute 'variables' with additional auxiliary data. +#' } +#' \item{Files}{ +#' Multidimensonal character string array with named dimensions. Its dimensions +#' are the file dimensions (as requested in \dots). Each cell in this array +#' contains a path to a file to be retrieved (which may exist or not). +#' } +#' \item{FileSelectors}{ +#' Multidimensional character string array with named dimensions, with the same +#' shape as \code{$Files} and \code{$NotFoundFiles}, which contains the +#' components used to build up the paths to each of the files in the data +#' sources. +#' } +#' \item{StartRCall}{ +#' List of parameters sent to the Start() call, with the parameter +#' 'retrieve' set to TRUE. Intended for calling in order to +#' retrieve the associated data a posteriori with a call to do.call(). +#' } +#' +#'@examples +#' data_path <- system.file('extdata', package = 'startR') +#' path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' sdates <- c('200011', '200012') +#' data <- Start(dat = list(list(path = path_obs)), +#' var = 'tos', +#' sdate = sdates, +#' time = 'all', +#' latitude = 'all', +#' longitude = 'all', +#' return_vars = list(latitude = 'dat', +#' longitude = 'dat', +#' time = 'sdate'), +#' retrieve = FALSE) +#' +#'@import bigmemory multiApply parallel abind future +#'@importFrom utils str +#'@importFrom stats na.omit setNames +#'@importFrom ClimProjDiags Subset +#'@export Start <- function(..., # dim = indices/selectors, # dim_var = 'var', # dim_reorder = Sort/CircularSort, @@ -29,6 +802,10 @@ Start <- function(..., # dim = indices/selectors, #, config_file = NULL #dictionary_dim_names = , #dictionary_var_names = + + # Specify Subset() is from ClimProjDiags + Subset <- ClimProjDiags::Subset + dim_params <- list(...) # Take *_var parameters apart @@ -681,7 +1458,7 @@ Start <- function(..., # dim = indices/selectors, if (!(dim_name %in% pattern_dims)) { if (is.null(attr(dat_selectors[[dim_name]], 'values')) || is.null(attr(dat_selectors[[dim_name]], 'indices'))) { - flag <- ((dat_selectors[[dim_name]] %in% c('all', 'first', 'last')) || + flag <- ((any(dat_selectors[[dim_name]] %in% c('all', 'first', 'last'))) || (is.numeric(unlist(dat_selectors[[dim_name]])))) attr(dat_selectors[[dim_name]], 'values') <- !flag attr(dat_selectors[[dim_name]], 'indices') <- flag @@ -1542,7 +2319,7 @@ Start <- function(..., # dim = indices/selectors, # In the special case that the selectors for a dimension are 'all', 'first', ... # and chunking (dividing in more than 1 chunk) is requested, the selectors are # replaced for equivalent indices. - if ((dat[[i]][['selectors']][[inner_dim]][[1]] %in% c('all', 'first', 'last')) && + if ((any(dat[[i]][['selectors']][[inner_dim]][[1]] %in% c('all', 'first', 'last'))) && (chunks[[inner_dim]]['n_chunks'] != 1)) { selectors <- dat[[i]][['selectors']][[inner_dim]][[1]] if (selectors == 'all') { @@ -2052,7 +2829,7 @@ Start <- function(..., # dim = indices/selectors, #NOTE: the possible case? if (goes_across_prime_meridian & sub_array_of_indices[[1]] > sub_array_of_indices[[2]]) { - .stop("The case is goes_across_prime_meridian but no adjustion for the indices!") + stop("The case is goes_across_prime_meridian but no adjustion for the indices!") } if (any(is.na(sub_array_of_indices))) { @@ -2258,7 +3035,7 @@ Start <- function(..., # dim = indices/selectors, # Before changing crop, first we need to find the name of longitude. # NOTE: The potential bug here (also the bug for CDORemapper): the lon name # is limited (only the ones listed in .KnownLonNames() are available. - known_lon_names <- s2dverification:::.KnownLonNames() + known_lon_names <- .KnownLonNames() lon_name <- names(subset_vars_to_transform)[which(names(subset_vars_to_transform) %in% known_lon_names)[1]] # NOTE: The cases not considered: (1) if lon reorder(decreasing = T) @@ -2892,10 +3669,10 @@ Start <- function(..., # dim = indices/selectors, # TODO: try performance of storing all in cols instead of rows # Create the shared memory array, and a pointer to it, to be sent # to the work pieces. - data_array <- big.matrix(nrow = prod(final_dims), ncol = 1) - shared_matrix_pointer <- describe(data_array) + data_array <- bigmemory::big.matrix(nrow = prod(final_dims), ncol = 1) + shared_matrix_pointer <- bigmemory::describe(data_array) if (is.null(num_procs)) { - num_procs <- availableCores() + num_procs <- future::availableCores() } # Creating a shared tmp folder to store metadata from each chunk array_of_metadata_flags <- array(FALSE, dim = dim(array_of_files_to_load)) @@ -3113,18 +3890,18 @@ Start <- function(..., # dim = indices/selectors, transform_params = transform_params, silent = silent, debug = debug) } else { - cluster <- makeCluster(num_procs, outfile = "") + cluster <- parallel::makeCluster(num_procs, outfile = "") # Send the heavy work to the workers work_errors <- try({ - found_files <- clusterApplyLB(cluster, work_pieces, .LoadDataFile, - shared_matrix_pointer = shared_matrix_pointer, - file_data_reader = file_data_reader, - synonims = synonims, - transform = transform, - transform_params = transform_params, - silent = silent, debug = debug) + found_files <- parallel::clusterApplyLB(cluster, work_pieces, .LoadDataFile, + shared_matrix_pointer = shared_matrix_pointer, + file_data_reader = file_data_reader, + synonims = synonims, + transform = transform, + transform_params = transform_params, + silent = silent, debug = debug) }) - stopCluster(cluster) + parallel::stopCluster(cluster) } if (!silent) { diff --git a/R/Step.R b/R/Step.R index 32c92738010a0ea564071ba3d679962e79f7e1df..e5d73042ef6ddd40afc266c17b17772de81c14a0 100644 --- a/R/Step.R +++ b/R/Step.R @@ -1,3 +1,59 @@ +#'Define the operation applied on declared data. +#' +#'The step of the startR workflow after declaring data by Start() call. It +#'identifies the operation (i.e., function) and the target and output +#'dimensions of data array for the function. Ideally, it expects the dimension +#'name to be in the same order as the one requested in the Start() call. +#'If a different order is specified, startR will reorder the subset dimension +#'to the expected order for this function. +#' +#'@param fun A function in R format defining the operation to be applied to the +#' data declared by a Start() call. It should only work on the essential +#' dimensions rather than all the data dimensions. Since the function will be +#' called numerous times through all the non-essential dimensions, it is +#' recommended to keep them as light as possible. +#'@param target_dims A vector for single input array or a list of vectors for +#' multiple input arrays indicating the names of the dimensions 'fun' to be +#' applied along. +#'@param output_dims A vector for single returned array or a list of vectors +#' for multiple returned arrays indicating the dimension names of the function +#' output. +#'@param use_libraries A vector of character string indicating the R library +#' names to be used in 'fun'. The default value is NULL. +#'@param use_attributes One or more lists of vectors of character string +#' indicating the data attributes to be used in 'fun'. The list name should be +#' consistent with the list name of 'data' in AddStep(). The default value is +#' NULL. +#'@return A closure that contains all the objects assigned. It serves as the +#' input of Addstep(). +#'@examples +#' data_path <- system.file('extdata', package = 'startR') +#' path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' sdates <- c('200011', '200012') +#' data <- Start(dat = list(list(path = path_obs)), +#' var = 'tos', +#' sdate = sdates, +#' time = 'all', +#' latitude = 'all', +#' longitude = 'all', +#' return_vars = list(latitude = 'dat', +#' longitude = 'dat', +#' time = 'sdate'), +#' retrieve = FALSE) +#' fun <- function(x) { +#' lat = attributes(x)$Variables$dat1$latitude +#' weight = sqrt(cos(lat * pi / 180)) +#' corrected = Apply(list(x), target_dims = "latitude", +#' fun = function(x) {x * weight}) +#' } +#' step <- Step(fun = fun, +#' target_dims = 'latitude', +#' output_dims = 'latitude', +#' use_libraries = c('multiApply'), +#' use_attributes = list(data = "Variables")) +#' wf <- AddStep(data, step) +#' +#'@export Step <- function(fun, target_dims, output_dims, use_libraries = NULL, use_attributes = NULL) { # Check fun diff --git a/R/Subset.R b/R/Subset.R deleted file mode 100644 index da35b5b8a0f35066c50ae3f3e7cc0996b0ecadba..0000000000000000000000000000000000000000 --- a/R/Subset.R +++ /dev/null @@ -1,97 +0,0 @@ -Subset <- function(x, along, indices, drop = FALSE) { - # Check x - if (!is.array(x)) { - stop("Input array 'x' must be a numeric array.") - } - - # Take the input array dimension names - dim_names <- attr(x, 'dimensions') - if (!is.character(dim_names)) { - dim_names <- names(dim(x)) - } - if (!is.character(dim_names)) { - if (any(sapply(along, is.character))) { - stop("The input array 'x' doesn't have labels for the dimensions but the parameter 'along' contains dimension names.") - } - } - - # Check along - if (any(sapply(along, function(x) !is.numeric(x) && !is.character(x)))) { - stop("All provided dimension indices in 'along' must be integers or character strings.") - } - if (any(sapply(along, is.character))) { - req_dimnames <- along[which(sapply(along, is.character))] - if (length(unique(req_dimnames)) < length(req_dimnames)) { - stop("The parameter 'along' must not contain repeated dimension names.") - } - along[which(sapply(along, is.character))] <- match(req_dimnames, dim_names) - if (any(is.na(along))) { - stop("Could not match all dimension names in 'indices' with dimension names in input array 'x'.") - } - along <- as.numeric(along) - } - - # Check indices - if (!is.list(indices)) { - indices <- list(indices) - } - - # Check parameter drop - dims_to_drop <- c() - if (is.character(drop)) { - if (drop == 'all') { - drop <- TRUE - } else if (any(drop %in% c('selected', 'non-selected', 'none'))) { - if (drop == 'selected') { - dims_to_drop <- along[which(sapply(indices, length) == 1)] - } else if (drop == 'non-selected') { - dims_to_drop <- dim(x) == 1 - dims_to_drop[along] <- FALSE - dims_to_drop <- which(dims_to_drop) - } - drop <- FALSE - } else { - stop("Parameter 'drop' must be one of TRUE, FALSE, 'all', 'selected', 'non-selected', 'none'.") - } - } else if (!is.logical(drop)) { - stop("Parameter 'drop' must be one of TRUE, FALSE, 'all', 'selected', 'non-selected', 'none'.") - } - - # Take the subset - nd <- length(dim(x)) - index <- as.list(rep(TRUE, nd)) - index[along] <- indices - subset <- eval(as.call(c(as.name("["), as.name("x"), index, drop = drop))) - # If dropped all dimensions, need to drop dimnames too - if (is.character(dim_names) && drop == TRUE) { - dim_names_to_remove <- unique(c(along[which(sapply(indices, length) == 1)], - which(dim(x) == 1))) - if (length(dim_names_to_remove) > 0) { - dim_names <- dim_names[-dim_names_to_remove] - } - } - - # Amend the final dimensions and put dimnames and attributes - metadata <- attributes(x) - metadata[['dim']] <- dim(subset) - if (length(dims_to_drop) > 0) { - metadata[['dim']] <- metadata[['dim']][-dims_to_drop] - if (is.character(dim_names)) { - names(metadata[['dim']]) <- dim_names[-dims_to_drop] - if ('dimensions' %in% names(attributes(x))) { - metadata[['dimensions']] <- dim_names[-dims_to_drop] - } - } - if (length(metadata[['dim']]) == 0) { - metadata['dim'] <- list(NULL) - metadata['dimensions'] <- list(NULL) - } - } else if (is.character(dim_names)) { - names(metadata[['dim']]) <- dim_names - if ('dimensions' %in% names(attributes(x))) { - metadata[['dimensions']] <- dim_names - } - } - attributes(subset) <- metadata - subset -} diff --git a/R/Utils.R b/R/Utils.R index 8d06343b3e689a96d3d81081e676409d6b949de4..a4255c1790057b2328dba51c076842064514883c 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -1,18 +1,6 @@ -indices <- function(x) { - attr(x, 'indices') <- TRUE - attr(x, 'values') <- FALSE - attr(x, 'chunk') <- c(chunk = 1, n_chunks = 1) - x -} - -values <- function(x) { - attr(x, 'indices') <- FALSE - attr(x, 'values') <- TRUE - attr(x, 'chunk') <- c(chunk = 1, n_chunks = 1) - x -} - -chunk <- function(chunk, n_chunks, selectors) { +#'@import abind +#'@importFrom ClimProjDiags Subset +.chunk <- function(chunk, n_chunks, selectors) { if (any(chunk > n_chunks)) { stop("Requested chunk index out of bounds.") } @@ -46,8 +34,8 @@ chunk <- function(chunk, n_chunks, selectors) { } old_indices <- attr(selectors, 'indices') old_values <- attr(selectors, 'values') - selectors <- Subset(selectors, 1:length(chunk), - lapply(1:length(chunk), + selectors <- ClimProjDiags::Subset(selectors, 1:length(chunk), + lapply(1:length(chunk), function(x) { n_indices <- dim(selectors)[x] chunk_sizes <- rep(floor(n_indices / n_chunks[x]), n_chunks[x]) @@ -725,7 +713,7 @@ chunk <- function(chunk, n_chunks, selectors) { } .MergeChunks <- function(shared_dir, suite_id, remove) { - MergeArrays <- startR:::.MergeArrays + MergeArrays <- .MergeArrays args <- NULL shared_dir <- paste0(shared_dir, '/STARTR_CHUNKING_', suite_id) @@ -826,7 +814,7 @@ chunk <- function(chunk, n_chunks, selectors) { } } - result[[component]] <- startR:::.MergeArrayOfArrays(array_of_chunks) + result[[component]] <- .MergeArrayOfArrays(array_of_chunks) rm(array_of_chunks) gc() } @@ -841,3 +829,11 @@ chunk <- function(chunk, n_chunks, selectors) { result } + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') +} diff --git a/R/indices.R b/R/indices.R new file mode 100644 index 0000000000000000000000000000000000000000..a7ad7a08123656c8b07fc10f46567ed40b59558a --- /dev/null +++ b/R/indices.R @@ -0,0 +1,34 @@ +#'Specify dimension selectors with indices +#' +#'This is a helper function used in a Start() call to define the desired range +#'of dimensions. It selects the indices of the coordinate variable from +#'original data. See details in the documentation of the parameter \code{\dots} +#''indices to take' of the function Start(). +#' +#'@param x A numeric vector or a list with two nemerics to take all the +#' elements between the two specified indices (both extremes inclusive). +#'@return Same as input, but with additional attribute 'indices', 'values', and +#' 'chunk'. +#'@examples +#' # Used in Start(): +#' data_path <- system.file('extdata', package = 'startR') +#' path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' sdates <- c('200011', '200012') +#' data <- Start(dat = list(list(path = path_obs)), +#' var = 'tos', +#' sdate = sdates, +#' time = 'all', +#' latitude = indices(1:2), +#' longitude = indices(list(2, 14)), +#' return_vars = list(latitude = 'dat', +#' longitude = 'dat', +#' time = 'sdate'), +#' retrieve = FALSE) +#'@seealso \code{\link{values}} +#'@export +indices <- function(x) { + attr(x, 'indices') <- TRUE + attr(x, 'values') <- FALSE + attr(x, 'chunk') <- c(chunk = 1, n_chunks = 1) + x +} diff --git a/R/values.R b/R/values.R new file mode 100644 index 0000000000000000000000000000000000000000..592aa1d15989a0f412338c18d731db2a4cd9927f --- /dev/null +++ b/R/values.R @@ -0,0 +1,35 @@ +#'Specify dimension selectors with actual values +#' +#'This is a helper function used in a Start() call to define the desired range +#'of dimensions. It specifies the actual value to be matched with the +#'coordinate variable. See details in the documentation of the parameter +#'\code{\dots} 'indices to take' of the function Start(). +#'@param x A numeric vector or a list with two nemerics to take all the element +#' between the two specified values (both extremes inclusive). +#'@return Same as input, but with additional attribute 'indices', 'values', and +#' 'chunk'. +#'@examples +#' # Used in Start(): +#' data_path <- system.file('extdata', package = 'startR') +#' path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' sdates <- c('200011', '200012') +#' data <- Start(dat = list(list(path = path_obs)), +#' var = 'tos', +#' sdate = sdates, +#' time = 'all', +#' latitude = values(seq(-80, 80, 20)), +#' latitude_reorder = Sort(), +#' longitude = values(list(10, 300)), +#' longitude_reorder = CircularSort(0, 360), +#' return_vars = list(latitude = 'dat', +#' longitude = 'dat', +#' time = 'sdate'), +#' retrieve = FALSE) +#'@seealso \code{\link{indices}} +#'@export +values <- function(x) { + attr(x, 'indices') <- FALSE + attr(x, 'values') <- TRUE + attr(x, 'chunk') <- c(chunk = 1, n_chunks = 1) + x +} diff --git a/inst/extdata/exp/monthly_mean/tos/tos_19851101.nc b/inst/extdata/exp/monthly_mean/tos/tos_19851101.nc new file mode 100644 index 0000000000000000000000000000000000000000..b33e028f9781f4521649ec817034f4984630ed11 Binary files /dev/null and b/inst/extdata/exp/monthly_mean/tos/tos_19851101.nc differ diff --git a/inst/extdata/exp/monthly_mean/tos/tos_19901101.nc b/inst/extdata/exp/monthly_mean/tos/tos_19901101.nc new file mode 100644 index 0000000000000000000000000000000000000000..02dbce274480da92d31fdb29d84e984c8b517e69 Binary files /dev/null and b/inst/extdata/exp/monthly_mean/tos/tos_19901101.nc differ diff --git a/inst/extdata/exp/monthly_mean/tos/tos_19951101.nc b/inst/extdata/exp/monthly_mean/tos/tos_19951101.nc new file mode 100644 index 0000000000000000000000000000000000000000..96b936163ce5c1c9dd3957f28c488a5d2338444d Binary files /dev/null and b/inst/extdata/exp/monthly_mean/tos/tos_19951101.nc differ diff --git a/inst/extdata/exp/monthly_mean/tos/tos_20001101.nc b/inst/extdata/exp/monthly_mean/tos/tos_20001101.nc new file mode 100644 index 0000000000000000000000000000000000000000..7e89318da4285ca33a6040e4d4e6b86bcde5e9e5 Binary files /dev/null and b/inst/extdata/exp/monthly_mean/tos/tos_20001101.nc differ diff --git a/inst/extdata/exp/monthly_mean/tos/tos_20051101.nc b/inst/extdata/exp/monthly_mean/tos/tos_20051101.nc new file mode 100644 index 0000000000000000000000000000000000000000..c9406385a9b5ee561dc44d13cd7876abcc65d607 Binary files /dev/null and b/inst/extdata/exp/monthly_mean/tos/tos_20051101.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198511.nc b/inst/extdata/obs/monthly_mean/tos/tos_198511.nc new file mode 100644 index 0000000000000000000000000000000000000000..e6da57456a8af5560ee4dd3f6332ae11d98a035c Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198511.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198512.nc b/inst/extdata/obs/monthly_mean/tos/tos_198512.nc new file mode 100644 index 0000000000000000000000000000000000000000..76de8ba1d418b0235c66b9915f986aff3caa5609 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198512.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198601.nc b/inst/extdata/obs/monthly_mean/tos/tos_198601.nc new file mode 100644 index 0000000000000000000000000000000000000000..c9882ba1f1c0d330a3b564f032ce6933aad4c91b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198601.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198602.nc b/inst/extdata/obs/monthly_mean/tos/tos_198602.nc new file mode 100644 index 0000000000000000000000000000000000000000..28249da7d96a62159aa23fc6e1ac48b475567a2e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198602.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198603.nc b/inst/extdata/obs/monthly_mean/tos/tos_198603.nc new file mode 100644 index 0000000000000000000000000000000000000000..49ded88a4645dc746367fc4c99ba7aaff49a3f2d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198603.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198604.nc b/inst/extdata/obs/monthly_mean/tos/tos_198604.nc new file mode 100644 index 0000000000000000000000000000000000000000..82aaaf1e514e393131804456aa23ab4952538bdb Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198604.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198605.nc b/inst/extdata/obs/monthly_mean/tos/tos_198605.nc new file mode 100644 index 0000000000000000000000000000000000000000..ecd488715a1231fefddf9bfdf22ac2cb666677fb Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198605.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198606.nc b/inst/extdata/obs/monthly_mean/tos/tos_198606.nc new file mode 100644 index 0000000000000000000000000000000000000000..641325a609c4eff2ccc03006c3ba94b44298832e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198606.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198607.nc b/inst/extdata/obs/monthly_mean/tos/tos_198607.nc new file mode 100644 index 0000000000000000000000000000000000000000..666a46eaa69ef4b8667f446563daa7d327e6a27b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198607.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198608.nc b/inst/extdata/obs/monthly_mean/tos/tos_198608.nc new file mode 100644 index 0000000000000000000000000000000000000000..f9283acbae3416e854ad488a76add42da2620c64 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198608.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198609.nc b/inst/extdata/obs/monthly_mean/tos/tos_198609.nc new file mode 100644 index 0000000000000000000000000000000000000000..8a607590bd2c73f8420fb5e56038cf85813be743 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198609.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198610.nc b/inst/extdata/obs/monthly_mean/tos/tos_198610.nc new file mode 100644 index 0000000000000000000000000000000000000000..e45169dab65c5408cfbb29e762608a3ab8a8751a Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198610.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198611.nc b/inst/extdata/obs/monthly_mean/tos/tos_198611.nc new file mode 100644 index 0000000000000000000000000000000000000000..4db38d7b6cc9385a09c03067b860c201b3566514 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198611.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198612.nc b/inst/extdata/obs/monthly_mean/tos/tos_198612.nc new file mode 100644 index 0000000000000000000000000000000000000000..e3e17610f253f992e74393d886f704d5657f51dd Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198612.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198701.nc b/inst/extdata/obs/monthly_mean/tos/tos_198701.nc new file mode 100644 index 0000000000000000000000000000000000000000..a35b7107648ac08ea59f33112d4ff218cb1400c7 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198701.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198702.nc b/inst/extdata/obs/monthly_mean/tos/tos_198702.nc new file mode 100644 index 0000000000000000000000000000000000000000..3417ed85ee0af6875b2f86b5e1abdff7ed1ca9f3 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198702.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198703.nc b/inst/extdata/obs/monthly_mean/tos/tos_198703.nc new file mode 100644 index 0000000000000000000000000000000000000000..e8b326779fcc85bb4ab0998e974925925dbb1b4d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198703.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198704.nc b/inst/extdata/obs/monthly_mean/tos/tos_198704.nc new file mode 100644 index 0000000000000000000000000000000000000000..1350ac5523ef18ad442d0be883fde4baf32563db Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198704.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198705.nc b/inst/extdata/obs/monthly_mean/tos/tos_198705.nc new file mode 100644 index 0000000000000000000000000000000000000000..69f03bc768c505c8b6fbf5ec054222bf8aea73e7 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198705.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198706.nc b/inst/extdata/obs/monthly_mean/tos/tos_198706.nc new file mode 100644 index 0000000000000000000000000000000000000000..3f5de280dc5e39b9749a3500a0c1d9b818ea07cd Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198706.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198707.nc b/inst/extdata/obs/monthly_mean/tos/tos_198707.nc new file mode 100644 index 0000000000000000000000000000000000000000..6cdf8a3224c910d6c179f04274ec622f1144c4f3 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198707.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198708.nc b/inst/extdata/obs/monthly_mean/tos/tos_198708.nc new file mode 100644 index 0000000000000000000000000000000000000000..26eed7ee00fd4318221c94e0e3fdbc50a2ff75a8 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198708.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198709.nc b/inst/extdata/obs/monthly_mean/tos/tos_198709.nc new file mode 100644 index 0000000000000000000000000000000000000000..ae48057db0ef574116ca25c7d37db0e80ca7db88 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198709.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198710.nc b/inst/extdata/obs/monthly_mean/tos/tos_198710.nc new file mode 100644 index 0000000000000000000000000000000000000000..bb0abfda43cbd65d2bc77f98da84153ae38c0690 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198710.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198711.nc b/inst/extdata/obs/monthly_mean/tos/tos_198711.nc new file mode 100644 index 0000000000000000000000000000000000000000..9380f4c85f2ba12f3c4b5de53100bd40e22d2368 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198711.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198712.nc b/inst/extdata/obs/monthly_mean/tos/tos_198712.nc new file mode 100644 index 0000000000000000000000000000000000000000..15d907deec341f54c8e971b70c5ca1534e5f03b4 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198712.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198801.nc b/inst/extdata/obs/monthly_mean/tos/tos_198801.nc new file mode 100644 index 0000000000000000000000000000000000000000..199cc711b929a3e53801059190b338a39447a934 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198801.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198802.nc b/inst/extdata/obs/monthly_mean/tos/tos_198802.nc new file mode 100644 index 0000000000000000000000000000000000000000..f2d9893e8b7708683a43a25a891531c5590c1f28 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198802.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198803.nc b/inst/extdata/obs/monthly_mean/tos/tos_198803.nc new file mode 100644 index 0000000000000000000000000000000000000000..a1f6aba8e3c0db429a4b39da4fcccfe478ce71ec Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198803.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198804.nc b/inst/extdata/obs/monthly_mean/tos/tos_198804.nc new file mode 100644 index 0000000000000000000000000000000000000000..98fc7e4fce903e07a7ad17bccea3246483071636 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198804.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198805.nc b/inst/extdata/obs/monthly_mean/tos/tos_198805.nc new file mode 100644 index 0000000000000000000000000000000000000000..31ac9c2480b6b76cb9caa382c7a43484c5ec5078 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198805.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198806.nc b/inst/extdata/obs/monthly_mean/tos/tos_198806.nc new file mode 100644 index 0000000000000000000000000000000000000000..ca06142f7d7dcadd3a547811e2f72ea2c15ec44e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198806.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198807.nc b/inst/extdata/obs/monthly_mean/tos/tos_198807.nc new file mode 100644 index 0000000000000000000000000000000000000000..0d09147d007e0d9b87c2d4a1d7e8d66ad0a950f0 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198807.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198808.nc b/inst/extdata/obs/monthly_mean/tos/tos_198808.nc new file mode 100644 index 0000000000000000000000000000000000000000..23e814d610f40483c1a6282fa3eb64251dbdc044 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198808.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198809.nc b/inst/extdata/obs/monthly_mean/tos/tos_198809.nc new file mode 100644 index 0000000000000000000000000000000000000000..ba21b797682fe3c6dc438b97b03e53114c4539fc Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198809.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198810.nc b/inst/extdata/obs/monthly_mean/tos/tos_198810.nc new file mode 100644 index 0000000000000000000000000000000000000000..e1e2c42b7dc81431ae0f7ffc740d406a46aa6a7b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198810.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198811.nc b/inst/extdata/obs/monthly_mean/tos/tos_198811.nc new file mode 100644 index 0000000000000000000000000000000000000000..492254baa2786a59c090df6147d73dd543c3ff9d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198811.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198812.nc b/inst/extdata/obs/monthly_mean/tos/tos_198812.nc new file mode 100644 index 0000000000000000000000000000000000000000..a52a382483f49e975ff9a09e60d546692e35281c Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198812.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198901.nc b/inst/extdata/obs/monthly_mean/tos/tos_198901.nc new file mode 100644 index 0000000000000000000000000000000000000000..0e6f43952e509dd58349efdd1f0ed3c1a612aa36 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198901.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198902.nc b/inst/extdata/obs/monthly_mean/tos/tos_198902.nc new file mode 100644 index 0000000000000000000000000000000000000000..0a627e00ed9f156af677d07a63ff4c6b9d8a9f67 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198902.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198903.nc b/inst/extdata/obs/monthly_mean/tos/tos_198903.nc new file mode 100644 index 0000000000000000000000000000000000000000..ea1ccb65cfa472611715d995e8cb336fae75d1ac Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198903.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198904.nc b/inst/extdata/obs/monthly_mean/tos/tos_198904.nc new file mode 100644 index 0000000000000000000000000000000000000000..526ff8be9677f321e2703a4297f17a8e5019910e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198904.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198905.nc b/inst/extdata/obs/monthly_mean/tos/tos_198905.nc new file mode 100644 index 0000000000000000000000000000000000000000..856b1f010ee9c800a71fcdec1fe5f487c01c6809 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198905.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198906.nc b/inst/extdata/obs/monthly_mean/tos/tos_198906.nc new file mode 100644 index 0000000000000000000000000000000000000000..d7d405d2c073320b48b778766ebbcd19f8f8a090 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198906.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198907.nc b/inst/extdata/obs/monthly_mean/tos/tos_198907.nc new file mode 100644 index 0000000000000000000000000000000000000000..c7ce3708cc7ff08db7940fd8f0f6939f9b6972a1 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198907.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198908.nc b/inst/extdata/obs/monthly_mean/tos/tos_198908.nc new file mode 100644 index 0000000000000000000000000000000000000000..12473a600848372c65bbefd696b89a83a81d583c Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198908.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198909.nc b/inst/extdata/obs/monthly_mean/tos/tos_198909.nc new file mode 100644 index 0000000000000000000000000000000000000000..726319166e6ba58e1d6b905b90a5fd029a6c25d1 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198909.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198910.nc b/inst/extdata/obs/monthly_mean/tos/tos_198910.nc new file mode 100644 index 0000000000000000000000000000000000000000..37c8f2be634449f786d644d0ed722896d9258cbf Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198910.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198911.nc b/inst/extdata/obs/monthly_mean/tos/tos_198911.nc new file mode 100644 index 0000000000000000000000000000000000000000..bc830d23b9d23c9aba2a35dd13fdda2554fcb8c0 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198911.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_198912.nc b/inst/extdata/obs/monthly_mean/tos/tos_198912.nc new file mode 100644 index 0000000000000000000000000000000000000000..0d976ff9072ffef4e3f37ac19299ac1007289f03 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_198912.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199001.nc b/inst/extdata/obs/monthly_mean/tos/tos_199001.nc new file mode 100644 index 0000000000000000000000000000000000000000..3de3c0e1e0a0dd9bd6720a1efec53f794a41036f Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199001.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199002.nc b/inst/extdata/obs/monthly_mean/tos/tos_199002.nc new file mode 100644 index 0000000000000000000000000000000000000000..67e83255caddeef2c91a35d493c88f10e3208a1a Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199002.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199003.nc b/inst/extdata/obs/monthly_mean/tos/tos_199003.nc new file mode 100644 index 0000000000000000000000000000000000000000..c7aceb1386659e99fd3401f1cc4f71d46de02c4e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199003.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199004.nc b/inst/extdata/obs/monthly_mean/tos/tos_199004.nc new file mode 100644 index 0000000000000000000000000000000000000000..9478dcc90050ee38325457acd6d1dac8f2a18e79 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199004.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199005.nc b/inst/extdata/obs/monthly_mean/tos/tos_199005.nc new file mode 100644 index 0000000000000000000000000000000000000000..9c6a14f2f16910882faa02373445e609802cb995 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199005.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199006.nc b/inst/extdata/obs/monthly_mean/tos/tos_199006.nc new file mode 100644 index 0000000000000000000000000000000000000000..9845437870880dde73a334f82c73835bce56c491 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199006.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199007.nc b/inst/extdata/obs/monthly_mean/tos/tos_199007.nc new file mode 100644 index 0000000000000000000000000000000000000000..b9b4e28fca8708eaaa07fce76b124fcecc73cfc1 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199007.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199008.nc b/inst/extdata/obs/monthly_mean/tos/tos_199008.nc new file mode 100644 index 0000000000000000000000000000000000000000..c6c51bce676895d4be49ab56aa3c3aa073347fe8 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199008.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199009.nc b/inst/extdata/obs/monthly_mean/tos/tos_199009.nc new file mode 100644 index 0000000000000000000000000000000000000000..3876010a8465b22a2122cb20ebfef0f97d5b9eb1 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199009.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199010.nc b/inst/extdata/obs/monthly_mean/tos/tos_199010.nc new file mode 100644 index 0000000000000000000000000000000000000000..7afecc1b06a2ac1d4fe075613238f3d12bda6648 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199010.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199011.nc b/inst/extdata/obs/monthly_mean/tos/tos_199011.nc new file mode 100644 index 0000000000000000000000000000000000000000..3b8a1e2904cd48984afe35172435f186f9cb5e5f Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199011.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199012.nc b/inst/extdata/obs/monthly_mean/tos/tos_199012.nc new file mode 100644 index 0000000000000000000000000000000000000000..713991f30b92689ee501632d62b0f35fcb76dc24 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199012.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199101.nc b/inst/extdata/obs/monthly_mean/tos/tos_199101.nc new file mode 100644 index 0000000000000000000000000000000000000000..baab0d4e3b2377332f202f8c27d31b2065aecdab Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199101.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199102.nc b/inst/extdata/obs/monthly_mean/tos/tos_199102.nc new file mode 100644 index 0000000000000000000000000000000000000000..531db5629ce8113d21cffd930713012f60ebb0cc Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199102.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199103.nc b/inst/extdata/obs/monthly_mean/tos/tos_199103.nc new file mode 100644 index 0000000000000000000000000000000000000000..20ca5a54f4e3b5eab5a9fb9eb37f2c0df8140f8d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199103.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199104.nc b/inst/extdata/obs/monthly_mean/tos/tos_199104.nc new file mode 100644 index 0000000000000000000000000000000000000000..62fc8584e18e29b458262ce15695bad0d6cbcf3e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199104.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199105.nc b/inst/extdata/obs/monthly_mean/tos/tos_199105.nc new file mode 100644 index 0000000000000000000000000000000000000000..bf19f6f11cfff03601a0f07976a3ee8e4db41038 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199105.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199106.nc b/inst/extdata/obs/monthly_mean/tos/tos_199106.nc new file mode 100644 index 0000000000000000000000000000000000000000..3673714620de75a9c6d17ecceb6bdfe73ee24393 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199106.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199107.nc b/inst/extdata/obs/monthly_mean/tos/tos_199107.nc new file mode 100644 index 0000000000000000000000000000000000000000..5c75d94cd3be48ddded1ddf30c9996e271baf9ad Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199107.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199108.nc b/inst/extdata/obs/monthly_mean/tos/tos_199108.nc new file mode 100644 index 0000000000000000000000000000000000000000..75579d134a854c2f29a96949ed3a602e6421d23c Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199108.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199109.nc b/inst/extdata/obs/monthly_mean/tos/tos_199109.nc new file mode 100644 index 0000000000000000000000000000000000000000..7dfdab5097981a8f775d0041ae9248a2745a6525 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199109.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199110.nc b/inst/extdata/obs/monthly_mean/tos/tos_199110.nc new file mode 100644 index 0000000000000000000000000000000000000000..c252045cbf91864dfa17c4a9dece2a450b8ef726 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199110.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199111.nc b/inst/extdata/obs/monthly_mean/tos/tos_199111.nc new file mode 100644 index 0000000000000000000000000000000000000000..0a793ab022820c72f6247480d784ae7769602917 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199111.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199112.nc b/inst/extdata/obs/monthly_mean/tos/tos_199112.nc new file mode 100644 index 0000000000000000000000000000000000000000..cc561d40324b436e76c9afe5b269eca203b12692 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199112.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199201.nc b/inst/extdata/obs/monthly_mean/tos/tos_199201.nc new file mode 100644 index 0000000000000000000000000000000000000000..862670b5bf4032897471b6e80f23f0c5eb574c83 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199201.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199202.nc b/inst/extdata/obs/monthly_mean/tos/tos_199202.nc new file mode 100644 index 0000000000000000000000000000000000000000..4e8e696cc6d73b473f0c7305089ca087efaaa5f0 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199202.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199203.nc b/inst/extdata/obs/monthly_mean/tos/tos_199203.nc new file mode 100644 index 0000000000000000000000000000000000000000..660c648cf0a266765ca55f54bddf93580736ff9c Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199203.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199204.nc b/inst/extdata/obs/monthly_mean/tos/tos_199204.nc new file mode 100644 index 0000000000000000000000000000000000000000..163125ca72d1db522d17ea1a7b44d90ed2c1f4f2 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199204.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199205.nc b/inst/extdata/obs/monthly_mean/tos/tos_199205.nc new file mode 100644 index 0000000000000000000000000000000000000000..c2f0b7484727707f2d5af55c303b863147518e00 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199205.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199206.nc b/inst/extdata/obs/monthly_mean/tos/tos_199206.nc new file mode 100644 index 0000000000000000000000000000000000000000..36bc94b327a8913d70f6e199ad5c6672c9cd5d9e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199206.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199207.nc b/inst/extdata/obs/monthly_mean/tos/tos_199207.nc new file mode 100644 index 0000000000000000000000000000000000000000..52775d687f1858012fb8ad3213ac4be611c44237 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199207.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199208.nc b/inst/extdata/obs/monthly_mean/tos/tos_199208.nc new file mode 100644 index 0000000000000000000000000000000000000000..e171509cafda23324a3a351ae9d28132444b5453 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199208.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199209.nc b/inst/extdata/obs/monthly_mean/tos/tos_199209.nc new file mode 100644 index 0000000000000000000000000000000000000000..fab6f90bccd8b6fdd1686e05ec695caa7e6a4cb0 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199209.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199210.nc b/inst/extdata/obs/monthly_mean/tos/tos_199210.nc new file mode 100644 index 0000000000000000000000000000000000000000..430d17adb31979f44e753e1a1d5f24460d67c9d3 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199210.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199211.nc b/inst/extdata/obs/monthly_mean/tos/tos_199211.nc new file mode 100644 index 0000000000000000000000000000000000000000..abb419ff004dc5e54d80f9cec91d69724d3dade8 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199211.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199212.nc b/inst/extdata/obs/monthly_mean/tos/tos_199212.nc new file mode 100644 index 0000000000000000000000000000000000000000..ef63154cbab4c96fab80dc3d1a4246a1a1baaa2f Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199212.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199301.nc b/inst/extdata/obs/monthly_mean/tos/tos_199301.nc new file mode 100644 index 0000000000000000000000000000000000000000..f626e7757f10585e95eb1bcd12e319559226fe0e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199301.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199302.nc b/inst/extdata/obs/monthly_mean/tos/tos_199302.nc new file mode 100644 index 0000000000000000000000000000000000000000..0d1ec9a79178355c3f1a31ce32d236afc027eef3 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199302.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199303.nc b/inst/extdata/obs/monthly_mean/tos/tos_199303.nc new file mode 100644 index 0000000000000000000000000000000000000000..9219ec2dc59ae7a69b5f4aba0a9d27166a23c23e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199303.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199304.nc b/inst/extdata/obs/monthly_mean/tos/tos_199304.nc new file mode 100644 index 0000000000000000000000000000000000000000..badd5d25b1878efef1ec56af2f6e5b1601296c66 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199304.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199305.nc b/inst/extdata/obs/monthly_mean/tos/tos_199305.nc new file mode 100644 index 0000000000000000000000000000000000000000..705a116ed88b32fd00a8ee78c782069f2e13b20b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199305.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199306.nc b/inst/extdata/obs/monthly_mean/tos/tos_199306.nc new file mode 100644 index 0000000000000000000000000000000000000000..a0da9f884f770d4eb7b5fedbac9f937cdd5c64b5 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199306.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199307.nc b/inst/extdata/obs/monthly_mean/tos/tos_199307.nc new file mode 100644 index 0000000000000000000000000000000000000000..ccb8bfd38cf5e15dd35051e57877d73d003508c2 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199307.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199308.nc b/inst/extdata/obs/monthly_mean/tos/tos_199308.nc new file mode 100644 index 0000000000000000000000000000000000000000..7fb6d7659879b5439f72bf037830a6c9f1270f47 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199308.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199309.nc b/inst/extdata/obs/monthly_mean/tos/tos_199309.nc new file mode 100644 index 0000000000000000000000000000000000000000..29cfbd04f3aacd4df13deaa26ed417981845ab5d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199309.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199310.nc b/inst/extdata/obs/monthly_mean/tos/tos_199310.nc new file mode 100644 index 0000000000000000000000000000000000000000..276cae67d9684e0c9ca116ff92b73701bc48d697 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199310.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199311.nc b/inst/extdata/obs/monthly_mean/tos/tos_199311.nc new file mode 100644 index 0000000000000000000000000000000000000000..e09dca1f9d226fa461ea6b9214b4d6a046290e5a Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199311.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199312.nc b/inst/extdata/obs/monthly_mean/tos/tos_199312.nc new file mode 100644 index 0000000000000000000000000000000000000000..7971119407327e5e03a3aa27a6c3b92d4e71bd46 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199312.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199401.nc b/inst/extdata/obs/monthly_mean/tos/tos_199401.nc new file mode 100644 index 0000000000000000000000000000000000000000..13a8f0f8f1c44a140067ffb57d1df6a33fc10fd4 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199401.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199402.nc b/inst/extdata/obs/monthly_mean/tos/tos_199402.nc new file mode 100644 index 0000000000000000000000000000000000000000..df97759534c7aef56e46851f0fe9b2aedbae616f Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199402.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199403.nc b/inst/extdata/obs/monthly_mean/tos/tos_199403.nc new file mode 100644 index 0000000000000000000000000000000000000000..663d1db00c2eff717eef425d6a2af5ec93d8121c Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199403.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199404.nc b/inst/extdata/obs/monthly_mean/tos/tos_199404.nc new file mode 100644 index 0000000000000000000000000000000000000000..43b1efad45f925ba1308398fb05940f24d7a650e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199404.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199405.nc b/inst/extdata/obs/monthly_mean/tos/tos_199405.nc new file mode 100644 index 0000000000000000000000000000000000000000..6d67749c277d63a3a2b9c3dc0bb6b65fbbc4bbf6 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199405.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199406.nc b/inst/extdata/obs/monthly_mean/tos/tos_199406.nc new file mode 100644 index 0000000000000000000000000000000000000000..7403604ba89425bfa2b662206921879b9ebcd3bc Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199406.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199407.nc b/inst/extdata/obs/monthly_mean/tos/tos_199407.nc new file mode 100644 index 0000000000000000000000000000000000000000..c1f930e9da32a49b926fd675d40d3c5cbed467a4 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199407.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199408.nc b/inst/extdata/obs/monthly_mean/tos/tos_199408.nc new file mode 100644 index 0000000000000000000000000000000000000000..556cbb5087340d7459f5f1f0264ce64f5907b8e4 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199408.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199409.nc b/inst/extdata/obs/monthly_mean/tos/tos_199409.nc new file mode 100644 index 0000000000000000000000000000000000000000..2138f105ed5bc5b7b785f1d200e0421fb3e0c67b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199409.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199410.nc b/inst/extdata/obs/monthly_mean/tos/tos_199410.nc new file mode 100644 index 0000000000000000000000000000000000000000..dd05f395983011be0ac3b2cd908de9c2de342390 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199410.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199411.nc b/inst/extdata/obs/monthly_mean/tos/tos_199411.nc new file mode 100644 index 0000000000000000000000000000000000000000..fec2f500965ff1feae08240f584ec100bed22a27 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199411.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199412.nc b/inst/extdata/obs/monthly_mean/tos/tos_199412.nc new file mode 100644 index 0000000000000000000000000000000000000000..96b8d65b77966891ef5066dc724cd67e6eb9c611 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199412.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199501.nc b/inst/extdata/obs/monthly_mean/tos/tos_199501.nc new file mode 100644 index 0000000000000000000000000000000000000000..1df698f356bb97e046322e82f10c6d8c0763c40c Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199501.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199502.nc b/inst/extdata/obs/monthly_mean/tos/tos_199502.nc new file mode 100644 index 0000000000000000000000000000000000000000..f1a3ffee97fe7102d55837f5552f2d4f8310b085 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199502.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199503.nc b/inst/extdata/obs/monthly_mean/tos/tos_199503.nc new file mode 100644 index 0000000000000000000000000000000000000000..af092db4eb97c58e7b6ef61c7707668f51d722f6 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199503.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199504.nc b/inst/extdata/obs/monthly_mean/tos/tos_199504.nc new file mode 100644 index 0000000000000000000000000000000000000000..59d6698bfe5e98026440f4e64a46199720e02f1e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199504.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199505.nc b/inst/extdata/obs/monthly_mean/tos/tos_199505.nc new file mode 100644 index 0000000000000000000000000000000000000000..e6715046dda95f9b8bedf7172d2dc4d791f3becd Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199505.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199506.nc b/inst/extdata/obs/monthly_mean/tos/tos_199506.nc new file mode 100644 index 0000000000000000000000000000000000000000..3db86fea49924a69ca4e45805146c264992fb9ab Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199506.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199507.nc b/inst/extdata/obs/monthly_mean/tos/tos_199507.nc new file mode 100644 index 0000000000000000000000000000000000000000..2c6858670c71a14a62dd3c807506fe27421b8673 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199507.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199508.nc b/inst/extdata/obs/monthly_mean/tos/tos_199508.nc new file mode 100644 index 0000000000000000000000000000000000000000..cbec13158a824025fc883b6d8027a3338f26a783 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199508.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199509.nc b/inst/extdata/obs/monthly_mean/tos/tos_199509.nc new file mode 100644 index 0000000000000000000000000000000000000000..b4061e9023d0a8fdce05a0e2156d23290a5b81b1 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199509.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199510.nc b/inst/extdata/obs/monthly_mean/tos/tos_199510.nc new file mode 100644 index 0000000000000000000000000000000000000000..2092f6b576a7c8875bb9d21441b1c3e9da2e3d7b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199510.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199511.nc b/inst/extdata/obs/monthly_mean/tos/tos_199511.nc new file mode 100644 index 0000000000000000000000000000000000000000..dcd524a91c9c100356234e877a71c68133051386 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199511.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199512.nc b/inst/extdata/obs/monthly_mean/tos/tos_199512.nc new file mode 100644 index 0000000000000000000000000000000000000000..9eba32163611155934896387b7224468e6745c6c Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199512.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199601.nc b/inst/extdata/obs/monthly_mean/tos/tos_199601.nc new file mode 100644 index 0000000000000000000000000000000000000000..612124d3bd6b3fb053a45b4a22b357c22bb3a458 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199601.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199602.nc b/inst/extdata/obs/monthly_mean/tos/tos_199602.nc new file mode 100644 index 0000000000000000000000000000000000000000..74ef862e1ee62a9539364bbb0da2c121b0ff2941 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199602.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199603.nc b/inst/extdata/obs/monthly_mean/tos/tos_199603.nc new file mode 100644 index 0000000000000000000000000000000000000000..3add04541705c40f13c8224d8aecf29590608c7d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199603.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199604.nc b/inst/extdata/obs/monthly_mean/tos/tos_199604.nc new file mode 100644 index 0000000000000000000000000000000000000000..4e167e86833d23ee115e4fe2f102ae180ed90bc9 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199604.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199605.nc b/inst/extdata/obs/monthly_mean/tos/tos_199605.nc new file mode 100644 index 0000000000000000000000000000000000000000..ab7cee08e887439ff5535deeb9869c2c96a9d81f Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199605.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199606.nc b/inst/extdata/obs/monthly_mean/tos/tos_199606.nc new file mode 100644 index 0000000000000000000000000000000000000000..cf8b1a0509439dd885c60839ae3f009ac3220668 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199606.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199607.nc b/inst/extdata/obs/monthly_mean/tos/tos_199607.nc new file mode 100644 index 0000000000000000000000000000000000000000..147823e1d95afa4ad2d2fefcb85ffe3e0e954a28 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199607.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199608.nc b/inst/extdata/obs/monthly_mean/tos/tos_199608.nc new file mode 100644 index 0000000000000000000000000000000000000000..2ece9ccec87b42278b193cafe64ed93df318ae8e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199608.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199609.nc b/inst/extdata/obs/monthly_mean/tos/tos_199609.nc new file mode 100644 index 0000000000000000000000000000000000000000..bff945e152ecc82fed9c01e1be42ea4918039633 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199609.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199610.nc b/inst/extdata/obs/monthly_mean/tos/tos_199610.nc new file mode 100644 index 0000000000000000000000000000000000000000..eb04a3d39e81db03186090f5df8beb2db448275a Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199610.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199611.nc b/inst/extdata/obs/monthly_mean/tos/tos_199611.nc new file mode 100644 index 0000000000000000000000000000000000000000..d93e1108f1257208f961ab77a88bf269d22a24f5 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199611.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199612.nc b/inst/extdata/obs/monthly_mean/tos/tos_199612.nc new file mode 100644 index 0000000000000000000000000000000000000000..9f7e29810564479c0149e65279adc7cf8b2bb637 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199612.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199701.nc b/inst/extdata/obs/monthly_mean/tos/tos_199701.nc new file mode 100644 index 0000000000000000000000000000000000000000..73019e3a0096c10afe16a45f6a5d71b8c11fc3f7 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199701.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199702.nc b/inst/extdata/obs/monthly_mean/tos/tos_199702.nc new file mode 100644 index 0000000000000000000000000000000000000000..12ffb9d33cdc46ffabd8be7d35cbf5d413b4a91f Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199702.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199703.nc b/inst/extdata/obs/monthly_mean/tos/tos_199703.nc new file mode 100644 index 0000000000000000000000000000000000000000..00e18accc10dbddf2ba4a950a96cf18e4087591d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199703.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199704.nc b/inst/extdata/obs/monthly_mean/tos/tos_199704.nc new file mode 100644 index 0000000000000000000000000000000000000000..81ddba985f9db8e0796e0f5bd5ca8326294f6b95 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199704.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199705.nc b/inst/extdata/obs/monthly_mean/tos/tos_199705.nc new file mode 100644 index 0000000000000000000000000000000000000000..60eb4ac01c99bfcf2d0f7c90a7573f6d91bf759a Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199705.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199706.nc b/inst/extdata/obs/monthly_mean/tos/tos_199706.nc new file mode 100644 index 0000000000000000000000000000000000000000..7b3ea3119a3a3f4c43eb08846784eabfb49b68e7 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199706.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199707.nc b/inst/extdata/obs/monthly_mean/tos/tos_199707.nc new file mode 100644 index 0000000000000000000000000000000000000000..ef972a3b521439f024646263bbd9cc6d475a15b6 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199707.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199708.nc b/inst/extdata/obs/monthly_mean/tos/tos_199708.nc new file mode 100644 index 0000000000000000000000000000000000000000..3b0069bf8b6cbe2b2b8aaf8b3258cbee2a724e5f Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199708.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199709.nc b/inst/extdata/obs/monthly_mean/tos/tos_199709.nc new file mode 100644 index 0000000000000000000000000000000000000000..e41df3ae7f50abd2bd2c5c01287a44fe6da5b877 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199709.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199710.nc b/inst/extdata/obs/monthly_mean/tos/tos_199710.nc new file mode 100644 index 0000000000000000000000000000000000000000..2201735309e44f4f646c30e695ecc632424e45cf Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199710.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199711.nc b/inst/extdata/obs/monthly_mean/tos/tos_199711.nc new file mode 100644 index 0000000000000000000000000000000000000000..52e1c7e74ff68701c6ceb6fff4b4191085d921d4 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199711.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199712.nc b/inst/extdata/obs/monthly_mean/tos/tos_199712.nc new file mode 100644 index 0000000000000000000000000000000000000000..681b5adcb140d6d6ff34e2ddd22c42d0998c277f Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199712.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199801.nc b/inst/extdata/obs/monthly_mean/tos/tos_199801.nc new file mode 100644 index 0000000000000000000000000000000000000000..f25abd59f417e45487f0c1fab830178a514ba946 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199801.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199802.nc b/inst/extdata/obs/monthly_mean/tos/tos_199802.nc new file mode 100644 index 0000000000000000000000000000000000000000..b6b9a331299710879605902c02303a0ba2dda34e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199802.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199803.nc b/inst/extdata/obs/monthly_mean/tos/tos_199803.nc new file mode 100644 index 0000000000000000000000000000000000000000..ef5515f5c258cceeeea1a18ac2c8ac8dd040ffb8 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199803.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199804.nc b/inst/extdata/obs/monthly_mean/tos/tos_199804.nc new file mode 100644 index 0000000000000000000000000000000000000000..2c3c42e0aa28f188eb340ba72cabb8c20e1d3a53 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199804.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199805.nc b/inst/extdata/obs/monthly_mean/tos/tos_199805.nc new file mode 100644 index 0000000000000000000000000000000000000000..ac8846973093f358b93d41afa0eb5e1e2099ed39 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199805.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199806.nc b/inst/extdata/obs/monthly_mean/tos/tos_199806.nc new file mode 100644 index 0000000000000000000000000000000000000000..2cede3d18227573808ecc237eeb792ee3a36414a Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199806.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199807.nc b/inst/extdata/obs/monthly_mean/tos/tos_199807.nc new file mode 100644 index 0000000000000000000000000000000000000000..b515fee87de6be1e9926a92126e93aa2b8330752 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199807.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199808.nc b/inst/extdata/obs/monthly_mean/tos/tos_199808.nc new file mode 100644 index 0000000000000000000000000000000000000000..ffaadc6a668dfa2f427db849a188b4339d12865d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199808.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199809.nc b/inst/extdata/obs/monthly_mean/tos/tos_199809.nc new file mode 100644 index 0000000000000000000000000000000000000000..cb0d7c474960dc9c715b8c3ccd9d63be27cf9dcc Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199809.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199810.nc b/inst/extdata/obs/monthly_mean/tos/tos_199810.nc new file mode 100644 index 0000000000000000000000000000000000000000..07a9276c4dcfb5a8b3c65ab799be618f384f3145 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199810.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199811.nc b/inst/extdata/obs/monthly_mean/tos/tos_199811.nc new file mode 100644 index 0000000000000000000000000000000000000000..fafb84d13c5a71d37b88562ade7a3bf7bd9a3cb5 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199811.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199812.nc b/inst/extdata/obs/monthly_mean/tos/tos_199812.nc new file mode 100644 index 0000000000000000000000000000000000000000..5aff386a57dbb1aefa21b63ccc060e167acfdc62 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199812.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199901.nc b/inst/extdata/obs/monthly_mean/tos/tos_199901.nc new file mode 100644 index 0000000000000000000000000000000000000000..aaa526296a8961d25cd1a22094511a958fce1bc4 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199901.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199902.nc b/inst/extdata/obs/monthly_mean/tos/tos_199902.nc new file mode 100644 index 0000000000000000000000000000000000000000..f43ca1b856658aae3670bfe3981f9d8b8f79e18b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199902.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199903.nc b/inst/extdata/obs/monthly_mean/tos/tos_199903.nc new file mode 100644 index 0000000000000000000000000000000000000000..ee2b83b7f1d8e379a520713bbf9f04ac0a570253 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199903.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199904.nc b/inst/extdata/obs/monthly_mean/tos/tos_199904.nc new file mode 100644 index 0000000000000000000000000000000000000000..85daf9ab4e182e26dbf1b2fccd6c3d61b8f2c7b8 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199904.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199905.nc b/inst/extdata/obs/monthly_mean/tos/tos_199905.nc new file mode 100644 index 0000000000000000000000000000000000000000..4d189711c4600033df9d7bc84225234513542ec3 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199905.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199906.nc b/inst/extdata/obs/monthly_mean/tos/tos_199906.nc new file mode 100644 index 0000000000000000000000000000000000000000..b446928cbca3e572b72586794c97f053482aef57 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199906.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199907.nc b/inst/extdata/obs/monthly_mean/tos/tos_199907.nc new file mode 100644 index 0000000000000000000000000000000000000000..eac0b8d9ba142051068dd83f8dda7baf8c327a2d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199907.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199908.nc b/inst/extdata/obs/monthly_mean/tos/tos_199908.nc new file mode 100644 index 0000000000000000000000000000000000000000..ee1502a4c73aa5371cab52fe942c5d2a5bfdeddd Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199908.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199909.nc b/inst/extdata/obs/monthly_mean/tos/tos_199909.nc new file mode 100644 index 0000000000000000000000000000000000000000..f11aeeeaeaa22ace6f69e9a017119c16d7c68fad Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199909.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199910.nc b/inst/extdata/obs/monthly_mean/tos/tos_199910.nc new file mode 100644 index 0000000000000000000000000000000000000000..ea6d31726b0fed2a06139cf85b65f2dd0d33187c Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199910.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199911.nc b/inst/extdata/obs/monthly_mean/tos/tos_199911.nc new file mode 100644 index 0000000000000000000000000000000000000000..c8e5287c14f288cd6f52b49e18d0c01497dccebb Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199911.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_199912.nc b/inst/extdata/obs/monthly_mean/tos/tos_199912.nc new file mode 100644 index 0000000000000000000000000000000000000000..78b56020e90717f9dd2ac16bbc4d6a265f3d6ada Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_199912.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200001.nc b/inst/extdata/obs/monthly_mean/tos/tos_200001.nc new file mode 100644 index 0000000000000000000000000000000000000000..d2acb942be711e25edc0908134a28bb75f3b3ad7 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200001.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200002.nc b/inst/extdata/obs/monthly_mean/tos/tos_200002.nc new file mode 100644 index 0000000000000000000000000000000000000000..33d019b269dc75e8babb42e3f5877ea1e29989e2 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200002.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200003.nc b/inst/extdata/obs/monthly_mean/tos/tos_200003.nc new file mode 100644 index 0000000000000000000000000000000000000000..3564cb7bb9e16658fb2cf16ee0b51ed0cf022ef6 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200003.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200004.nc b/inst/extdata/obs/monthly_mean/tos/tos_200004.nc new file mode 100644 index 0000000000000000000000000000000000000000..e394f2efc6de18bec28f10386fb94b8a6c706902 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200004.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200005.nc b/inst/extdata/obs/monthly_mean/tos/tos_200005.nc new file mode 100644 index 0000000000000000000000000000000000000000..dd2571d314084edfd4531ddbfa824f14a71a40d6 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200005.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200006.nc b/inst/extdata/obs/monthly_mean/tos/tos_200006.nc new file mode 100644 index 0000000000000000000000000000000000000000..48b108459302cd9ef1ccda2bc9602bdebbb04ab1 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200006.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200007.nc b/inst/extdata/obs/monthly_mean/tos/tos_200007.nc new file mode 100644 index 0000000000000000000000000000000000000000..2cfd2e96b9c0532b19ff587e34c1a250df427c07 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200007.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200008.nc b/inst/extdata/obs/monthly_mean/tos/tos_200008.nc new file mode 100644 index 0000000000000000000000000000000000000000..322c7383871d58951b7630595917436a2306a0e6 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200008.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200009.nc b/inst/extdata/obs/monthly_mean/tos/tos_200009.nc new file mode 100644 index 0000000000000000000000000000000000000000..0236c68f2e8d7c22eda60544c6f9a19c1f02de57 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200009.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200010.nc b/inst/extdata/obs/monthly_mean/tos/tos_200010.nc new file mode 100644 index 0000000000000000000000000000000000000000..197bdf5e332b949c685202da1bd34b61c9b59d55 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200010.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200011.nc b/inst/extdata/obs/monthly_mean/tos/tos_200011.nc new file mode 100644 index 0000000000000000000000000000000000000000..bc91e1e1af4cab3abd3a7d3c09be76c12ca133d8 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200011.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200012.nc b/inst/extdata/obs/monthly_mean/tos/tos_200012.nc new file mode 100644 index 0000000000000000000000000000000000000000..142fa338035979a083b301771aa6784ca9afe558 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200012.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200101.nc b/inst/extdata/obs/monthly_mean/tos/tos_200101.nc new file mode 100644 index 0000000000000000000000000000000000000000..dcfba0c515edf5dddc7fc4c9c144797ba919bc03 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200101.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200102.nc b/inst/extdata/obs/monthly_mean/tos/tos_200102.nc new file mode 100644 index 0000000000000000000000000000000000000000..9e4963bda38c33223caa994fc2e2b35ddec6fcc6 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200102.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200103.nc b/inst/extdata/obs/monthly_mean/tos/tos_200103.nc new file mode 100644 index 0000000000000000000000000000000000000000..a973bec2e145d399efdaba4ad972bcf8ea00b866 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200103.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200104.nc b/inst/extdata/obs/monthly_mean/tos/tos_200104.nc new file mode 100644 index 0000000000000000000000000000000000000000..7c81cea66d445e0d33cef0c5707aec356f079016 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200104.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200105.nc b/inst/extdata/obs/monthly_mean/tos/tos_200105.nc new file mode 100644 index 0000000000000000000000000000000000000000..dd6c5418bd7d74c634cd73376b82064a2bf7cbdf Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200105.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200106.nc b/inst/extdata/obs/monthly_mean/tos/tos_200106.nc new file mode 100644 index 0000000000000000000000000000000000000000..bdfffc2c56eeeba5dcfad45bddf2b9497ee25feb Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200106.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200107.nc b/inst/extdata/obs/monthly_mean/tos/tos_200107.nc new file mode 100644 index 0000000000000000000000000000000000000000..4146a3460e0d758c0a5dd02bee13c54ee09731eb Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200107.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200108.nc b/inst/extdata/obs/monthly_mean/tos/tos_200108.nc new file mode 100644 index 0000000000000000000000000000000000000000..fee622d6f63d5a8ed4581c150691d493fd4a152e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200108.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200109.nc b/inst/extdata/obs/monthly_mean/tos/tos_200109.nc new file mode 100644 index 0000000000000000000000000000000000000000..aab27ba0ef059eb21b58ed5ddce933c411327d35 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200109.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200110.nc b/inst/extdata/obs/monthly_mean/tos/tos_200110.nc new file mode 100644 index 0000000000000000000000000000000000000000..297d086266f71e483c2c9892dde9706e1754b80d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200110.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200111.nc b/inst/extdata/obs/monthly_mean/tos/tos_200111.nc new file mode 100644 index 0000000000000000000000000000000000000000..af8d100d9a323b1a9fc3cf6d5676304be1d92b3c Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200111.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200112.nc b/inst/extdata/obs/monthly_mean/tos/tos_200112.nc new file mode 100644 index 0000000000000000000000000000000000000000..93ad6208f8a90ebee002f6f0aaf3a30dfc48d21d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200112.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200201.nc b/inst/extdata/obs/monthly_mean/tos/tos_200201.nc new file mode 100644 index 0000000000000000000000000000000000000000..070be2057a269faa43f968428a995be1f7df7674 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200201.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200202.nc b/inst/extdata/obs/monthly_mean/tos/tos_200202.nc new file mode 100644 index 0000000000000000000000000000000000000000..ec19f5101e50727f08e7e4af2bebcafb1fed3bad Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200202.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200203.nc b/inst/extdata/obs/monthly_mean/tos/tos_200203.nc new file mode 100644 index 0000000000000000000000000000000000000000..d3f4b81a2583a2916fcac50e5e7737c83bcb8a90 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200203.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200204.nc b/inst/extdata/obs/monthly_mean/tos/tos_200204.nc new file mode 100644 index 0000000000000000000000000000000000000000..74b188832d54231765a3ea35d89a7532ba21813c Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200204.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200205.nc b/inst/extdata/obs/monthly_mean/tos/tos_200205.nc new file mode 100644 index 0000000000000000000000000000000000000000..d5de66c4f563af7031d1cb246fb3c18340cb94d2 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200205.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200206.nc b/inst/extdata/obs/monthly_mean/tos/tos_200206.nc new file mode 100644 index 0000000000000000000000000000000000000000..fe941f0e5d74094f07679ee5099573e2e235357e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200206.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200207.nc b/inst/extdata/obs/monthly_mean/tos/tos_200207.nc new file mode 100644 index 0000000000000000000000000000000000000000..dfa814d2bcc659d7670d3731cd506891e0800d14 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200207.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200208.nc b/inst/extdata/obs/monthly_mean/tos/tos_200208.nc new file mode 100644 index 0000000000000000000000000000000000000000..fc9762f27da5d8a5fa0ec1e8c5f158f50cf5d0d0 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200208.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200209.nc b/inst/extdata/obs/monthly_mean/tos/tos_200209.nc new file mode 100644 index 0000000000000000000000000000000000000000..498b7dfb24087a8c6779f58f6bb0a8b62b008a85 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200209.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200210.nc b/inst/extdata/obs/monthly_mean/tos/tos_200210.nc new file mode 100644 index 0000000000000000000000000000000000000000..726ee66d1dc689f79e1fba87d7de58deafd3eb04 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200210.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200211.nc b/inst/extdata/obs/monthly_mean/tos/tos_200211.nc new file mode 100644 index 0000000000000000000000000000000000000000..8ebaea5787c682657daf8010ef07901b8d9157ed Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200211.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200212.nc b/inst/extdata/obs/monthly_mean/tos/tos_200212.nc new file mode 100644 index 0000000000000000000000000000000000000000..e07be3bbc0dcc5bdf696fea07f1e7c985baa74aa Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200212.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200301.nc b/inst/extdata/obs/monthly_mean/tos/tos_200301.nc new file mode 100644 index 0000000000000000000000000000000000000000..730df0ae3a9bd466c3ae933892c4854a207762f6 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200301.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200302.nc b/inst/extdata/obs/monthly_mean/tos/tos_200302.nc new file mode 100644 index 0000000000000000000000000000000000000000..5d85213005e85302b0a42da035118e3f004f2d34 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200302.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200303.nc b/inst/extdata/obs/monthly_mean/tos/tos_200303.nc new file mode 100644 index 0000000000000000000000000000000000000000..52e116345f4deeaf41b2f98984e263dd68845d3d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200303.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200304.nc b/inst/extdata/obs/monthly_mean/tos/tos_200304.nc new file mode 100644 index 0000000000000000000000000000000000000000..0f120e6bd077819844a30f49187b63d6b0090911 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200304.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200305.nc b/inst/extdata/obs/monthly_mean/tos/tos_200305.nc new file mode 100644 index 0000000000000000000000000000000000000000..e176413c879892d6a448fd3e1216ee7bf7c98574 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200305.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200306.nc b/inst/extdata/obs/monthly_mean/tos/tos_200306.nc new file mode 100644 index 0000000000000000000000000000000000000000..21386d7242e8eb92cf4bc3ed3e6d4a1149417a98 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200306.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200307.nc b/inst/extdata/obs/monthly_mean/tos/tos_200307.nc new file mode 100644 index 0000000000000000000000000000000000000000..37afa6d2ce18378595f4164bf9024f2b8dbb845d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200307.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200308.nc b/inst/extdata/obs/monthly_mean/tos/tos_200308.nc new file mode 100644 index 0000000000000000000000000000000000000000..16aa75095044eea01c723083357ab9e923a43c69 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200308.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200309.nc b/inst/extdata/obs/monthly_mean/tos/tos_200309.nc new file mode 100644 index 0000000000000000000000000000000000000000..81a6d6e231cc6e51757e3c4a70d0e310db4871cf Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200309.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200310.nc b/inst/extdata/obs/monthly_mean/tos/tos_200310.nc new file mode 100644 index 0000000000000000000000000000000000000000..21517396e889508f617c63fcf43b69ab20f0e43e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200310.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200311.nc b/inst/extdata/obs/monthly_mean/tos/tos_200311.nc new file mode 100644 index 0000000000000000000000000000000000000000..a5559270db94c189d8966699a315a3287ac6f736 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200311.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200312.nc b/inst/extdata/obs/monthly_mean/tos/tos_200312.nc new file mode 100644 index 0000000000000000000000000000000000000000..1f8145ad9531b242a328baa3c238d5534575a235 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200312.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200401.nc b/inst/extdata/obs/monthly_mean/tos/tos_200401.nc new file mode 100644 index 0000000000000000000000000000000000000000..f2f9acc272626f7e6fe0d15559ea855fd91574f1 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200401.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200402.nc b/inst/extdata/obs/monthly_mean/tos/tos_200402.nc new file mode 100644 index 0000000000000000000000000000000000000000..4395e11672f8b5268cd907dd7f110242994e4f8f Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200402.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200403.nc b/inst/extdata/obs/monthly_mean/tos/tos_200403.nc new file mode 100644 index 0000000000000000000000000000000000000000..202a6a5a17e17a8172a6faac9d10a03da0209d27 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200403.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200404.nc b/inst/extdata/obs/monthly_mean/tos/tos_200404.nc new file mode 100644 index 0000000000000000000000000000000000000000..09ab557b3625a3feef45dd5eed5f20c8ff4d47c0 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200404.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200405.nc b/inst/extdata/obs/monthly_mean/tos/tos_200405.nc new file mode 100644 index 0000000000000000000000000000000000000000..7ac5268d6eb5b08ca8472e60a8020d39cd5ede91 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200405.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200406.nc b/inst/extdata/obs/monthly_mean/tos/tos_200406.nc new file mode 100644 index 0000000000000000000000000000000000000000..21c33dfa53214a85bba8547b8bc13978425a25d2 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200406.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200407.nc b/inst/extdata/obs/monthly_mean/tos/tos_200407.nc new file mode 100644 index 0000000000000000000000000000000000000000..2106a921e5202317e81e31e2e1823103e3c3bc85 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200407.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200408.nc b/inst/extdata/obs/monthly_mean/tos/tos_200408.nc new file mode 100644 index 0000000000000000000000000000000000000000..c91e9f7110dc8966ff2b349ac1b3789c4b270822 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200408.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200409.nc b/inst/extdata/obs/monthly_mean/tos/tos_200409.nc new file mode 100644 index 0000000000000000000000000000000000000000..ed6ad1ccee2f4bfdbd6320e108b184a789b74a18 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200409.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200410.nc b/inst/extdata/obs/monthly_mean/tos/tos_200410.nc new file mode 100644 index 0000000000000000000000000000000000000000..fc4b5bb0273f79d6191253e5a0e329f6a711a5d7 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200410.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200411.nc b/inst/extdata/obs/monthly_mean/tos/tos_200411.nc new file mode 100644 index 0000000000000000000000000000000000000000..1f8741ae7a2778d86497cb1edebf7caa4b7ba46c Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200411.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200412.nc b/inst/extdata/obs/monthly_mean/tos/tos_200412.nc new file mode 100644 index 0000000000000000000000000000000000000000..5c45205754561bb0d0cc79dc9a620a99dce16588 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200412.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200501.nc b/inst/extdata/obs/monthly_mean/tos/tos_200501.nc new file mode 100644 index 0000000000000000000000000000000000000000..704dbdfd378c98a34a0560591cd7ddbac86936b2 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200501.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200502.nc b/inst/extdata/obs/monthly_mean/tos/tos_200502.nc new file mode 100644 index 0000000000000000000000000000000000000000..95e09ab6cc03282326c98b5c782cf807c4001662 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200502.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200503.nc b/inst/extdata/obs/monthly_mean/tos/tos_200503.nc new file mode 100644 index 0000000000000000000000000000000000000000..24c6ec5b12cf4e730c854388e9d46fd6ae5b0267 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200503.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200504.nc b/inst/extdata/obs/monthly_mean/tos/tos_200504.nc new file mode 100644 index 0000000000000000000000000000000000000000..326f497d27a7f0ea3aef5054bf0d83ddd6fe75b5 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200504.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200505.nc b/inst/extdata/obs/monthly_mean/tos/tos_200505.nc new file mode 100644 index 0000000000000000000000000000000000000000..a4dc200fb2e9e2ff1ba65d575795baac68019ea2 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200505.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200506.nc b/inst/extdata/obs/monthly_mean/tos/tos_200506.nc new file mode 100644 index 0000000000000000000000000000000000000000..7bd3a6e063d54ade3f36cf32e2e869e04ebb839b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200506.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200507.nc b/inst/extdata/obs/monthly_mean/tos/tos_200507.nc new file mode 100644 index 0000000000000000000000000000000000000000..209272c85ce8ade790b4cb80404aae3e96b14063 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200507.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200508.nc b/inst/extdata/obs/monthly_mean/tos/tos_200508.nc new file mode 100644 index 0000000000000000000000000000000000000000..2a587050bbc5082941217ac286f3d94783f9eb05 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200508.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200509.nc b/inst/extdata/obs/monthly_mean/tos/tos_200509.nc new file mode 100644 index 0000000000000000000000000000000000000000..68759f0e647ae2eceb9e2553e429a31431fce97b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200509.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200510.nc b/inst/extdata/obs/monthly_mean/tos/tos_200510.nc new file mode 100644 index 0000000000000000000000000000000000000000..031d923c711aaaa82410ae9f24fd4c837039ddbf Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200510.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200511.nc b/inst/extdata/obs/monthly_mean/tos/tos_200511.nc new file mode 100644 index 0000000000000000000000000000000000000000..31d3b2b9ff11feffa08bd6de058c9af6ff0eb131 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200511.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200512.nc b/inst/extdata/obs/monthly_mean/tos/tos_200512.nc new file mode 100644 index 0000000000000000000000000000000000000000..9d6eeade8a598e2d4390ab042b6ba3317a0ccabe Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200512.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200601.nc b/inst/extdata/obs/monthly_mean/tos/tos_200601.nc new file mode 100644 index 0000000000000000000000000000000000000000..9c3b3d748c168f309b587b99d675ce9a273c0a2b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200601.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200602.nc b/inst/extdata/obs/monthly_mean/tos/tos_200602.nc new file mode 100644 index 0000000000000000000000000000000000000000..e251df8e1e660cf45b4062eccc698663c328ac6d Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200602.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200603.nc b/inst/extdata/obs/monthly_mean/tos/tos_200603.nc new file mode 100644 index 0000000000000000000000000000000000000000..0b0878ddaa55980ff326cdce9f57a6dc9ae21f56 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200603.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200604.nc b/inst/extdata/obs/monthly_mean/tos/tos_200604.nc new file mode 100644 index 0000000000000000000000000000000000000000..144e92022586eae5c99dae55e89d39c24e2674e7 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200604.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200605.nc b/inst/extdata/obs/monthly_mean/tos/tos_200605.nc new file mode 100644 index 0000000000000000000000000000000000000000..a476658f253ce3014b31201083934d0778512cac Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200605.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200606.nc b/inst/extdata/obs/monthly_mean/tos/tos_200606.nc new file mode 100644 index 0000000000000000000000000000000000000000..ce636b51b74052f6f0f112da283189701f133a30 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200606.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200607.nc b/inst/extdata/obs/monthly_mean/tos/tos_200607.nc new file mode 100644 index 0000000000000000000000000000000000000000..293a0dc77900dfba2dc80b9381240b0e4d462194 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200607.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200608.nc b/inst/extdata/obs/monthly_mean/tos/tos_200608.nc new file mode 100644 index 0000000000000000000000000000000000000000..a17bbc0da025fd8bae224ccbc2b4dfd39ae34c32 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200608.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200609.nc b/inst/extdata/obs/monthly_mean/tos/tos_200609.nc new file mode 100644 index 0000000000000000000000000000000000000000..ee3c8a918e5d4e550c25a6740c54bedd85761c34 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200609.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200610.nc b/inst/extdata/obs/monthly_mean/tos/tos_200610.nc new file mode 100644 index 0000000000000000000000000000000000000000..a4c51408b780512bda30945d11385894387a2e99 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200610.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200611.nc b/inst/extdata/obs/monthly_mean/tos/tos_200611.nc new file mode 100644 index 0000000000000000000000000000000000000000..8e43c2f72a426dba265e72de810bf62c9c8b0871 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200611.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200612.nc b/inst/extdata/obs/monthly_mean/tos/tos_200612.nc new file mode 100644 index 0000000000000000000000000000000000000000..734558a384bf11a2f85a4176c8b77dec9321d7ee Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200612.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200701.nc b/inst/extdata/obs/monthly_mean/tos/tos_200701.nc new file mode 100644 index 0000000000000000000000000000000000000000..456ea858f73abeab15252f40c284750a1958042a Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200701.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200702.nc b/inst/extdata/obs/monthly_mean/tos/tos_200702.nc new file mode 100644 index 0000000000000000000000000000000000000000..94513d437a9f4927b81c0a06ce40906f7d188c00 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200702.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200703.nc b/inst/extdata/obs/monthly_mean/tos/tos_200703.nc new file mode 100644 index 0000000000000000000000000000000000000000..5d6bd30005e5b3a353cd14aeab7ef9360c0d864a Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200703.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200704.nc b/inst/extdata/obs/monthly_mean/tos/tos_200704.nc new file mode 100644 index 0000000000000000000000000000000000000000..e0afded2983c33abe4415c5a3ea487311ef1ce34 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200704.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200705.nc b/inst/extdata/obs/monthly_mean/tos/tos_200705.nc new file mode 100644 index 0000000000000000000000000000000000000000..181eac6c38a44d9d7413db27ee7e91aad871ca84 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200705.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200706.nc b/inst/extdata/obs/monthly_mean/tos/tos_200706.nc new file mode 100644 index 0000000000000000000000000000000000000000..08407f7dccee1ec081df84982954ea0ddd5f0c7b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200706.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200707.nc b/inst/extdata/obs/monthly_mean/tos/tos_200707.nc new file mode 100644 index 0000000000000000000000000000000000000000..431dfd35254dfada86f69d179b4c4c56651f62e3 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200707.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200708.nc b/inst/extdata/obs/monthly_mean/tos/tos_200708.nc new file mode 100644 index 0000000000000000000000000000000000000000..f1f21804df0ec2758f56dace909b87323bce9ccc Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200708.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200709.nc b/inst/extdata/obs/monthly_mean/tos/tos_200709.nc new file mode 100644 index 0000000000000000000000000000000000000000..b20d987d972fd76f7221d3517437c4788d4679c2 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200709.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200710.nc b/inst/extdata/obs/monthly_mean/tos/tos_200710.nc new file mode 100644 index 0000000000000000000000000000000000000000..cd6c40e921c3bf3ba221936bf6db762e03405f79 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200710.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200711.nc b/inst/extdata/obs/monthly_mean/tos/tos_200711.nc new file mode 100644 index 0000000000000000000000000000000000000000..830dc21c333158c66d2143071b23c5e12d9fe454 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200711.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200712.nc b/inst/extdata/obs/monthly_mean/tos/tos_200712.nc new file mode 100644 index 0000000000000000000000000000000000000000..33ddb0da183a22c47fb431104f33f5b001db832b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200712.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200801.nc b/inst/extdata/obs/monthly_mean/tos/tos_200801.nc new file mode 100644 index 0000000000000000000000000000000000000000..06f5c41f69c233e56698ed37b98b91a4dcf5b4b5 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200801.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200802.nc b/inst/extdata/obs/monthly_mean/tos/tos_200802.nc new file mode 100644 index 0000000000000000000000000000000000000000..452d6bb4bf0278fbcf00d1b3ba20cfdf36ae4c38 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200802.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200803.nc b/inst/extdata/obs/monthly_mean/tos/tos_200803.nc new file mode 100644 index 0000000000000000000000000000000000000000..d4e51a236173047ee1bfe65a999ed46c5a4407d6 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200803.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200804.nc b/inst/extdata/obs/monthly_mean/tos/tos_200804.nc new file mode 100644 index 0000000000000000000000000000000000000000..ef8cce0db7f20e6f1b7eb825afd83047b2738cb1 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200804.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200805.nc b/inst/extdata/obs/monthly_mean/tos/tos_200805.nc new file mode 100644 index 0000000000000000000000000000000000000000..34a747468356b11c08fde5ee38dfe6ebb7a54908 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200805.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200806.nc b/inst/extdata/obs/monthly_mean/tos/tos_200806.nc new file mode 100644 index 0000000000000000000000000000000000000000..8947a8f188bacbe4f6554342adad0f42066d461b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200806.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200807.nc b/inst/extdata/obs/monthly_mean/tos/tos_200807.nc new file mode 100644 index 0000000000000000000000000000000000000000..aebe52c70de021ea76f69207a2ce9c5d3aa47445 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200807.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200808.nc b/inst/extdata/obs/monthly_mean/tos/tos_200808.nc new file mode 100644 index 0000000000000000000000000000000000000000..b7c20a6a8cab1d2702620cb0e55153b0686d1f6f Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200808.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200809.nc b/inst/extdata/obs/monthly_mean/tos/tos_200809.nc new file mode 100644 index 0000000000000000000000000000000000000000..bbd013f8f7ebd015cea73fd999225d6e9aa2553b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200809.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200810.nc b/inst/extdata/obs/monthly_mean/tos/tos_200810.nc new file mode 100644 index 0000000000000000000000000000000000000000..0b7e606f4011fa6d53ac752c64b26f61a04ee759 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200810.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200811.nc b/inst/extdata/obs/monthly_mean/tos/tos_200811.nc new file mode 100644 index 0000000000000000000000000000000000000000..db9783cdbe09aa11611f33ca44af84143a3fdb3f Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200811.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200812.nc b/inst/extdata/obs/monthly_mean/tos/tos_200812.nc new file mode 100644 index 0000000000000000000000000000000000000000..0dbfec7a147f6e2bfb66348d5a8c71759e1c64f3 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200812.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200901.nc b/inst/extdata/obs/monthly_mean/tos/tos_200901.nc new file mode 100644 index 0000000000000000000000000000000000000000..45abfca2c62c6410604bef82673e41ab61a8bdcb Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200901.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200902.nc b/inst/extdata/obs/monthly_mean/tos/tos_200902.nc new file mode 100644 index 0000000000000000000000000000000000000000..6f37e162873fdeacc70c01a9b1c8171d4a7b64bd Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200902.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200903.nc b/inst/extdata/obs/monthly_mean/tos/tos_200903.nc new file mode 100644 index 0000000000000000000000000000000000000000..cea6b302495fa60a9484ab2cbb3639dcf8400a18 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200903.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200904.nc b/inst/extdata/obs/monthly_mean/tos/tos_200904.nc new file mode 100644 index 0000000000000000000000000000000000000000..c730802c68a8f92bb10e4fb0e6ae8b15f14a1a22 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200904.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200905.nc b/inst/extdata/obs/monthly_mean/tos/tos_200905.nc new file mode 100644 index 0000000000000000000000000000000000000000..fcc821b0a866ef7beb6b2d352f0659a00d5590ca Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200905.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200906.nc b/inst/extdata/obs/monthly_mean/tos/tos_200906.nc new file mode 100644 index 0000000000000000000000000000000000000000..dd7723fb9a18dc5bb3af383366fce0648ccc5172 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200906.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200907.nc b/inst/extdata/obs/monthly_mean/tos/tos_200907.nc new file mode 100644 index 0000000000000000000000000000000000000000..8349007a8efec2c10cb2c78181105fc3c3c38c37 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200907.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200908.nc b/inst/extdata/obs/monthly_mean/tos/tos_200908.nc new file mode 100644 index 0000000000000000000000000000000000000000..31d64e8143435f46d32478b32a5f67e38236c41e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200908.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200909.nc b/inst/extdata/obs/monthly_mean/tos/tos_200909.nc new file mode 100644 index 0000000000000000000000000000000000000000..a1fc3ab903697425f8e5d8838250fa0047520d81 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200909.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200910.nc b/inst/extdata/obs/monthly_mean/tos/tos_200910.nc new file mode 100644 index 0000000000000000000000000000000000000000..a30a6afc40c3729a008347fce6c2feca44a37b51 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200910.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200911.nc b/inst/extdata/obs/monthly_mean/tos/tos_200911.nc new file mode 100644 index 0000000000000000000000000000000000000000..7d25af1fb183f0e03483821d113f8dd0b1096f3e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200911.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_200912.nc b/inst/extdata/obs/monthly_mean/tos/tos_200912.nc new file mode 100644 index 0000000000000000000000000000000000000000..1631e19408ec710909ebc15797e8cde0d8322a9f Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_200912.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_201001.nc b/inst/extdata/obs/monthly_mean/tos/tos_201001.nc new file mode 100644 index 0000000000000000000000000000000000000000..631bc006923db1f7a5888e748a95966772a1e7d2 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_201001.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_201002.nc b/inst/extdata/obs/monthly_mean/tos/tos_201002.nc new file mode 100644 index 0000000000000000000000000000000000000000..4a5d7fccb14ab4f338205dfe1674864a1ff1ed19 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_201002.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_201003.nc b/inst/extdata/obs/monthly_mean/tos/tos_201003.nc new file mode 100644 index 0000000000000000000000000000000000000000..d250cd2e16b1b4e3ef39f2976c4e423fa10e9090 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_201003.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_201004.nc b/inst/extdata/obs/monthly_mean/tos/tos_201004.nc new file mode 100644 index 0000000000000000000000000000000000000000..ca48037fc74385e3f8ffdc4c5a8bdd09c85e47c9 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_201004.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_201005.nc b/inst/extdata/obs/monthly_mean/tos/tos_201005.nc new file mode 100644 index 0000000000000000000000000000000000000000..290868bb0325cbe6653b9dcf93ec2fdc745c1f9e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_201005.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_201006.nc b/inst/extdata/obs/monthly_mean/tos/tos_201006.nc new file mode 100644 index 0000000000000000000000000000000000000000..782a4d5a6b4c399dafcda5f5ae2a121deadddc7b Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_201006.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_201007.nc b/inst/extdata/obs/monthly_mean/tos/tos_201007.nc new file mode 100644 index 0000000000000000000000000000000000000000..34cf2a2a63821f31f447d61af7224dc3d3fa31bd Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_201007.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_201008.nc b/inst/extdata/obs/monthly_mean/tos/tos_201008.nc new file mode 100644 index 0000000000000000000000000000000000000000..929cea19b92f74933767d4568b035213cd96f0c2 Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_201008.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_201009.nc b/inst/extdata/obs/monthly_mean/tos/tos_201009.nc new file mode 100644 index 0000000000000000000000000000000000000000..ec27a27f80326ec0ca2df144d84dac4c4df9253e Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_201009.nc differ diff --git a/inst/extdata/obs/monthly_mean/tos/tos_201010.nc b/inst/extdata/obs/monthly_mean/tos/tos_201010.nc new file mode 100644 index 0000000000000000000000000000000000000000..a441285dee40cd00f490760a003264feab6cedfc Binary files /dev/null and b/inst/extdata/obs/monthly_mean/tos/tos_201010.nc differ diff --git a/man/AddStep.Rd b/man/AddStep.Rd new file mode 100644 index 0000000000000000000000000000000000000000..3eece051f27640e918c98484ea0d66964a7b2304 --- /dev/null +++ b/man/AddStep.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AddStep.R +\name{AddStep} +\alias{AddStep} +\title{Create the workflow with the previous defined operation and data.} +\usage{ +AddStep(inputs, step_fun, ...) +} +\arguments{ +\item{inputs}{One or a list of objects of the class 'startR_cube' returned by +Start(), indicating the data to be processed.} + +\item{step_fun}{A startR step function as returned by Step().} + +\item{\dots}{Additional parameters for the inputs of function defined in +'step_fun' by Step().} +} +\value{ +A list of the class 'startR_workflow' containing all the objects +needed for the data operation. +} +\description{ +The step that combines the previous declared data and operation together to +create the complete workflow. It is the final step before data processing. +} +\examples{ + data_path <- system.file('extdata', package = 'startR') + path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') + sdates <- c('200011', '200012') + data <- Start(dat = list(list(path = path_obs)), + var = 'tos', + sdate = sdates, + time = 'all', + latitude = 'all', + longitude = 'all', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = FALSE) + pi_short <- 3.14 + fun <- function(x, pi_val) { + lat = attributes(x)$Variables$dat1$latitude + weight = sqrt(cos(lat * pi_val / 180)) + corrected = Apply(list(x), target_dims = "latitude", + fun = function(x) {x * weight}) + } + + + step <- Step(fun = fun, + target_dims = 'latitude', + output_dims = 'latitude', + use_libraries = c('multiApply'), + use_attributes = list(data = "Variables")) + wf <- AddStep(data, step, pi_val = pi_short) + +} + diff --git a/man/CDORemapper.Rd b/man/CDORemapper.Rd index 5ec1329533a777ae1c40395c39cc3fd526b48cca..4f56baaa4dda00233912c1c2c1cccb5b5945b3aa 100644 --- a/man/CDORemapper.Rd +++ b/man/CDORemapper.Rd @@ -1,41 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CDORemapper.R \name{CDORemapper} \alias{CDORemapper} \title{CDO Remap Data Transformation for 'startR'} -\description{ -This is a transform function that uses CDO software to remap longitude-latitude data subsets onto a specified target grid, intended for use as parameter \code{transform} in a call to the function \code{Start()} in the package 'startR'. This function complies with the input/output interface required by \code{Start()} defined in the documentation for the parameter \code{transform} of that function. -\cr\cr -This function uses the function \code{CDORemap()} in the package 's2dverification' to perform the interpolations and hence requires having CDO installed in the machine. -} \usage{ -CDORemapper(data_array, variables, file_selectors, ...) +CDORemapper(data_array, variables, file_selectors = NULL, ...) } \arguments{ - \item{data_array}{ -Input data array to be transformed. See details in the documentation of the parameter \code{transform} of the function \code{Start()}. - } - \item{variables}{ -Auxiliary variables required for the transformation, automatically provided by \code{Start()}. See details in the documentation of the parameter \code{transform} of the function \code{Start()}. - } - \item{file_selectors}{ -Information on the path of the file the input data array comes from. See details in the documentation of the parameter \code{transform} of the function \code{Start()}. - } - \item{\dots}{ -Additional parameters to adjust the transform process, as provided in the parameter \code{transform_params} in a call to the function \code{Start()}. See details in the documentation of the parameter \code{transform} of the function \code{Start()}. - } +\item{data_array}{A data array to be transformed. See details in the +documentation of the parameter 'transform' of the function Start().} + +\item{variables}{A list of auxiliary variables required for the transformation, +automatically provided by Start(). See details in the documentation of the +parameter 'transform' of the function Start().} + +\item{file_selectors}{A charcter vector indicating the information of the path of +the file parameter 'data_array' comes from. See details in the documentation of +the parameter 'transform' of the function Start(). The default value is NULL.} + +\item{\dots}{A list of additional parameters to adjust the transform process, +as provided in the parameter 'transform_params' in a Start() call. See details +in the documentation of the parameter 'transform' of the function Start().} } \value{ -An array with the same amount of dimensions as the input data array, potentially with different sizes, and potentially with the attribute 'variables' with additional auxiliary data. See details in the documentation of the parameter \code{transform} of the function \code{Start()}. +An array with the same amount of dimensions as the input data array, + potentially with different sizes, and potentially with the attribute + 'variables' with additional auxiliary data. See details in the documentation + of the parameter 'transform' of the function Start(). +} +\description{ +This is a transform function that uses CDO software to remap longitude-latitude +data subsets onto a specified target grid, intended for use as parameter +'transform' in a Start() call. This function complies with the input/output +interface required by Start() defined in the documentation for the parameter +'transform' of function Start().\cr\cr +This function uses the function CDORemap() in the package 's2dverification' to +perform the interpolation, hence CDO is required to be installed. +} +\examples{ +# Used in Start(): + data_path <- system.file('extdata', package = 'startR') + path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') + sdates <- c('200011') + \donttest{ + data <- Start(dat = list(list(path = path_obs)), + var = 'tos', + sdate = sdates, + time = 'all', + latitude = values(list(-60, 60)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(-120, 120)), + longitude_reorder = CircularSort(-180, 180), + transform = CDORemapper, + transform_params = list(grid = 'r360x181', + method = 'conservative', + crop = c(-120, 120, -60, 60)), + transform_vars = c('latitude', 'longitude'), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = FALSE) } -\author{ -History:\cr -0.0 - 2017-04 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Original code } \seealso{ \code{\link[s2dverification]{CDORemap}} } -\examples{ -## Check https://earth.bsc.es/gitlab/es/startR for step-by-step examples -## of Start() that use CDORemapper(). -} -\keyword{array} -\keyword{manip} + diff --git a/man/CircularSort.Rd b/man/CircularSort.Rd deleted file mode 100644 index e0a7a5cf8c6cca26aa485171d418223568d54154..0000000000000000000000000000000000000000 --- a/man/CircularSort.Rd +++ /dev/null @@ -1,38 +0,0 @@ -\name{CircularSort} -\alias{CircularSort} -\title{Circular Sort Dimension Reorder for 'startR'} -\description{ -This is a function that generates a reorder function intended for use as parameter \code{_reorder} in a call to the function \code{Start()} in the package 'startR'. This function complies with the input/output interface required by \code{Start()} defined in the documentation for the parameter \dots of that function. -\cr\cr -The applied reordering consists of a circular sort of the coordinate variable values, where any values beyond the limits specified in the parameters \code{start} and \code{end} is applied a modulus to fall in the specified range. This is useful for circular dimensions such as the Earth longitudes. -} -\usage{ -CircularSort(start, end, ...) -} -\arguments{ - \item{start}{ -Numeric lower bound of the circular range. - } - \item{end}{ -Numeric upper bound of the circular range. - } - \item{\dots}{ -Additional parameters to adjust the reorderig (sent internally to the function \code{sort()}). - } -} -\value{ -List with the reordered values in the component \code{$x} and the permutation indices in the component \code{$ix}. See details in the documentation of the parameter \dots of the function \code{Start()}. -} -\author{ -History:\cr -0.0 - 2017-04 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Original code -} -\seealso{ -\code{\link[startR]{Sort}} -} -\examples{ -## Check https://earth.bsc.es/gitlab/es/startR for step-by-step examples -## of Start() that use CircularSort(). -} -\keyword{array} -\keyword{manip} diff --git a/man/Collect.Rd b/man/Collect.Rd new file mode 100644 index 0000000000000000000000000000000000000000..44a7dee1d3c613d0a0c40663c8b87870b33eed65 --- /dev/null +++ b/man/Collect.Rd @@ -0,0 +1,86 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Collect.R +\name{Collect} +\alias{Collect} +\title{Collect and merge the computation results} +\usage{ +Collect(startr_exec, wait = TRUE, remove = TRUE) +} +\arguments{ +\item{startr_exec}{An R object returned by Compute() when the parameter 'wait' +of Compute() is FALSE. It can be directly from a Compute() call or read from +the RDS file.} + +\item{wait}{A logical value deciding whether the R session waits for the +Collect() call to finish (TRUE) or not (FALSE). If TRUE, it will be a +blocking call, in which Collect() will retrieve information from the HPC, +including signals and outputs, each polling_period seconds. The the status +can be monitored on the EC-Flow GUI. Collect() will not return until the +results of all chunks have been received. If FALSE, Collect() will crash with +an error if the execution has not finished yet, otherwise it will return the +merged array. The default value is TRUE.} + +\item{remove}{A logical value deciding whether to remove of all data results +received from the HPC (and stored under 'ecflow_suite_dir', the parameter in +Compute()) after being collected. To preserve the data and Collect() it as +many times as desired, set remove to FALSE. The default value is TRUE.} +} +\value{ +A list of merged data array. +} +\description{ +The final step of the startR workflow after the data operation. It is used when +the parameter 'wait' of Compute() is FALSE, and the functionality includes +updating the job status shown on the EC-Flow GUI and collecting all the chunks +of results as one data array when the execution is done. See more details on +\href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab}. +} +\examples{ + data_path <- system.file('extdata', package = 'startR') + path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') + sdates <- c('200011', '200012') + data <- Start(dat = list(list(path = path_obs)), + var = 'tos', + sdate = sdates, + time = 'all', + latitude = 'all', + longitude = 'all', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = FALSE) + fun <- function(x) { + lat = attributes(x)$Variables$dat1$latitude + weight = sqrt(cos(lat * pi / 180)) + corrected = Apply(list(x), target_dims = "latitude", + fun = function(x) {x * weight}) + } + step <- Step(fun = fun, + target_dims = 'latitude', + output_dims = 'latitude', + use_libraries = c('multiApply'), + use_attributes = list(data = "Variables")) + wf <- AddStep(data, step) + \dontrun{ + res <- Compute(wf, chunks = list(longitude = 2, sdate = 2), + threads_load = 1, + threads_compute = 4, + cluster = list(queue_host = 'nord3', + queue_type = 'lsf', + temp_dir = '/on_hpc/tmp_dir/', + cores_per_job = 2, + job_wallclock = '05:00', + max_jobs = 4, + extra_queue_params = list('#BSUB -q bsc_es'), + bidirectional = FALSE, + polling_period = 10 + ), + ecflow_suite_dir = '/on_local_machine/username/ecflow_dir/', + wait = FALSE) + saveRDS(res, file = 'test_collect.Rds') + collect_info <- readRDS('test_collect.Rds') + result <- Collect(collect_info, wait = TRUE) + } + +} + diff --git a/man/Compute.Rd b/man/Compute.Rd new file mode 100644 index 0000000000000000000000000000000000000000..e07106a84abe08b157ca74f94966c589f23c6784 --- /dev/null +++ b/man/Compute.Rd @@ -0,0 +1,107 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Compute.R +\name{Compute} +\alias{Compute} +\title{Specify the execution parameters and trigger the execution} +\usage{ +Compute(workflow, chunks = "auto", threads_load = 1, threads_compute = 1, + cluster = NULL, ecflow_suite_dir = NULL, ecflow_server = NULL, + silent = FALSE, debug = FALSE, wait = TRUE) +} +\arguments{ +\item{workflow}{A list of the class 'startR_workflow' returned by function +AddSteop() or of class 'startR_cube' returned by function Start(). It +contains all the objects needed for the execution.} + +\item{chunks}{A named list of dimensions which to split the data along and +the number of chunks to make for each. The chunked dimension can only be +those not required as the target dimension in function Step(). The default +value is 'auto', which lists all the non-target dimensions and each one has +one chunk.} + +\item{threads_load}{An integer indicating the number of execution threads to +use for the data retrieval stage. The default value is 1.} + +\item{threads_compute}{An integer indicating the number of execution threads +to use for the computation. The default value is 1.} + +\item{cluster}{A list of components that define the configuration of the +machine to be run on. The comoponents vary from the different machines. +Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more +details and examples. Only needed when the computation is not run locally. +The default value is NULL.} + +\item{ecflow_suite_dir}{A character string indicating the path to a folder in +the local workstation where to store temporary files generated for the +automatic management of the workflow. Only needed when the execution is run +remotely. The default value is NULL.} + +\item{ecflow_server}{A named vector indicating the host and port of the +EC-Flow server. The vector form should be +\code{c(host = 'hostname', port = port_number)}. Only needed when the +execution is run#' remotely. The default value is NULL.} + +\item{silent}{A logical value deciding whether to print the computation +progress (FALSE) on the R session or not (TRUE). It only works when the +execution runs locally or the parameter 'wait' is TRUE. The default value +is FALSE.} + +\item{debug}{A logical value deciding whether to return detailed messages on +the progress and operations in a Compute() call (TRUE) or not (FALSE). +Automatically changed to FALSE if parameter 'silent' is TRUE. The default +value is FALSE.} + +\item{wait}{A logical value deciding whether the R session waits for the +Compute() call to finish (TRUE) or not (FALSE). If FALSE, it will return an +object with all the information of the startR execution that can be stored +in your disk. After that, the R session can be closed and the results can +be collected later with the Collect() function. The default value is TRUE.} +} +\value{ +A list of data arrays for the output returned by the last step in the + specified workflow (wait = TRUE), or an object with information about the + startR execution (wait = FALSE). The configuration details and profiling + information are attached as attributes to the returned list of arrays. +} +\description{ +The step of the startR workflow after the complete workflow is defined by +AddStep(). This function specifies the execution parameters and triggers the +execution. The execution can be operated locally or on a remote machine. If +it is the latter case, the configuration of the machine needs to be +sepecified in the function, and the EC-Flow server is required to be +installed.\cr\cr +The execution can be operated by chunks to avoid overloading the RAM memory. +After all the chunks are finished, Compute() will gather and merge them, and +return a single data object, including one or multiple multidimensional data +arrays and additional metadata. +} +\examples{ + data_path <- system.file('extdata', package = 'startR') + path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') + sdates <- c('200011', '200012') + data <- Start(dat = list(list(path = path_obs)), + var = 'tos', + sdate = sdates, + time = 'all', + latitude = 'all', + longitude = 'all', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = FALSE) + fun <- function(x) { + lat = attributes(x)$Variables$dat1$latitude + weight = sqrt(cos(lat * pi / 180)) + corrected = Apply(list(x), target_dims = "latitude", + fun = function(x) {x * weight}) + } + step <- Step(fun = fun, + target_dims = 'latitude', + output_dims = 'latitude', + use_libraries = c('multiApply'), + use_attributes = list(data = "Variables")) + wf <- AddStep(data, step) + res <- Compute(wf, chunks = list(longitude = 4, sdate = 2)) + +} + diff --git a/man/NcCloser.Rd b/man/NcCloser.Rd index f8ca5997778547bd31dc6e61521b3de5b53b93c1..65beab8542da4d3770fe362902ac5efe92b69722 100644 --- a/man/NcCloser.Rd +++ b/man/NcCloser.Rd @@ -1,33 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/NcCloser.R \name{NcCloser} \alias{NcCloser} -\title{NetCDF File Closer for 'startR'} -\description{ -This is a file closer function for NetCDF files, intended for use as parameter \code{file_closer} in a call to the function \code{Start()} in the package 'startR'. This function complies with the input/output interface required by \code{Start()} defined in the documentation for the parameter \code{file_closer} of that function. -\cr\cr -This function uses the function \code{NcClose()} in the package 'easyNCDF', which in turn uses \code{nc_close()} in the package 'ncdf4'. -} +\title{NetCDF file closer for 'startR'} \usage{ NcCloser(file_object) } \arguments{ - \item{file_object}{ -Open connection to a NetCDF file, optionally with additional header information. See details in the documentation of the parameter \code{file_closer} of the function \code{Start()}. - } +\item{file_object}{An open connection to a NetCDF file, optionally with +additional header information. See details in the documentation of the +parameter 'file_closer' of the function Start().} } \value{ -This function returns \code{NULL}. +This function returns NULL. } -\author{ -History:\cr -0.0 - 2017-04 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Original code -} -\seealso{ -\code{\link{NcOpener}}, \code{\link{NcDimReader}}, \code{\link{NcDataReader}}, \code{\link{NcVarReader}} +\description{ +This is a file closer function for NetCDF files, intended for use as +parameter 'file_closer' in a Start() call. This function complies with the +input/output interface required by Start() defined in the documentation for +the parameter 'file_closer'.\cr\cr +This function uses the function NcClose() in the package 'easyNCDF', +which in turn uses nc_close() in the package 'ncdf4'. } \examples{ -## Check https://earth.bsc.es/gitlab/es/startR for step-by-step examples -## of Start() that use NcCloser(). +data_path <- system.file('extdata', package = 'startR') +path_obs <- file.path(data_path, 'obs/monthly_mean/tos/tos_200011.nc') +connection <- NcOpener(path_obs) +NcCloser(connection) +} +\seealso{ +\code{\link{NcOpener}} \code{\link{NcDataReader}} + \code{\link{NcDimReader}} \code{\link{NcVarReader}} } -\keyword{IO} -\keyword{array} -\keyword{manip} + diff --git a/man/NcDataReader.Rd b/man/NcDataReader.Rd index 8bd68afac16c2f3a51216412c4a7425718185b20..a6d32c7b2235fa615a621f0c9f9cd4af13364349 100644 --- a/man/NcDataReader.Rd +++ b/man/NcDataReader.Rd @@ -1,46 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/NcDataReader.R \name{NcDataReader} \alias{NcDataReader} -\title{NetCDF File Data Reader for 'startR'} -\description{ -This is a data reader function for NetCDF files, intended for use as parameter \code{file_data_reader} in a call to the function \code{Start()} in the package 'startR'. This function complies with the input/output interface required by \code{Start()} defined in the documentation for the parameter \code{file_data_reader} of that function. -\cr\cr -This function uses the function \code{NcToArray()} in the package 'easyNCDF', which in turn uses \code{nc_var_get()} in the package 'ncdf4'. -} +\title{NetCDF file data reader for 'startR'} \usage{ -NcDataReader(file_path = NULL, file_object = NULL, file_selectors = NULL, - inner_indices = NULL, synonims) +NcDataReader(file_path = NULL, file_object = NULL, file_selectors = NULL, + inner_indices = NULL, synonims) } \arguments{ - \item{file_path}{ -Character string with the path to the data file to read. See details in the documentation of the parameter \code{file_data_reader} of the function \code{Start()}. - } - \item{file_object}{ -Open connection to a NetCDF file, optionally with additional header information. See details in the documentation of the parameter \code{file_data_reader} of the function \code{Start()}. - } - \item{file_selectors}{ -Information on the path of the file to read data from. See details in the documentation of the parameter \code{file_data_reader} of the function \code{Start()}. - } - \item{inner_indices}{ -Named list with the numeric indices to take from each of the inner dimensions in the requested file. See details in the documentation of the parameter \code{file_data_reader} of the function \code{Start()}. - } - \item{synonims}{ -Named list with synonims for the dimension names to look for in the requested file, exactly as provided in the parameter \code{synonims} in a call to the function \code{Start()}. See details in the documentation of the parameter \code{file_data_reader} of the function \code{Start()}. - } +\item{file_path}{A character string indicating the path to the data file to +read. See details in the documentation of the parameter 'file_data_reader' +of the function Start(). The default value is NULL.} + +\item{file_object}{An open connection to a NetCDF file, optionally with +additional header information. See details in the documentation of the +parameter 'file_data_reader' of the function Start(). The default value is +NULL.} + +\item{file_selectors}{A named list containing the information of the path of +the file to read data from. It is automatically provided by Start(). See +details in the documentation of the parameter 'file_data_reader' of the +function Start(). The default value is NULL.} + +\item{inner_indices}{A named list of numeric vectors indicating the indices +to take from each of the inner dimensions in the requested file. It is +automatically provided by Start(). See details in the documentation of the +parameter 'file_data_reader' of the function Start(). The default value is +NULL.} + +\item{synonims}{A named list indicating the synonims for the dimension names +to look for in the requested file, exactly as provided in the parameter +'synonims' in a Start() call. See details in the documentation of the +parameter 'file_data_reader' of the function Start().} } \value{ -A multidimensional data array with the named dimensions and indices requested in \code{inner_indices}, potentially with the attribute 'variables' with additional auxiliary data. See details in the documentation of the parameter \code{file_data_reader} of the function \code{Start()}. +A multidimensional data array with the named dimensions and indices + requested in 'inner_indices', potentially with the attribute 'variables' + with additional auxiliary data. See details in the documentation of the + parameter 'file_data_reader' of the function Start(). } -\author{ -History:\cr -0.0 - 2017-04 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Original code -} -\seealso{ -\code{\link{NcOpener}}, \code{\link{NcCloser}}, \code{\link{NcDimReader}}, \code{\link{NcVarReader}} +\description{ +This is a data reader function for NetCDF files, intended for use as parameter +file_data_reader in a Start() call. This function complies with the +input/output interface required by Start() defined in the documentation for +the parameter 'file_data_reader'.\cr\cr +This function uses the function NcToArray() in the package 'easyNCDF', which +in turn uses nc_var_get() in the package 'ncdf4'. } \examples{ -## Check https://earth.bsc.es/gitlab/es/startR for step-by-step examples -## of Start() that use NcDataReader(). + data_path <- system.file('extdata', package = 'startR', mustWork = TRUE) + file_to_open <- file.path(data_path, 'obs/monthly_mean/tos/tos_200011.nc') + file_selectors <- c(dat = 'dat1', var = 'tos', sdate = '200011') + first_round_indices <- list(time = 1, latitude = 1:8, longitude = 1:16) + synonims <- list(dat = 'dat', var = 'var', sdate = 'sdate', time = 'time', + latitude = 'latitude', longitude = 'longitude') + sub_array <- NcDataReader(file_to_open, NULL, file_selectors, + first_round_indices, synonims) +} +\seealso{ +\code{\link{NcOpener}} \code{\link{NcDimReader}} + \code{\link{NcCloser}} \code{\link{NcVarReader}} } -\keyword{IO} -\keyword{array} -\keyword{manip} + diff --git a/man/NcDimReader.Rd b/man/NcDimReader.Rd index 9a0a1044120127d85b0f1fdb7ad5c53e0c642647..d539ffdc9eac1869c0e9e60e623ae9688b204e35 100644 --- a/man/NcDimReader.Rd +++ b/man/NcDimReader.Rd @@ -1,46 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/NcDimReader.R \name{NcDimReader} \alias{NcDimReader} -\title{NetCDF Dimension Reader for 'startR'} -\description{ -This is a dimension reader function for NetCDF files, intended for use as parameter \code{file_dim_reader} in a call to the function \code{Start()} in the package 'startR'. This function complies with the input/output interface required by \code{Start()} defined in the documentation for the parameter \code{file_dim_reader} of that function. -\cr\cr -This function uses the function \code{NcReadDims()} in the package 'easyNCDF'. -} +\title{NetCDF dimension reader for 'startR'} \usage{ -NcDimReader(file_path = NULL, file_object = NULL, file_selectors = NULL, - inner_indices = NULL, synonims) +NcDimReader(file_path = NULL, file_object = NULL, file_selectors = NULL, + inner_indices = NULL, synonims) } \arguments{ - \item{file_path}{ -Character string with the path to the data file to read the dimensions for. See details in the documentation of the parameter \code{file_dim_reader} of the function \code{Start()}. - } - \item{file_object}{ -Open connection to a NetCDF file, optionally with additional header information. See details in the documentation of the parameter \code{file_dim_reader} of the function \code{Start()}. - } - \item{file_selectors}{ -Information on the path of the file to read data from. See details in the documentation of the parameter \code{file_dim_reader} of the function \code{Start()}. - } - \item{inner_indices}{ -Named list with the numeric indices to take from each of the inner dimensions in the requested file. Used only in advanced configurations. See details in the documentation of the parameter \code{file_dim_reader} of the function \code{Start()}. - } - \item{synonims}{ -Named list with synonims for the dimension names to look for in the requested file, exactly as provided in the parameter \code{synonims} in a call to the function \code{Start()}. See details in the documentation of the parameter \code{file_dim_reader} of the function \code{Start()}. - } +\item{file_path}{A character string indicating the path to the data file to +read. See details in the documentation of the parameter 'file_dim_reader' +of the function Start(). The default value is NULL.} + +\item{file_object}{An open connection to a NetCDF file, optionally with +additional header information. See details in the documentation of the +parameter 'file_dim_reader' of the function Start(). The default value is +NULL.} + +\item{file_selectors}{A named list containing the information of the path of +the file to read data from. It is automatically provided by Start(). See +details in the documentation of the parameter 'file_dim_reader' of the +function Start(). The default value is NULL.} + +\item{inner_indices}{A named list of numeric vectors indicating the indices +to take from each of the inner dimensions in the requested file. It is +automatically provided by Start(). See details in the documentation of the +parameter 'file_dim_reader' of the function Start(). The default value is +NULL.} + +\item{synonims}{A named list indicating the synonims for the dimension names +to look for in the requested file, exactly as provided in the parameter +'synonims' in a Start() call. See details in the documentation of the +parameter 'file_dim_reader' of the function Start().} } \value{ -Named numeric vector with the names and sizes of the dimensions of the requested file. +A named numeric vector with the names and sizes of the dimensions of + the requested file. } -\author{ -History:\cr -0.0 - 2017-04 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Original code -} -\seealso{ -\code{\link{NcOpener}}, \code{\link{NcCloser}}, \code{\link{NcDataReader}}, \code{\link{NcVarReader}} +\description{ +A dimension reader function for NetCDF files, intended for use as parameter +'file_dim_reader' in a Start() call. It complies with the input/output +interface required by Start() defined in the documentation for the parameter +'file_dim_reader' of that function.\cr\cr +This function uses the function NcReadDims() in the package 'easyNCDF'. } \examples{ -## Check https://earth.bsc.es/gitlab/es/startR for step-by-step examples -## of Start() that use NcDimReader(). + data_path <- system.file('extdata', package = 'startR') + file_to_open <- file.path(data_path, 'obs/monthly_mean/tos/tos_200011.nc') + file_selectors <- c(dat = 'dat1', var = 'tos', sdate = '200011') + first_round_indices <- list(time = 1, latitude = 1:8, longitude = 1:16) + synonims <- list(dat = 'dat', var = 'var', sdate = 'sdate', time = 'time', + latitude = 'latitude', longitude = 'longitude') + dim_of_file <- NcDimReader(file_to_open, NULL, file_selectors, + first_round_indices, synonims) +} +\seealso{ +\code{\link{NcOpener}} \code{\link{NcDataReader}} + \code{\link{NcCloser}} \code{\link{NcVarReader}} } -\keyword{IO} -\keyword{array} -\keyword{manip} + diff --git a/man/NcOpener.Rd b/man/NcOpener.Rd index 3c0f8756d4da1d5526ebd5fb08af1766e0fe3160..e46384ca2a1b4b7ce281c4b74fc2a29e5ebf52f4 100644 --- a/man/NcOpener.Rd +++ b/man/NcOpener.Rd @@ -1,33 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/NcOpener.R \name{NcOpener} \alias{NcOpener} -\title{NetCDF File Opener for 'startR'} -\description{ -This is a file closer function for NetCDF files, intended for use as parameter \code{file_opener} in a call to the function \code{Start()} in the package 'startR'. This function complies with the input/output interface required by \code{Start()} defined in the documentation for the parameter \code{file_opener} of that function. -\cr\cr -This function uses the function \code{NcOpen()} in the package 'easyNCDF', which in turn uses \code{nc_open()} in the package 'ncdf4'. -} +\title{NetCDF file opener for 'startR'} \usage{ NcOpener(file_path) } \arguments{ - \item{file_path}{ -Character string with the path to the data file to read. See details in the documentation of the parameter \code{file_opener} of the function \code{Start()}. - } +\item{file_path}{A character string indicating the path to the data file to +read. See details in the documentation of the parameter 'file_opener' of the +function Start().} } \value{ -An open connection to a NetCDF file, with additional header information, as returned by \code{nc_open} in the package 'ncdf4'. See details in the documentation of the parameter \code{file_opener} of the function \code{Start()}. +An open connection to a NetCDF file with additional header + information as returned by nc_open() in the package 'ncdf4'. See details in + the documentation of the parameter 'file_opener' of the function Start(). } -\author{ -History:\cr -0.0 - 2017-04 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Original code -} -\seealso{ -\code{\link{NcCloser}}, \code{\link{NcDataReader}}, \code{\link{NcDimReader}}, \code{\link{NcVarReader}} +\description{ +This is a file opener function for NetCDF files, intended for use as parameter +'file_opener' in a Start() call. This function complies with the input/output +interface required by Start() defined in the documentation for the parameter +'file_opener'.\cr\cr +This function uses the function NcOpen() in the package 'easyNCDF', which in +turn uses nc_open() in the package 'ncdf4'. } \examples{ -## Check https://earth.bsc.es/gitlab/es/startR for step-by-step examples -## of Start() that use NcOpener(). +data_path <- system.file('extdata', package = 'startR') +path_obs <- file.path(data_path, 'obs/monthly_mean/tos/tos_200011.nc') +connection <- NcOpener(path_obs) +NcCloser(connection) +} +\seealso{ +\code{\link{NcDimReader}} \code{\link{NcDataReader}} + \code{\link{NcCloser}} \code{\link{NcVarReader}} } -\keyword{IO} -\keyword{array} -\keyword{manip} + diff --git a/man/NcVarReader.Rd b/man/NcVarReader.Rd index c8efec392996fb4061825308793efa17c4d6fb0d..c601907304c7c17639742da81750015804c2ebbd 100644 --- a/man/NcVarReader.Rd +++ b/man/NcVarReader.Rd @@ -1,46 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/NcVarReader.R \name{NcVarReader} \alias{NcVarReader} -\title{NetCDF Variable Reader for 'startR'} -\description{ -This is an auxiliary variable reader function for NetCDF files, intended for use as parameter \code{file_var_reader} in a call to the function \code{Start()} in the package 'startR'. This function complies with the input/output interface required by \code{Start()} defined in the documentation for the parameter \code{file_var_reader} of that function. -\cr\cr -This function uses the function \code{NcDataReader()} in the package 'startR', which in turn uses \code{NcToArray()} in the package 'easyNCDF', which in turn uses \code{nc_var_get()} in the package 'ncdf4'. -} +\title{NetCDF variable reader for 'startR'} \usage{ -NcVarReader(file_path = NULL, file_object = NULL, file_selectors = NULL, - var_name = NULL, synonims) +NcVarReader(file_path = NULL, file_object = NULL, file_selectors = NULL, + var_name = NULL, synonims) } \arguments{ - \item{file_path}{ -Character string with the path to the data file to read the variable from. See details in the documentation of the parameter \code{file_var_reader} of the function \code{Start()}. - } - \item{file_object}{ -Open connection to a NetCDF file, optionally with additional header information. See details in the documentation of the parameter \code{file_var_reader} of the function \code{Start()}. - } - \item{file_selectors}{ -Information on the path of the file to read data from. See details in the documentation of the parameter \code{file_var_reader} of the function \code{Start()}. - } - \item{var_name}{ -Character string with the name of the variable to be read. - } - \item{synonims}{ -Named list with synonims for the variable names to look for in the requested file, exactly as provided in the parameter \code{synonims} in a call to the function \code{Start()}. See details in the documentation of the parameter \code{file_var_reader} of the function \code{Start()}. - } +\item{file_path}{A character string indicating the path to the data file to +read the variable from. See details in the documentation of the parameter +'file_var_reader' of the function Start(). The default value is NULL.} + +\item{file_object}{An open connection to a NetCDF file, optionally with +additional header information. See details in the documentation of the +parameter 'file_var_reader' of the function Start(). The default value is +NULL.} + +\item{file_selectors}{A named list containing the information of the path of +the file to read data from. It is automatically provided by Start(). See +details in the documentation of the parameter 'file_var_reader' of the +function Start(). The default value is NULL.} + +\item{var_name}{A character string with the name of the variable to be read. +The default value is NULL.} + +\item{synonims}{A named list indicating the synonims for the dimension names +to look for in the requested file, exactly as provided in the parameter +'synonims' in a Start() call. See details in the documentation of the +parameter 'file_var_reader' of the function Start().} } \value{ -A multidimensional data array with the named dimensions, potentially with the attribute 'variables' with additional auxiliary data. See details in the documentation of the parameter \code{file_var_reader} of the function \code{Start()}. +A multidimensional data array with the named dimensions, potentially + with the attribute 'variables' with additional auxiliary data. See details + in the documentation of the parameter 'file_var_reader' of the function + Start(). } -\author{ -History:\cr -0.0 - 2017-04 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Original code -} -\seealso{ -\code{\link{NcOpener}}, \code{\link{NcCloser}}, \code{\link{NcDataReader}}, \code{\link{NcDimReader}} +\description{ +This is an auxiliary variable reader function for NetCDF files, intended for +use as parameter 'file_var_reader' in a Start() call. It complies with the +input/output interface required by Start() defined in the documentation for +the parameter 'file_var_reader' of that function.\cr\cr +This function uses the function NcDataReader() in the package 'startR', +which in turn uses NcToArray() in the package 'easyNCDF', which in turn uses +nc_var_get() in the package 'ncdf4'. } \examples{ -## Check https://earth.bsc.es/gitlab/es/startR for step-by-step examples -## of Start() that use NcVarReader(). + data_path <- system.file('extdata', package = 'startR') + file_to_open <- file.path(data_path, 'obs/monthly_mean/tos/tos_200011.nc') + file_selectors <- c(dat = 'dat1', var = 'tos', sdate = '200011') + synonims <- list(dat = 'dat', var = 'var', sdate = 'sdate', time = 'time', + latitude = 'latitude', longitude = 'longitude') + var <- NcVarReader(file_to_open, NULL, file_selectors, + 'tos', synonims) +} +\seealso{ +\code{\link{NcOpener}} \code{\link{NcDataReader}} + \code{\link{NcCloser}} \code{\link{NcDimReader}} } -\keyword{IO} -\keyword{array} -\keyword{manip} + diff --git a/man/SelectorChecker.Rd b/man/SelectorChecker.Rd index 270078295df0f8ec6d69518d3c54c32108561ff4..ef83575bab49fd5bca62d7a86d7df9f5840dae3c 100644 --- a/man/SelectorChecker.Rd +++ b/man/SelectorChecker.Rd @@ -1,37 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SelectorChecker.R \name{SelectorChecker} \alias{SelectorChecker} -\title{Default Selector Checker for 'startR'} -\description{ -This is a selector checker function intended for use as parameter \code{selector_checker} in a call to the function \code{Start()} in the package 'startR'. This function complies with the input/output interface required by \code{Start()} defined in the documentation for the parameter \code{selector_checker} of that function. -} +\title{Translate a set of selectors into a set of numeric indices} \usage{ -SelectorChecker(selectors, var = NULL, return_indices = TRUE, tolerance = NULL) +SelectorChecker(selectors, var = NULL, return_indices = TRUE, + tolerance = NULL) } \arguments{ - \item{selectors}{ -Numeric indices or variable values to be retrieved for a dimension, automatically provided by \code{Start()}. See details in the documentation of the parameters \code{selector_checker} and \dots of the function \code{Start()}. The indices or values can be provided in the form of a vector or in the form of a list with two elements. - } - \item{var}{ -Vector of values of a coordinate variable where to search matches with the provided indices or values in the parameter \code{selectors}, automatically provided by \code{Start()}. See details in the documentation of the parameters \code{selector_checker} and \dots of the function \code{Start()}. The parameter \code{var} is optional. When not specified, \code{SelectorChecker} simply returns the input indices. - } - \item{return_indices}{ -Boolean flag, automatically configured by \code{Start()}, telling whether to return numeric indices or coordinate variable values after doing the matching. - } - \item{tolerance}{ -Numeric value with a tolerance value to be used in the matching of the \code{selectors} and \code{var}. See documentation on \code{_tolerance} in \dots, in the documentation of the function \code{Start()}. - } +\item{selectors}{A vector or a list of two of numeric indices or variable +values to be retrieved for a dimension, automatically provided by Start(). +See details in the documentation of the parameters 'selector_checker' and +'\dots' of the function Start().} + +\item{var}{A vector of values of a coordinate variable for which to search +matches with the provided indices or values in the parameter 'selectors', +automatically provided by Start(). See details in the documentation of the +parameters 'selector_checker' and '\dots' of the function Start(). The +default value is NULL. When not specified, SelectorChecker() simply returns +the input indices.} + +\item{return_indices}{A logical value automatically configured by Start(), +telling whether to return the numeric indices or coordinate variable values +after the matching. The default value is TRUE.} + +\item{tolerance}{A numeric value indicating a tolerance value to be used in +the matching of 'selectors' and 'var'. See documentation on +'_tolerance' in \code{\dots} in the documentation of the function +Start(). The default value is NULL.} } \value{ -A vector of either the indices of the matching values (if \code{return_indices = TRUE}) or the matching values themselves (if \code{return_indices = FALSE}). +A vector of either the indices of the matching values (if + return_indices = TRUE) or the matching values themselves (if return_indices + = FALSE). } -\author{ -History:\cr -0.0 - 2017-04 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Original code +\description{ +This is a selector checker function intended for use as parameter +'selector_checker' in a Start() call. It translates a set of selectors which +is the value for one dimension into a set of numeric indices corresponding to +the coordinate variable. The function complies with the input/output interface +required by Start() defined in the documentation for the parameter +'selector_checker' of Start(). } \examples{ -## Check https://earth.bsc.es/gitlab/es/startR for step-by-step examples -## of Start() that use SelectorChecker(). +# Get the latitudes from 10 to 20 degree +sub_array_of_selectors <- list(10, 20) +# The latitude values from original file +sub_array_of_values <- seq(90, -90, length.out = 258)[2:257] +SelectorChecker(sub_array_of_selectors, sub_array_of_values) + } -\keyword{IO} -\keyword{array} -\keyword{manip} + diff --git a/man/Sort.Rd b/man/Sort.Rd index c32a6e2a1a27cce6205b19c4cf85da859053ece0..9ab516ed187f4bdb853eba58effd4049346a0d95 100644 --- a/man/Sort.Rd +++ b/man/Sort.Rd @@ -1,33 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Sort.R \name{Sort} +\alias{CircularSort} \alias{Sort} -\title{Sort Dimension Reorder for 'startR'} -\description{ -This is a reorder function intended for use as parameter \code{_reorder} in a call to the function \code{Start()} in the package 'startR'. This function complies with the input/output interface required by \code{Start()} defined in the documentation for the parameter \dots of that function. -\cr\cr -The applied reordering consists of an increasing sort of the coordinate variable values. -} +\title{Sort the coordinate variable values in a Start() call} \usage{ Sort(...) + +CircularSort(start, end, ...) } \arguments{ - \item{\dots}{ -Additional parameters to adjust the reorderig (sent internally to the function \code{sort()}). - } +\item{start}{A numeric indicating the lower bound of the circular range.} + +\item{end}{A numeric indicating the upper bound of the circular range.} + +\item{\dots}{Additional parameters to adjust the reorderig. See function +sort() for more details.} } \value{ -List with the reordered values in the component \code{$x} and the permutation indices in the component \code{$ix}. See details in the documentation of the parameter \dots of the function \code{Start()}. +A list of 2 containing: +\item{$x}{ + The reordered values. } -\author{ -History:\cr -0.0 - 2017-04 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Original code +\item{$ix}{ + The permutation indices of $x in the original coordinate. } -\seealso{ -\code{\link[startR]{CircularSort}} +} +\description{ +The reorder function intended for use as parameter '_reorder' +in a call to the function Start(). This function complies with the +input/output interface required by Start() defined in the documentation +for the parameter \code{\dots} of that function.\cr\cr +The coordinate applied to Sort() consists of an increasing or decreasing +sort of the values. It is useful for adjusting the latitude order.\cr\cr +The coordinate applied to CircularSort() consists of a circular sort of +values, where any values beyond the limits specified in the parameters +'start' and 'end' is applied a modulus to fall in the specified +range. This is useful for circular coordinates such as the Earth longitudes. } \examples{ -## Check https://earth.bsc.es/gitlab/es/startR for step-by-step examples -## of Start() that use Sort(). +# Used in Start(): + data_path <- system.file('extdata', package = 'startR') + path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') + sdates <- c('200011', '200012') + data <- Start(dat = list(list(path = path_obs)), + var = 'tos', + sdate = sdates, + time = 'all', + latitude = values(list(-60, 60)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(-120, 120)), + longitude_reorder = CircularSort(-180, 180), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = FALSE) + } -\keyword{IO} -\keyword{array} -\keyword{manip} + diff --git a/man/Start.Rd b/man/Start.Rd index 52317d3c99aac2c968f005e165ca26edd92b19ff..d69562c018d32f8404794d14f9ecd518d75c45c8 100644 --- a/man/Start.Rd +++ b/man/Start.Rd @@ -1,68 +1,388 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Start.R \name{Start} \alias{Start} -\title{Declare, Discover, Subset and Retrieve Multidimensional Distributed Data Sets} -\description{ -See the \href{https://earth.bsc.es/gitlab/es/startR}{\code{startR} documentation and tutorial} for a step-by-step explanation on how to use \code{Start()}. -\cr\cr -Nowadays in the era of Big Data, large multidimensional data sets from diverse sources need to be combined and processed. Analysis of Big Data in any field is often highly complex and time-consuming. Taking subsets of these datasets (Divide) and processing them efficiently (and Conquer) becomes an indispensable practice. This technique is also known as Domain Decomposition, Map Reduce or, more commonly, 'chunking'. -\cr\cr -\code{startR} (Subset, TrAnsform, ReTrieve, arrange and process large multidimensional data sets in R) is an R project started at BSC with the aim to develop a tool that allows the user to automatically process large multidimensional distributed data sets. It is an open source project that is open to external collaboration and funding, and will continuously evolve to support as many data set formats as possible while maximizing its efficiency. -\cr\cr -\code{startR} provides a framework under which a data set (collection of one or multiple data files, potentially distributed over various remote servers) are perceived as if they all were part of a single large multidimensional array. Once such multidimensional array is declared, any user-defined function can be applied to the data in a \code{apply}-like fashion, where \code{startR} transparently implements the Map Reduce paradigm. The steps to follow in order to process a collection of Big Data sets are as follows: -\cr\cr +\title{Declare, discover, subset and retrieve multidimensional distributed data sets} +\usage{ +Start(..., return_vars = NULL, synonims = NULL, file_opener = NcOpener, + file_var_reader = NcVarReader, file_dim_reader = NcDimReader, + file_data_reader = NcDataReader, file_closer = NcCloser, + transform = NULL, transform_params = NULL, transform_vars = NULL, + transform_extra_cells = 2, apply_indices_after_transform = FALSE, + pattern_dims = NULL, metadata_dims = NULL, + selector_checker = SelectorChecker, merge_across_dims = FALSE, + merge_across_dims_narm = FALSE, split_multiselected_dims = FALSE, + path_glob_permissive = FALSE, retrieve = FALSE, num_procs = 1, + silent = FALSE, debug = FALSE) +} +\arguments{ +\item{return_vars}{A named list where the names are the names of the +variables to be fetched in the files, and the values are vectors of +character strings with the names of the file dimension which to retrieve each +variable for, or NULL if the variable has to be retrieved only once +from any (the first) of the involved files.\cr\cr +Apart from retrieving a multidimensional data array, retrieving auxiliary +variables inside the files can also be needed. The parameter +'return_vars' allows for requesting such variables, as long as a +'file_var_reader' function is also specified in the call to +Start() (see documentation on the corresponding parameter). +\cr\cr +In the case of the the item sales example (see documentation on parameter +\code{\dots)}, the store location variable is requested with the parameter\cr +\code{return_vars = list(store_location = NULL)}.\cr This will cause +Start() to fetch once the variable 'store_location' and return it in +the component\cr \code{$Variables$common$store_location},\cr and will be an +array of character strings with the location names, with the dimensions +\code{c('store' = 100)}. Although useless in this example, we could ask +Start() to fetch and return such variable for each file along the +items dimension as follows: \cr +\code{return_vars = list(store_location = c('item'))}.\cr In that case, the +variable will be fetched once from a file of each of the items, and will be +returned as an array with the dimensions \code{c('item' = 3, 'store' = 100)}. +\cr\cr +If a variable is requested along a file dimension that contains path pattern +specifications ('source' in the example), the fetched variable values will be +returned in the component\cr \code{$Variables$$}.\cr +For example: \cr -\itemize{ - \item{ -Declaring the data set, i.e. declaring the distribution of the data files involved, the dimensions and shape of the multidimensional array, and the boundaries of the tatget data. Numeric indices or coordinate values can be used when fixing the boundaries. Once a data set is declared, a list of involved files, dimension lengths, memory size and other metadata is made available. Optionally, the data set can be retrieved and loaded onto the current R session if it is small enough. This step can be performed with the \code{Start()} function. - } - \item{ -Declaring the workflow of operations to perform on the involved data set(s). This step can be performed with the \code{Step()} and \code{AddStep()} functions. - } - \item{ -Defining the computation settings. The mandatory settings include a) how many subsets to divide the data sets into and along which dimensions; b) which platform to perform the workflow of operations on (local machine, remote machine, remote HPC?), how to communicate with it (unidirectional or bidirectional connection? shared or separate file systems?), which queuing system it uses (slurm, PBS, LSF, none?); and c) how many parallel jobs and execution threads per job to use when running the calculations. This step can be performed when building up the call to the \code{Compute()} function. - } - \item{ -Running the computation. startR transparently implements the Map Reduce paradigm, according to the settings in the previous steps. The progress can optionally be monitored with the EC-Flow workflow management tool. When the computation ends, a report of performance timings is displayed. This step can be triggered with the \code{Compute()} function. - } +\command{ +\cr # data <- Start(source = list( +\cr # list(name = 'sourceA', +\cr # path = paste0('/sourceA/$variable$/', +\cr # '$section$/$item$.data')), +\cr # list(name = 'sourceB', +\cr # path = paste0('/sourceB/$section$/', +\cr # '$variable$/$item$.data')) +\cr # ), +\cr # variable = 'sales', +\cr # section = 'first', +\cr # item = indices(c(1, 3)), +\cr # item_depends = 'section', +\cr # store = 'Barcelona', +\cr # store_var = 'store_location', +\cr # month = 'all', +\cr # return_vars = list(store_location = c('source', +\cr # 'item'))) +\cr # # Checking the structure of the returned variables +\cr # str(found_data$Variables) +\cr # Named list +\cr # ..$common: NULL +\cr # ..$sourceA: Named list +\cr # .. ..$store_location: char[1:18(3d)] 'Barcelona' 'Barcelona' ... +\cr # ..$sourceB: Named list +\cr # .. ..$store_location: char[1:18(3d)] 'Barcelona' 'Barcelona' ... +\cr # # Checking the dimensions of the returned variable +\cr # # for the source A +\cr # dim(found_data$Variables$sourceA) +\cr # item store +\cr # 3 3 } -\code{startR} is not bound to a specific file format. Interface functions to custom file formats can be provided for \code{Start()} to read them. As of April 2017 \code{startR} includes interface functions to the following file formats: \cr\cr +The names of the requested variables do not necessarily have to match the +actual variable names inside the files. A list of alternative names to be +seeked can be specified via the parameter 'synonims'.} + +\item{synonims}{A named list where the names are the requested variable or +dimension names, and the values are vectors of character strings with +alternative names to seek for such dimension or variable.\cr\cr +In some requests, data from different sources may follow different naming +conventions for the dimensions or variables, or even files in the same source +could have varying names. This parameter is in order for Start() to +properly identify the dimensions or variables with different names. +\cr\cr +In the example used in parameter 'return_vars', it may be the case that +the two involved data sources follow slightly different naming conventions. +For example, source A uses 'sect' as name for the sections dimension, whereas +source B uses 'section'; source A uses 'store_loc' as variable name for the +store locations, whereas source B uses 'store_location'. This can be taken +into account as follows: \cr -\itemize{ - \item{ -NetCDF - } -} -} -\usage{ -Start(..., - return_vars = NULL, - synonims = NULL, - file_opener = NcOpener, - file_var_reader = NcVarReader, - file_dim_reader = NcDimReader, - file_data_reader = NcDataReader, - file_closer = NcCloser, - transform = NULL, - transform_params = NULL, - transform_vars = NULL, - transform_extra_cells = 0, - apply_indices_after_transform = FALSE, - pattern_dims = NULL, - metadata_dims = NULL, - selector_checker = SelectorChecker, - merge_across_dims = FALSE, - split_multiselected_dims = FALSE, - path_glob_permissive = FALSE, - retrieve = FALSE, - num_procs = 1, - silent = FALSE, - debug = FALSE) +\command{ +\cr # data <- Start(source = list( +\cr # list(name = 'sourceA', +\cr # path = paste0('/sourceA/$variable$/', +\cr # '$section$/$item$.data')), +\cr # list(name = 'sourceB', +\cr # path = paste0('/sourceB/$section$/', +\cr # '$variable$/$item$.data')) +\cr # ), +\cr # variable = 'sales', +\cr # section = 'first', +\cr # item = indices(c(1, 3)), +\cr # item_depends = 'section', +\cr # store = 'Barcelona', +\cr # store_var = 'store_location', +\cr # month = 'all', +\cr # return_vars = list(store_location = c('source', +\cr # 'item')), +\cr # synonims = list( +\cr # section = c('sec', 'section'), +\cr # store_location = c('store_loc', +\cr # 'store_location') +\cr # )) } -\arguments{ - \item{\dots}{ -When willing to retrieve data from one or a collection of data sets, the involved data can be perceived as belonging to a large multi-dimensional array. For instance, let us consider an example case. We want to retrieve data from a source, which contains data for the number of monthly sales of various items, and also for their retail price each month. The data on source is stored as follows: -\cr +\cr} + +\item{file_opener}{A function that receives as a single parameter + 'file_path' a character string with the path to a file to be opened, + and returns an object with an open connection to the file (optionally with + header information) on success, or returns NULL on failure. +\cr\cr +This parameter takes by default NcOpener() (an opener function for NetCDF +files). +\cr\cr +See NcOpener() for a template to build a file opener for your own file +format.} + +\item{file_var_reader}{A function with the header \code{file_path = NULL}, + \code{file_object = NULL}, \code{file_selectors = NULL}, \code{var_name}, + \code{synonims} that returns an array with auxiliary data (i.e. data from a + variable) inside a file. Start() will provide automatically either a + 'file_path' or a 'file_object' to the 'file_var_reader' + function (the function has to be ready to work whichever of these two is + provided). The parameter 'file_selectors' will also be provided + automatically to the variable reader, containing a named list where the + names are the names of the file dimensions of the queried data set (see + documentation on \code{\dots}) and the values are single character strings + with the components used to build the path to the file being read (the one + provided in 'file_path' or 'file_object'). The parameter 'var_name' + will be filled in automatically by Start() also, with the name of one + of the variales to be read. The parameter 'synonims' will be filled in + with exactly the same value as provided in the parameter 'synonims' in + the call to Start(), and has to be used in the code of the variable + reader to check for alternative variable names inside the target file. The + 'file_var_reader' must return a (multi)dimensional array with named + dimensions, and optionally with the attribute 'variales' with other + additional metadata on the retrieved variable. +\cr\cr +Usually, the 'file_var_reader' should be a degenerate case of the +'file_data_reader' (see documentation on the corresponding parameter), +so it is recommended to code the 'file_data_reder' in first place. +\cr\cr +This parameter takes by default NcVarReader() (a variable reader function +for NetCDF files). +\cr\cr +See NcVarReader() for a template to build a variale reader for your own +file format.} + +\item{file_dim_reader}{A function with the header \code{file_path = NULL}, + \code{file_object = NULL}, \code{file_selectors = NULL}, \code{synonims} + that returns a named numeric vector where the names are the names of the + dimensions of the multidimensional data array in the file and the values are + the sizes of such dimensions. Start() will provide automatically + either a 'file_path' or a 'file_object' to the + 'file_dim_reader' function (the function has to be ready to work + whichever of these two is provided). The parameter 'file_selectors' + will also be provided automatically to the dimension reader, containing a + named list where the names are the names of the file dimensions of the + queried data set (see documentation on \code{\dots}) and the values are + single character strings with the components used to build the path to the + file being read (the one provided in 'file_path' or 'file_object'). + The parameter 'synonims' will be filled in with exactly the same value + as provided in the parameter 'synonims' in the call to Start(), + and can optionally be used in advanced configurations. +\cr\cr +This parameter takes by default NcDimReader() (a dimension reader +function for NetCDF files). +\cr\cr +See NcDimReader() for (an advanced) template to build a dimension reader +for your own file format.} + +\item{file_data_reader}{A function with the header \code{file_path = NULL}, + \code{file_object = NULL}, \code{file_selectors = NULL}, + \code{inner_indices = NULL}, \code{synonims} that returns a subset of the + multidimensional data array inside a file (even if internally it is not an + array). Start() will provide automatically either a 'file_path' + or a 'file_object' to the 'file_data_reader' function (the + function has to be ready to work whichever of these two is provided). The + parameter 'file_selectors' will also be provided automatically to the + data reader, containing a named list where the names are the names of the + file dimensions of the queried data set (see documentation on \code{\dots}) + and the values are single character strings with the components used to + build the path to the file being read (the one provided in 'file_path' or + 'file_object'). The parameter 'inner_indices' will be filled in + automatically by Start() also, with a named list of numeric vectors, + where the names are the names of all the expected inner dimensions in a file + to be read, and the numeric vectors are the indices to be taken from the + corresponding dimension (the indices may not be consecutive nor in order). + The parameter 'synonims' will be filled in with exactly the same value + as provided in the parameter 'synonims' in the call to Start(), + and has to be used in the code of the data reader to check for alternative + dimension names inside the target file. The 'file_data_reader' must + return a (multi)dimensional array with named dimensions, and optionally with + the attribute 'variables' with other additional metadata on the retrieved + data. +\cr\cr +Usually, 'file_data_reader' should use 'file_dim_reader' +(see documentation on the corresponding parameter), so it is recommended to +code 'file_dim_reder' in first place. +\cr\cr +This parameter takes by default NcDataReader() (a data reader function +for NetCDF files). +\cr\cr +See NcDataReader() for a template to build a data reader for your own +file format.} + +\item{file_closer}{A function that receives as a single parameter + 'file_object' an open connection (as returned by 'file_opener') + to one of the files to be read, optionally with header information, and + closes the open connection. Always returns NULL. +\cr\cr +This parameter takes by default NcCloser() (a closer function for NetCDF +files). +\cr\cr +See NcCloser() for a template to build a file closer for your own file +format.} + +\item{transform}{A function with the header \code{dara_array}, +\code{variables}, \code{file_selectors = NULL}, \code{\dots}. It receives as +input, through the parameter \code{data_array}, a subset of a +multidimensional array (as returned by 'file_data_reader'), applies a +transformation to it and returns it, preserving the amount of dimensions but +potentially modifying their size. This transformation may require data from +other auxiliary variables, automatically provided to 'transform' +through the parameter 'variables', in the form of a named list where +the names are the variable names and the values are (multi)dimensional +arrays. Which variables need to be sent to 'transform' can be specified +with the parameter 'transform_vars' in Start(). The parameter +'file_selectors' will also be provided automatically to +'transform', containing a named list where the names are the names of +the file dimensions of the queried data set (see documentation on +\code{\dots}) and the values are single character strings with the +components used to build the path to the file the subset being processed +belongs to. The parameter \code{\dots} will be filled in with other +additional parameters to adjust the transformation, exactly as provided in +the call to Start() via the parameter 'transform_params'.} + +\item{transform_params}{A named list with additional parameters to be sent to +the 'transform' function (if specified). See documentation on parameter +'transform' for details.} + +\item{transform_vars}{A vector of character strings with the names of +auxiliary variables to be sent to the 'transform' function (if +specified). All the variables to be sent to 'transform' must also +have been requested as return variables in the parameter 'return_vars' +of Start().} + +\item{transform_extra_cells}{An integer of extra indices to retrieve from the +data set, beyond the requested indices in \code{\dots}, in order for +'transform' to dispose of additional information to properly apply +whichever transformation (if needed). As many as +'transform_extra_cells' will be retrieved beyond each of the limits for +each of those inner dimensions associated to a coordinate variable and sent +to 'transform' (i.e. present in 'transform_vars'). After +'transform' has finished, Start() will take again and return a +subset of the result, for the returned data to fall within the specified +bounds in \code{\dots}. The default value is 2.} + +\item{apply_indices_after_transform}{A logical value indicating when a +'transform' is specified in Start() and numeric indices are +provided for any of the inner dimensions that depend on coordinate variables, +these numeric indices can be made effective (retrieved) before applying the +transformation or after. The boolean flag allows to adjust this behaviour. +It takes FALSE by default (numeric indices are applied before sending +data to 'transform').} + +\item{pattern_dims}{A character string indicating the name of the dimension +with path pattern specifications (see \code{\dots} for details). If not +specified, Start() assumes the first provided dimension is the pattern +dimension, with a warning.} + +\item{metadata_dims}{A vector of character strings with the names of the file +dimensions which to return metadata for. As noted in 'file_data_reader', +the data reader can optionally return auxiliary data via the attribute +'variables' of the returned array. Start() by default returns the +auxiliary data read for only the first file of each source (or data set) in +the pattern dimension (see \code{\dots} for info on what the pattern +dimension is). However it can be configured to return the metadata for all +the files along any set of file dimensions. The parameter 'metadata_dims' + allows to configure this level of granularity of the returned metadata.} + +\item{selector_checker}{A function used internaly by Start() to +translate a set of selectors (values for a dimension associated to a +coordinate variable) into a set of numeric indices. It takes by default +SelectorChecker() and, in principle, it should not be required to +change it for customized file formats. The option to replace it is left open +for more versatility. See the code of SelectorChecker() for details on +the inputs, functioning and outputs of a selector checker.} + +\item{merge_across_dims}{A logical value indicating whether to merge +dimensions across which another dimension extends (according to the +'_across' parameters). Takes the value FALSE by default. For +example, if the dimension 'time' extends across the dimension 'chunk' and +\code{merge_across_dims = TRUE}, the resulting data array will only contain +only the dimension 'time' as long as all the chunks together.} + +\item{merge_across_dims_narm}{A logical value indicating whether to remove +the additional NAs from data when parameter 'merge_across_dims' is TRUE. +It is helpful when the length of the to-be-merged dimension is different +across another dimension. For example, if the dimension 'time' extends +across dimension 'chunk', and the time length along the first chunk is 2 +while along the second chunk is 10. Setting this parameter as TRUE can +remove the additional 8 NAs at position 3 to 10. The default value is FALSE.} + +\item{split_multiselected_dims}{A logical value indicating whether to split a +dimension that has been selected with a multidimensional array of selectors +into as many dimensions as present in the selector array. The default value +is FALSE.} + +\item{path_glob_permissive}{A logical value or an integer specifying how many + folder levels in the path pattern, beginning from the end, the shell glob + expressions must be preserved and worked out for each file. The default + value is FALSE, which is equivalent to 0. TRUE is equivalent to 1.\cr\cr +When specifying a path pattern for a dataset, it might contain shell glob +experissions. For each dataset, the first file matching the path pattern is +found, and the found file is used to work out fixed values for the glob +expressions that will be used for all the files of the dataset. However in +some cases the values of the shell glob expressions may not be constant for +all files in a dataset, and they need to be worked out for each file +involved.\cr\cr +For example, a path pattern could be as follows: \cr +\code{'/path/to/dataset/$var$_*/$date$_*_foo.nc'}. \cr Leaving +\code{path_glob_permissive = FALSE} will trigger automatic seek of the + contents to replace the asterisks (e.g. the first asterisk matches with + \code{'bar'} and the second with \code{'baz'}. The found contents will be + used for all files in the dataset (in the example, the path pattern will be + fixed to\cr \code{'/path/to/dataset/$var$_bar/$date$_baz_foo.nc'}. However, if + any of the files in the dataset have other contents in the position of the + asterisks, Start() will not find them (in the example, a file like \cr + \code{'/path/to/dataset/precipitation_bar/19901101_bin_foo.nc'} would not be + found). Setting \code{path_glob_permissive = 1} would preserve global + expressions in the latest level (in the example, the fixed path pattern + would be\cr \code{'/path/to/dataset/$var$_bar/$date$_*_foo.nc'}, and the + problematic file mentioned before would be found), but of course this would + slow down the Start() call if the dataset involves a large number of + files. Setting \code{path_glob_permissive = 2} would leave the original path + pattern with the original glob expressions in the 1st and 2nd levels (in the + example, both asterisks would be preserved, thus would allow Start() + to recognize files such as \cr + \code{'/path/to/dataset/precipitation_zzz/19901101_yyy_foo.nc'}).} + +\item{retrieve}{A logical value indicating whether to retrieve the data +defined in the Start() call or to explore only its dimension lengths +and names, and the values for the file and inner dimensions. The default +value is FALSE.} + +\item{num_procs}{An integer of number of processes to be created for the +parallel execution of the retrieval/transformation/arrangement of the +multiple involved files in a call to Start(). If set to NULL, +takes the number of available cores (as detected by detectCores() in +the package 'future'). The default value is 1 (no parallel execution).} + +\item{silent}{A logical value of whether to display progress messages (FALSE) +or not (TRUE). The default value is FALSE.} + +\item{debug}{A logical value of whether to return detailed messages on the +progress and operations in a Start() call (TRUE) or not (FALSE). The +default value is FALSE.} + +\item{\dots}{A selection of custemized parameters depending on the data +format. When we retrieve data from one or a collection of data sets, +the involved data can be perceived as belonging to a large multi-dimensional +array. For instance, let us consider an example case. We want to retrieve data +from a source, which contains data for the number of monthly sales of various +items, and also for their retail price each month. The data on source is +stored as follows:\cr\cr \command{ \cr # /data/ \cr # |-> sales/ @@ -83,41 +403,55 @@ When willing to retrieve data from one or a collection of data sets, the involve \cr # |-> item_d.data \cr # |-> item_e.data \cr # |-> item_f.data -} -\cr\cr -Each item file contains data, stored in whichever format, for the sales or prices over a time period, e.g. for the past 24 months, registered at 100 different stores over the world. Whichever the format it is stored in, each file can be perceived as a container of a data array of 2 dimensions, time and store. Let us assume the '.data' format allows to keep a name for each of these dimensions, and the actual names are 'time' and 'store'. -\cr\cr -The different item files for sales or prices can be perceived as belonging to an 'item' dimension of length 3, and the two groups of three items to a 'section' dimension of length 2, and the two groups of two sections (one with the sales and the other with the prices) can be perceived as belonging also to another dimension 'variable' of length 2. Even the source can be perceived as belonging to a dimension 'source' of length 1. -\cr\cr -All in all, in this example, the whole data could be perceived as belonging to a multidimensional 'large array' of dimensions -\cr +}\cr\cr +Each item file contains data, stored in whichever format, for the sales or +prices over a time period, e.g. for the past 24 months, registered at 100 +different stores over the world. Whichever the format it is stored in, each +file can be perceived as a container of a data array of 2 dimensions, time and +store. Let us assume the '.data' format allows to keep a name for each of +these dimensions, and the actual names are 'time' and 'store'.\cr\cr +The different item files for sales or prices can be perceived as belonging to +an 'item' dimension of length 3, and the two groups of three items to a +'section' dimension of length 2, and the two groups of two sections (one with +the sales and the other with the prices) can be perceived as belonging also to +another dimension 'variable' of length 2. Even the source can be perceived as +belonging to a dimension 'source' of length 1.\cr\cr +All in all, in this example, the whole data could be perceived as belonging to +a multidimensional 'large array' of dimensions\cr \command{ \cr # source variable section item store month \cr # 1 2 2 3 100 24 } \cr\cr -The dimensions of this 'large array' can be classified in two types. The ones that group actual files (the file dimensions) and the ones that group data values inside the files (the inner dimensions). In the example, the file dimensions are 'source', 'variable', 'section' and 'item', whereas the inner dimensions are 'store' and 'month'. -\cr\cr -Having the dimensions of our target sources in mind, the parameter \dots expects to receive information on: - \itemize{ - \item{ -The names of the expected dimensions of the 'large dataset' we want to retrieve data from - } - \item{ +The dimensions of this 'large array' can be classified in two types. The ones +that group actual files (the file dimensions) and the ones that group data +values inside the files (the inner dimensions). In the example, the file +dimensions are 'source', 'variable', 'section' and 'item', whereas the inner +dimensions are 'store' and 'month'. +\cr\cr +Having the dimensions of our target sources in mind, the parameter \code{\dots} +expects to receive information on: + \itemize{ + \item{ +The names of the expected dimensions of the 'large dataset' we want to +retrieve data from + } + \item{ The indices to take from each dimension (and other constraints) - } - \item{ + } + \item{ How to reorder the dimension if needed - } - \item{ + } + \item{ The location and organization of the files of the data sets - } - } -For each dimension, the 3 first information items can be specified with a set of parameters to be provided through \dots. For a given dimension 'dimname', six parameters can be specified: -\cr + } + } +For each dimension, the 3 first information items can be specified with a set +of parameters to be provided through \code{\dots}. For a given dimension +'dimname', six parameters can be specified:\cr \command{ \cr # dimname = , # 'all' / 'first' / 'last' / -\cr # # indices(c(1, 10, 20)) / +\cr # # indices(c(1, 10, 20)) / \cr # # indices(c(1:20)) / \cr # # indices(list(1, 20)) / \cr # # c(1, 10, 20) / c(1:20) / @@ -129,23 +463,93 @@ For each dimension, the 3 first information items can be specified with a set of \cr # dimname_across = } \cr\cr -The \bold{indices to take} can be specified in three possible formats (see code comments above for examples). The first format consists in using character tags, such as 'all' (take all the indices available for that dimension), 'first' (take only the first) and 'last' (only the last). The second format consists in using numeric indices, which have to be wrapped in a call to the \code{indices()} helper function. For the second format, either a vector of numeric indices can be provided, or a list with two numeric indices can be provided to take all the indices in the range between the two specified indices (both extremes inclusive). The third format consists in providing a vector character strings (for file dimensions) or of values of whichever type (for inner dimensions). For the file dimensions, the provided character strings in the third format will be used as components to build up the final path to the files (read further). For inner dimensions, the provided values in the third format will be compared to the values of an associated coordinate variable (must be specified in \code{dimname_reorder}, read further), and the indices of the closest values will be retrieved. When using the third format, a list with two values can also be provided to take all the indices of the values within the specified range. -\cr\cr -The \bold{name of the associated coordinate variable} must be a character string with the name of an associated coordinate variable to be found in the data files (in all* of them). For this to work, a \code{file_var_reader} function must be specified when calling \code{Start()} (see parameter 'file_var_reader'). The coordinate variable must also be requested in the parameter \code{return_vars} (see its section for details). This feature only works for inner dimensions. -\cr\cr -The \bold{tolerance value} is useful when indices for an inner dimension are specified in the third format (values of whichever type). In that case, the indices of the closest values in the coordinate variable are seeked. However the closest value might be too distant and we would want to consider no real match exists for such provided value. This is possible via the tolerance. which allows to specify a threshold beyond which not to seek for matching values and mark that index as missing value. -\cr\cr -The \bold{reorder_function} is useful when indices for an inner dimension are specified in the third fromat, and the retrieved indices need to be reordered in function of their provided associated variable values. A function can be provided, which receives as input a vector of values, and returns as outputs a list with the components \code{x} with the reordered values, and \code{ix} with the permutation indices. Two reordering functions are included in \code{startR}, the \code{Sort()} and the \code{CircularSort()}. -\cr\cr -The \bold{name of another dimension} to be specified in \code{dimname_depends}, only available for file dimensions, must be a character string with the name of another requested \bold{file dimension} in \dots, and will make \code{Start()} aware that the path components of a file dimension can vary in function of the path component of another file dimension. For instance, in the example above, specifying \code{item_depends = 'section'} will make \code{Start()} aware that the item names vary in function of the section, i.e. section 'electronics' has items 'a', 'b' and 'c' but section 'clothing' has items 'd', 'e', 'f'. Otherwise \code{Start()} would expect to find the same item names in all the sections. -\cr\cr -The \bold{name of another dimension} to be specified in \code{dimname_across}, only available for inner dimensions, must be a character string with the name of another requested \bold{inner dimension} in \dots, and will make \code{Start()} aware that an inner dimension extends along multiple files. For instance, let us imagine that in the example above, the records for each item are so large that it becomes necessary to split them in multiple files each one containing the registers for a different period of time, e.g. in 10 files with 100 months each ('item_a_period1.data', 'item_a_period2.data', and so on). In that case, the data can be perceived as having an extra file dimension, the 'period' dimension. The inner dimension 'month' would extend across multiple files, and providing the parameter \code{month = indices(1, 300)} would make \code{Start()} crash because it would perceive we have made a request out of bounds (each file contains 100 'month' indices, but we requested 1 to 300). This can be solved by specifying the parameter \code{month_across = period} (along with the full specification of the dimension 'period'). +The \bold{indices to take} can be specified in three possible formats (see +code comments above for examples). The first format consists in using +character tags, such as 'all' (take all the indices available for that +dimension), 'first' (take only the first) and 'last' (only the last). The +second format consists in using numeric indices, which have to be wrapped in a +call to the indices() helper function. For the second format, either a +vector of numeric indices can be provided, or a list with two numeric indices +can be provided to take all the indices in the range between the two specified +indices (both extremes inclusive). The third format consists in providing a +vector character strings (for file dimensions) or of values of whichever type +(for inner dimensions). For the file dimensions, the provided character +strings in the third format will be used as components to build up the final +path to the files (read further). For inner dimensions, the provided values in +the third format will be compared to the values of an associated coordinate +variable (must be specified in '_reorder', read further), and the +indices of the closest values will be retrieved. When using the third format, +a list with two values can also be provided to take all the indices of the +values within the specified range. +\cr\cr +The \bold{name of the associated coordinate variable} must be a character +string with the name of an associated coordinate variable to be found in the +data files (in all* of them). For this to work, a 'file_var_reader' +function must be specified when calling Start() (see parameter +'file_var_reader'). The coordinate variable must also be requested in the +parameter 'return_vars' (see its section for details). This feature only +works for inner dimensions. +\cr\cr +The \bold{tolerance value} is useful when indices for an inner dimension are +specified in the third format (values of whichever type). In that case, the +indices of the closest values in the coordinate variable are seeked. However +the closest value might be too distant and we would want to consider no real +match exists for such provided value. This is possible via the tolerance, +which allows to specify a threshold beyond which not to seek for matching +values and mark that index as missing value. +\cr\cr +The \bold{reorder_function} is useful when indices for an inner dimension are +specified in the third fromat, and the retrieved indices need to be reordered +in function of their provided associated variable values. A function can be +provided, which receives as input a vector of values, and returns as outputs a +list with the components \code{$x} with the reordered values, and \code{$ix} +with the permutation indices. Two reordering functions are included in +startR, the Sort() and the CircularSort(). +\cr\cr +The \bold{name of another dimension} to be specified in _depends, +only available for file dimensions, must be a character string with the name +of another requested \bold{file dimension} in \code{\dots}, and will make +Start() aware that the path components of a file dimension can vary in +function of the path component of another file dimension. For instance, in the +example above, specifying \code{item_depends = 'section'} will make +Start() aware that the item names vary in function of the section, i.e. +section 'electronics' has items 'a', 'b' and 'c' but section 'clothing' has +items 'd', 'e', 'f'. Otherwise Start() would expect to find the same +item names in all the sections. +\cr\cr +The \bold{name of another dimension} to be specified in '_across', +only available for inner dimensions, must be a character string with the name +of another requested \bold{inner dimension} in \code{\dots}, and will make +Start() aware that an inner dimension extends along multiple files. For +instance, let us imagine that in the example above, the records for each item +are so large that it becomes necessary to split them in multiple files each +one containing the registers for a different period of time, e.g. in 10 files +with 100 months each ('item_a_period1.data', 'item_a_period2.data', and so on). +In that case, the data can be perceived as having an extra file dimension, the +'period' dimension. The inner dimension 'month' would extend across multiple +files, and providing the parameter \code{month = indices(1, 300)} would make +Start() crash because it would perceive we have made a request out of +bounds (each file contains 100 'month' indices, but we requested 1 to 300). +This can be solved by specifying the parameter \code{month_across = period} (a +long with the full specification of the dimension 'period'). \cr\cr \bold{Defining the path pattern} \cr -As mentioned above, the parameter \dots also expects to receive information with the location of the data files. In order to do this, a special dimension must be defined. In that special dimension, in place of specifying indices to take, a path pattern must be provided. The path pattern is a character string that encodes the way the files are organized in their source. It must be a path to one of the data set files in an accessible local or remote file system, or a URL to one of the files provided by a local or remote server. The regions of this path that vary across files (along the file dimensions) must be replaced by wildcards. The wildcards must match any of the defined file dimensions in the call to \code{Start()} and must be delimited with heading and trailing '$'. Shell globbing expressions can be used in the path pattern. See the next code snippet for an example of a path pattern. -\cr\cr -All in all, the call to \code{Start()} to load the entire data set in the example of store item sales, would look as follows: +As mentioned above, the parameter \dots also expects to receive information +with the location of the data files. In order to do this, a special dimension +must be defined. In that special dimension, in place of specifying indices to +take, a path pattern must be provided. The path pattern is a character string +that encodes the way the files are organized in their source. It must be a +path to one of the data set files in an accessible local or remote file system, +or a URL to one of the files provided by a local or remote server. The regions +of this path that vary across files (along the file dimensions) must be +replaced by wildcards. The wildcards must match any of the defined file +dimensions in the call to Start() and must be delimited with heading +and trailing '$'. Shell globbing expressions can be used in the path pattern. +See the next code snippet for an example of a path pattern. +\cr\cr +All in all, the call to Start() to load the entire data set in the +example of store item sales, would look as follows: \cr \command{ \cr # data <- Start(source = paste0('/data/$variable$/', @@ -158,16 +562,22 @@ All in all, the call to \code{Start()} to load the entire data set in the exampl \cr # month = 'all') } \cr\cr -Note that in this example it would still be pending to properly define the parameters \code{file_opener}, \code{file_closer}, \code{file_dim_reader}, \code{file_var_reader} and \code{file_data_reader} for the '.data' file format (see the corresponding sections). +Note that in this example it would still be pending to properly define the +parameters 'file_opener', 'file_closer', 'file_dim_reader', +'file_var_reader' and 'file_data_reader' for the '.data' file format +(see the corresponding sections). \cr\cr -The call to \code{Start()} will return a multidimensional R array with the following dimensions: +The call to Start() will return a multidimensional R array with the +following dimensions: \cr \command{ \cr # source variable section item store month \cr # 1 2 2 3 100 24 } \cr -The dimension specifications in the \dots do not have to follow any particular order. The returned array will have the dimensions in the same order as they have been specified in the call. For example, the following call: +The dimension specifications in the \code{\dots} do not have to follow any +particular order. The returned array will have the dimensions in the same order +as they have been specified in the call. For example, the following call: \cr \command{ \cr # data <- Start(source = paste0('/data/$variable$/', @@ -187,7 +597,11 @@ would return an array with the following dimensions: \cr # 1 24 100 3 2 2 } \cr\cr -Next, a more advanced example to retrieve data for only the sales records, for the first section ('electronics'), for the 1st and 3rd items and for the stores located in Barcelona (assuming the files contain the variable 'store_location' with the name of the city each of the 100 stores are located at): +Next, a more advanced example to retrieve data for only the sales records, for +the first section ('electronics'), for the 1st and 3rd items and for the +stores located in Barcelona (assuming the files contain the variable +'store_location' with the name of the city each of the 100 stores are located +at): \cr \command{ \cr # data <- Start(source = paste0('/data/$variable$/', @@ -202,16 +616,22 @@ Next, a more advanced example to retrieve data for only the sales records, for t \cr # return_vars = list(store_location = NULL)) } \cr\cr -The defined names for the dimensions do not necessarily have to match the names of the dimensions inside the file. Lists of alternative names to be seeked can be defined in the parameter \code{synonims}. +The defined names for the dimensions do not necessarily have to match the +names of the dimensions inside the file. Lists of alternative names to be +seeked can be defined in the parameter 'synonims'. \cr\cr -If data from multiple sources (not necessarily following the same structure) has to be retrieved, it can be done by providing a vector of character strings with path pattern specifications, or, in the extended form, by providing a list of lists with the components 'name' and 'path', and the name of the dataset and path pattern as values, respectively. For example: +If data from multiple sources (not necessarily following the same structure) +has to be retrieved, it can be done by providing a vector of character strings +with path pattern specifications, or, in the extended form, by providing a +list of lists with the components 'name' and 'path', and the name of the +dataset and path pattern as values, respectively. For example: \cr \command{ \cr # data <- Start(source = list( -\cr # list(name = 'sourceA', +\cr # list(name = 'sourceA', \cr # path = paste0('/sourceA/$variable$/', \cr # '$section$/$item$.data')), -\cr # list(name = 'sourceB', +\cr # list(name = 'sourceB', \cr # path = paste0('/sourceB/$section$/', \cr # '$variable$/$item$.data')) \cr # ), @@ -224,211 +644,160 @@ If data from multiple sources (not necessarily following the same structure) has \cr # month = 'all', \cr # return_vars = list(store_location = NULL)) } -\cr - } - \item{return_vars}{ -Apart from retrieving a multidimensional data array, retrieving auxiliary variables inside the files can also be needed. The parameter \code{return_vars} allows for requesting such variables, as long as a \code{file_var_reader} function is also specified in the call to \code{Start()} (see documentation on the corresponding parameter). -\cr\cr -This parameter expects to receive a named list where the names are the names of the variables to be fetched in the files, and the values are vectors of character strings with the names of the file dimension which to retrieve each variable for, or \code{NULL} if the variable has to be retrieved only once from any (the first) of the involved files. In the case of the the item sales example (see documentation on parameter \dots), the store location variable is requested with the parameter \cr \code{return_vars = list(store_location = NULL)}. This will cause \code{Start()} to fetch once the variable 'store_location' and return it in the component \cr \code{$Variables$common$store_location}, and will be an array of character strings with the location names, with the dimensions \code{c('store' = 100)}. Although useless in this example, we could ask \code{Start()} to fetch and return such variable for each file along the items dimension as follows: \cr \code{return_vars = list(store_location = c('item'))}. In that case, the variable will be fetched once from a file of each of the items, and will be returned as an array with the dimensions \code{c('item' = 3, 'store' = 100)}. -\cr\cr -If a variable is requested along a file dimension that contains path pattern specifications ('source' in the example), the fetched variable values will be returned in the component \code{$Variables$$}. For example: -\cr -\command{ -\cr # data <- Start(source = list( -\cr # list(name = 'sourceA', -\cr # path = paste0('/sourceA/$variable$/', -\cr # '$section$/$item$.data')), -\cr # list(name = 'sourceB', -\cr # path = paste0('/sourceB/$section$/', -\cr # '$variable$/$item$.data')) -\cr # ), -\cr # variable = 'sales', -\cr # section = 'first', -\cr # item = indices(c(1, 3)), -\cr # item_depends = 'section', -\cr # store = 'Barcelona', -\cr # store_var = 'store_location', -\cr # month = 'all', -\cr # return_vars = list(store_location = c('source', -\cr # 'item'))) -\cr # # Checking the structure of the returned variables -\cr # str(found_data$Variables) -\cr # Named list -\cr # ..$common: NULL -\cr # ..$sourceA: Named list -\cr # .. ..$store_location: char[1:18(3d)] 'Barcelona' 'Barcelona' ... -\cr # ..$sourceB: Named list -\cr # .. ..$store_location: char[1:18(3d)] 'Barcelona' 'Barcelona' ... -\cr # # Checking the dimensions of the returned variable -\cr # # for the source A -\cr # dim(found_data$Variables$sourceA) -\cr # item store -\cr # 3 3 -} -\cr\cr -The names of the requested variables do not necessarily have to match the actual variable names inside the files. A list of alternative names to be seeked can be specified via the parameter \code{synonims}. - } - \item{synonims}{ -In some requests, data from different sources may follow different naming conventions for the dimensions or variables, or even files in the same source could have varying names. In order for \code{Start()} to properly identify the dimensions or variables with different names, the parameter \code{synonims} can be specified as a named list where the names are requested variable or dimension names, and the values are vectors of character strings with alternative names to seek for such dimension or variable. -\cr\cr -In the example used in parameter \code{return_vars}, it may be the case that the two involved data sources follow slightly different naming conventions. For example, source A uses 'sect' as name for the sections dimension, whereas source B uses 'section'; source A uses 'store_loc' as variable name for the store locations, whereas source B uses 'store_location'. This can be taken into account as follows: -\cr -\command{ -\cr # data <- Start(source = list( -\cr # list(name = 'sourceA', -\cr # path = paste0('/sourceA/$variable$/', -\cr # '$section$/$item$.data')), -\cr # list(name = 'sourceB', -\cr # path = paste0('/sourceB/$section$/', -\cr # '$variable$/$item$.data')) -\cr # ), -\cr # variable = 'sales', -\cr # section = 'first', -\cr # item = indices(c(1, 3)), -\cr # item_depends = 'section', -\cr # store = 'Barcelona', -\cr # store_var = 'store_location', -\cr # month = 'all', -\cr # return_vars = list(store_location = c('source', -\cr # 'item')), -\cr # synonims = list( -\cr # section = c('sec', 'section'), -\cr # store_location = c('store_loc', -\cr # 'store_location') -\cr # )) +\cr} } -\cr - } - \item{file_opener}{ -A function that receives as a single parameter (\code{file_path}) a character string with the path to a file to be opened, and returns an object with an open connection to the file (optionally with header information) on success, or returns \code{NULL} on failure. -\cr\cr -This parameter takes by default \code{NcOpener} (an opener function for NetCDF files). -\cr\cr -See \code{NcOpener} for a template to build a file opener for your own file format. - } - \item{file_var_reader}{ -A function with the header \code{file_path = NULL}, \code{file_object = NULL}, \code{file_selectors = NULL}, \code{var_name}, \code{synonims} that returns an array with auxiliary data (i.e. data from a variable) inside a file. \code{Start()} will provide automatically either a \code{file_path} or a \code{file_object} to the \code{file_var_reader} function (the function has to be ready to work whichever of these two is provided). The parameter \code{file_selectors} will also be provided automatically to the variable reader, containing a named list where the names are the names of the file dimensions of the queried data set (see documentation on \dots) and the values are single character strings with the components used to build the path to the file being read (the one provided in \code{file_path} or \code{file_object}). The parameter \code{var_name} will be filled in automatically by \code{Start()} also, with the name of one of the variales to be read. The parameter \code{synonims} will be filled in with exactly the same value as provided in the parameter \code{synonims} in the call to \code{Start()}, and has to be used in the code of the variable reader to check for alternative variable names inside the target file. The \code{file_var_reader} must return a (multi)dimensional array with named dimensions, and optionally with the attribute 'variales' with other additional metadata on the retrieved variable. -\cr\cr -Usually, the \code{file_var_reader} should be a degenerate case of the \code{file_data_reader} (see documentation on the corresponding parameter), so it is recommended to code the \code{file_data_reder} in first place. -\cr\cr -This parameter takes by default \code{NcVarReader} (a variable reader function for NetCDF files). -\cr\cr -See \code{NcVarReader} for a template to build a variale reader for your own file format. - } - \item{file_dim_reader}{ -A function with the header \code{file_path = NULL}, \code{file_object = NULL}, \code{file_selectors = NULL}, \code{synonims} that returns a named numeric vector where the names are the names of the dimensions of the multidimensional data array in the file and the values are the sizes of such dimensions. \code{Start()} will provide automatically either a \code{file_path} or a \code{file_object} to the \code{file_dim_reader} function (the function has to be ready to work whichever of these two is provided). The parameter \code{file_selectors} will also be provided automatically to the dimension reader, containing a named list where the names are the names of the file dimensions of the queried data set (see documentation on \dots) and the values are single character strings with the components used to build the path to the file being read (the one provided in \code{file_path} or \code{file_object}). The parameter \code{synonims} will be filled in with exactly the same value as provided in the parameter \code{synonims} in the call to \code{Start()}, and can optionally be used in advanced configurations. -\cr\cr -This parameter takes by default \code{NcDimReader} (a dimension reader function for NetCDF files). -\cr\cr -See \code{NcDimReader} for a(n advanced) template to build a dimension reader for your own file format. - } - \item{file_data_reader}{ -A function with the header \code{file_path = NULL}, \code{file_object = NULL}, \code{file_selectors = NULL}, \code{inner_indices = NULL}, \code{synonims} that returns a subset of the multidimensional data array inside a file (even if internally it is not an array). \code{Start()} will provide automatically either a \code{file_path} or a \code{file_object} to the \code{file_data_reader} function (the function has to be ready to work whichever of these two is provided). The parameter \code{file_selectors} will also be provided automatically to the data reader, containing a named list where the names are the names of the file dimensions of the queried data set (see documentation on \dots) and the values are single character strings with the components used to build the path to the file being read (the one provided in \code{file_path} or \code{file_object}). The parameter \code{inner_indices} will be filled in automatically by \code{Start()} also, with a named list of numeric vectors, where the names are the names of all the expected inner dimensions in a file to be read, and the numeric vectors are the indices to be taken from the corresponding dimension (the indices may not be consecutive nor in order). The parameter \code{synonims} will be filled in with exactly the same value as provided in the parameter \code{synonims} in the call to \code{Start()}, and has to be used in the code of the data reader to check for alternative dimension names inside the target file. The \code{file_data_reader} must return a (multi)dimensional array with named dimensions, and optionally with the attribute 'variales' with other additional metadata on the retrieved data. -\cr\cr -Usually, the \code{file_data_reader} should use the \code{file_dim_reader} (see documentation on the corresponding parameter), so it is recommended to code the \code{file_dim_reder} in first place. -\cr\cr -This parameter takes by default \code{NcDataReader} (a data reader function for NetCDF files). -\cr\cr -See \code{NcDataReader} for a template to build a data reader for your own file format. - } - \item{file_closer}{ -A function that receives as a single parameter (\code{file_object}) an open connection (as returned by \code{file_opener}) to one of the files to be read, optionally with header information, and closes the open connection. Always returns \code{NULL}. -\cr\cr -This parameter takes by default \code{NcCloser} (a closer function for NetCDF files). -\cr\cr -See \code{NcCloser} for a template to build a file closer for your own file format. - } - \item{transform}{ -A function with the header \code{dara_array}, \code{variables}, \code{file_selectors = NULL}, \code{\dots}. It receives as input, through the parameter \code{data_array}, a subset of a multidimensional array (as returned by \code{file_data_reader}), applies a transformation to it and returns it, preserving the amount of dimensions but potentially modifying their size. This transformation may require data from other auxiliary variables, automatically provided to \code{transform} through the parameter \code{variables}, in the form of a named list where the names are the variable names and the values are (multi)dimensional arrays. Which variables need to be sent to \code{transform} can be specified with the parameter \code{transform_vars} in \code{Start()}. The parameter \code{file_selectors} will also be provided automatically to \code{transform}, containing a named list where the names are the names of the file dimensions of the queried data set (see documentation on \dots) and the values are single character strings with the components used to build the path to the file the subset being processed belongs to. The parameter \dots will be filled in with other additional parameters to adjust the transformation, exactly as provided in the call to \code{Start()} via the parameter \code{transform_params}. - } - \item{transform_params}{ -Named list with additional parameters to be sent to the \code{transform} function (if specified). See documentation on \code{transform} for details. - } - \item{transform_vars}{ -Vector of character strings with the names of auxiliary variables to be sent to the \code{transform} function (if specified). All the variables to be sent to \code{transform} must also have been requested as return variables in the parameter \code{return_vars} of \code{Start()}. - } - \item{transform_extra_cells}{ -Number of extra indices to retrieve from the data set, beyond the requested indices in \dots, in order for \code{transform} to dispose of additional information to properly apply whichever transformation (if needed). As many as \code{transform_extra_cells} will be retrieved beyond each of the limits for each of those inner dimensions associated to a coordinate variable and sent to \code{transform} (i.e. present in \code{transform_vars}). After \code{transform} has finished, \code{Start()} will take again and return a subset of the result, for the returned data to fall within the specified bounds in \dots. - } - \item{apply_indices_after_transform}{ -When a \code{transform} is specified in \code{Start()} and numeric indices are provided for any of the inner dimensions that depend on coordinate variables, these numeric indices can be made effective (retrieved) before applying the transformation or after. The boolean flab \code{apply_indices_after_transform} allows to adjust this behaviour. It takes \code{FALSE} by default (numeric indices are applied before sending data to \code{transform}). - } - \item{pattern_dims}{ -Name of the dimension with path pattern specifications (see \dots for details). If not specified, \code{Start()} assumes the first provided dimension is the pattern dimension, with a warning. - } - \item{metadata_dims}{ -It expects to receive a vector of character strings with the names of the file dimensions which to return metadata for. As noted in \code{file_data_reader}, the data reader can optionally return auxiliary data via the attribute 'variables' of the returned array. \code{Start()} by default returns the auxiliary data read for only the first file of each source (or data set) in the pattern dimension (see \dots for info on what the pattern dimension is). However it can be configured to return the metadata for all the files along any set of file dimensions. The parameter \code{metadata_dims} allows to configure this level of granularity of the returned metadata. - } - \item{selector_checker}{ -Function used internaly by \code{Start()} to translate a set of selectors (values for a dimension associated to a coordinate variable) into a set of numeric indices. It takes by default \code{SelectorChecker} and, in principle, it should not be required to change it for customized file formats. The option to replace it is left open for more versatility. See the code of \code{SelectorChecker} for details on the inputs, functioning and outputs of a selector checker. - } - \item{merge_across_dims}{ -Whether to merge dimensions across which another dimension extends (according to the \code{*_across} parameters). Takes the value \code{FALSE} by default. For example, if the dimension 'time' extends across the dimension 'chunk' and \code{merge_across_dims = TRUE}, the resulting data array will only contain only the dimension 'time' as long as all the chunks together. - } - \item{split_multiselected_dims}{ -Whether to split a dimension that has been selected with a multidimensional array of selectors into as many dimensions as present in the selector array. Takes the value \code{FALSE} by default. - } - \item{path_glob_permissive}{ -When specifying a path pattern for a dataset, it might contain shell glob experissions. For each dataset, the first file matching the path pattern is found, and the found file is used to work out fixed values for the glob expressions that will be used for all the files of the dataset. However in some cases the values of the shell glob expressions may not be constant for all files in a dataset, and they need to be worked out for each file involved. In this situation, the \code{path_glob_permissive} can be set to an integer value specifying for how many folder levels in the path pattern, beginning from the end, the shell glob expressions mut be preserved and worked out for each file.\cr The default value is \code{FALSE}, which is equivalent to \code{0}. Setting \code{TRUE} is equivalent to \code{1}.\cr For example, a path pattern could be as follows: \code{'/path/to/dataset/$var$_*/$date$_*_foo.nc'}. Leaving \code{path_glob_permissive = FALSE} will trigger automatic seek of the contents to replace the asterisks (e.g. the first asterisk matches with \code{'bar'} and the second with \code{'baz'}. The found contents will be used for all files in the dataset (in the example, the path pattern will be fixed to \code{'/path/to/dataset/$var$_bar/$date$_baz_foo.nc'}. However, if any of the files in the dataset have other contents in the position of the asterisks, \code{Start()} will not find them (in the example, a file like \code{'/path/to/dataset/precipitation_bar/19901101_bin_foo.nc'} would not be found). Setting \code{path_glob_permissive = 1} would preserve global expressions in the latest level (in the example, the fixed path pattern would be \code{'/path/to/dataset/$var$_bar/$date$_*_foo.nc'}, and the problematic file mentioned before would be found), but of course this would slow down the \code{Start()} call if the dataset involves a large number of files. Setting \code{path_glob_permissive = 2} would leave the original path pattern with the original glob expressions in the 1st and 2nd levels (in the example, both asterisks would be preserved, thus would allow \code{Start()} to recognize files such as \code{'/path/to/dataset/precipitation_zzz/19901101_yyy_foo.nc'}). - } - \item{retrieve}{ -Logical value telling whether to retrieve the data defined in the \code{Start} call or to explore only its dimension lengths and names, and the values for the file and inner dimensions. Takes FALSE by default. - } - \item{num_procs}{ -Number of processes to be created for the parallel execution of the retrieval / transformation / arrangement of the multiple involved files in a call to \code{Start()}. If set to \code{NULL}, takes the number of available cores (as detected by \code{detectCores()} in the package 'future'). Takes 1 by default (no parallel execution). - } - \item{silent}{ -Boolean flag, whether to display progress messages (FALSE; default) or not (TRUE). - } - \item{debug}{ -Whether to return detailed messages on the progress and operations in a \code{Start} call (TRUE) or not (FALSE; default). - } +\value{ +If \code{retrieve = TRUE} the involved data is loaded into RAM memory + and an object of the class 'startR_cube' with the following components is + returned:\cr + \item{Data}{ + Multidimensional data array with named dimensions, with the data values + requested via \code{\dots} and other parameters. This array can potentially + contain metadata in the attribute 'variables'. + } + \item{Variables}{ + Named list of 1 + N components, containing lists of retrieved variables (as + requested in 'return_vars') common to all the data sources (in the 1st + component, \code{$common}), and for each of the N dara sources (named after + the source name, as specified in \dots, or, if not specified, \code{$dat1}, + \code{$dat2}, ..., \code{$datN}). Each of the variables are contained in a + multidimensional array with named dimensions, and potentially with the + attribute 'variables' with additional auxiliary data. + } + \item{Files}{ + Multidimensonal character string array with named dimensions. Its dimensions + are the file dimensions (as requested in \code{\dots}). Each cell in this + array contains a path to a retrieved file, or NULL if the corresponding + file was not found. + } + \item{NotFoundFiles}{ + Array with the same shape as \code{$Files} but with NULL in the + positions for which the corresponding file was found, and a path to the + expected file in the positions for which the corresponding file was not + found. + } + \item{FileSelectors}{ + Multidimensional character string array with named dimensions, with the same + shape as \code{$Files} and \code{$NotFoundFiles}, which contains the + components used to build up the paths to each of the files in the data + sources. + } +If \code{retrieve = FALSE} the involved data is not loaded into RAM memory and +an object of the class 'startR_header' with the following components is +returned:\cr + \item{Dimensions}{ + Named vector with the dimension lengths and names of the data involved in + the Start() call. + } + \item{Variables}{ + Named list of 1 + N components, containing lists of retrieved variables (as + requested in 'return_vars') common to all the data sources (in the 1st + component, \code{$common}), and for each of the N dara sources (named after + the source name, as specified in \dots, or, if not specified, \code{$dat1}, + \code{$dat2}, ..., \code{$datN}). Each of the variables are contained in a + multidimensional array with named dimensions, and potentially with the + attribute 'variables' with additional auxiliary data. + } + \item{Files}{ + Multidimensonal character string array with named dimensions. Its dimensions + are the file dimensions (as requested in \dots). Each cell in this array + contains a path to a file to be retrieved (which may exist or not). + } + \item{FileSelectors}{ + Multidimensional character string array with named dimensions, with the same + shape as \code{$Files} and \code{$NotFoundFiles}, which contains the + components used to build up the paths to each of the files in the data + sources. + } + \item{StartRCall}{ + List of parameters sent to the Start() call, with the parameter + 'retrieve' set to TRUE. Intended for calling in order to + retrieve the associated data a posteriori with a call to do.call(). + } } -\details{ -Check \href{https://earth.bsc.es/gitlab/es/startR}{the startR website} for more information. +\description{ +See the \href{https://earth.bsc.es/gitlab/es/startR}{startR documentation and +tutorial} for a step-by-step explanation on how to use Start().\cr\cr +Nowadays in the era of big data, large multidimensional data sets from +diverse sources need to be combined and processed. Analysis of big data in any +field is often highly complex and time-consuming. Taking subsets of these data +sets and processing them efficiently become an indispensable practice. This +technique is also known as Domain Decomposition, Map Reduce or, more commonly, +'chunking'.\cr\cr +startR (Subset, TrAnsform, ReTrieve, arrange and process large +multidimensional data sets in R) is an R project started at BSC with the aim +to develop a tool that allows the user to automatically process large +multidimensional distributed data sets. It is an open source project that is +open to external collaboration and funding, and will continuously evolve to +support as many data set formats as possible while maximizing its efficiency.\cr\cr +startR provides a framework under which a data set (collection of one +or multiple data files, potentially distributed over various remote servers) +are perceived as if they all were part of a single large multidimensional +array. Once such multidimensional array is declared, any user-defined function +can be applied to the data in a \code{apply}-like fashion, where startR +transparently implements the Map Reduce paradigm. The steps to follow in order +to process a collection of big data sets are as follows:\cr +\itemize{ + \item{ +Declaring the data set, i.e. declaring the distribution of the data files +involved, the dimensions and shape of the multidimensional array, and the +boundaries of the target data. This step can be performed with the +Start() function. Numeric indices or coordinate values can be used when +fixing the boundaries. It is common having the need to apply transformations, +pre-processing or reordering to the data. Start() accepts user-defined +transformation or reordering functions to be applied for such purposes. Once a +data set is declared, a list of involved files, dimension lengths, memory size +and other metadata is made available. Optionally, the data set can be +retrieved and loaded onto the current R session if it is small enough. + } + \item{ +Declaring the workflow of operations to perform on the involved data set(s). +This step can be performed with the Step() and AddStep() functions. + } + \item{ +Defining the computation settings. The mandatory settings include a) how many +subsets to divide the data sets into and along which dimensions; b) which +platform to perform the workflow of operations on (local machine or remote +machine/HPC?), how to communicate with it (unidirectional or bidirectional +connection? shared or separate file systems?), which queuing system it uses +(slurm, PBS, LSF, none?); and c) how many parallel jobs and execution threads +per job to use when running the calculations. This step can be performed when +building up the call to the Compute() function. + } + \item{ +Running the computation. startR transparently implements the Map Reduce +paradigm, according to the settings in the previous steps. The progress can +optionally be monitored with the EC-Flow workflow management tool. When the +computation ends, a report of performance timings is displayed. This step can +be triggered with the Compute() function. + } } -\value{ -If \code{retrieve = TRUE} the involved data is loaded into RAM memory and an object of the class 'startR_cube' with the following components is returned:\cr - \item{Data}{ -Multidimensional data array with named dimensions, with the data values requested via \dots and other parameters. This array can potentially contain metadata in the attribute 'variables'. - } - \item{Variales}{ -Named list of 1 + N components, containing lists of retrieved variables (as requested in \code{return_vars}) common to all the data sources (in the 1st component, \code{$common}), and for each of the N dara sources (named after the source name, as specified in \dots, or, if not specified, \code{$dat1}, \code{$dat2}, ..., \code{$datN}). Each of the variables are contained in a multidimensional array with named dimensions, and potentially with the attribute 'variables' with additional auxiliary data. - } - \item{Files}{ -Multidimensonal character string array with named dimensions. Its dimensions are the file dimensions (as requested in \dots). Each cell in this array contains a path to a retrieved file, or \code{NULL} if the corresponding file was not found. - } - \item{NotFoundFiles}{ -Array with the same shape as \code{$Files} but with \code{NULL} in the positions for which the corresponding file was found, and a path to the expected file in the positions for which the corresponding file was not found. - } - \item{FileSelectors}{ -Multidimensional character string array with named dimensions, with the same shape as \code{$Files} and \code{$NotFoundFiles}, which contains the components used to build up the paths to each of the files in the data sources. - } -If \code{retrieve = FALSE} the involved data is not loaded into RAM memory and an object of the class 'startR_header' with the following components is returned:\cr - \item{Dimensions}{ -Named vector with the dimension lengths and names of the data involved in the \code{Start} call. - } - \item{Variales}{ -Named list of 1 + N components, containing lists of retrieved variables (as requested in \code{return_vars}) common to all the data sources (in the 1st component, \code{$common}), and for each of the N dara sources (named after the source name, as specified in \dots, or, if not specified, \code{$dat1}, \code{$dat2}, ..., \code{$datN}). Each of the variables are contained in a multidimensional array with named dimensions, and potentially with the attribute 'variables' with additional auxiliary data. - } - \item{Files}{ -Multidimensonal character string array with named dimensions. Its dimensions are the file dimensions (as requested in \dots). Each cell in this array contains a path to a file to be retrieved (which may exist or not). - } - \item{FileSelectors}{ -Multidimensional character string array with named dimensions, with the same shape as \code{$Files} and \code{$NotFoundFiles}, which contains the components used to build up the paths to each of the files in the data sources. - } - \item{StartRCall}{ -List of parameters sent to the \code{Start} call, with the parameter \code{retrieve} set to \code{TRUE}. Intended for calling in order to retrieve the associated data a posteriori with a call to \code{do.call}. - } +startR is not bound to a specific file format. Interface functions to +custom file formats can be provided for Start() to read them. As this +version, startR includes interface functions to the following file formats: +\itemize{ + \item{ +NetCDF + } } -\author{ -History:\cr -0.0 - 2017-04 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Original code +Metadata and auxilliary data is also preserved and arranged by Start() +in the measure that it is retrieved by the interface functions for a specific +file format. } \examples{ -## Check https://earth.bsc.es/gitlab/es/startR for step-by-step examples -## of Start(). + data_path <- system.file('extdata', package = 'startR') + path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') + sdates <- c('200011', '200012') + data <- Start(dat = list(list(path = path_obs)), + var = 'tos', + sdate = sdates, + time = 'all', + latitude = 'all', + longitude = 'all', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = FALSE) + } -\keyword{IO} -\keyword{array} -\keyword{manip} + diff --git a/man/Step.Rd b/man/Step.Rd new file mode 100644 index 0000000000000000000000000000000000000000..65f0c727eef1d8b9ccf24b6f992304bd1a99e63a --- /dev/null +++ b/man/Step.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Step.R +\name{Step} +\alias{Step} +\title{Define the operation applied on declared data.} +\usage{ +Step(fun, target_dims, output_dims, use_libraries = NULL, + use_attributes = NULL) +} +\arguments{ +\item{fun}{A function in R format defining the operation to be applied to the +data declared by a Start() call. It should only work on the essential +dimensions rather than all the data dimensions. Since the function will be +called numerous times through all the non-essential dimensions, it is +recommended to keep them as light as possible.} + +\item{target_dims}{A vector for single input array or a list of vectors for +multiple input arrays indicating the names of the dimensions 'fun' to be +applied along.} + +\item{output_dims}{A vector for single returned array or a list of vectors +for multiple returned arrays indicating the dimension names of the function +output.} + +\item{use_libraries}{A vector of character string indicating the R library +names to be used in 'fun'. The default value is NULL.} + +\item{use_attributes}{One or more lists of vectors of character string +indicating the data attributes to be used in 'fun'. The list name should be +consistent with the list name of 'data' in AddStep(). The default value is +NULL.} +} +\value{ +A closure that contains all the objects assigned. It serves as the + input of Addstep(). +} +\description{ +The step of the startR workflow after declaring data by Start() call. It +identifies the operation (i.e., function) and the target and output +dimensions of data array for the function. Ideally, it expects the dimension +name to be in the same order as the one requested in the Start() call. +If a different order is specified, startR will reorder the subset dimension +to the expected order for this function. +} +\examples{ + data_path <- system.file('extdata', package = 'startR') + path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') + sdates <- c('200011', '200012') + data <- Start(dat = list(list(path = path_obs)), + var = 'tos', + sdate = sdates, + time = 'all', + latitude = 'all', + longitude = 'all', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = FALSE) + fun <- function(x) { + lat = attributes(x)$Variables$dat1$latitude + weight = sqrt(cos(lat * pi / 180)) + corrected = Apply(list(x), target_dims = "latitude", + fun = function(x) {x * weight}) + } + step <- Step(fun = fun, + target_dims = 'latitude', + output_dims = 'latitude', + use_libraries = c('multiApply'), + use_attributes = list(data = "Variables")) + wf <- AddStep(data, step) + +} + diff --git a/man/Subset.Rd b/man/Subset.Rd deleted file mode 100644 index b90def7d63abc7ed78e7135920747be4d94203b0..0000000000000000000000000000000000000000 --- a/man/Subset.Rd +++ /dev/null @@ -1,51 +0,0 @@ -\name{Subset} -\alias{Subset} -\title{Subset a Data Array} -\description{ -This function allows to subset (i.e. slice, take a chunk of) an array, in a -similar way as done in the function \code{take()} in the package plyr. There -are two main inprovements:\cr\cr -The input array can have dimension names, either -in \code{names(dim(x))} or in the attribute 'dimensions', and the dimensions to -subset along can be specified via the parameter \code{along} either with -integer indices or either by their name.\cr\cr -There are additional ways to adjust which dimensions are dropped in the -resulting array: either to drop all, to drop none, to drop only the ones that -have been sliced or to drop only the ones that have not been sliced.\cr\cr -If an array is provided without dimension names, dimension names taken from -the parameter \code{dim_names} will be added to the array. -} -\usage{ -Subset(x, along, indices, drop = FALSE) -} -\arguments{ - \item{x}{ -A multidimensional array to be sliced. It can have dimension names either -in \code{names(dim(x))} or either in the attribute 'dimensions'. - } - \item{along}{ -Vector with references to the dimensions to take the subset from: either -integers or dimension names. - } - \item{indices}{ -List of indices to take from each dimension specified in 'along'. If a single -dimension is specified in 'along' the indices can be directly provided as a -single integer or as a vector. - } - \item{drop}{ -Whether to drop all the dimensions of length 1 in the resulting array, none, -only those that are specified in 'along', or only those that are not specified -in 'along'. The possible values are, respectively: 'all' or TRUE, 'none' or -FALSE, 'selected', and 'non-selected'. - } -} -\examples{ -sample_array <- array(1:24, dim = c(dataset = 1, sdate = 2, member = 3, ftime = 4)) -subset <- Subset(sample_array, c('dataset', 'sdate', 'member'), - list(1, 1, 1), drop = 'selected') -} -\author{ -History:\cr -0.0 - 2016-06 (N. Manubens, \email{nicolau.manubens at bsc.es}) - Original code -} -\keyword{datagen} diff --git a/man/indices.Rd b/man/indices.Rd index e5b086f50ad6bb311a3299e1c4acb46e20a1e35c..a3d85ea1e9c0c6f6b1168c532872a769f85cc15e 100644 --- a/man/indices.Rd +++ b/man/indices.Rd @@ -1,27 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/indices.R \name{indices} \alias{indices} -\title{Mark Dimension Selectors as Indices} -\description{ -Helper function intended for use in a call to the function \code{Start} in the package 'startR', to explicitly mark that a set of provided indices to subset one of the requested dimensions are actually indices and not values to be matched against a coordinate variable. See details in the documentation of the parameter \dots of the function \code{Start()}. -} +\title{Specify dimension selectors with indices} \usage{ indices(x) } \arguments{ - \item{x}{ -Numeric vector or list with two numeric elements. - } +\item{x}{A numeric vector or a list with two nemerics to take all the +elements between the two specified indices (both extremes inclusive).} } \value{ -The same as the input, but with an additional attribute 'indices' with the value TRUE, marking the indices as numeric indices. +Same as input, but with additional attribute 'indices', 'values', and + 'chunk'. } -\author{ -See details in the documentation of the parameter \code{transform} of the function \code{Start()}. +\description{ +This is a helper function used in a Start() call to define the desired range +of dimensions. It selects the indices of the coordinate variable from +original data. See details in the documentation of the parameter \code{\dots} +'indices to take' of the function Start(). } \examples{ -## Check https://earth.bsc.es/gitlab/es/startR for step-by-step examples -## of Start() that use indices(). +# Used in Start(): + data_path <- system.file('extdata', package = 'startR') + path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') + sdates <- c('200011', '200012') + data <- Start(dat = list(list(path = path_obs)), + var = 'tos', + sdate = sdates, + time = 'all', + latitude = indices(1:2), + longitude = indices(list(2, 14)), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = FALSE) +} +\seealso{ +\code{\link{values}} } -\keyword{IO} -\keyword{array} -\keyword{manip} + diff --git a/man/values.Rd b/man/values.Rd new file mode 100644 index 0000000000000000000000000000000000000000..3300f19dc730494b23723c734d4372f13366e7b1 --- /dev/null +++ b/man/values.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/values.R +\name{values} +\alias{values} +\title{Specify dimension selectors with actual values} +\usage{ +values(x) +} +\arguments{ +\item{x}{A numeric vector or a list with two nemerics to take all the element +between the two specified values (both extremes inclusive).} +} +\value{ +Same as input, but with additional attribute 'indices', 'values', and + 'chunk'. +} +\description{ +This is a helper function used in a Start() call to define the desired range +of dimensions. It specifies the actual value to be matched with the +coordinate variable. See details in the documentation of the parameter +\code{\dots} 'indices to take' of the function Start(). +} +\examples{ +# Used in Start(): + data_path <- system.file('extdata', package = 'startR') + path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') + sdates <- c('200011', '200012') + data <- Start(dat = list(list(path = path_obs)), + var = 'tos', + sdate = sdates, + time = 'all', + latitude = values(seq(-80, 80, 20)), + latitude_reorder = Sort(), + longitude = values(list(10, 300)), + longitude_reorder = CircularSort(0, 360), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = FALSE) +} +\seealso{ +\code{\link{indices}} +} + diff --git a/startR-manual.pdf b/startR-manual.pdf index dac7899e68eedb61e1475b499a208d22950b4714..3417b13eaf55bc8c3e82ee68bf29c1c3900d2704 100644 Binary files a/startR-manual.pdf and b/startR-manual.pdf differ diff --git a/tests/testthat/test-AddStep-DimNames.R b/tests/testthat/test-AddStep-DimNames.R index 03e1d01f9aed4ad367b7ac0e348989348c7d5f50..5577ff5dd68e4e6b78f3e4af0431eaf0ece0a7ed 100644 --- a/tests/testthat/test-AddStep-DimNames.R +++ b/tests/testthat/test-AddStep-DimNames.R @@ -1,6 +1,8 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Error with bad dimensions tests.") test_that("Single File - Local execution", { - +skip_on_cran() data <- Start(dataset = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', var = 'tas', sdate = '2000', diff --git a/tests/testthat/test-Compute-NumChunks.R b/tests/testthat/test-Compute-NumChunks.R index dbfcbd99de638ff7c70c9fa68c254cd1391db177..319a18b65c6f8afda3c124e2908a98d413d2384b 100644 --- a/tests/testthat/test-Compute-NumChunks.R +++ b/tests/testthat/test-Compute-NumChunks.R @@ -1,6 +1,8 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Number of chunks tests.") test_that("Single File - Local execution", { - +skip_on_cran() data <- Start(dataset = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', var = 'tas', sdate = '2000', @@ -27,13 +29,13 @@ wf = AddStep(inputs = data, expect_equal(Compute(workflow = wf, chunks = list(lat = 2, lon = 2), - threads_load = 2, - threads_compute = 4), + threads_load = 1, + threads_compute = 2), Compute(workflow = wf, chunks = list(lat = 3, lon = 3), - threads_load = 2, - threads_compute = 4), + threads_load = 1, + threads_compute = 2), check.attributes = FALSE) }) diff --git a/tests/testthat/test-Start-global-lon-across_meridian.R b/tests/testthat/test-Start-global-lon-across_meridian.R index aa1dc18f254a03a6e81cced65409123620758b42..16507033c30bb95c0009b92b99794dcb6adbad91 100644 --- a/tests/testthat/test-Start-global-lon-across_meridian.R +++ b/tests/testthat/test-Start-global-lon-across_meridian.R @@ -1,7 +1,9 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() across_meridia global lon length check") test_that("first test", { - +skip_on_cran() repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/$var$_$sdate$.nc" var <- 'tas' diff --git a/tests/testthat/test-Start-line_order-consistency.R b/tests/testthat/test-Start-line_order-consistency.R index 1fe98ea1c394994372428c580c5ac7df8ffe5594..74ffae2ad7cb4abd91710d9e6be0a8b69bc97fa9 100644 --- a/tests/testthat/test-Start-line_order-consistency.R +++ b/tests/testthat/test-Start-line_order-consistency.R @@ -1,3 +1,5 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() line order consistency check") variable <- "tas" @@ -10,7 +12,7 @@ context("Start() line order consistency check") lons.max <- 360 test_that("1. lon and lat order", { - +skip_on_cran() dat1 <- Start(dat = obs.path, var = variable, file_date = dates_file, @@ -61,7 +63,7 @@ test_that("1. lon and lat order", { test_that("2. dim length check: with/out reorder", { - +skip_on_cran() dat1 <- Start(dat = obs.path, var = variable, diff --git a/tests/testthat/test-Start-multiple-sdates.R b/tests/testthat/test-Start-multiple-sdates.R index cd5bac8f86128d86334c985fabfc389c893b1a62..832205ae2e8bfd9e8abc4ef776f840b0564dda05 100644 --- a/tests/testthat/test-Start-multiple-sdates.R +++ b/tests/testthat/test-Start-multiple-sdates.R @@ -1,3 +1,5 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() multiple sdate with split + merge dim") # When certain values in one observation file are required more than once, @@ -14,6 +16,7 @@ var100_name <- 'windagl100' sdates.seq <- c("20161222","20161229","20170105","20170112") test_that("1. ", { +skip_on_cran() hcst<-Start(dat = ecmwf_path_hc, var = var_name, sdate = sdates.seq, @@ -85,6 +88,7 @@ obs <- Start(dat = obs_path, }) test_that("2. change the file_date order", { +skip_on_cran() hcst<-Start(dat = ecmwf_path_hc, var = var_name, sdate = sdates.seq, diff --git a/tests/testthat/test-Start-reorder-lat.R b/tests/testthat/test-Start-reorder-lat.R index f53a9ec12cfcc1ed39df7791330fd95fca79c2b8..9c2729a930041831e13c707f75dfe41d5bea77da 100644 --- a/tests/testthat/test-Start-reorder-lat.R +++ b/tests/testthat/test-Start-reorder-lat.R @@ -1,3 +1,5 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() lat Reorder test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -16,6 +18,7 @@ path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$. ############################################## test_that("1-1-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -78,6 +81,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-2-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 20 @@ -121,6 +125,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-3-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- -10 @@ -162,6 +167,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-4-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- -20 @@ -204,6 +210,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("2-1-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -266,6 +273,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("2-2-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 20 @@ -307,6 +315,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("2-3-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- -10 @@ -347,6 +356,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("2-4-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- -20 @@ -387,6 +397,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-1-2-3-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -428,6 +439,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("2-1-2-3-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -469,6 +481,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## ############################################## test_that("1-1-2-2-2-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -530,6 +543,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-2-2-2-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 20 @@ -571,6 +585,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-1-2-2-2-3-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -612,6 +627,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("2-1-2-2-2-3-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -666,6 +682,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-1-2-2-1-1-2-3", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -714,6 +731,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-1-2-2-3-1-2-3", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -762,6 +780,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-1-2-2-3-2-2-3", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -811,6 +830,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-1-2-2-3-1-2-1", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 diff --git a/tests/testthat/test-Start-reorder-latCoarse.R b/tests/testthat/test-Start-reorder-latCoarse.R index bc6f480a8af702e354584345e34cc9ff3acc771e..4229b06a09944b87d06fcf2060360fefd39ecf1c 100644 --- a/tests/testthat/test-Start-reorder-latCoarse.R +++ b/tests/testthat/test-Start-reorder-latCoarse.R @@ -1,3 +1,5 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() lat Reorder test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix @@ -18,6 +20,7 @@ path_exp <- '/esarchive/exp/ncar/cesm-dple/monthly_mean/$var$/$var$_$sdate$.nc' ## latitude: -90 o 90 {-90, -89.05759 ...} #192 values ############################################## test_that("1-1-4-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -80,6 +83,7 @@ res <- Start(dat = list(list(path = path_exp)), ############################################## test_that("1-2-4-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 20 @@ -123,6 +127,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-3-4-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- -10 @@ -164,6 +169,7 @@ res <- Start(dat = list(list(path = path_exp)), }) ############################################## test_that("1-4-4-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- -20 @@ -206,6 +212,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("2-1-4-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -268,6 +275,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("2-2-4-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 20 @@ -309,6 +317,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("2-3-4-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- -10 @@ -349,6 +358,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("2-4-4-2-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- -20 @@ -389,6 +399,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-1-4-3-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -430,6 +441,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("2-1-4-3-1-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -471,6 +483,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## ############################################## test_that("1-1-4-2-2-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -532,6 +545,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-4-2-2-1-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 20 @@ -573,6 +587,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-1-4-2-2-3-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -614,6 +629,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("2-1-4-2-2-3-1-x", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -668,6 +684,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-1-4-2-1-1-2-3", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -716,6 +733,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-1-4-2-3-1-2-3", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -764,6 +782,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-1-4-2-3-2-2-3", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 @@ -813,6 +832,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-1-4-2-3-1-2-1", { +skip_on_cran() lons.min <- 40 lons.max <- 45 lats.min <- 10 diff --git a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R index 1b8a96b44f3ae49c4b32a3d53f4975e9e271cc62..efbe178401b929b11df534a8dcf16f2da6b72cbb 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R @@ -1,3 +1,5 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() lon Reorder transform -180to180 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[170, 190] @@ -17,6 +19,7 @@ sdate <- '199212' ############################################## test_that("1-1-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -59,6 +62,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-2-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -97,6 +101,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-5-2-2-1-1-2-4", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -135,6 +140,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-6-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 10 lons.max <- -10 lats.min <- 10 @@ -172,6 +178,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-8-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 170 lons.max <- 190 lats.min <- 10 @@ -211,6 +218,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## ############################################## test_that("1-1-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -257,6 +265,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -304,6 +313,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-3-2-2-2-2-2-3", { +skip_on_cran() lons.min <- -10 lons.max <- -20 lats.min <- 10 @@ -351,6 +361,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-4-2-2-2-2-2-3", { +skip_on_cran() lons.min <- -20 lons.max <- -10 lats.min <- 10 @@ -392,6 +403,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-5-2-2-2-2-2-3", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -439,6 +451,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-6-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 20 lons.max <- -10 lats.min <- 10 @@ -481,6 +494,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-7-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -522,6 +536,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-8-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 @@ -571,6 +586,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## ############################################## test_that("1-1-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -616,6 +632,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -662,6 +679,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-3-2-2-2-3-2-3", { +skip_on_cran() lons.min <- -10 lons.max <- -20 lats.min <- 10 @@ -708,6 +726,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-4-2-2-2-3-2-3", { +skip_on_cran() lons.min <- -20 lons.max <- -10 lats.min <- 10 @@ -750,6 +769,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-5-2-2-2-3-2-3", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -791,6 +811,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-6-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 20 lons.max <- -10 lats.min <- 10 @@ -837,6 +858,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-7-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -878,6 +900,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-8-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360.R b/tests/testthat/test-Start-reorder-lon-transform_0to360.R index 5ef55769fd4ce52f0b1a06a450547910be785664..0a973bcfcd04eaad0e634fda06dff7da9ed57f10 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360.R @@ -1,3 +1,5 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() lon Reorder transform 0to360 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[350, 370] @@ -17,6 +19,7 @@ sdate <- '19821201' ############################################## test_that("1-1-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -61,6 +64,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-2-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -100,6 +104,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-5-2-2-1-1-2-4", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -140,6 +145,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-6-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 10 lons.max <- -10 lats.min <- 10 @@ -179,6 +185,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-7-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -218,6 +225,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-8-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 @@ -259,6 +267,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## ############################################## test_that("1-1-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -307,6 +316,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -356,6 +366,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-3-2-2-2-2-2-3", { +skip_on_cran() lons.min <- -10 lons.max <- -20 lats.min <- 10 @@ -405,6 +416,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-4-2-2-2-2-2-3", { +skip_on_cran() lons.min <- -20 lons.max <- -10 lats.min <- 10 @@ -448,6 +460,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-5-2-2-2-2-2-3", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -497,6 +510,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-6-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 20 lons.max <- -10 lats.min <- 10 @@ -541,6 +555,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-7-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -584,6 +599,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-8-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 @@ -635,6 +651,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## ############################################## test_that("1-1-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -682,6 +699,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -730,6 +748,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-3-2-2-2-3-2-3", { +skip_on_cran() lons.min <- -10 lons.max <- -20 lats.min <- 10 @@ -778,6 +797,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-4-2-2-2-3-2-3", { +skip_on_cran() lons.min <- -20 lons.max <- -10 lats.min <- 10 @@ -822,6 +842,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-5-2-2-2-3-2-3", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -865,6 +886,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-6-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 20 lons.max <- -10 lats.min <- 10 @@ -913,6 +935,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-7-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -956,6 +979,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-8-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R index 262673bd1cf7f11961d37c584b143b960dfbcb2e..01b5d68919b98c0d7f9886064e7680d18c459dd9 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R @@ -1,3 +1,5 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() lon Reorder transform 0to360 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[350, 370] @@ -21,6 +23,7 @@ sdate <- '20001101' ############################################## test_that("1-1-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -65,6 +68,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-2-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -104,6 +108,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-5-2-2-1-1-2-4", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -144,6 +149,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-6-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 10 lons.max <- -10 lats.min <- 10 @@ -183,6 +189,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-7-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -222,6 +229,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-8-2-2-1-1-2-4", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 @@ -263,6 +271,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## ############################################## test_that("1-1-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -311,6 +320,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -360,6 +370,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-3-2-2-2-2-2-3", { +skip_on_cran() lons.min <- -10 lons.max <- -20 lats.min <- 10 @@ -409,6 +420,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-4-2-2-2-2-2-3", { +skip_on_cran() lons.min <- -20 lons.max <- -10 lats.min <- 10 @@ -452,6 +464,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-5-2-2-2-2-2-3", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -501,6 +514,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-6-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 20 lons.max <- -10 lats.min <- 10 @@ -545,6 +559,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-7-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -588,6 +603,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-8-2-2-2-2-2-3", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 @@ -639,6 +655,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## ############################################## test_that("1-1-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -686,6 +703,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -734,6 +752,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-3-2-2-2-3-2-3", { +skip_on_cran() lons.min <- -10 lons.max <- -20 lats.min <- 10 @@ -782,6 +801,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-4-2-2-2-3-2-3", { +skip_on_cran() lons.min <- -20 lons.max <- -10 lats.min <- 10 @@ -826,6 +846,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-5-2-2-2-3-2-3", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -869,6 +890,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-6-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 20 lons.max <- -10 lats.min <- 10 @@ -917,6 +939,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-7-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -960,6 +983,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-8-2-2-2-3-2-3", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 diff --git a/tests/testthat/test-Start-reorder-lon0to360Coarse.R b/tests/testthat/test-Start-reorder-lon0to360Coarse.R index 461042a83acf74b77bab87d9d3e3519f224fd32e..cb7649a82b11e0fa797bb002e2a47f94526ee44e 100644 --- a/tests/testthat/test-Start-reorder-lon0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon0to360Coarse.R @@ -1,3 +1,5 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() lon Reorder non-transform 0to360 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[350, 370] @@ -16,6 +18,7 @@ path_exp <- '/esarchive/exp/ncar/cesm-dple/monthly_mean/$var$/$var$_$sdate$.nc' ## latitude: -90 o 90 {-90, -89.05759 ...} #192 values ############################################## test_that("1-1-4-2-1-1-1-x", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -57,6 +60,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-2-4-2-1-1-1-x", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -87,6 +91,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-5-4-2-1-1-1-x", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -119,6 +124,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-6-4-2-1-1-1-x", { +skip_on_cran() lons.min <- 10 lons.max <- -10 lats.min <- 10 @@ -150,6 +156,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-7-4-2-1-1-1-x", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -180,6 +187,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-8-4-2-1-1-1-x", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 @@ -212,6 +220,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## ############################################## test_that("1-1-4-2-2-2-1-x", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -252,6 +261,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-1-4-2-2-3-1-x", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -288,6 +298,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-4-2-2-2-1-x", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -324,6 +335,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-4-2-2-3-1-x", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -361,6 +373,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-3-4-2-2-2-1-x", { +skip_on_cran() lons.min <- -10 lons.max <- -20 lats.min <- 10 @@ -398,6 +411,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-3-4-2-2-3-1-x", { +skip_on_cran() lons.min <- -10 lons.max <- -20 lats.min <- 10 @@ -430,6 +444,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-4-4-2-2-2-1-x", { +skip_on_cran() lons.min <- -20 lons.max <- -10 lats.min <- 10 @@ -462,6 +477,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-4-4-2-2-3-1-x", { +skip_on_cran() lons.min <- -20 lons.max <- -10 lats.min <- 10 @@ -495,6 +511,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-5-4-2-2-2-1-x", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -523,6 +540,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-5-4-2-2-3-1-x", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -556,6 +574,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-6-4-2-2-2-1-x", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 @@ -589,6 +608,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-6-4-2-2-3-1-x", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 diff --git a/tests/testthat/test-Start-reorder-lon_-180to180.R b/tests/testthat/test-Start-reorder-lon_-180to180.R index 38cd6b8efbe7c3ec9442e637ed4597ffab6ed390..bdfc1da5343c95597e6ef76224d7ae781c0744be 100644 --- a/tests/testthat/test-Start-reorder-lon_-180to180.R +++ b/tests/testthat/test-Start-reorder-lon_-180to180.R @@ -1,3 +1,5 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() lon Reorder non-transform -180to180 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[170, 190] @@ -18,6 +20,7 @@ sdate <- '199212' ############################################## test_that("1-1-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -57,6 +60,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-2-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -86,6 +90,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-3-2-2-1-1-1-x", { +skip_on_cran() lons.min <- -10 lons.max <- -20 lats.min <- 10 @@ -116,6 +121,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-4-2-2-1-1-1-x", { +skip_on_cran() lons.min <- -20 lons.max <- -10 lats.min <- 10 @@ -146,6 +152,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-5-2-2-1-1-1-x", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -176,6 +183,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-6-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 10 lons.max <- -10 lats.min <- 10 @@ -205,6 +213,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-8-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 170 lons.max <- 190 lats.min <- 10 @@ -235,6 +244,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## ############################################## test_that("1-1-2-2-2-2-1-x", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -273,6 +283,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-1-2-2-2-3-1-x", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -307,6 +318,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-2-2-2-2-1-x", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -341,6 +353,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-2-2-2-3-1-x", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -376,6 +389,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-3-2-2-2-2-1-x", { +skip_on_cran() lons.min <- -10 lons.max <- -20 lats.min <- 10 @@ -411,6 +425,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-3-2-2-2-3-1-x", { +skip_on_cran() lons.min <- -10 lons.max <- -20 lats.min <- 10 @@ -441,6 +456,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-4-2-2-2-2-1-x", { +skip_on_cran() lons.min <- -20 lons.max <- -10 lats.min <- 10 @@ -471,6 +487,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-4-2-2-2-3-1-x", { +skip_on_cran() lons.min <- -20 lons.max <- -10 lats.min <- 10 @@ -501,6 +518,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-5-2-2-2-2-1-x", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -541,6 +559,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-5-2-2-2-3-1-x", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -571,6 +590,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-6-2-2-2-2-1-x", { +skip_on_cran() lons.min <- 20 lons.max <- -10 lats.min <- 10 @@ -606,6 +626,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-6-2-2-2-3-1-x", { +skip_on_cran() lons.min <- 20 lons.max <- -10 lats.min <- 10 @@ -645,6 +666,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-7-2-2-2-2-1-x", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -676,6 +698,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-7-2-2-2-3-1-x", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -712,6 +735,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-8-2-2-2-2-1-x", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 @@ -743,6 +767,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-8-2-2-2-3-1-x", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 diff --git a/tests/testthat/test-Start-reorder-lon_0to360.R b/tests/testthat/test-Start-reorder-lon_0to360.R index 67d0e908612336b09ddff32add7f6a4a0d381230..5faf713a849260765a1cc52adb14bd13b967eddc 100644 --- a/tests/testthat/test-Start-reorder-lon_0to360.R +++ b/tests/testthat/test-Start-reorder-lon_0to360.R @@ -1,3 +1,5 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() lon Reorder non-transform 0to360 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[350, 370] @@ -16,6 +18,7 @@ path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$. ############################################## test_that("1-1-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -57,6 +60,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-2-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -87,6 +91,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-5-2-2-1-1-1-x", { +skip_on_cran() lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -119,6 +124,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-6-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 10 lons.max <- -10 lats.min <- 10 @@ -150,6 +156,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-7-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -180,6 +187,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-8-2-2-1-1-1-x", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 @@ -212,6 +220,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## ############################################## test_that("1-1-2-2-2-2-1-x", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -252,6 +261,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-1-2-2-2-3-1-x", { +skip_on_cran() lons.min <- 10 lons.max <- 20 lats.min <- 10 @@ -288,6 +298,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-2-2-2-2-1-x", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -324,6 +335,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-2-2-2-2-3-1-x", { +skip_on_cran() lons.min <- 20 lons.max <- 10 lats.min <- 10 @@ -361,6 +373,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-3-2-2-2-2-1-x", { +skip_on_cran() lons.min <- -10 lons.max <- -20 lats.min <- 10 @@ -398,6 +411,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-3-2-2-2-3-1-x", { +skip_on_cran() lons.min <- -10 lons.max <- -20 lats.min <- 10 @@ -430,6 +444,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-4-2-2-2-2-1-x", { +skip_on_cran() lons.min <- -20 lons.max <- -10 lats.min <- 10 @@ -462,6 +477,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-4-2-2-2-3-1-x", { +skip_on_cran() lons.min <- -20 lons.max <- -10 lats.min <- 10 @@ -495,6 +511,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-5-2-2-2-2-1-x", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -523,6 +540,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-5-2-2-2-3-1-x", { +skip_on_cran() lons.min <- 330 lons.max <- 350 lats.min <- 10 @@ -556,6 +574,7 @@ res <- Start(dat = list(list(path=path_exp)), ############################################## test_that("1-6-2-2-2-2-1-x", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 @@ -589,6 +608,7 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## test_that("1-6-2-2-2-3-1-x", { +skip_on_cran() lons.min <- 350 lons.max <- 370 lats.min <- 10 diff --git a/tests/testthat/test-Start-reorder-retrieve.R b/tests/testthat/test-Start-reorder-retrieve.R index 47412f59279905449384e00b96836fad422e7950..cb6cfc6929ff778623d7285a40843a1c0979ba41 100644 --- a/tests/testthat/test-Start-reorder-retrieve.R +++ b/tests/testthat/test-Start-reorder-retrieve.R @@ -1,9 +1,11 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() lon Reorder non-transform retrieve test") ############################################## test_that("original range 0to360", { - +skip_on_cran() ## Origin longitude in file: [0:359.722222222222] path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' @@ -83,7 +85,7 @@ res2 <- Start(dat = path_exp, ############################################## test_that("original range -180to180", { - +skip_on_cran() ## Origin longitude in file: [0:359.722222222222] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' variable <- 'tas' diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index a88be8ba073eb06adfc8793b13f3638ae2bf71cc..e998844b3489d728a86f56d7c32af0a13144d05f 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -1,3 +1,5 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() split + merge dim and value check") var_name <- 'sfcWind' @@ -25,7 +27,7 @@ path.obs <- '/esarchive/recon/ecmwf/era5/1hourly/$var$/$var$_$file_date$.nc' test_that("1. split + merge + narm", { - +skip_on_cran() obs <- Start(dat = path.obs, var = var_name, file_date = file_date, # a vector @@ -49,12 +51,12 @@ obs <- Start(dat = path.obs, ) expect_equal( obs[1, 1, 1, 1, 2, 2, 1:3], - c(6.802123, 7.110264, 7.584915), + c(6.968349, 6.604249, 7.563990), tolerance = 0.0001 ) expect_equal( mean(obs), - 5.751444, + 5.726787, tolerance = 0.0001 ) expect_equal( @@ -65,7 +67,7 @@ obs <- Start(dat = path.obs, test_that("2. no split + merge + narm", { - +skip_on_cran() obs <- Start(dat = path.obs, var = var_name, file_date = file_date, # a vector @@ -89,12 +91,12 @@ obs <- Start(dat = path.obs, ) expect_equal( obs[1, 1, 1, 1, 10:13], - c(4.076760, 7.644944, 4.589063, 4.402223), + c(3.777488, 7.563611, 3.737108, 4.478187), tolerance = 0.0001 ) expect_equal( mean(obs), - 5.751444, + 5.726787, tolerance = 0.0001 ) expect_equal( @@ -105,7 +107,7 @@ obs <- Start(dat = path.obs, test_that("3. no split + merge + no narm", { - +skip_on_cran() obs <- Start(dat = path.obs, var = var_name, file_date = file_date, # a vector @@ -129,12 +131,12 @@ obs <- Start(dat = path.obs, ) expect_equal( obs[1, 1, 1, 1, 10:13], - c(4.076760, 7.644944, 4.589063, 4.402223), + c(3.777488, 7.563611, 3.737108, 4.478187), tolerance = 0.0001 ) expect_equal( mean(obs, na.rm = T), - 5.751444, + 5.726787, tolerance = 0.0001 ) expect_equal( @@ -142,3 +144,4 @@ obs <- Start(dat = path.obs, 3600 ) }) + diff --git a/tests/testthat/test-Start-transform-lon-across_meridian.R b/tests/testthat/test-Start-transform-lon-across_meridian.R index 96fc5503065d03ef19dc4b5bc67ab8fedcef4373..9ac0b8de89bb130a6a316e17ea27f2e4e37e3e03 100644 --- a/tests/testthat/test-Start-transform-lon-across_meridian.R +++ b/tests/testthat/test-Start-transform-lon-across_meridian.R @@ -1,7 +1,9 @@ +#if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') + context("Start() transform across_meridian lon order check") test_that("first test", { - +skip_on_cran() repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/$var$_$sdate$.nc" var <- 'tas'