diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 1508742096218a5ca6ee05840b347855afbaaf03..ffe91a514552f734178f68a46e173becbd0604e4 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -1001,13 +1001,10 @@ See [How-to-21](#21-retrieve-the-complete-data-when-the-dimension-length-varies- In the self-defined function in startR workflow, the dimensions required for the computations are used as target dimensions, and the rest can be used to chunk the data in pieces. There is one situation that some information of one dimension is needed in the function but it is not depended by the computation. In this case, we may be able to chunk through this dimension while using it in the function still. It is a saver if you have a complex case with no margin dimension left (see [How-to-25](#25-what-to-do-if-your-function-has-too-many-target-dimensions).) -You just need to define a parameter in your function 'nchunks = chunk_indices' and use it in the function. -The use case [RainFARM precipitation downscaling](https://earth.bsc.es/gitlab/es/startR/-/blob/develop-RainFARMCase/inst/doc/usecase/ex2_5_rainFARM.R) demonstrates an example that the start date dimension is used as chunking dimension, -but we use its chunk number to know the start date value of each chunk. -The first part of the function performs downscaling method, which requres longitude and latitude dimensions, so these two dimensions must be the target dimensions in the workflow. -After that, the results are saved as netCDF file following esarchive convention. We need start date value here to decide the file name. -As you can see, the sdate dimension is not required for the computation, so it is not necessary to be the target dimension. We can just use 'chunk_indices' to get the chunk number therefore get the corresponding start date value for the file name. +We have two examples: (1) [ex2_5_RainFARM precipitation downscaling](inst/doc/usecase/ex2_5_rainFARM.R) +shows how to get start date for each chunk using chunk number; (2) [ex2_14](inst/doc/usecase/ex2_14_margin_dim_indices.R) shows how to distinguish the variable in each chunk since "variable" is one of the chunking dimensions +(__NOTE: In this case, it is easier to simply use attributes to find which variable it is. Check use case for more details.__) There are many other possible applications of this parameter. Please share with us other uses cases you may create. diff --git a/inst/doc/usecase/ex2_14_margin_dim_indices.R b/inst/doc/usecase/ex2_14_margin_dim_indices.R index 747a4e31db4edd7b5d429b023e5ac964ec31815f..4ce1caf8c9a70b4d3f71444b5b7acff36cbd23c4 100644 --- a/inst/doc/usecase/ex2_14_margin_dim_indices.R +++ b/inst/doc/usecase/ex2_14_margin_dim_indices.R @@ -4,6 +4,10 @@ # This usecase shows you how to know the margin dimension indices in the self-defined function. # In this example, we chunk the data along dimensions 'var' and 'sdate'. We can get the indices # of each chunck, and when dimension 'var' is 2 (i.e., 'tas'), we convert unit from K to degC. +# +# [UPDATE_12072023] This case can be much easier, simply use attributes to +# identify which variable it is in each chunk because attributes are also +# chunked along with data. #------------------------------------------------------------------ library(startR) @@ -19,7 +23,19 @@ return_vars = list(time = 'sdate', lon = NULL, lat = NULL), retrieve = FALSE) +#---------------- METHOD 1 (RECOMMENDED) ----------------- + func <- function(x) { + # x: [lat, lon] + attrs_names <- names(attr(x, 'Variables')$common) + if ('tas' %in% attrs_names) x <- x - 273.15 + + res <- ClimProjDiags::WeightedMean(x, lat = c(attr(x, 'Variables')$common$lat), lon = c(attr(x, 'Variables')$common$lon)) + + return(res) + } + +#---------------- METHOD 2 ----------------- #NOTE: 'chunk_indices', 'chunks', and 'start_call' are the variables from startR:::ByChunks func <- function(x) { # x: [lat, lon] @@ -70,6 +86,7 @@ return(res) } +#-------------------------------------------------------- step <- Step(func, target_dims = c('lat', 'lon'), output_dims = NULL, use_attributes = list("Variables")) diff --git a/inst/doc/usecase/ex2_5_rainFARM.R b/inst/doc/usecase/ex2_5_rainFARM.R index 8d315a03c1f47588f881b39513dbf6b76cf43b7e..8b58901bcdf9c5a791fc7be8d4f9510604364033 100644 --- a/inst/doc/usecase/ex2_5_rainFARM.R +++ b/inst/doc/usecase/ex2_5_rainFARM.R @@ -1,6 +1,19 @@ # ------------------------------------------------------------------------------ # Downscaling precipitation using RainFARM # ------------------------------------------------------------------------------ +# This usecase demonstrates that the start date dimension is used as chunking +# dimension, but the chunk number is used to know the start date value of each +# chunk. +# The first part of the function performs downscaling method, which requires +# longitude and latitude dimensions, so these two dimensions must be the target +# dimensions in the workflow. +# After that, the results are saved as netCDF file following esarchive convention. +# We need start date value here to decide the file name. +# As you can see, the sdate dimension is not required for the computation, so it +# is not necessary to be the target dimension. We can just use 'chunk_indices' to +# get the chunk number therefore get the corresponding start date value for the +# file name. +# ------------------------------------------------------------------------------ # Note 1: The data could be first transformed with QuantileMapping from CSTools # Note 2: Extra parameters could be used to downscale the data: weights, slope... # See more information in: @@ -74,15 +87,20 @@ step <- Step(Chunk_RF, use_libraries = c('CSTools', 'ncdf4'), use_attributes = list(data = "Variables")) -workflow <- AddStep(data, step, nf = 4, - destination = "/esarchive/scratch/nperez/git/Flor/cstools/test_RF_start/", +workflow <- AddStep(list(data = data), step, nf = 4, + destination = "./test_RF_start/", startdates = as.Date(sdates, format = "%Y%m%d")) + +#========= OPTION 1: Compute locally ============ res <- Compute(workflow, chunks = list(sdate = 4), threads_load = 2, threads_compute = 4) + +#========= OPTION 2: Compute ON NORD3 ============ + #-----------modify according to your personal info--------- queue_host = 'nord3' # your own host name for nord3v2 temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/'