diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 80614d01c8420629dad7c930eadd1406f32fd698..47f807e67d88fd07e126988212944133d605996e 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -106,8 +106,11 @@ Find more explanation of this use case in FAQ [How-to-27](inst/doc/faq.md#27-uti This script shows how to load irregular grid data by Start(), then regrid it by s2dv::CDORemap in the workflow. It is a solution before Start() can deal with irregular regridding directly. + 14. [Get margin dimension indices in startR workflow](inst/doc/usecase/ex2_14_margin_dim_indices.R) + This usecase shows you how to know the margin dimension indices in the self-defined function. + 3. **Verification workflows** - 1. [Weekly ECV Subseasonal Hindcast Verification](inst/doc/usecase/ex3_1_SubseasonalECVHindcast.md) + 1. [Weekly ECV Subseasonal Hindcast Verification](inst/doc/usecase/ex3_1_SubseasonalECVHindcast.md) This is a practical case to compute monthly skill scores for the ECMWF/S2S-ENSForhc subseasonal hindcast using as a reference dataset ERA5. The ECV is air temperature at surface level (tas). Note that since this case is practical, it is heavy and takes much time to finish running. diff --git a/inst/doc/usecase/ex2_14_margin_dim_indices.R b/inst/doc/usecase/ex2_14_margin_dim_indices.R new file mode 100644 index 0000000000000000000000000000000000000000..747a4e31db4edd7b5d429b023e5ac964ec31815f --- /dev/null +++ b/inst/doc/usecase/ex2_14_margin_dim_indices.R @@ -0,0 +1,87 @@ +# Author: An-Chi Ho +# Date: 4th July 2023 +# ------------------------------------------------------------------ +# This usecase shows you how to know the margin dimension indices in the self-defined function. +# In this example, we chunk the data along dimensions 'var' and 'sdate'. We can get the indices +# of each chunck, and when dimension 'var' is 2 (i.e., 'tas'), we convert unit from K to degC. +#------------------------------------------------------------------ + + library(startR) + + data <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc", + var = c('psl', 'tas'), + sdate = paste0(2011:2018, '0501'), + ensemble = 'all', + time = indices(1:3), + lat = values(list(20, 80)), lat_reorder = Sort(), + lon = values(list(-80, 40)), lon_reorder = CircularSort(-180, 180), + synonims = list(lat = c('lat', 'latitude'), lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', lon = NULL, lat = NULL), + retrieve = FALSE) + + + #NOTE: 'chunk_indices', 'chunks', and 'start_call' are the variables from startR:::ByChunks + func <- function(x) { + # x: [lat, lon] + + #----- Get margin dim indices --------- + # code modified from startR Util.R .chunk() + dim_indices <- lapply(names(chunks), + function(x) { + n_indices <- attr(start_call, 'Dimensions')[[x]] + chunk_sizes <- rep(floor(n_indices / chunks[[x]]), chunks[[x]]) + chunks_to_extend <- n_indices - chunk_sizes[1] * chunks[[x]] + if (chunks_to_extend > 0) { + chunk_sizes[1:chunks_to_extend] <- chunk_sizes[1:chunks_to_extend] + 1 + } + chunk_size <- chunk_sizes[chunk_indices[x]] + offset <- 0 + if (chunk_indices[x] > 1) { + offset <- sum(chunk_sizes[1:(chunk_indices[x] - 1)]) + } + 1:chunk_sizes[chunk_indices[x]] + offset + }) + names(dim_indices) <- names(chunks) + + # The first chunk [var = 1, sdate = 1] + #str(dim_indices) + #List of 5 + # $ dat : num 1 + # $ var : num 1 + # $ sdate : num [1:4] 1 2 3 4 + # $ ensemble: num [1:25] 1 2 3 4 5 6 7 8 9 10 ... + # $ time : num [1:3] 1 2 3 + + # The fourth chunk [var = 2, sdate = 2] + #str(dim_indices) + #List of 5 + # $ dat : num 1 + # $ var : num 2 + # $ sdate : num [1:4] 5 6 7 8 + # $ ensemble: num [1:25] 1 2 3 4 5 6 7 8 9 10 ... + # $ time : num [1:3] 1 2 3 + + if (dim_indices$var == 2) { # tas + x <- x - 273.15 + } + + res <- ClimProjDiags::WeightedMean(x, lat = c(attr(x, 'Variables')$common$lat), lon = c(attr(x, 'Variables')$common$lon)) + + return(res) + } + + + step <- Step(func, target_dims = c('lat', 'lon'), output_dims = NULL, + use_attributes = list("Variables")) + wf <- AddStep(data, step) + + res <- Compute(wf, chunks = list(var = 2, sdate = 2)) + + +# Check result: +summary(res$output1[1, 1, , , ]) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 101373 101525 101573 101570 101615 101749 +summary(res$output1[1, 2, , , ]) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 13.56 14.27 17.24 16.87 19.06 19.78