From 3753b14d7ef3fd909f0df4930bff2cddb0e355cf Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 6 Oct 2021 15:06:51 +0200 Subject: [PATCH 1/3] change s2dverification to s2dv --- DESCRIPTION | 2 +- NAMESPACE | 2 +- R/CDORemapper.R | 8 ++++---- inst/doc/faq.md | 14 +++++++------- inst/doc/practical_guide.md | 2 +- inst/doc/usecase.md | 4 ++-- inst/doc/usecase/ex1_1_tranform.R | 8 ++++---- inst/doc/usecase/ex2_4_two_func.R | 2 +- .../usecase/ex2_7_seasonal_forecast_verification.R | 2 +- man/CDORemapper.Rd | 4 ++-- tests/testthat/test-Compute-CDORemap.R | 4 ++-- 11 files changed, 26 insertions(+), 26 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f1a9778..62087d9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,7 @@ Imports: multiApply (>= 2.1.1), parallel, easyNCDF, - s2dverification, + s2dv, ClimProjDiags, PCICt Suggests: diff --git a/NAMESPACE b/NAMESPACE index ccf783c..1375d83 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,7 +24,7 @@ import(future) import(multiApply) import(parallel) importFrom(ClimProjDiags,Subset) -importFrom(s2dverification,CDORemap) +importFrom(s2dv,CDORemap) importFrom(stats,na.omit) importFrom(stats,setNames) importFrom(utils,str) diff --git a/R/CDORemapper.R b/R/CDORemapper.R index 8aed954..8e2a468 100644 --- a/R/CDORemapper.R +++ b/R/CDORemapper.R @@ -5,7 +5,7 @@ #''transform' in a Start() call. This function complies with the input/output #'interface required by Start() defined in the documentation for the parameter #''transform' of function Start().\cr\cr -#'This function uses the function CDORemap() in the package 's2dverification' to +#'This function uses the function CDORemap() in the package 's2dv' to #'perform the interpolation, hence CDO is required to be installed. #' #'@param data_array A data array to be transformed. See details in the @@ -24,7 +24,7 @@ #' potentially with different sizes, and potentially with the attribute #' 'variables' with additional auxiliary data. See details in the documentation #' of the parameter 'transform' of the function Start(). -#'@seealso \code{\link[s2dverification]{CDORemap}} +#'@seealso \code{\link[s2dv]{CDORemap}} #' #'@examples #'# Used in Start(): @@ -50,7 +50,7 @@ #' time = 'sdate'), #' retrieve = FALSE) #' } -#'@importFrom s2dverification CDORemap +#'@importFrom s2dv CDORemap #'@export CDORemapper <- function(data_array, variables, file_selectors = NULL, ...) { file_dims <- names(file_selectors) @@ -93,7 +93,7 @@ CDORemapper <- function(data_array, variables, file_selectors = NULL, ...) { stop("Parameters 'grid', 'method' and 'crop' must be specified for the ", "CDORemapper, via the 'transform_params' argument.") } - result <- s2dverification::CDORemap(data_array, lons, lats, ...) + result <- s2dv::CDORemap(data_array, lons, lats, ...) return_variables <- list(result$lons, result$lats) names(return_variables) <- c(lon_name, lat_name) list(data_array = result$data_array, variables = return_variables) diff --git a/inst/doc/faq.md b/inst/doc/faq.md index fbf2efb..5753fa1 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -275,7 +275,7 @@ data <- Start(..., retrieve = FALSE) func <- function(x) { - y <- s2dverification::Season(x, posdim = 'time') #specify package name + y <- s2dv::Season(x, posdim = 'time') #specify package name return(y) } @@ -293,7 +293,7 @@ wf <- AddStep(data, step) cluster = list(queue_host = 'p1', #your alias for power9 queue_type = 'slurm', temp_dir = '/gpfs/scratch/bsc32/bsc32734/startR_hpc/', - lib_dir = '/gpfs/projects/bsc32/share/R_libs/3.5/', #s2dverification is involved here, so the machine can find Season() + lib_dir = '/gpfs/projects/bsc32/share/R_libs/3.5/', #s2dv is involved here, so the machine can find Season() r_module = 'startR/0.1.2-foss-2018b-R-3.5.0', job_wallclock = '00:10:00', cores_per_job = 4, @@ -311,7 +311,7 @@ wf <- AddStep(data, step) If you want to do the interpolation within Start(), you can use the following four parameters: -1. **`transform`**: Assign the interpolation function. It is recommended to use `startR::CDORemapper`, the wrapper function of s2dverification::CDORemap(). +1. **`transform`**: Assign the interpolation function. It is recommended to use `startR::CDORemapper`, the wrapper function of s2dv::CDORemap(). 2. **`transform_params`**: A list of the required inputs for `transform`. Take `transform = CDORemapper` as an example, the common items are: - `grid`: A character string specifying either a name of a target grid (recognized by CDO, e.g., 'r256x128', 't106grid') or a path to another NetCDF file with the target grid (a single grid must be defined in such file). - `method`: A character string specifying an interpolation method (recognized by CDO, e.g., 'con', 'bil', 'bic', 'dis'). The following long names are also supported: 'conservative', 'bilinear', 'bicubic', and 'distance-weighted'. @@ -321,10 +321,10 @@ If you want to do the interpolation within Start(), you can use the following fo The parameter ’crop’ also accepts a numeric vector of custom borders: c(western border, eastern border, southern border, northern border). 3. **`transform_vars`**: A character vector of the inner dimensions to be transformed. E.g., c('latitude', 'longitude'). -4. **`transform_extra_cells`**: A numeric indicating the number of grid cell to extend from the borders if the interpolating region is a subset of the whole region. 2 as default, which is consistent with the method in s2dverification::Load(). +4. **`transform_extra_cells`**: A numeric indicating the number of grid cell to extend from the borders if the interpolating region is a subset of the whole region. 2 as default, which is consistent with the method in s2dv::Load(). You can find an example script here [ex1_1_tranform.R](/inst/doc/usecase/ex1_1_tranform.R) -You can see more information in s2dverification::CDORemap documentation [here](https://earth.bsc.es/gitlab/es/s2dverification/blob/master/man/CDORemap.Rd). +You can see more information in s2dv::CDORemap documentation [here](https://earth.bsc.es/gitlab/es/s2dv/blob/master/man/CDORemap.Rd). ### 6. Get data attributes without retrieving data to workstation @@ -459,7 +459,7 @@ data <- Start(dat = repos, ### 9. Use CDORemap() in function -If you want to interpolate data by s2dverification::CDORemap in function, you need to tell the +If you want to interpolate data by s2dv::CDORemap in function, you need to tell the machine which CDO module to use. Therefore, `CDO_module = 'CDO/1.9.5-foss-2018b'` should be added in Compute() cluster list. See the example in usecase [ex2_3_cdo.R](inst/doc/usecase/ex2_3_cdo.R). @@ -1025,7 +1025,7 @@ some problem. ### 5. Errors related to wrong file formatting -Several errors could be return when the files are not correctly formatted. If you see one of this errors, review the coordinates in your files: +Several errors could be returned when the files are not correctly formatted. If you see one of this errors, review the coordinates in your files: ``` Error in Rsx_nc4_put_vara_double: NetCDF: Numeric conversion not representable diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index d476652..378f648 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -297,7 +297,7 @@ If you are interested in actually loading the entire data set in your machine yo - evaluating the object returned by `Start()`: `data_load <- eval(data)` See the section on "How to choose the number of chunks, jobs and cores" for indications on working out the maximum amount of data that can be retrieved with a `Start()` call on a specific machine. -You may realize that this functionality is similar to the `Load()` function in the s2dverification package. In fact, `Start()` is more advanced and flexible, although `Load()` is more mature and consistent for loading typical seasonal to decadal forecasting data. `Load()` will be adapted in the future to use `Start()` internally. +You may realize that this functionality is similar to the `Load()` function in the s2dv package. In fact, `Start()` is more advanced and flexible, although `Load()` is more mature and consistent for loading typical seasonal to decadal forecasting data. `Load()` will be adapted in the future to use `Start()` internally. There are no constrains for the number or names of the outer or inner dimensions used in a `Start()` call. In other words, `Start()` will handle NetCDF files with any number of dimensions with any name, as well as files distributed across folders in complex ways, since you can use customized wildcards in the path pattern. diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 013b47a..e0cf5e1 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -70,12 +70,12 @@ this use case is for two file dimensions (i.e., the usage of *_depends). 2. [Function using attributes of the data](inst/doc/usecase/ex2_2_attr.R) Using attributes is only available in startR_v0.1.3 or above. 3. [Use function CDORemap for interpolation](inst/doc/usecase/ex2_3_cdo.R) - Using parameter `CDO_module` is only available in startR_v0.1.3 or above. Interpolate data by using `s2dverification::CDORemap` in the workflow. + Using parameter `CDO_module` is only available in startR_v0.1.3 or above. Interpolate data by using `s2dv::CDORemap` in the workflow. 4. [Use two functions in workflow](inst/doc/usecase/ex2_4_two_func.R) 5. 6. [Use external parameters in atomic function](inst/doc/usecase/ex2_6_ext_param_func.R) 7. [Calculate the ensemble-adjusted Continuous Ranked Probability Score (CRPS)](inst/doc/usecase/ex2_7_seasonal_forecast_crps.R) - Use `SpecsVerification::EnsCrps` to calculate the ensemble-adjusted Continuous Ranked Probability Score (CRPS) for ECWMF experimental data, and do ensemble mean. Use `s2dverification::PlotEquiMap` to plot the CRPS map. + Use `SpecsVerification::EnsCrps` to calculate the ensemble-adjusted Continuous Ranked Probability Score (CRPS) for ECWMF experimental data, and do ensemble mean. Use `s2dv::PlotEquiMap` to plot the CRPS map. 8. [Use CSTools Calibration function](inst/doc/usecase/ex2_8_calibration.R) Use `CSTools:::.cal`, the interior function of `CSTools::CST_Calibration`, to do the bias adjustment for ECMWF experimental monthly mean data. 9. [Use a mask to apply different methods to different gridpoints](inst/doc/usecase/ex2_9_mask.R) diff --git a/inst/doc/usecase/ex1_1_tranform.R b/inst/doc/usecase/ex1_1_tranform.R index c805b45..a544f84 100644 --- a/inst/doc/usecase/ex1_1_tranform.R +++ b/inst/doc/usecase/ex1_1_tranform.R @@ -1,12 +1,12 @@ # ------------------------------------------------------------------ # This script shows you how to do the interpolation in Start(). It also -# uses s2dverification::Load to compare the results, which are identical +# uses s2dv::Load to compare the results, which are identical # (only tiny difference due to round-up). # # The parameters in Start() for interpolation include 'transform', # 'transform_extra_cells', 'transform_params', and 'transform_vars'. # 'transform' is the interpolation function. startR provides 'CDORemapper', -# which is the wrapper function of s2dverification::CDORemap. +# which is the wrapper function of s2dv::CDORemap. # 'transform_extra_cells' defines the extra grid points you want to use for # interpolation. The default value is 2. 'transform_params' is a list which # defines the arguments used in 'cdo'. 'transform_vars' is a vector indicating @@ -65,9 +65,9 @@ obs[1,1,1,1,1:3,1:2] #------------------------- -# s2dverification::Load() +# s2dv::Load() #------------------------- -library(s2dverification) +library(s2dv) pobs <- paste0('/esarchive/recon/ecmwf/era5/monthly_mean/', '$VAR_NAME$_f1h/$VAR_NAME$_$YEAR$$MONTH$.nc') diff --git a/inst/doc/usecase/ex2_4_two_func.R b/inst/doc/usecase/ex2_4_two_func.R index d4d1b5f..2e638bd 100644 --- a/inst/doc/usecase/ex2_4_two_func.R +++ b/inst/doc/usecase/ex2_4_two_func.R @@ -1,5 +1,5 @@ # -------------------------------------------------------------- -# Two functions (e.g.: s2dverification::CDORemap and Season) +# Two functions (e.g.: s2dv::CDORemap and Season) #--------------------------------------------------------------- library(startR) repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' diff --git a/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R b/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R index 8b74607..b28e6a6 100644 --- a/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R +++ b/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R @@ -104,7 +104,7 @@ # Plotting -library(s2dverification) +library(s2dv) lon <- seq(from = 0, to = 359, length.out = 512) lat <- seq(from = 90, to = -90, length.out = 256) diff --git a/man/CDORemapper.Rd b/man/CDORemapper.Rd index 763be77..3f4238a 100644 --- a/man/CDORemapper.Rd +++ b/man/CDORemapper.Rd @@ -34,7 +34,7 @@ data subsets onto a specified target grid, intended for use as parameter 'transform' in a Start() call. This function complies with the input/output interface required by Start() defined in the documentation for the parameter 'transform' of function Start().\cr\cr -This function uses the function CDORemap() in the package 's2dverification' to +This function uses the function CDORemap() in the package 's2dv' to perform the interpolation, hence CDO is required to be installed. } \examples{ @@ -63,5 +63,5 @@ perform the interpolation, hence CDO is required to be installed. } } \seealso{ -\code{\link[s2dverification]{CDORemap}} +\code{\link[s2dv]{CDORemap}} } diff --git a/tests/testthat/test-Compute-CDORemap.R b/tests/testthat/test-Compute-CDORemap.R index 991e7e1..28df234 100644 --- a/tests/testthat/test-Compute-CDORemap.R +++ b/tests/testthat/test-Compute-CDORemap.R @@ -19,8 +19,8 @@ suppressWarnings( fun <- function(x) { lons_data <- as.vector(attr(x, 'Variables')$dat1$longitude) lats_data <- as.vector(attr(x, 'Variables')$dat1$latitude) - r <- s2dverification::CDORemap(x, lons_data, lats_data, "r360x181", - 'bil', crop = FALSE, force_remap = TRUE)[[1]] + r <- s2dv::CDORemap(x, lons_data, lats_data, "r360x181", + 'bil', crop = FALSE, force_remap = TRUE)[[1]] return(r) } -- GitLab From 94db0497bf0c5b219722d0ea5ac7c07e29723487 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 8 Oct 2021 16:54:55 +0200 Subject: [PATCH 2/3] Add use case of irregular regridding using the workflow --- inst/doc/usecase/ex2_13_irregular_regrid.R | 68 ++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 inst/doc/usecase/ex2_13_irregular_regrid.R diff --git a/inst/doc/usecase/ex2_13_irregular_regrid.R b/inst/doc/usecase/ex2_13_irregular_regrid.R new file mode 100644 index 0000000..e39c0fb --- /dev/null +++ b/inst/doc/usecase/ex2_13_irregular_regrid.R @@ -0,0 +1,68 @@ +#---------------------------------------------------------------------------- +# Author: An-Chi Ho +# Date: 8th Oct 2021 +# +# This script shows how to load irregular grid data by Start(), then regrid it +# by s2dv::CDORemap in the workflow. It is a solution before Start() can deal +# with irregular regridding directly. +#---------------------------------------------------------------------------- + +library(startR) + +path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/cmcc-cm2-sr5/cmip6-dcppA-hindcast_i1p1/', + 'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20210312/', + '$var$_*_s$sdate$-$member$_gn_$aux$.nc') + +data <- Start(dataset = path, + var = 'tos', + sdate = c('1960', '1961'), + aux = 'all', + aux_depends = 'sdate', + j = indices(2:361), # remove two indices to avoid white strips + i = indices(2:291), # remove two indices to avoid white strips + time = indices(1:12), + member = 'r1i1p1f1', + return_vars = list(j = NULL, i = NULL, + latitude = NULL, longitude = NULL), + retrieve = F) + +func_regrid <- function(data) { + lons <- attr(data, 'Variables')$common$longitude + lats <- attr(data, 'Variables')$common$latitude + data <- s2dv::CDORemap(data, lons, lats, grid = 'r360x180', + method = 'bil', crop = FALSE) + lons_reg <- data[['lons']] + lats_reg <- data[['lats']] + return(list(data = data[[1]], lats = lats_reg, lons = lons_reg)) +} + +step <- Step(fun = func_regrid, + target_dims = list(data = c('j', 'i')), + output_dims = list(data = c('lon', 'lat'), + lats = 'lat', lons = 'lon'), + use_attributes = list(data = "Variables")) +wf <- AddStep(data, step) + +res <- Compute(workflow = wf$data, + chunks = list(sdate = 2, time = 2)) + +names(res) +#[1] "data" "lats" "lons" +dim(res$data) +# lat lon dataset var sdate aux time member +# 180 360 1 1 2 1 12 1 +dim(res$lons) +# lon dataset var sdate aux time member +# 360 1 1 2 1 12 1 +dim(res$lats) +# lat dataset var sdate aux time member +# 180 1 1 2 1 12 1 + +s2dv::PlotEquiMap(drop(res$data)[ , , 1, 1], + lon = drop(res$lons)[, 1, 1], + lat = drop(res$lats)[, 1, 1]) + +# Plot Layout for sdate = 1 all the time steps +var <- s2dv:::.aperm2(drop(res$data)[, , 1, ], c(3, 1, 2)) +s2dv::PlotLayout(PlotEquiMap, c('lon', 'lat'), var = var, + lon = drop(res$lons)[, 1, 1], lat = drop(res$lats)[, 1, 1]) -- GitLab From b55786427abc03cbc5507692250395faeb408e55 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 23 Dec 2021 13:23:20 +0100 Subject: [PATCH 3/3] Check use case with new s2dv::CDOremap --- inst/doc/deployment.md | 4 ++-- inst/doc/faq.md | 4 ++-- inst/doc/usecase.md | 9 +++++++-- inst/doc/usecase/ex2_13_irregular_regrid.R | 16 ++++++++-------- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/inst/doc/deployment.md b/inst/doc/deployment.md index 47325db..1e098bb 100644 --- a/inst/doc/deployment.md +++ b/inst/doc/deployment.md @@ -17,7 +17,7 @@ devtools::install_git('https://earth.bsc.es/gitlab/es/startR') ``` - Among others, the bigmemory package will be installed. - If loading and processing NetCDF files (only file format supported by now), install the easyNCDF package. - - If planning to interpolate the data with CDO (either by using the `transform` parameter in `startR::Start`, or by using `s2dverification::CDORemap` in the workflow specified to `startR::Compute`), install s2dverification (>= 2.8.4) and CDO (version 1.6.3 tested). CDO is not available for Windows. + - If planning to interpolate the data with CDO (either by using the `transform` parameter in `startR::Start`, or by using `s2dv::CDORemap` in the workflow specified to `startR::Compute`), install s2dv and CDO (version 1.6.3 tested). CDO is not available for Windows. A local or remote file system or THREDDS/OPeNDAP server providing the data to be retrieved must be accessible. @@ -50,7 +50,7 @@ All machines must be UNIX-based, with the "hostname", "date", "touch" and "sed" - netCDF-4 is installed, if loading and processing NetCDF files (only supported format by now) - R (>= 2.14.1) is installed as a Linux Environment Module - the startR package is installed - - if using CDO interpolation, the s2dverification package and CDO 1.6.3 are installed + - if using CDO interpolation, the s2dv package and CDO 1.6.3 are installed - any other R packages required by the `startR::Compute` workflow are installed - any other Environment Modules used by the `startR::Compute` workflow are installed - a shared file system (with a unified access point) or THREDDS/OPeNDAP server is accessible across HPC nodes and HPC login node, where the necessary data can be uploaded from your workstation. A file system shared between your workstation and the HPC is also supported and advantageous. Use of a data transfer service between the workstation and the HPC is also supported under specific configurations. diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 51d58ff..ea84377 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -1045,7 +1045,7 @@ Error in dim(x$x) <- dim_bk : ``` ``` -Error in s2dverification::CDORemap(data_array, lons, lats, ...) : +Error in s2dv::CDORemap(data_array, lons, lats, ...) : Found invalid values in 'lons'. ``` @@ -1053,7 +1053,7 @@ Error in s2dverification::CDORemap(data_array, lons, lats, ...) : ERROR: invalid cell Aborting in file clipping.c, line 1295 ... -Error in s2dverification::CDORemap(data_array, lons, lats, ...) : +Error in s2dv::CDORemap(data_array, lons, lats, ...) : CDO remap failed. ``` diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index e0cf5e1..9d17139 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -87,5 +87,10 @@ If you need to create the mask file on your own, go to ex2_9_mask.R. This use case uses experimental and the corresponding observational data to calculate the temporal mean and spatial weighted mean. Notice that the spatial resolutions of the two datasets are different, but it still works because lat and lon are target dimensions. - - + 12. [Transform and chunk over spatial dimensions](inst/doc/usecase/ex2_12_transform_and_chunk.R) + This use case provides an example of transforming and chunking +latitude and longitude dimensions. If all other dimensions are used as target dimensions in the operation, +it would be good to have the option of chunking the spatial dimensions. + 13. [Interpolate irregular grid in the workflow](inst/doc/usecase/ex2_13_irregular_regrid.R) + This script shows how to load irregular grid data by Start(), then regrid it +by s2dv::CDORemap in the workflow. It is a solution before Start() can deal with irregular regridding directly. diff --git a/inst/doc/usecase/ex2_13_irregular_regrid.R b/inst/doc/usecase/ex2_13_irregular_regrid.R index e39c0fb..df5e21f 100644 --- a/inst/doc/usecase/ex2_13_irregular_regrid.R +++ b/inst/doc/usecase/ex2_13_irregular_regrid.R @@ -49,8 +49,8 @@ res <- Compute(workflow = wf$data, names(res) #[1] "data" "lats" "lons" dim(res$data) -# lat lon dataset var sdate aux time member -# 180 360 1 1 2 1 12 1 +# lon lat dataset var sdate aux time member +# 360 180 1 1 2 1 12 1 dim(res$lons) # lon dataset var sdate aux time member # 360 1 1 2 1 12 1 @@ -58,11 +58,11 @@ dim(res$lats) # lat dataset var sdate aux time member # 180 1 1 2 1 12 1 -s2dv::PlotEquiMap(drop(res$data)[ , , 1, 1], - lon = drop(res$lons)[, 1, 1], - lat = drop(res$lats)[, 1, 1]) +library(s2dv) +PlotEquiMap(drop(res$data)[ , , 1, 1], lon = drop(res$lons)[, 1, 1], + lat = drop(res$lats)[, 1, 1]) # Plot Layout for sdate = 1 all the time steps -var <- s2dv:::.aperm2(drop(res$data)[, , 1, ], c(3, 1, 2)) -s2dv::PlotLayout(PlotEquiMap, c('lon', 'lat'), var = var, - lon = drop(res$lons)[, 1, 1], lat = drop(res$lats)[, 1, 1]) +var <- Reorder(drop(res$data)[, , 1, ], c(3, 1, 2)) +PlotLayout(PlotEquiMap, c('lon', 'lat'), var = var, + lon = drop(res$lons)[, 1, 1], lat = drop(res$lats)[, 1, 1]) -- GitLab