From 98c9ce92c8a1ff0b7418e355fa7e02ccfe5a1786 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 9 Nov 2023 16:38:56 +0100 Subject: [PATCH 01/10] Add parallel development; few checks; few unit tests --- R/ShapeToMask.R | 244 ++++++++++++++++++------------ man/ShapeToMask.Rd | 38 +---- tests/testthat/test-ShapeToMask.R | 66 ++++++++ 3 files changed, 216 insertions(+), 132 deletions(-) create mode 100644 tests/testthat/test-ShapeToMask.R diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 7bf8de4..8efb570 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -42,7 +42,7 @@ #'@param lon_dim A character string indicating the longitudinal dimension. If it #' is NULL, the longitudinal name will be searched using an internal function #' with the following possible names: 'lon', 'longitude', 'x', 'i' and -#' 'nav_lon'. It is NULL by default. +#' 'nav_lon'. #'@param target_crs A character string indicating the target 'Coordinate #' Reference System'. #'@param region A logical value indicating if we want a dimension for the @@ -56,9 +56,6 @@ #'@param max_dist A numeric value indicating the maximum distance is accepted #' to the closest gridpoint when there is no intersection between the shapefile #' and the reference grid. -#'@param savefile A logical value indicating wether to save the mask array into -#' a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by -#' default. This functionality is not developed yet. #'@param ... Arguments passed on to 's2_options' in function 'st_intersection'. #' See 's2 package'. #' @@ -70,48 +67,65 @@ #'# Exmple (1): NUTS #'shp_file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', #' 'NUTS_RG_60M_2021_4326.shp') -#'# shp_file <- paste0('/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/', -#'# 'tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp') -#'# ref_grid <- paste0('/esarchive/exp/ecmwf/system5c3s/monthly_mean/', -#'# 'tas_f6h/tas_20170201.nc') -#'# ref_grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', -#'# 'tas_f6h/tas_20191212.nc') -#'ref_grid <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/', -#' 'tas_f1h/tas_201006.nc') -#'# ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) -#' -#'NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9)) -#'NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) -#'mask1 <- ShapeToMask(shp_file, ref_grid, reg_ids = NUTS.id) -#'mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, -#' reg_names = NUTS.name) -#' -#'# Exmple (2): GADM -#'shp_file <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp" -#'ref_grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', -#' 'tas_f6h/tas_20191212.nc') -#'GADM.id <- c("ESP", "ITA") -#'GADM.name <- c("Spain", "Italy") -#'mask1 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, -#' reg_ids = GADM.id, shp_system = "GADM") +#'ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) +#'NUTS_name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) #'mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, -#' reg_names = GADM.name, shp_system = "GADM") +#' reg_names = NUTS_name) #'@import easyNCDF #'@import sf #'@export -ShapeToMask <- function(shp_file, ref_grid = NULL, +ShapeToMask <- function(shp_file, ref_grid, shp_system = "NUTS", shp_system_name = NULL, reg_ids = NULL, reg_names = NULL, reg_level = 3, lat_dim = NULL, lon_dim = NULL, - savefile = FALSE, region = FALSE, target_crs = NULL, + region = FALSE, target_crs = NULL, check_valid = FALSE, find_min_dist = FALSE, - max_dist = 50, ...) { + max_dist = 50, ncores = NULL, ...) { - # NOTE: One region is one number; need to have the option to combine them? # TODO: Suppress warnings? # TODO: Substitute packages # TODO: Substitute loop for each region with multiApply # TODO: Add initial checks + # TODO: Add saving option? + + # Initial checks + # shp_file + if (is.null(shp_file)) { + stop("Parameter 'shp_file' cannot be NULL.") + } + if (!is.character(shp_file)) { + stop("Parameter 'shp_file' must be a character string.") + } + # ref_grid + if (is.null(ref_grid)) { + stop("Parameter 'ref_grid' cannot be NULL.") + } + if (!any(is.character(ref_grid) | is.list(ref_grid))) { + stop("Parameter 'ref_grid' must be a character string or a list.") + } + # shp_system + # shp_system_name + # reg_ids + # reg_names + # reg_level + # lat_dim + # lon_dim + # region + # target_crs + # check_valid + # find_min_dist + # max_dist + + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores)) { + stop("Parameter 'ncores' must be numeric.") + } + ncores <- round(ncores) + if (ncores < 2) { + ncores <- NULL + } + } # Step 1: Load the shapefile shp <- sf::st_read(shp_file) # class sf @@ -153,14 +167,14 @@ ShapeToMask <- function(shp_file, ref_grid = NULL, if (shp_system == "NUTS") { tmp <- subset(shp, CNTR_CODE == names(reg_names)[cntr_i]) tmp <- subset(tmp, NUTS_NAME %in% reg_names[[cntr_i]]) - shp_system_name <- NUTS_NAME + shp_system_name <- 'NUTS_NAME' } else if (shp_system == "ADM") { tmp <- subset(shp, ADM0_EN == names(reg_names)[cntr_i]) tmp <- subset(tmp, ADM1_EN %in% reg_names[[cntr_i]]) - shp_system_name <- ADM1_EN + shp_system_name <- 'ADM1_EN' } else if (shp_system == "GADM") { tmp <- subset(shp, Name %in% reg_names) - shp_system_name <- Name + shp_system_name <- 'Name' } if (cntr_i == 1) { shp_tmp <- tmp @@ -220,8 +234,8 @@ ShapeToMask <- function(shp_file, ref_grid = NULL, lat = rep(lat, each = length(lon))) coord <- as.matrix(data.frame(x = ref.df$lon, y = ref.df$lat)) - xy.sfg <- st_multipoint(coord) - xy.sfc <- st_sfc(xy.sfg) + xy.sfg <- sf::st_multipoint(coord) + xy.sfc <- sf::st_sfc(xy.sfg) # Assign crs of original shapefile if (!is.null(target_crs)) { st_crs(xy.sfc) <- sf::st_crs(target_crs) #initial_crs # asign crs of original shapefile @@ -231,13 +245,6 @@ ShapeToMask <- function(shp_file, ref_grid = NULL, } # Step 3: Create mask - ## Create mask array with 0; 1, 2, etc. will be filled in for each shp region - mask <- array(0, dim = c(length(lon), length(lat))) - names(dim(mask)) <- c(lon_dim, lat_dim) - if (region) { - mask <- array(0, dim = c(nrow(shp), length(lon), length(lat))) - names(dim(mask)) <- c('region', lon_dim, lat_dim) - } if (check_valid) { xy.sfc <- st_make_valid(xy.sfc) @@ -245,57 +252,43 @@ ShapeToMask <- function(shp_file, ref_grid = NULL, } ## Loop through each shp region - for (shp_i in 1:nrow(shp)) { - # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may - # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). - shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ]) - # shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ], ...) - tmp_coords <- st_coordinates(shp_pol)[, 1:2] - if (length(tmp_coords) == 0) { - dim(tmp_coords) <- NULL - } else if (is.null(dim(tmp_coords))) { - tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords))) - } - if (!is.null(dim(tmp_coords))) { - - # polygon_instersection - for (ii in 1:nrow(tmp_coords)) { - # pt_x <- which(lon == tmp_coords[ii, 1]) - # pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) - if (length(dim(mask)) == 2) { - # min(abs(lon - tmp_coords[ii, 1])) - # min(abs(lat - tmp_coords[ii, 2])) - mask[which.min(abs(lon - tmp_coords[ii, 1])), - which.min(abs(lat - tmp_coords[ii, 2]))] <- shp_i - } else { - mask[shp_i, which.min(abs(lon - tmp_coords[ii, 1])), - which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 - } - } - } else if (find_min_dist) { - x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shp[shp_i,]))[,1] - y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shp[shp_i,]))[,2] - dist <- sqrt((xy.sfg[,1] - x.centroid.shpi)**2 + (xy.sfg[,2] - y.centroid.shpi)**2) - tmp_coords <- array(xy.sfg[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2)) - colnames(tmp_coords) <- c('X', 'Y') - if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) & - any(round(lon,2) == round(tmp_coords[1,1],2))) ) { - if (length(dim(mask)) == 2) { - mask[which.min(abs(lon - tmp_coords[, 1])), - which.min(abs(lat - tmp_coords[, 2]))] <- shp_i - } else { - mask[shp_i, which.min(abs(lon - tmp_coords[, 1])), - which.min(abs(lat - tmp_coords[, 2]))] <- 1 - } - warning(paste0('The reference grid has no intersection with region ', - ifelse(is.character(shp_system_name), shp[shp_i,][[shp_system_name]], paste0('n° ', shp_i)), - ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], - ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) + if (region) { + cfun <- function(a, b) { + if (length(dim(a)) == 2) { + dims <- c(dim(a), 2) } else { - warning(paste0('The reference grid has no intersection with region ', - ifelse(is.character(shp_system_name), shp[shp_i,][[shp_system_name]], paste0('n° ', shp_i)))) + dims <- c(dim(a)[1:2], dim(a)[length(dim(a))]+1) } + return(array(c(a,b), dim = dims)) } + } else { + cfun <- function(a, b) { + a[which(b != 0)] <- b[which(b != 0)] + return(a) + } + } + + if (is.null(ncores)) { + mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun') %do% + .shapetomask(shp = shp, xy.sfc = xy.sfc, + find_min_dist = find_min_dist, + shp_system_name = shp_system_name, + max_dist = max_dist, n = shp_i, + lon = lon, lat = lat, region = region) + } else { + registerDoParallel(ncores) + mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun', .packages='sf') %dopar% + .shapetomask(shp = shp, xy.sfc = xy.sfc, + find_min_dist = find_min_dist, + shp_system_name = shp_system_name, + max_dist = max_dist, n = shp_i, + lon = lon, lat = lat, region = region) + registerDoSEQ() + } + if (region) { + names(dim(mask)) <- c(lon_dim, lat_dim, 'region') + } else { + names(dim(mask)) <- c(lon_dim, lat_dim) } # Step 4: Add attributes @@ -313,12 +306,63 @@ ShapeToMask <- function(shp_file, ref_grid = NULL, # ## Return all the info from shp # attr(mask, "shapefile") <- attributes(shp) - # Step 5: Save the file or return the array - if (!savefile) { - return(mask) - } else { - warning("This functionality is not developed yet.") - # TODO - # ArrayToNc() + return(mask) +} + +.shapetomask <- function(shp, xy.sfc, find_min_dist = FALSE, + shp_system_name = NULL, max_dist = 50, n, lon, lat, + region = FALSE) { + mask <- array(0, dim = c(length(lon), length(lat))) + shpi <- shp[n, ] + # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may + # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). + shp_pol <- sf::st_intersection(xy.sfc, shpi) + # shp_pol <- sf::st_intersection(xy.sfc, shpi, ...) + tmp_coords <- sf::st_coordinates(shp_pol)[, 1:2] + if (length(tmp_coords) == 0) { + dim(tmp_coords) <- NULL + } else if (is.null(dim(tmp_coords))) { + tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords))) + } + if (!is.null(dim(tmp_coords))) { + + # polygon_instersection + for (ii in 1:nrow(tmp_coords)) { + # pt_x <- which(lon == tmp_coords[ii, 1]) + # pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) + if (!region) { + # min(abs(lon - tmp_coords[ii, 1])) + # min(abs(lat - tmp_coords[ii, 2])) + mask[which.min(abs(lon - tmp_coords[ii, 1])), + which.min(abs(lat - tmp_coords[ii, 2]))] <- n + } else { + mask[which.min(abs(lon - tmp_coords[ii, 1])), + which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 + } + } + } else if (find_min_dist) { + x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,1] + y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,2] + dist <- sqrt((xy.sfg[,1] - x.centroid.shpi)**2 + (xy.sfg[,2] - y.centroid.shpi)**2) + tmp_coords <- array(xy.sfg[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2)) + colnames(tmp_coords) <- c('X', 'Y') + if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) & + any(round(lon,2) == round(tmp_coords[1,1],2))) ) { + if (length(dim(mask)) == 2) { + mask[which.min(abs(lon - tmp_coords[, 1])), + which.min(abs(lat - tmp_coords[, 2]))] <- shp_i + } else { + mask[shp_i, which.min(abs(lon - tmp_coords[, 1])), + which.min(abs(lat - tmp_coords[, 2]))] <- 1 + } + warning(paste0('The reference grid has no intersection with region ', + ifelse(is.character(shp_system_name), shpi[[shp_system_name]], paste0('n° ', shp_i)), + ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], + ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) + } else { + warning(paste0('The reference grid has no intersection with region ', + ifelse(is.character(shp_system_name), shpi[[shp_system_name]], paste0('n° ', shp_i)))) + } } + return(mask) } \ No newline at end of file diff --git a/man/ShapeToMask.Rd b/man/ShapeToMask.Rd index 87826d3..251f845 100644 --- a/man/ShapeToMask.Rd +++ b/man/ShapeToMask.Rd @@ -6,7 +6,7 @@ \usage{ ShapeToMask( shp_file, - ref_grid = NULL, + ref_grid, shp_system = "NUTS", shp_system_name = NULL, reg_ids = NULL, @@ -14,12 +14,12 @@ ShapeToMask( reg_level = 3, lat_dim = NULL, lon_dim = NULL, - savefile = FALSE, region = FALSE, target_crs = NULL, check_valid = FALSE, find_min_dist = FALSE, max_dist = 50, + ncores = NULL, ... ) } @@ -57,11 +57,7 @@ with the following possible names: 'lat', 'latitude', 'y', 'j' and \item{lon_dim}{A character string indicating the longitudinal dimension. If it is NULL, the longitudinal name will be searched using an internal function with the following possible names: 'lon', 'longitude', 'x', 'i' and -'nav_lon'. It is NULL by default.} - -\item{savefile}{A logical value indicating wether to save the mask array into -a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by -default. This functionality is not developed yet.} +'nav_lon'.} \item{region}{A logical value indicating if we want a dimension for the regions in the resulting mask array. It is FALSE by default.} @@ -112,30 +108,8 @@ Note: Modules GDAL, PROJ and GEOS are required. # Exmple (1): NUTS shp_file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', 'NUTS_RG_60M_2021_4326.shp') -# shp_file <- paste0('/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/', -# 'tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp') -# ref_grid <- paste0('/esarchive/exp/ecmwf/system5c3s/monthly_mean/', -# 'tas_f6h/tas_20170201.nc') -# ref_grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', -# 'tas_f6h/tas_20191212.nc') -ref_grid <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/', - 'tas_f1h/tas_201006.nc') -# ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) - -NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9)) -NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) -mask1 <- ShapeToMask(shp_file, ref_grid, reg_ids = NUTS.id) -mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, - reg_names = NUTS.name) - -# Exmple (2): GADM -shp_file <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp" -ref_grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', - 'tas_f6h/tas_20191212.nc') -GADM.id <- c("ESP", "ITA") -GADM.name <- c("Spain", "Italy") -mask1 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, - reg_ids = GADM.id, shp_system = "GADM") +ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) +NUTS_name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, - reg_names = GADM.name, shp_system = "GADM") + reg_names = NUTS_name) } diff --git a/tests/testthat/test-ShapeToMask.R b/tests/testthat/test-ShapeToMask.R new file mode 100644 index 0000000..09deae2 --- /dev/null +++ b/tests/testthat/test-ShapeToMask.R @@ -0,0 +1,66 @@ +############################################## + +# data1 +shp_file1 <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', + 'NUTS_RG_60M_2021_4326.shp') +ref_grid1 <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/', + 'tas_f1h/tas_201006.nc') +NUTS_id1 <- paste0("FI1D", c(1:3, 5, 7:9)) +NUTS_name1 <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) + +# data2 +ref_grid2 <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) + +############################################## + +test_that("1. Input checks", { + expect_error( + ShapeToMask(NULL), + "Parameter 'shp_file' cannot be NULL." + ) + expect_error( + ShapeToMask(1), + "Parameter 'shp_file' must be a character string." + ) + expect_error( + ShapeToMask(shp_file1, ref_grid = NULL), + "Parameter 'ref_grid' cannot be NULL." + ) + expect_error( + ShapeToMask(shp_file1, ref_grid = 1), + "Parameter 'ref_grid' must be a character string or a list." + ) +}) + +############################################## + +test_that("2. Output", { + mask1 <- ShapeToMask(shp_file1, ref_grid1, reg_ids = NUTS_id1) + expect_equal( + dim(mask1), + c(lon = 3600, lat = 1801) + ) + expect_equal( + sum(mask1), + c(20899) + ) + mask2 <- ShapeToMask(shp_file1, ref_grid = ref_grid2, reg_ids = NUTS_id1) + expect_equal( + sum(mask2), + 830 + ) + expect_equal( + dim(mask2), + c(lon = 61, lat = 91) + ) + mask3 <- ShapeToMask(shp_file1, ref_grid = ref_grid2, reg_ids = NUTS_id1, + region = TRUE) + expect_equal( + dim(mask3), + c(lon = 61, lat = 91, region = 7) + ) + expect_equal( + sum(mask3), + 176 + ) +}) \ No newline at end of file -- GitLab From 73fa2956f1ebc056667e078fa53260db4dd98951 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 9 Nov 2023 16:54:25 +0100 Subject: [PATCH 02/10] Add foreach package and doParallel --- NAMESPACE | 2 ++ R/ShapeToMask.R | 2 ++ 2 files changed, 4 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index a5dbed7..9139e77 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ export(VizWeeklyClim) import(RColorBrewer) import(cowplot) import(easyNCDF) +import(foreach) import(ggplot2) import(graphics) import(mapproj) @@ -41,6 +42,7 @@ importFrom(RColorBrewer,brewer.pal) importFrom(data.table,CJ) importFrom(data.table,data.table) importFrom(data.table,setkey) +importFrom(doParallel,registerDoParallel) importFrom(dplyr,group_by) importFrom(dplyr,mutate) importFrom(dplyr,summarise) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 8efb570..f6a04e3 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -73,6 +73,8 @@ #' reg_names = NUTS_name) #'@import easyNCDF #'@import sf +#'@import foreach +#'@importFrom doParallel registerDoParallel #'@export ShapeToMask <- function(shp_file, ref_grid, shp_system = "NUTS", shp_system_name = NULL, -- GitLab From a05ff673f214a46d201f6ab1f41dbe260a5107f4 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 9 Nov 2023 16:58:07 +0100 Subject: [PATCH 03/10] Add foreach and doParallel in DESCRIPTION --- DESCRIPTION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 88e38bd..09c1dbc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,7 +33,9 @@ Imports: reshape2, scales, stats, - utils + utils, + foreach, + doParallel Suggests: testthat License: GPL-3 -- GitLab From 568c4d5f154c20da9b0ed2f079c64649a16b370d Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 9 Nov 2023 17:12:05 +0100 Subject: [PATCH 04/10] Add ncores parameter and assign some vairbales to NULL --- R/ShapeToMask.R | 27 +++++++++++++++------------ man/ShapeToMask.Rd | 3 +++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index f6a04e3..f476e6d 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -56,6 +56,8 @@ #'@param max_dist A numeric value indicating the maximum distance is accepted #' to the closest gridpoint when there is no intersection between the shapefile #' and the reference grid. +#'@param ncores The number of parallel processes to spawn for the use for +#' parallel computation in multiple cores. #'@param ... Arguments passed on to 's2_options' in function 'st_intersection'. #' See 's2 package'. #' @@ -135,7 +137,10 @@ ShapeToMask <- function(shp_file, ref_grid, transformed_shapefile <- st_transform(shp, crs = target_crs) shp <- transformed_shapefile } - + + NUTS_ID <- ADM1_PCODE <- ISO <- NULL + CNTR_CODE <- NUTS_NAME <- ADM0_EN <- ADM1_EN <- Name <- NULL + if (all(is.null(reg_ids), is.null(reg_names))) { stop("Either provide parameter 'reg_ids' or 'reg_names'.") } else if (!is.null(reg_ids)) { @@ -272,19 +277,17 @@ ShapeToMask <- function(shp_file, ref_grid, if (is.null(ncores)) { mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun') %do% - .shapetomask(shp = shp, xy.sfc = xy.sfc, - find_min_dist = find_min_dist, + .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, + xy.sfc = xy.sfc, find_min_dist = find_min_dist, shp_system_name = shp_system_name, - max_dist = max_dist, n = shp_i, - lon = lon, lat = lat, region = region) + max_dist = max_dist, region = region) } else { registerDoParallel(ncores) mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun', .packages='sf') %dopar% - .shapetomask(shp = shp, xy.sfc = xy.sfc, - find_min_dist = find_min_dist, + .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, + xy.sfc = xy.sfc, find_min_dist = find_min_dist, shp_system_name = shp_system_name, - max_dist = max_dist, n = shp_i, - lon = lon, lat = lat, region = region) + max_dist = max_dist, region = region) registerDoSEQ() } if (region) { @@ -311,14 +314,14 @@ ShapeToMask <- function(shp_file, ref_grid, return(mask) } -.shapetomask <- function(shp, xy.sfc, find_min_dist = FALSE, - shp_system_name = NULL, max_dist = 50, n, lon, lat, +.shapetomask <- function(shp, n, lon, lat, xy.sfc, find_min_dist = FALSE, + shp_system_name = NULL, max_dist = 50, region = FALSE) { mask <- array(0, dim = c(length(lon), length(lat))) shpi <- shp[n, ] # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). - shp_pol <- sf::st_intersection(xy.sfc, shpi) + shp_pol <- sf::st_intersection(x = xy.sfc, y = shpi) # shp_pol <- sf::st_intersection(xy.sfc, shpi, ...) tmp_coords <- sf::st_coordinates(shp_pol)[, 1:2] if (length(tmp_coords) == 0) { diff --git a/man/ShapeToMask.Rd b/man/ShapeToMask.Rd index 251f845..5f8dfc0 100644 --- a/man/ShapeToMask.Rd +++ b/man/ShapeToMask.Rd @@ -77,6 +77,9 @@ FALSE by default.} to the closest gridpoint when there is no intersection between the shapefile and the reference grid.} +\item{ncores}{The number of parallel processes to spawn for the use for +parallel computation in multiple cores.} + \item{...}{Arguments passed on to 's2_options' in function 'st_intersection'. See 's2 package'.} } -- GitLab From aeefc4e2c3c699eb4fb8d6dd0e42c0b6498a9e30 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 9 Nov 2023 17:18:34 +0100 Subject: [PATCH 05/10] Correct error: No visible binding for variables --- R/ShapeToMask.R | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index f476e6d..986c251 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -139,8 +139,8 @@ ShapeToMask <- function(shp_file, ref_grid, } NUTS_ID <- ADM1_PCODE <- ISO <- NULL - CNTR_CODE <- NUTS_NAME <- ADM0_EN <- ADM1_EN <- Name <- NULL - + CNTR_CODE <- NUTS_NAME <- ADM0_EN <- ADM1_EN <- Name <- LEVL_CODE <- NULL + if (all(is.null(reg_ids), is.null(reg_names))) { stop("Either provide parameter 'reg_ids' or 'reg_names'.") } else if (!is.null(reg_ids)) { @@ -274,7 +274,7 @@ ShapeToMask <- function(shp_file, ref_grid, return(a) } } - + shp_i <- NULL if (is.null(ncores)) { mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun') %do% .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, @@ -355,18 +355,18 @@ ShapeToMask <- function(shp_file, ref_grid, any(round(lon,2) == round(tmp_coords[1,1],2))) ) { if (length(dim(mask)) == 2) { mask[which.min(abs(lon - tmp_coords[, 1])), - which.min(abs(lat - tmp_coords[, 2]))] <- shp_i + which.min(abs(lat - tmp_coords[, 2]))] <- n } else { - mask[shp_i, which.min(abs(lon - tmp_coords[, 1])), + mask[which.min(abs(lon - tmp_coords[, 1])), which.min(abs(lat - tmp_coords[, 2]))] <- 1 } - warning(paste0('The reference grid has no intersection with region ', - ifelse(is.character(shp_system_name), shpi[[shp_system_name]], paste0('n° ', shp_i)), - ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], - ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) + # warning(paste0('The reference grid has no intersection with region ', + # ifelse(is.character(shp_system_name), shpi[[shp_system_name]], paste0('n° ', n)), + # ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], + # ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) } else { - warning(paste0('The reference grid has no intersection with region ', - ifelse(is.character(shp_system_name), shpi[[shp_system_name]], paste0('n° ', shp_i)))) + # warning(paste0('The reference grid has no intersection with region ', + # ifelse(is.character(shp_system_name), shpi[[shp_system_name]], paste0('n° ', n)))) } } return(mask) -- GitLab From b8d0d84248acd61a79a96fe0fe1996a5ae84292b Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 9 Nov 2023 17:25:20 +0100 Subject: [PATCH 06/10] Correct variable error --- R/ShapeToMask.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 986c251..7e53a51 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -348,8 +348,8 @@ ShapeToMask <- function(shp_file, ref_grid, } else if (find_min_dist) { x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,1] y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,2] - dist <- sqrt((xy.sfg[,1] - x.centroid.shpi)**2 + (xy.sfg[,2] - y.centroid.shpi)**2) - tmp_coords <- array(xy.sfg[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2)) + dist <- sqrt((xy.sfc[,1] - x.centroid.shpi)**2 + (xy.sfc[,2] - y.centroid.shpi)**2) + tmp_coords <- array(xy.sfc[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2)) colnames(tmp_coords) <- c('X', 'Y') if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) & any(round(lon,2) == round(tmp_coords[1,1],2))) ) { -- GitLab From 1be6432066f0e51dc6a96b42bf22c7dce793fa63 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 10 Nov 2023 14:14:24 +0100 Subject: [PATCH 07/10] Fix pipeline --- R/Viz2VarsVsLTime.R | 4 ++-- R/VizLayout.R | 2 +- R/VizRobinson.R | 2 +- R/VizSection.R | 1 - R/VizVsLTime.R | 4 ++-- R/VizWeeklyClim.R | 4 ++-- man/Viz2VarsVsLTime.Rd | 4 ++-- man/VizLayout.Rd | 2 +- man/VizRobinson.Rd | 2 +- man/VizSection.Rd | 1 - man/VizVsLTime.Rd | 4 ++-- 11 files changed, 14 insertions(+), 16 deletions(-) diff --git a/R/Viz2VarsVsLTime.R b/R/Viz2VarsVsLTime.R index 9a61df6..47aae43 100644 --- a/R/Viz2VarsVsLTime.R +++ b/R/Viz2VarsVsLTime.R @@ -47,8 +47,8 @@ #'@examples #'clim <- s2dv::Clim(ts_temp$exp, ts_temp$obs, time_dim = "sdate", #' dat_dim = c("dat", "member")) -#'ano_exp <- Ano(ts_temp$exp, clim$clim_exp) -#'ano_obs <- Ano(ts_temp$obs, clim$clim_obs) +#'ano_exp <- s2dv::Ano(ts_temp$exp, clim$clim_exp) +#'ano_obs <- s2dv::Ano(ts_temp$obs, clim$clim_obs) #'corr_ano <- s2dv::Corr(s2dv::MeanDims(ano_exp, 'member'), ano_obs, #' time_dim = 'sdate', dat_dim = 'dat') #'input_cor <- array(dim = c(dat = 1, 3, time = 5)) diff --git a/R/VizLayout.R b/R/VizLayout.R index 4cf0a54..c9984f7 100644 --- a/R/VizLayout.R +++ b/R/VizLayout.R @@ -185,7 +185,7 @@ #'lats <- attr(map_temp$exp, "Variables")$common$lat #'lons <- attr(map_temp$exp, "Variables")$common$lon #' -#'VizLayout(fun = VizquiMap, plot_dims = c('lat', 'lon'), +#'VizLayout(fun = VizEquiMap, plot_dims = c('lat', 'lon'), #' var = var[, 1, 1, 1, , ], lon = lons, lat = lats, #' filled.continents = FALSE, #' toptitle = 'Near-surface temperature Nov.', diff --git a/R/VizRobinson.R b/R/VizRobinson.R index 285e8b4..91b4de5 100644 --- a/R/VizRobinson.R +++ b/R/VizRobinson.R @@ -106,7 +106,7 @@ #' toptitle = 'synthetic example', vertical = F, #' caption = 'Robinson Projection', #' bar_extra_margin = c(0, 1, 0, 1), width = 8, height = 6) -#'Vizobinson(data, lon = 0:359, lat = -90:90, mask = dots, legend = 'ggplot2', +#'VizRobinson(data, lon = 0:359, lat = -90:90, mask = dots, legend = 'ggplot2', #' target_proj = "+proj=moll", brks = seq(-10, 10, length.out = 11), #' color_fun = ClimPalette("purpleorange"), colNA = 'green', #' toptitle = 'synthetic example', caption = 'Mollweide Projection', diff --git a/R/VizSection.R b/R/VizSection.R index 06a2724..b375ea7 100644 --- a/R/VizSection.R +++ b/R/VizSection.R @@ -41,7 +41,6 @@ #'data <- array(rep(seq(25, 10, length.out = 7), each = 21) - rnorm(147), #' dim = c(lat = 21, depth = 7)) #'VizSection(data, horiz = 0:20, depth = seq(0, 300, length.out = 7), -#' intydep = 50, intxhoriz = 5, brks = 11, #' toptitle = 'Temperature cross-section', units = "degC") #'@importFrom grDevices dev.cur dev.new dev.off rainbow #'@export diff --git a/R/VizVsLTime.R b/R/VizVsLTime.R index 7dfdae9..cff8550 100644 --- a/R/VizVsLTime.R +++ b/R/VizVsLTime.R @@ -68,8 +68,8 @@ #'@examples #'clim <- s2dv::Clim(ts_temp$exp, ts_temp$obs, time_dim = "sdate", #' dat_dim = c("dat", "member")) -#'ano_exp <- Ano(ts_temp$exp, clim$clim_exp) -#'ano_obs <- Ano(ts_temp$obs, clim$clim_obs) +#'ano_exp <- s2dv::Ano(ts_temp$exp, clim$clim_exp) +#'ano_obs <- s2dv::Ano(ts_temp$obs, clim$clim_obs) #'corr_ano <- s2dv::Corr(s2dv::MeanDims(ano_exp, 'member'), ano_obs, #' time_dim = 'sdate', dat_dim = 'dat') #'input_cor <- array(dim = c(dat = 1, 4, time = 5)) diff --git a/R/VizWeeklyClim.R b/R/VizWeeklyClim.R index 3fbe62b..01f71d8 100644 --- a/R/VizWeeklyClim.R +++ b/R/VizWeeklyClim.R @@ -287,9 +287,9 @@ VizWeeklyClim <- function(data, first_date, ref_period, last_date = NULL, minor_breaks = NULL, expand = c(0.03, 0.03), labels = scales::date_format("%d %b %Y")) + theme(axis.text.x = element_text(angle = 45, hjust = 1), - panel.grid.major = element_line(linewidth = 0.5, linetype = 'solid', + panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "gray92"), - panel.grid.minor = element_line(linewidth = 0.25, linetype = 'solid', + panel.grid.minor = element_line(size = 0.25, linetype = 'solid', colour = "gray92"), legend.spacing = unit(-0.2, "cm")) + scale_fill_manual(name = NULL, diff --git a/man/Viz2VarsVsLTime.Rd b/man/Viz2VarsVsLTime.Rd index 1e7d388..c74534b 100644 --- a/man/Viz2VarsVsLTime.Rd +++ b/man/Viz2VarsVsLTime.Rd @@ -99,8 +99,8 @@ The input variables should have dimensions (nexp/nmod, nltime). \examples{ clim <- s2dv::Clim(ts_temp$exp, ts_temp$obs, time_dim = "sdate", dat_dim = c("dat", "member")) -ano_exp <- Ano(ts_temp$exp, clim$clim_exp) -ano_obs <- Ano(ts_temp$obs, clim$clim_obs) +ano_exp <- s2dv::Ano(ts_temp$exp, clim$clim_exp) +ano_obs <- s2dv::Ano(ts_temp$obs, clim$clim_obs) corr_ano <- s2dv::Corr(s2dv::MeanDims(ano_exp, 'member'), ano_obs, time_dim = 'sdate', dat_dim = 'dat') input_cor <- array(dim = c(dat = 1, 3, time = 5)) diff --git a/man/VizLayout.Rd b/man/VizLayout.Rd index b025a19..525b11e 100644 --- a/man/VizLayout.Rd +++ b/man/VizLayout.Rd @@ -271,7 +271,7 @@ var <- s2dv::MeanDims(ano$exp, "member") lats <- attr(map_temp$exp, "Variables")$common$lat lons <- attr(map_temp$exp, "Variables")$common$lon -VizLayout(fun = VizquiMap, plot_dims = c('lat', 'lon'), +VizLayout(fun = VizEquiMap, plot_dims = c('lat', 'lon'), var = var[, 1, 1, 1, , ], lon = lons, lat = lats, filled.continents = FALSE, toptitle = 'Near-surface temperature Nov.', diff --git a/man/VizRobinson.Rd b/man/VizRobinson.Rd index 7f2ee85..e34faab 100644 --- a/man/VizRobinson.Rd +++ b/man/VizRobinson.Rd @@ -177,7 +177,7 @@ VizRobinson(data, lon = 0:359, lat = -90:90, dots = dots, toptitle = 'synthetic example', vertical = F, caption = 'Robinson Projection', bar_extra_margin = c(0, 1, 0, 1), width = 8, height = 6) -Vizobinson(data, lon = 0:359, lat = -90:90, mask = dots, legend = 'ggplot2', +VizRobinson(data, lon = 0:359, lat = -90:90, mask = dots, legend = 'ggplot2', target_proj = "+proj=moll", brks = seq(-10, 10, length.out = 11), color_fun = ClimPalette("purpleorange"), colNA = 'green', toptitle = 'synthetic example', caption = 'Mollweide Projection', diff --git a/man/VizSection.Rd b/man/VizSection.Rd index f60d0aa..c11f2c9 100644 --- a/man/VizSection.Rd +++ b/man/VizSection.Rd @@ -85,6 +85,5 @@ Plot a (longitude,depth) or (latitude,depth) section. data <- array(rep(seq(25, 10, length.out = 7), each = 21) - rnorm(147), dim = c(lat = 21, depth = 7)) VizSection(data, horiz = 0:20, depth = seq(0, 300, length.out = 7), - intydep = 50, intxhoriz = 5, brks = 11, toptitle = 'Temperature cross-section', units = "degC") } diff --git a/man/VizVsLTime.Rd b/man/VizVsLTime.Rd index 52f79ba..f890517 100644 --- a/man/VizVsLTime.Rd +++ b/man/VizVsLTime.Rd @@ -118,8 +118,8 @@ of the forecast time. \examples{ clim <- s2dv::Clim(ts_temp$exp, ts_temp$obs, time_dim = "sdate", dat_dim = c("dat", "member")) -ano_exp <- Ano(ts_temp$exp, clim$clim_exp) -ano_obs <- Ano(ts_temp$obs, clim$clim_obs) +ano_exp <- s2dv::Ano(ts_temp$exp, clim$clim_exp) +ano_obs <- s2dv::Ano(ts_temp$obs, clim$clim_obs) corr_ano <- s2dv::Corr(s2dv::MeanDims(ano_exp, 'member'), ano_obs, time_dim = 'sdate', dat_dim = 'dat') input_cor <- array(dim = c(dat = 1, 4, time = 5)) -- GitLab From 4b9496f84f0bed5220a898c784b5251595c11876 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 10 Nov 2023 14:33:56 +0100 Subject: [PATCH 08/10] Fix pipeline: try 2 --- R/VizRobinson.R | 2 +- man/VizRobinson.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/VizRobinson.R b/R/VizRobinson.R index 91b4de5..048677a 100644 --- a/R/VizRobinson.R +++ b/R/VizRobinson.R @@ -103,7 +103,7 @@ #'dots[which(dots != 0)] <- 1 #'VizRobinson(data, lon = 0:359, lat = -90:90, dots = dots, #' brks = seq(-10, 10, length.out = 11), -#' toptitle = 'synthetic example', vertical = F, +#' toptitle = 'synthetic example', vertical = FALSE, #' caption = 'Robinson Projection', #' bar_extra_margin = c(0, 1, 0, 1), width = 8, height = 6) #'VizRobinson(data, lon = 0:359, lat = -90:90, mask = dots, legend = 'ggplot2', diff --git a/man/VizRobinson.Rd b/man/VizRobinson.Rd index e34faab..fd0dd94 100644 --- a/man/VizRobinson.Rd +++ b/man/VizRobinson.Rd @@ -174,7 +174,7 @@ dots[which(dots < 4 & dots > -4)] <- 0 dots[which(dots != 0)] <- 1 VizRobinson(data, lon = 0:359, lat = -90:90, dots = dots, brks = seq(-10, 10, length.out = 11), - toptitle = 'synthetic example', vertical = F, + toptitle = 'synthetic example', vertical = FALSE, caption = 'Robinson Projection', bar_extra_margin = c(0, 1, 0, 1), width = 8, height = 6) VizRobinson(data, lon = 0:359, lat = -90:90, mask = dots, legend = 'ggplot2', -- GitLab From afb0c07db2b9f386408c16f6419b21c591414d2d Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 10 Nov 2023 15:39:33 +0100 Subject: [PATCH 09/10] Add unit tests --- R/ShapeToMask.R | 183 ++++++++++++++++++------------ tests/testthat/test-ShapeToMask.R | 57 +++++++++- 2 files changed, 162 insertions(+), 78 deletions(-) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 7e53a51..248775e 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -5,7 +5,7 @@ #'output based on requested region names or IDs. The accepted shapefile #'databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. #'However, the function can use other shapefiles databases with specifying the -#'categories names with the parameter 'shp_system_name'. +#'categories names with the parameter 'shp_col_name_ids'. #' #'To ensure accurate comparison with the shapefile, the function loads a #'reference dataset that provides longitude and latitude information. By @@ -24,7 +24,7 @@ #'@param shp_system A character string containing the Shapefile System Database #' name. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. It is set to #' 'NUTS' by default. -#'@param shp_system_name A character string indicating the column name of the +#'@param shp_col_name_ids A character string indicating the column name of the #' column in where the specified 'reg_ids' will be taken. #'@param reg_ids A character string indicating the unique ID in shapefile. #' It is NULL by default. @@ -79,17 +79,14 @@ #'@importFrom doParallel registerDoParallel #'@export ShapeToMask <- function(shp_file, ref_grid, - shp_system = "NUTS", shp_system_name = NULL, - reg_ids = NULL, reg_names = NULL, + shp_system = "NUTS", reg_names = NULL, + reg_ids = NULL, shp_col_name_ids = NULL, reg_level = 3, lat_dim = NULL, lon_dim = NULL, region = FALSE, target_crs = NULL, check_valid = FALSE, find_min_dist = FALSE, max_dist = 50, ncores = NULL, ...) { # TODO: Suppress warnings? - # TODO: Substitute packages - # TODO: Substitute loop for each region with multiApply - # TODO: Add initial checks # TODO: Add saving option? # Initial checks @@ -100,26 +97,97 @@ ShapeToMask <- function(shp_file, ref_grid, if (!is.character(shp_file)) { stop("Parameter 'shp_file' must be a character string.") } + # lon_dim, lat_dim + if (!is.null(lon_dim)) { + if (!is.character(lon_dim)) { + stop("Parameter 'lon_dim' must be a character string.") + } + } + if (!is.null(lat_dim)) { + if (!is.character(lat_dim)) { + stop("Parameter 'lat_dim' must be a character string.") + } + } # ref_grid if (is.null(ref_grid)) { stop("Parameter 'ref_grid' cannot be NULL.") } if (!any(is.character(ref_grid) | is.list(ref_grid))) { - stop("Parameter 'ref_grid' must be a character string or a list.") + stop("Parameter 'ref_grid' must be either a netCDF file or a list of lon and lat.") + } + if (is.character(ref_grid)) { + if (!file.exists(ref_grid)) { + stop("ref_grid file does not exist.") + } + } + if (is.list(ref_grid)) { + if (length(ref_grid) != 2) { + stop("If 'ref_grid' is a list, it must have two items for longitude and latitude.") + } + if (is.null(lat_dim) | is.null(lon_dim)) { + lon_known_names <- c(.KnownLonNames(), 'lons') + lat_known_names <- c(.KnownLatNames(), 'lats') + lon_dim <- lon_known_names[which(lon_known_names %in% names(ref_grid))] + lat_dim <- lat_known_names[which(lat_known_names %in% names(ref_grid))] + + if (identical(lon_dim, character(0)) | identical(lat_dim, character(0))) { + stop("longitude and latitude names are not recognized in 'ref_grid'. ", + "Please specify 'lon_dim' and 'lat_dim'.") + } + } + } + + # shp_system, reg_ids, reg_names, shp_col_name_ids + if (!is.null(shp_system)) { + if (!is.character(shp_system)) { + stop("Parameter 'shp_system' must be a character strinig.") + } + if (all(is.null(reg_ids), is.null(reg_names))) { + stop("Either provide parameter 'reg_ids' or 'reg_names'.") + } + } else if (is.null(shp_col_name_ids)) { + stop("Either provide parameter 'shp_system' or 'shp_col_name_ids'.") + } else { + if (is.null(reg_ids)) { + stop("If 'shp_col_name_ids' is used, parameter 'reg_ids' must be provided.") + } + if (!is.character(shp_col_name_ids)) { + stop("Parameter 'shp_col_name_ids' must be a character strinig.") + } + } + if (all(!is.null(reg_ids), !is.null(reg_names))) { + warning("Only use 'reg_ids' to get the shape region. 'reg_names' is not used.") } - # shp_system - # shp_system_name - # reg_ids - # reg_names # reg_level - # lat_dim - # lon_dim + if (!is.null(reg_level)) { + if (!is.numeric(reg_level)) { + stop("Parameter 'reg_level' must be numeric.") + } + } # region + if (!is.logical(region)) { + stop("Parameter 'region' must be a logical value.") + } # target_crs + if (!is.null(target_crs)) { + if (!is.character(target_crs)) { + stop("Parameter 'target_crs' must be a character string.") + } + } # check_valid + if (!is.logical(check_valid)) { + stop("Parameter 'check_valid' must be a logical value.") + } # find_min_dist + if (!is.logical(find_min_dist)) { + stop("Parameter 'find_min_dist' must be a logical value.") + } # max_dist - + if (!is.null(max_dist)) { + if (!is.numeric(max_dist)) { + stop("Parameter 'max_dist' must be a numeric.") + } + } # ncores if (!is.null(ncores)) { if (!is.numeric(ncores)) { @@ -141,47 +209,42 @@ ShapeToMask <- function(shp_file, ref_grid, NUTS_ID <- ADM1_PCODE <- ISO <- NULL CNTR_CODE <- NUTS_NAME <- ADM0_EN <- ADM1_EN <- Name <- LEVL_CODE <- NULL - if (all(is.null(reg_ids), is.null(reg_names))) { - stop("Either provide parameter 'reg_ids' or 'reg_names'.") - } else if (!is.null(reg_ids)) { + if (!is.null(reg_ids)) { ## Method 1: Directly use IDs - if (!is.null(shp_system_name)) { - if (shp_system_name %in% names(shp)) { - shp <- subset(shp, get(shp_system_name) %in% reg_ids) + if (!is.null(shp_col_name_ids)) { + if (shp_col_name_ids %in% names(shp)) { + shp <- subset(shp, get(shp_col_name_ids) %in% reg_ids) } else { stop("Shape system name not found in shapefile names.") } } else if (shp_system == "NUTS") { shp <- subset(shp, NUTS_ID %in% reg_ids) - shp_system_name <- "NUTS_ID" + shp_col_name_ids <- "NUTS_ID" } else if (shp_system == "ADM") { shp <- subset(shp, ADM1_PCODE %in% reg_ids) - shp_system_name <- "ADM1_PCODE" + shp_col_name_ids <- "ADM1_PCODE" } else if (shp_system == "GADM") { shp <- subset(shp, ISO %in% reg_ids) - shp_system_name <- "ISO" + shp_col_name_ids <- "ISO" } else { stop("shp_system ", shp_system, " is not defined yet.") } - if (!is.null(reg_names)) { - warning("Only use 'reg_ids' to get the shape region. 'reg_names' is not used.") - } } else if (!is.null(reg_names)) { - shp_system_name <- NULL + shp_col_name_ids <- NULL ## Method 2: Use country code & region name for (cntr_i in 1:length(reg_names)) { if (shp_system == "NUTS") { tmp <- subset(shp, CNTR_CODE == names(reg_names)[cntr_i]) tmp <- subset(tmp, NUTS_NAME %in% reg_names[[cntr_i]]) - shp_system_name <- 'NUTS_NAME' + shp_col_name_ids <- 'NUTS_NAME' } else if (shp_system == "ADM") { tmp <- subset(shp, ADM0_EN == names(reg_names)[cntr_i]) tmp <- subset(tmp, ADM1_EN %in% reg_names[[cntr_i]]) - shp_system_name <- 'ADM1_EN' + shp_col_name_ids <- 'ADM1_EN' } else if (shp_system == "GADM") { tmp <- subset(shp, Name %in% reg_names) - shp_system_name <- 'Name' + shp_col_name_ids <- 'Name' } if (cntr_i == 1) { shp_tmp <- tmp @@ -197,42 +260,20 @@ ShapeToMask <- function(shp_file, ref_grid, } # Step 2: Use the reference file to get lat and lon - if (all(tools::file_ext(ref_grid) == 'nc')) { - if (!file.exists(ref_grid)) { - stop("ref_grid file does not exist.") - } else { - - ## Method 1: ref_grid is a netCDF file - if (is.null(lat_dim) | is.null(lon_dim)) { - var_names <- easyNCDF::NcReadVarNames(ref_grid) - lat_dim <- var_names[which(var_names %in% .KnownLatNames())] - lon_dim <- var_names[which(var_names %in% .KnownLonNames())] - } - latlon <- NcToArray(ref_grid, vars_to_read = c(lat_dim, lon_dim)) - lat <- NcToArray(ref_grid, vars_to_read = lat_dim)[1, ] - lon <- NcToArray(ref_grid, vars_to_read = lon_dim)[1, ] + ## Method 1: ref_grid is a netCDF file + if (is.null(lat_dim) | is.null(lon_dim)) { + var_names <- easyNCDF::NcReadVarNames(ref_grid) + lat_dim <- var_names[which(var_names %in% .KnownLatNames())] + lon_dim <- var_names[which(var_names %in% .KnownLonNames())] } + latlon <- easyNCDF::NcToArray(ref_grid, vars_to_read = c(lat_dim, lon_dim)) + lat <- easyNCDF::NcToArray(ref_grid, vars_to_read = lat_dim)[1, ] + lon <- easyNCDF::NcToArray(ref_grid, vars_to_read = lon_dim)[1, ] } else if (is.list(ref_grid)) { ## Method 2: ref_grid is a list of lon and lat - if (length(ref_grid) != 2) { - stop("If 'ref_grid' is a list, it must have two items for longitude and latitude.") - } - if (is.null(lat_dim) | is.null(lon_dim)) { - lon_known_names <- c(.KnownLonNames(), 'lons') - lat_known_names <- c(.KnownLatNames(), 'lats') - lon_dim <- lon_known_names[which(lon_known_names %in% names(ref_grid))] - lat_dim <- lat_known_names[which(lat_known_names %in% names(ref_grid))] - - if (identical(lon_dim, character(0)) | identical(lat_dim, character(0))) { - stop("longitude and latitude names are not recognized in 'ref_grid'. Please specify 'lon_dim' and 'lat_dim'.") - } - } lat <- ref_grid[[lat_dim]] lon <- ref_grid[[lon_dim]] - - } else { - stop("Parameter 'ref_grid' must be either a netCDF file or a list of lon and lat.") } ## Create data frame & sp class for ref grid @@ -252,7 +293,6 @@ ShapeToMask <- function(shp_file, ref_grid, } # Step 3: Create mask - if (check_valid) { xy.sfc <- st_make_valid(xy.sfc) shp <- st_make_valid(shp) @@ -279,15 +319,15 @@ ShapeToMask <- function(shp_file, ref_grid, mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun') %do% .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, xy.sfc = xy.sfc, find_min_dist = find_min_dist, - shp_system_name = shp_system_name, - max_dist = max_dist, region = region) + shp_col_name_ids = shp_col_name_ids, + max_dist = max_dist, region = region, ...) } else { registerDoParallel(ncores) mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun', .packages='sf') %dopar% .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, xy.sfc = xy.sfc, find_min_dist = find_min_dist, - shp_system_name = shp_system_name, - max_dist = max_dist, region = region) + shp_col_name_ids = shp_col_name_ids, + max_dist = max_dist, region = region, ...) registerDoSEQ() } if (region) { @@ -315,14 +355,13 @@ ShapeToMask <- function(shp_file, ref_grid, } .shapetomask <- function(shp, n, lon, lat, xy.sfc, find_min_dist = FALSE, - shp_system_name = NULL, max_dist = 50, - region = FALSE) { + shp_col_name_ids = NULL, max_dist = 50, + region = FALSE, ...) { mask <- array(0, dim = c(length(lon), length(lat))) shpi <- shp[n, ] # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). - shp_pol <- sf::st_intersection(x = xy.sfc, y = shpi) - # shp_pol <- sf::st_intersection(xy.sfc, shpi, ...) + shp_pol <- sf::st_intersection(x = xy.sfc, y = shpi, ...) tmp_coords <- sf::st_coordinates(shp_pol)[, 1:2] if (length(tmp_coords) == 0) { dim(tmp_coords) <- NULL @@ -361,12 +400,12 @@ ShapeToMask <- function(shp_file, ref_grid, which.min(abs(lat - tmp_coords[, 2]))] <- 1 } # warning(paste0('The reference grid has no intersection with region ', - # ifelse(is.character(shp_system_name), shpi[[shp_system_name]], paste0('n° ', n)), + # ifelse(is.character(shp_col_name_ids), shpi[[shp_col_name_ids]], paste0('n° ', n)), # ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], # ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) } else { # warning(paste0('The reference grid has no intersection with region ', - # ifelse(is.character(shp_system_name), shpi[[shp_system_name]], paste0('n° ', n)))) + # ifelse(is.character(shp_col_name_ids), shpi[[shp_col_name_ids]], paste0('n° ', n)))) } } return(mask) diff --git a/tests/testthat/test-ShapeToMask.R b/tests/testthat/test-ShapeToMask.R index 09deae2..0217da5 100644 --- a/tests/testthat/test-ShapeToMask.R +++ b/tests/testthat/test-ShapeToMask.R @@ -5,15 +5,21 @@ shp_file1 <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', 'NUTS_RG_60M_2021_4326.shp') ref_grid1 <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/', 'tas_f1h/tas_201006.nc') +ref_grid1_1 <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) NUTS_id1 <- paste0("FI1D", c(1:3, 5, 7:9)) NUTS_name1 <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) # data2 -ref_grid2 <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) +shp_file2 <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp" +ref_grid2 <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', + 'tas_f6h/tas_20191212.nc') +GADM_id2 <- c("ESP", "ITA") +GADM_name2 <- c("Spain", "Italy") ############################################## test_that("1. Input checks", { + # shp_file expect_error( ShapeToMask(NULL), "Parameter 'shp_file' cannot be NULL." @@ -22,14 +28,32 @@ test_that("1. Input checks", { ShapeToMask(1), "Parameter 'shp_file' must be a character string." ) + # ref_grid expect_error( ShapeToMask(shp_file1, ref_grid = NULL), "Parameter 'ref_grid' cannot be NULL." ) expect_error( ShapeToMask(shp_file1, ref_grid = 1), - "Parameter 'ref_grid' must be a character string or a list." + "Parameter 'ref_grid' must be either a netCDF file or a list of lon and lat." ) + # lon_dim, lat_dim + expect_error( + ShapeToMask(shp_file1, lon_dim = 1, lat_dim = 'lat'), + "Parameter 'lon_dim' must be a character string." + ) + expect_error( + ShapeToMask(shp_file1, lat_dim = 1, lon_dim = 'lat'), + "Parameter 'lat_dim' must be a character string." + ) + # shp_system, reg_ids, reg_names, shp_col_name_ids + # reg_level + # region + # target_crs + # check_valid + # find_min_dist + # max_dist + # ncores }) ############################################## @@ -44,7 +68,7 @@ test_that("2. Output", { sum(mask1), c(20899) ) - mask2 <- ShapeToMask(shp_file1, ref_grid = ref_grid2, reg_ids = NUTS_id1) + mask2 <- ShapeToMask(shp_file1, ref_grid = ref_grid1_1, reg_ids = NUTS_id1) expect_equal( sum(mask2), 830 @@ -53,14 +77,35 @@ test_that("2. Output", { dim(mask2), c(lon = 61, lat = 91) ) - mask3 <- ShapeToMask(shp_file1, ref_grid = ref_grid2, reg_ids = NUTS_id1, + mask3 <- ShapeToMask(shp_file1, ref_grid = ref_grid1_1, reg_names = NUTS_name1, region = TRUE) expect_equal( dim(mask3), - c(lon = 61, lat = 91, region = 7) + c(lon = 61, lat = 91, region = 4) ) expect_equal( sum(mask3), - 176 + 105 + ) + mask4 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, + reg_ids = GADM_id2, shp_system = "GADM") + expect_equal( + dim(mask4), + c(longitude = 240, latitude = 121) + ) + expect_equal( + sum(mask4), + 48 + ) + mask5 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, + reg_names = GADM_name2, shp_system = "GADM", + region = TRUE) + expect_equal( + dim(mask5), + c(longitude = 240, latitude = 121, region = 4) + ) + expect_equal( + sum(mask5), + 56 ) }) \ No newline at end of file -- GitLab From 6b28e5a37f6c0c1ec270fb080f8c5706cfe6a16b Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 10 Nov 2023 17:14:14 +0100 Subject: [PATCH 10/10] Update documentation --- man/ShapeToMask.Rd | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/man/ShapeToMask.Rd b/man/ShapeToMask.Rd index 5f8dfc0..f5beede 100644 --- a/man/ShapeToMask.Rd +++ b/man/ShapeToMask.Rd @@ -8,9 +8,9 @@ ShapeToMask( shp_file, ref_grid, shp_system = "NUTS", - shp_system_name = NULL, - reg_ids = NULL, reg_names = NULL, + reg_ids = NULL, + shp_col_name_ids = NULL, reg_level = 3, lat_dim = NULL, lon_dim = NULL, @@ -34,17 +34,17 @@ reference grid points. It is NULL by default.} name. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. It is set to 'NUTS' by default.} -\item{shp_system_name}{A character string indicating the column name of the -column in where the specified 'reg_ids' will be taken.} - -\item{reg_ids}{A character string indicating the unique ID in shapefile. -It is NULL by default.} - \item{reg_names}{A named list of character string vectors indicating the country and the region name. The name of the list stands for the country name code and the vector character strings indicate the region name for each country. It is NULL by default.} +\item{reg_ids}{A character string indicating the unique ID in shapefile. +It is NULL by default.} + +\item{shp_col_name_ids}{A character string indicating the column name of the +column in where the specified 'reg_ids' will be taken.} + \item{reg_level}{An integer number from 1 to 3 indicating the 'NUTS' dataset level. For other datasets this parameter is not used. One mask can only have a unique level. It is set to 3 by default.} @@ -94,7 +94,7 @@ regions. It then transfers the shapefile data into an array and subsets the output based on requested region names or IDs. The accepted shapefile databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. However, the function can use other shapefiles databases with specifying the -categories names with the parameter 'shp_system_name'. +categories names with the parameter 'shp_col_name_ids'. } \details{ To ensure accurate comparison with the shapefile, the function loads a -- GitLab