diff --git a/R/shp_mask.R b/R/shp_mask.R index dcd52d42893c198ab781cda7ca8ba2dac64f9f42..969f879f1e390ac8f7dabafba9f2eab8aa1edd27 100644 --- a/R/shp_mask.R +++ b/R/shp_mask.R @@ -3,7 +3,10 @@ #'This function reads a shapefile (.shp) containing information about polygonal #'regions. It then transfers the shapefile data into an array and subsets the #'output based on requested region names or IDs. The accepted shapefile -#'databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. +#'databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. +#'However, the function can use other shapefiles databases with specifying the +#'categories names with the parameter 'shp.system.name'. +#' #'To ensure accurate comparison with the shapefile, the function loads a #'reference dataset that provides longitude and latitude information. By #'intersecting each subset of the shapefile with the reference coordinates, the @@ -21,6 +24,8 @@ #'@param shp.system A character string containing the Shapefile System Database #' name. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. It is set to #' 'NUTS' by default. +#'@param shp.system.name A character string indicating the column name of the +#' column in where the specified 'reg.ids' will be taken. #'@param reg.ids A character string indicating the unique ID in shapefile. #' It is NULL by default. #'@param reg.names A named list of character string vectors indicating the @@ -38,9 +43,24 @@ #' is NULL, the longitudinal name will be searched using an internal function #' with the following possible names: 'lon', 'longitude', 'x', 'i' and #' 'nav_lon'. It is NULL by default. +#'@param target_crs A character string indicating the target 'Coordinate +#' Reference System'. +#'@param region A logical value indicating if we want a dimension for the +#' regions in the resulting mask array. It is FALSE by default. +#'@param check_valid A logical value that when it is TRUE it uses the function +#' 'sf::st_make_valid' applied to the shapefile and to the coordinates. +#'@param max_dist A numeric value indicating the maximum distance is accepted +#' to the closest gridpoint when there is no intersection between the shapefile +#' and the reference grid. #'@param savefile A logical value indicating wether to save the mask array into #' a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by -#' default. +#' default. This functionality is not developed yet. +#'@param ... Arguments passed on to 's2_options' in function 'st_intersection'. +#' See 's2 package'. +#' +#'@return A multidimensional array containing a mask array with longitude and +#'latitude dimensions. If 'region' is TRUE, there will be a dimension for +#'the region. #' #'@examples #'# Exmple (1): NUTS @@ -76,9 +96,11 @@ #'@import sf #'@export shp_mask <- function(shp.file, ref.grid = NULL, - shp.system = "NUTS", reg.ids = NULL, reg.names = NULL, + shp.system = "NUTS", shp.system.name = NULL, + reg.ids = NULL, reg.names = NULL, reg.level = 3, lat_dim = NULL, lon_dim = NULL, - savefile = FALSE) { + savefile = FALSE, region = FALSE, target_crs = NULL, + check_valid = FALSE, max_dist = 99999999, ...) { # NOTE: One region is one number; need to have the option to combine them? # TODO: Suppress warnings? @@ -86,18 +108,30 @@ shp_mask <- function(shp.file, ref.grid = NULL, # Step 1: Load the shapefile shp <- sf::st_read(shp.file) # class sf + if (!is.null(target_crs)) { + transformed_shapefile <- st_transform(shp, crs = target_crs) + shp <- transformed_shapefile + } if (all(is.null(reg.ids), is.null(reg.names))) { stop("Either provide parameter 'reg.ids' or 'reg.names'.") - } else if (!is.null(reg.ids)) { ## Method 1: Directly use IDs - if (shp.system == "NUTS") { + if (!is.null(shp.system.name)) { + if (shp.system.name %in% names(shp)) { + shp <- subset(shp, get(shp.system.name) %in% reg.ids) + } else { + stop("Shape system name not found in shapefile names.") + } + } else if (shp.system == "NUTS") { shp <- subset(shp, NUTS_ID %in% reg.ids) + shp.system.name <- NUTS_ID } else if (shp.system == "ADM") { shp <- subset(shp, ADM1_PCODE %in% reg.ids) + shp.system.name <- ADM1_PCODE } else if (shp.system == "GADM") { shp <- subset(shp, ISO %in% reg.ids) + shp.system.name <- ISO } else { stop("shp.system ", shp.system, " is not defined yet.") } @@ -106,16 +140,20 @@ shp_mask <- function(shp.file, ref.grid = NULL, } } else if (!is.null(reg.names)) { + shp.system.name <- NULL ## Method 2: Use country code & region name for (cntr_i in 1:length(reg.names)) { if (shp.system == "NUTS") { tmp <- subset(shp, CNTR_CODE == names(reg.names)[cntr_i]) tmp <- subset(tmp, NUTS_NAME %in% reg.names[[cntr_i]]) + shp.system.name <- NUTS_NAME } else if (shp.system == "ADM") { tmp <- subset(shp, ADM0_EN == names(reg.names)[cntr_i]) tmp <- subset(tmp, ADM1_EN %in% reg.names[[cntr_i]]) + shp.system.name <- ADM1_EN } else if (shp.system == "GADM") { tmp <- subset(shp, Name %in% reg.names) + shp.system.name <- Name } if (cntr_i == 1) { shp_tmp <- tmp @@ -178,18 +216,34 @@ shp_mask <- function(shp.file, ref.grid = NULL, xy.sfg <- st_multipoint(coord) xy.sfc <- st_sfc(xy.sfg) - st_crs(xy.sfc) <- sf::st_crs(shp) + # Assign crs of original shapefile + if (!is.null(target_crs)) { + st_crs(xy.sfc) <- sf::st_crs(target_crs) #initial_crs # asign crs of original shapefile + xy.sfc <- sf::st_transform(xy.sfc, st_crs(shp)) + } else { + st_crs(xy.sfc) <- sf::st_crs(shp) + } # Step 3: Create mask ## Create mask array with 0; 1, 2, etc. will be filled in for each shp region mask <- array(0, dim = c(length(lon), length(lat))) names(dim(mask)) <- c(lon_dim, lat_dim) + if (region) { + mask <- array(0, dim = c(nrow(shp), length(lon), length(lat))) + names(dim(mask)) <- c('region', lon_dim, lat_dim) + } + if (check_valid) { + xy.sfc <- st_make_valid(xy.sfc) + shp <- st_make_valid(shp) + } + ## Loop through each shp region for (shp_i in 1:nrow(shp)) { # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may - # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). + # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ]) + # shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ], ...) tmp_coords <- st_coordinates(shp_pol)[, 1:2] if (length(tmp_coords) == 0) { dim(tmp_coords) <- NULL @@ -197,41 +251,61 @@ shp_mask <- function(shp.file, ref.grid = NULL, tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords))) } if (!is.null(dim(tmp_coords))) { + # polygon_instersection for (ii in 1:nrow(tmp_coords)) { # pt_x <- which(lon == tmp_coords[ii, 1]) # pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) - mask[which.min(abs(lon - tmp_coords[ii, 1])), - which.min(abs(lat - tmp_coords[ii, 2]))] <- shp_i - # NOTE: This line can't catch correct point when the NOTE above happens. - # mask[which(lon == tmp_coords[ii, 1]), which(lat == tmp_coords[ii, 2])] <- shp_i + if (length(dim(mask)) == 2) { + # min(abs(lon - tmp_coords[ii, 1])) + # min(abs(lat - tmp_coords[ii, 2])) + mask[which.min(abs(lon - tmp_coords[ii, 1])), + which.min(abs(lat - tmp_coords[ii, 2]))] <- shp_i + } else { + mask[shp_i, which.min(abs(lon - tmp_coords[ii, 1])), + which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 + } } } else { - if (shp.system == "NUTS") { - tmp <- shp$NUTS_ID[shp_i] - } else if (shp.system == "ADM") { - tmp <- shp$ADM1_PCODE[shp_i] - } else if (shp.system == "GADM") { - tmp <- shp$ISO[shp_i] + x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shp[shp_i,]))[,1] + y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shp[shp_i,]))[,2] + dist <- sqrt((xy.sfg[,1] - x.centroid.shpi)**2 + (xy.sfg[,2] - y.centroid.shpi)**2) + tmp_coords <- array(xy.sfg[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2)) + colnames(tmp_coords) <- c('X', 'Y') + if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) & + any(round(lon,2) == round(tmp_coords[1,1],2))) ) { + if (length(dim(mask)) == 2) { + mask[which.min(abs(lon - tmp_coords[, 1])), + which.min(abs(lat - tmp_coords[, 2]))] <- shp_i + } else { + mask[shp_i, which.min(abs(lon - tmp_coords[, 1])), + which.min(abs(lat - tmp_coords[, 2]))] <- 1 + } + warning(paste0('The reference grid has no intersection with region ', + ifelse(is.character(shp.system.name), shp[shp_i,][[shp.system.name]], paste0('n° ', shp_i)), + ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], + ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) + } else { + warning(paste0('The reference grid has no intersection with region ', + ifelse(is.character(shp.system.name), shp[shp_i,][[shp.system.name]], paste0('n° ', shp_i)))) } - warning("shp ID '", tmp, "' cannot be identified in this reference grid.") } } # Step 4: Add attributes - attr(mask, lon_dim) <- lon - attr(mask, lat_dim) <- lat - if (shp.system == "NUTS") { - attr(mask, "index") <- as.list(shp$NUTS_ID) - } else if (shp.system == "ADM") { - attr(mask, "index") <- as.list(shp$ADM1_PCODE) - } else if (shp.system == "GADM") { - attr(mask, "index") <- as.list(shp$ISO) - } - names(attr(mask, "index")) <- 1:nrow(shp) + # attr(mask, lon_dim) <- lon + # attr(mask, lat_dim) <- lat + # if (shp.system == "NUTS") { + # attr(mask, "index") <- as.list(shp$NUTS_ID) + # } else if (shp.system == "ADM") { + # attr(mask, "index") <- as.list(shp$ADM1_PCODE) + # } else if (shp.system == "GADM") { + # attr(mask, "index") <- as.list(shp$ISO) + # } + # names(attr(mask, "index")) <- 1:nrow(shp) - ## Return all the info from shp - attr(mask, "shapefile") <- attributes(shp) + # ## Return all the info from shp + # attr(mask, "shapefile") <- attributes(shp) # Step 5: Save the file or return the array if (!savefile) { @@ -241,5 +315,4 @@ shp_mask <- function(shp.file, ref.grid = NULL, # TODO # ArrayToNc() } -} - +} \ No newline at end of file diff --git a/man/shp_mask.Rd b/man/shp_mask.Rd index ad018c4132c5eab94063a88035b14f052b4fca00..e5621351a372a070f765c02a68cbb451921f3de4 100644 --- a/man/shp_mask.Rd +++ b/man/shp_mask.Rd @@ -8,12 +8,18 @@ shp_mask( shp.file, ref.grid = NULL, shp.system = "NUTS", + shp.system.name = NULL, reg.ids = NULL, reg.names = NULL, reg.level = 3, lat_dim = NULL, lon_dim = NULL, - savefile = FALSE + savefile = FALSE, + region = FALSE, + target_crs = NULL, + check_valid = FALSE, + max_dist = 99999999, + ... ) } \arguments{ @@ -27,6 +33,9 @@ reference grid points. It is NULL by default.} name. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. It is set to 'NUTS' by default.} +\item{shp.system.name}{A character string indicating the column name of the +column in where the specified 'reg.ids' will be taken.} + \item{reg.ids}{A character string indicating the unique ID in shapefile. It is NULL by default.} @@ -51,13 +60,38 @@ with the following possible names: 'lon', 'longitude', 'x', 'i' and \item{savefile}{A logical value indicating wether to save the mask array into a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by -default.} +default. This functionality is not developed yet.} + +\item{region}{A logical value indicating if we want a dimension for the +regions in the resulting mask array. It is FALSE by default.} + +\item{target_crs}{A character string indicating the target 'Coordinate +Reference System'.} + +\item{check_valid}{A logical value that when it is TRUE it uses the function +'sf::st_make_valid' applied to the shapefile and to the coordinates.} + +\item{max_dist}{A numeric value indicating the maximum distance is accepted +to the closest gridpoint when there is no intersection between the shapefile +and the reference grid.} + +\item{...}{Arguments passed on to 's2_options' in function 'st_intersection'. +See 's2 package'.} +} +\value{ +A multidimensional array containing a mask array with longitude and +latitude dimensions. If 'region' is TRUE, there will be a dimension for +the region. } \description{ This function reads a shapefile (.shp) containing information about polygonal regions. It then transfers the shapefile data into an array and subsets the output based on requested region names or IDs. The accepted shapefile -databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. +databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. +However, the function can use other shapefiles databases with specifying the +categories names with the parameter 'shp.system.name'. +} +\details{ To ensure accurate comparison with the shapefile, the function loads a reference dataset that provides longitude and latitude information. By intersecting each subset of the shapefile with the reference coordinates, the @@ -65,8 +99,7 @@ function selects only the desired regions. The final step involves creating a mask array. Depending on the chosen option, the mask array is either returned as the function's output or saved into a NetCDF format in the specified directory. -} -\details{ + Note: Modules GDAL, PROJ and GEOS are required. } \examples{