Newer
Older
#'Convert Shapefile to Mask Array
#'
#'This function reads a shapefile (.shp) containing information about polygonal
#'regions. It then transfers the shapefile data into an array and subsets the
#'output based on requested region names or IDs. The accepted shapefile
#'databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format.
#'However, the function can use other shapefiles databases with specifying the
#'categories names with the parameter 'shp_system_name'.
#'
#'To ensure accurate comparison with the shapefile, the function loads a
#'reference dataset that provides longitude and latitude information. By
#'intersecting each subset of the shapefile with the reference coordinates, the
#'function selects only the desired regions. The final step involves creating a
#'mask array. Depending on the chosen option, the mask array is either returned
#'as the function's output or saved into a NetCDF format in the specified
#'directory.
#'
#'Note: Modules GDAL, PROJ and GEOS are required.
#'
#'@param shp_file A character string indicating the shp file path.
#'@param ref_grid A character string indicating the path to the reference
#' data. Either (1) a netCDF file or (2) a list of lon and lat to provide the
#' reference grid points. It is NULL by default.
#'@param shp_system A character string containing the Shapefile System Database
#' name. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. It is set to
#' 'NUTS' by default.
#'@param shp_system_name A character string indicating the column name of the
#' column in where the specified 'reg_ids' will be taken.
#'@param reg_ids A character string indicating the unique ID in shapefile.
#'@param reg_names A named list of character string vectors indicating the
#' country and the region name. The name of the list stands for the country
#' name code and the vector character strings indicate the region name for
#' each country. It is NULL by default.
#'@param reg_level An integer number from 1 to 3 indicating the 'NUTS' dataset
#' level. For other datasets this parameter is not used. One mask can only have
#' a unique level. It is set to 3 by default.
#'@param lat_dim A character string indicating the latitudinal dimension. If it
#' is NULL, the latitudinal name will be searched using an internal function
#' with the following possible names: 'lat', 'latitude', 'y', 'j' and
#' 'nav_lat'. It is NULL by default.
#'@param lon_dim A character string indicating the longitudinal dimension. If it
#' is NULL, the longitudinal name will be searched using an internal function
#' with the following possible names: 'lon', 'longitude', 'x', 'i' and
#' 'nav_lon'. It is NULL by default.
#'@param target_crs A character string indicating the target 'Coordinate
#' Reference System'.
#'@param region A logical value indicating if we want a dimension for the
#' regions in the resulting mask array. It is FALSE by default.
#'@param check_valid A logical value that when it is TRUE it uses the function
#' 'sf::st_make_valid' applied to the shapefile and to the coordinates.
#'@param find_min_dist A logical value indicating if we want to look for the
#' nearest coordinate between the shapefile region and the reference grid when
#' there is no intersection between the shapefile and the reference grid. It is
#' FALSE by default.
#'@param max_dist A numeric value indicating the maximum distance is accepted
#' to the closest gridpoint when there is no intersection between the shapefile
#' and the reference grid.
#'@param savefile A logical value indicating wether to save the mask array into
#' a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by
#' default. This functionality is not developed yet.
#'@param ... Arguments passed on to 's2_options' in function 'st_intersection'.
#' See 's2 package'.
#'
#'@return A multidimensional array containing a mask array with longitude and
#'latitude dimensions. If 'region' is TRUE, there will be a dimension for
#'the region.
#'
#'@examples
#'# Exmple (1): NUTS
#'shp_file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/',
#'# shp_file <- paste0('/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/',
#'# 'tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp')
#'# ref_grid <- paste0('/esarchive/exp/ecmwf/system5c3s/monthly_mean/',
#'# ref_grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/',
#'ref_grid <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/',
#'# ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5))
#'
#'NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9))
#'NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska'))
#'mask1 <- ShapeToMask(shp_file, ref_grid, reg_ids = NUTS.id)
#'mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid,
#' reg_names = NUTS.name)
#'shp_file <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp"
#'ref_grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/',
#' 'tas_f6h/tas_20191212.nc')
#'GADM.id <- c("ESP", "ITA")
#'GADM.name <- c("Spain", "Italy")
#'mask1 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid,
#' reg_ids = GADM.id, shp_system = "GADM")
#'mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid,
#' reg_names = GADM.name, shp_system = "GADM")
ShapeToMask <- function(shp_file, ref_grid = NULL,
shp_system = "NUTS", shp_system_name = NULL,
reg_ids = NULL, reg_names = NULL,
reg_level = 3, lat_dim = NULL, lon_dim = NULL,
savefile = FALSE, region = FALSE, target_crs = NULL,
check_valid = FALSE, find_min_dist = FALSE,
max_dist = 50, ...) {
# NOTE: One region is one number; need to have the option to combine them?
# TODO: Suppress warnings?
# TODO: Substitute packages
# TODO: Substitute loop for each region with multiApply
# TODO: Add initial checks
shp <- sf::st_read(shp_file) # class sf
if (!is.null(target_crs)) {
transformed_shapefile <- st_transform(shp, crs = target_crs)
shp <- transformed_shapefile
}
if (all(is.null(reg_ids), is.null(reg_names))) {
stop("Either provide parameter 'reg_ids' or 'reg_names'.")
} else if (!is.null(reg_ids)) {
if (!is.null(shp_system_name)) {
if (shp_system_name %in% names(shp)) {
shp <- subset(shp, get(shp_system_name) %in% reg_ids)
} else {
stop("Shape system name not found in shapefile names.")
}
} else if (shp_system == "NUTS") {
shp <- subset(shp, NUTS_ID %in% reg_ids)
shp_system_name <- "NUTS_ID"
} else if (shp_system == "ADM") {
shp <- subset(shp, ADM1_PCODE %in% reg_ids)
shp_system_name <- "ADM1_PCODE"
} else if (shp_system == "GADM") {
shp <- subset(shp, ISO %in% reg_ids)
shp_system_name <- "ISO"
stop("shp_system ", shp_system, " is not defined yet.")
if (!is.null(reg_names)) {
warning("Only use 'reg_ids' to get the shape region. 'reg_names' is not used.")
} else if (!is.null(reg_names)) {
shp_system_name <- NULL
for (cntr_i in 1:length(reg_names)) {
if (shp_system == "NUTS") {
tmp <- subset(shp, CNTR_CODE == names(reg_names)[cntr_i])
tmp <- subset(tmp, NUTS_NAME %in% reg_names[[cntr_i]])
shp_system_name <- NUTS_NAME
} else if (shp_system == "ADM") {
tmp <- subset(shp, ADM0_EN == names(reg_names)[cntr_i])
tmp <- subset(tmp, ADM1_EN %in% reg_names[[cntr_i]])
shp_system_name <- ADM1_EN
} else if (shp_system == "GADM") {
tmp <- subset(shp, Name %in% reg_names)
shp_system_name <- Name
}
if (cntr_i == 1) {
shp_tmp <- tmp
} else {
shp_tmp <- rbind(shp_tmp, tmp)
}
}
if (shp_system == "NUTS") {
shp <- subset(shp_tmp, LEVL_CODE == reg_level)
} else if (shp_system == "ADM" | shp_system == "GADM") {
shp <- shp_tmp
}
}
# Step 2: Use the reference file to get lat and lon
if (all(tools::file_ext(ref_grid) == 'nc')) {
if (!file.exists(ref_grid)) {
stop("ref_grid file does not exist.")
## Method 1: ref_grid is a netCDF file
var_names <- easyNCDF::NcReadVarNames(ref_grid)
Eva Rifà
committed
lat_dim <- var_names[which(var_names %in% .KnownLatNames())]
lon_dim <- var_names[which(var_names %in% .KnownLonNames())]
latlon <- NcToArray(ref_grid, vars_to_read = c(lat_dim, lon_dim))
lat <- NcToArray(ref_grid, vars_to_read = lat_dim)[1, ]
lon <- NcToArray(ref_grid, vars_to_read = lon_dim)[1, ]
} else if (is.list(ref_grid)) {
## Method 2: ref_grid is a list of lon and lat
if (length(ref_grid) != 2) {
stop("If 'ref_grid' is a list, it must have two items for longitude and latitude.")
Eva Rifà
committed
lon_known_names <- c(.KnownLonNames(), 'lons')
lat_known_names <- c(.KnownLatNames(), 'lats')
lon_dim <- lon_known_names[which(lon_known_names %in% names(ref_grid))]
lat_dim <- lat_known_names[which(lat_known_names %in% names(ref_grid))]
if (identical(lon_dim, character(0)) | identical(lat_dim, character(0))) {
stop("longitude and latitude names are not recognized in 'ref_grid'. Please specify 'lon_dim' and 'lat_dim'.")
lat <- ref_grid[[lat_dim]]
lon <- ref_grid[[lon_dim]]
stop("Parameter 'ref_grid' must be either a netCDF file or a list of lon and lat.")
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
}
## Create data frame & sp class for ref grid
ref.df <- data.frame(data = 0,
lon = rep(lon, times = length(lat)),
lat = rep(lat, each = length(lon)))
coord <- as.matrix(data.frame(x = ref.df$lon, y = ref.df$lat))
xy.sfg <- st_multipoint(coord)
xy.sfc <- st_sfc(xy.sfg)
# Assign crs of original shapefile
if (!is.null(target_crs)) {
st_crs(xy.sfc) <- sf::st_crs(target_crs) #initial_crs # asign crs of original shapefile
xy.sfc <- sf::st_transform(xy.sfc, st_crs(shp))
} else {
st_crs(xy.sfc) <- sf::st_crs(shp)
}
# Step 3: Create mask
## Create mask array with 0; 1, 2, etc. will be filled in for each shp region
mask <- array(0, dim = c(length(lon), length(lat)))
names(dim(mask)) <- c(lon_dim, lat_dim)
if (region) {
mask <- array(0, dim = c(nrow(shp), length(lon), length(lat)))
names(dim(mask)) <- c('region', lon_dim, lat_dim)
}
if (check_valid) {
xy.sfc <- st_make_valid(xy.sfc)
shp <- st_make_valid(shp)
}
## Loop through each shp region
for (shp_i in 1:nrow(shp)) {
# NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may
# not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65).
shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ])
# shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ], ...)
tmp_coords <- st_coordinates(shp_pol)[, 1:2]
if (length(tmp_coords) == 0) {
dim(tmp_coords) <- NULL
} else if (is.null(dim(tmp_coords))) {
tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords)))
}
if (!is.null(dim(tmp_coords))) {
# polygon_instersection
for (ii in 1:nrow(tmp_coords)) {
# pt_x <- which(lon == tmp_coords[ii, 1])
# pt_y <- which.min(abs(lat - tmp_coords[ii, 2]))
if (length(dim(mask)) == 2) {
# min(abs(lon - tmp_coords[ii, 1]))
# min(abs(lat - tmp_coords[ii, 2]))
mask[which.min(abs(lon - tmp_coords[ii, 1])),
which.min(abs(lat - tmp_coords[ii, 2]))] <- shp_i
} else {
mask[shp_i, which.min(abs(lon - tmp_coords[ii, 1])),
which.min(abs(lat - tmp_coords[ii, 2]))] <- 1
}
}
} else if (find_min_dist) {
x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shp[shp_i,]))[,1]
y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shp[shp_i,]))[,2]
dist <- sqrt((xy.sfg[,1] - x.centroid.shpi)**2 + (xy.sfg[,2] - y.centroid.shpi)**2)
tmp_coords <- array(xy.sfg[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2))
colnames(tmp_coords) <- c('X', 'Y')
if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) &
any(round(lon,2) == round(tmp_coords[1,1],2))) ) {
if (length(dim(mask)) == 2) {
mask[which.min(abs(lon - tmp_coords[, 1])),
which.min(abs(lat - tmp_coords[, 2]))] <- shp_i
} else {
mask[shp_i, which.min(abs(lon - tmp_coords[, 1])),
which.min(abs(lat - tmp_coords[, 2]))] <- 1
}
warning(paste0('The reference grid has no intersection with region ',
ifelse(is.character(shp_system_name), shp[shp_i,][[shp_system_name]], paste0('n° ', shp_i)),
' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))],
' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).'))
} else {
warning(paste0('The reference grid has no intersection with region ',
ifelse(is.character(shp_system_name), shp[shp_i,][[shp_system_name]], paste0('n° ', shp_i))))
}
}
}
# Step 4: Add attributes
# attr(mask, lon_dim) <- lon
# attr(mask, lat_dim) <- lat
# if (shp_system == "NUTS") {
# } else if (shp_system == "ADM") {
# } else if (shp_system == "GADM") {
# attr(mask, "index") <- as.list(shp$ISO)
# }
# names(attr(mask, "index")) <- 1:nrow(shp)
# ## Return all the info from shp
# attr(mask, "shapefile") <- attributes(shp)
# Step 5: Save the file or return the array
if (!savefile) {
return(mask)
} else {
warning("This functionality is not developed yet.")
# TODO
# ArrayToNc()
}
}