From e120b6522968b7501a4e67c9844c263386a3b5fe Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 23 May 2023 17:21:03 +0200 Subject: [PATCH 01/11] Add function to create a mask array from shapefile created by An-Chi --- R/shp_mask.R | 219 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 R/shp_mask.R diff --git a/R/shp_mask.R b/R/shp_mask.R new file mode 100644 index 0000000..ba41471 --- /dev/null +++ b/R/shp_mask.R @@ -0,0 +1,219 @@ +#'create the mask array from a shapefile (.shp) and a dataset, and save to netCDF file (optionally) +#' +#library(easyNCDF) +#library(sf) + +######################## +# module load GDAL PROJ GEOS +######################## +# shp.file: The shp file +# ref.grid: Either (1) a netCDF file or (2) a list of lon and lat to provide the reference grid points +# reg.ids: The unique ID in shapefile +# reg.names: A list of country and the region name +# reg.level: Can only have the same level in one mask; only for NUTS +# lat_dim: 'latitude' for example +# lon_dim: 'longitude' for example +# savefile: If NULL, return an array + +### Some example inputs ### +## Exmple (1): NUTS +#-------shp.file---------- +#shp.file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/NUTS_RG_60M_2021_4326.shp') +#shp.file <- '/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp' +#-------ref.grid---------- +#ref.grid <- '/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20170201.nc' +#ref.grid <- '/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/tas_f6h/tas_20191212.nc' +#ref.grid <- '/esarchive/recon/ecmwf/era5land/monthly_mean/tas_f1h/tas_201006.nc' +#ref.grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) +#-------reg.ids----------- +#NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9)) +#ADM.id <- +#-------reg.names----------- +#NUTS.name <- list(FI = c('Lappi', 'Kainuu')) #NOTE: NUTS.name may be repetitive; use level to filter +#NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) +#--------------------------- +# mask1 <- shp_mask(shp.file, ref.grid, reg.ids = NUTS.id) +# mask2 <- shp_mask(shp.file, ref.grid, reg.names = NUTS.name) + +## Exmple (2): GADM +#-------shp.file---------- +# shp.file <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp" +#-------ref.grid---------- +# ref.grid <- '/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/tas_f6h/tas_20191212.nc' +#-------reg.ids----------- +# GADM.id <- c("ESP", "ITA") +#-------reg.names----------- +# GADM.name <- c("Spain", "Italy") +#--------------------------- +# mask1 <- shp_mask(shp.file = shp.file, ref.grid = ref.grid, +# reg.ids = GADM.id, shp.system = "GADM") +# mask2 <- shp_mask(shp.file = shp.file, ref.grid = ref.grid, +# reg.names = GADM.name, shp.system = "GADM") + + +#NOTE: One region is one number; need to have the option to combine them? +#TODO: Suppress warnings? +#TODO: Substitute packages +shp_mask <- function(shp.file, ref.grid = NULL, + shp.system = "NUTS", reg.ids = NULL, reg.names = NULL, + reg.level = 3, lat_dim = NULL, lon_dim = NULL, savefile = NULL) { + +#################################################### + # Step 1: Load the shapefile + shp <- sf::st_read(shp.file) # class sf + + if (all(is.null(reg.ids), is.null(reg.names))) { + stop("Either provide parameter 'reg.ids' or 'reg.names'.") + + } else if (!is.null(reg.ids)) { + ## Method 1: Directly use IDs + if (shp.system == "NUTS") { + shp <- subset(shp, NUTS_ID %in% reg.ids) + } else if (shp.system == "ADM") { + shp <- subset(shp, ADM1_PCODE %in% reg.ids) + } else if (shp.system == "GADM") { + shp <- subset(shp, ISO %in% reg.ids) + } else { + stop("shp.system ", shp.system, " is not defined yet.") + } + if (!is.null(reg.names)) { + warning("Only use 'reg.ids' to get the shape region. 'reg.names' is not used.") + } + + } else if (!is.null(reg.names)) { + ## Method 2: Use country code & region name + for (cntr_i in 1:length(reg.names)) { + if (shp.system == "NUTS") { + tmp <- subset(shp, CNTR_CODE == names(reg.names)[cntr_i]) + tmp <- subset(tmp, NUTS_NAME %in% reg.names[[cntr_i]]) + } else if (shp.system == "ADM") { + tmp <- subset(shp, ADM0_EN == names(reg.names)[cntr_i]) + tmp <- subset(tmp, ADM1_EN %in% reg.names[[cntr_i]]) + } else if (shp.system == "GADM") { + tmp <- subset(shp, Name %in% reg.names) + } + if (cntr_i == 1) { + shp_tmp <- tmp + } else { + shp_tmp <- rbind(shp_tmp, tmp) + } + } + if (shp.system == "NUTS") { + shp <- subset(shp_tmp, LEVL_CODE == reg.level) + } else if (shp.system == "ADM" | shp.system == "GADM") { + shp <- shp_tmp + } + } + # plot(shp) + +#################################################### + # Step 2: Use the reference file to get lat and lon + + if (all(tools::file_ext(ref.grid) == 'nc')) { + if (!file.exists(ref.grid)) { + stop("ref.grid file does not exist.") + } else { + + ## Method 1: ref.grid is a netCDF file + if (is.null(lat_dim) | is.null(lon_dim)) { + var_names <- easyNCDF::NcReadVarNames(ref.grid) + lat_dim <- var_names[which(var_names %in% s2dv:::.KnownLatNames())] + lon_dim <- var_names[which(var_names %in% s2dv:::.KnownLonNames())] + } + latlon <- NcToArray(ref.grid, vars_to_read = c(lat_dim, lon_dim)) + lat <- NcToArray(ref.grid, vars_to_read = lat_dim)[1, ] + lon <- NcToArray(ref.grid, vars_to_read = lon_dim)[1, ] + } + } else if (is.list(ref.grid)) { + ## Method 2: ref.grid is a list of lon and lat + if (length(ref.grid) != 2) { + stop("If 'ref.grid' is a list, it must have two items for longitude and latitude.") + } + if (is.null(lat_dim) | is.null(lon_dim)) { + #NOTE: the names come from s2dv:::.KnownLonNames and .KnownLatNames + lon_known_names <- c(s2dv:::.KnownLonNames(), 'lons') + lat_known_names <- c(s2dv:::.KnownLatNames(), 'lats') + lon_dim <- lon_known_names[which(lon_known_names %in% names(ref.grid))] + lat_dim <- lat_known_names[which(lat_known_names %in% names(ref.grid))] + + if (identical(lon_dim, character(0)) | identical(lat_dim, character(0))) { + stop("longitude and latitude names are not recognized in 'ref.grid'. Please specify 'lon_dim' and 'lat_dim'.") + } + } + lat <- ref.grid[[lat_dim]] + lon <- ref.grid[[lon_dim]] + + } else { + stop("Parameter 'ref.grid' must be either a netCDF file or a list of lon and lat.") + } + + ## Create data frame & sp class for ref grid + ref.df <- data.frame(data = 0, + lon = rep(lon, times = length(lat)), + lat = rep(lat, each = length(lon))) + coord <- as.matrix(data.frame(x = ref.df$lon, y = ref.df$lat)) + + xy.sfg <- st_multipoint(coord) + xy.sfc <- st_sfc(xy.sfg) + st_crs(xy.sfc) <- sf::st_crs(shp) + +#################################################### + # Step 3: Create mask + ## Create mask array with 0; 1, 2, etc. will be filled in for each shp region + mask <- array(0, dim = c(length(lon), length(lat))) + names(dim(mask)) <- c(lon_dim, lat_dim) + + ## Loop through each shp region + for (shp_i in 1:nrow(shp)) { + #NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may + # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). + shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ]) + tmp_coords <- st_coordinates(shp_pol)[, 1:2] + if (nrow(tmp_coords) != 0) { + for (ii in 1:nrow(tmp_coords)) { + pt_x <- which(lon == tmp_coords[ii, 1]) + pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) + mask[which.min(abs(lon - tmp_coords[ii, 1])), + which.min(abs(lat - tmp_coords[ii, 2]))] <- shp_i + #NOTE: This line can't catch correct point when the NOTE above happens. + #mask[which(lon == tmp_coords[ii, 1]), which(lat == tmp_coords[ii, 2])] <- shp_i + } + } else { + if (shp.system == "NUTS") { + tmp <- shp$NUTS_ID[shp_i] + } else if (shp.system == "ADM") { + tmp <- shp$ADM1_PCODE[shp_i] + } else if (shp.system == "GADM") { + tmp <- shp$ISO[shp_i] + } + warning("shp ID '", tmp, "' cannot be identified in this reference grid.") + } + } + +#################################################### + # Step 4: Add attributes + attr(mask, lon_dim) <- lon + attr(mask, lat_dim) <- lat + if (shp.system == "NUTS") { + attr(mask, "index") <- as.list(shp$NUTS_ID) + } else if (shp.system == "ADM") { + attr(mask, "index") <- as.list(shp$ADM1_PCODE) + } else if (shp.system == "GADM") { + attr(mask, "index") <- as.list(shp$ISO) + } + names(attr(mask, "index")) <- 1:nrow(shp) + + ## Return all the info from shp + attr(mask, "shapefile") <- attributes(shp) + +#################################################### + # Step 5: Save the file or return the array + if (is.null(savefile)) { + return(mask) + } else { + #TODO + ArrayToNc() + } + +} + -- GitLab From 949120d5bc4d2beb37ca9cdfc678306c235a9a37 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 30 May 2023 17:48:56 +0200 Subject: [PATCH 02/11] Add documentation to function 'shp_mask' --- R/shp_mask.R | 142 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 87 insertions(+), 55 deletions(-) diff --git a/R/shp_mask.R b/R/shp_mask.R index ba41471..6491ad7 100644 --- a/R/shp_mask.R +++ b/R/shp_mask.R @@ -1,63 +1,95 @@ -#'create the mask array from a shapefile (.shp) and a dataset, and save to netCDF file (optionally) +#'Create mask array from a shapefile (.shp) and a dataset, and save to netCDF +#'file (optionally) #' -#library(easyNCDF) -#library(sf) - -######################## -# module load GDAL PROJ GEOS -######################## -# shp.file: The shp file -# ref.grid: Either (1) a netCDF file or (2) a list of lon and lat to provide the reference grid points -# reg.ids: The unique ID in shapefile -# reg.names: A list of country and the region name -# reg.level: Can only have the same level in one mask; only for NUTS -# lat_dim: 'latitude' for example -# lon_dim: 'longitude' for example -# savefile: If NULL, return an array - -### Some example inputs ### -## Exmple (1): NUTS -#-------shp.file---------- -#shp.file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/NUTS_RG_60M_2021_4326.shp') -#shp.file <- '/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp' -#-------ref.grid---------- -#ref.grid <- '/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20170201.nc' -#ref.grid <- '/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/tas_f6h/tas_20191212.nc' -#ref.grid <- '/esarchive/recon/ecmwf/era5land/monthly_mean/tas_f1h/tas_201006.nc' -#ref.grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) -#-------reg.ids----------- -#NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9)) -#ADM.id <- -#-------reg.names----------- -#NUTS.name <- list(FI = c('Lappi', 'Kainuu')) #NOTE: NUTS.name may be repetitive; use level to filter -#NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) -#--------------------------- -# mask1 <- shp_mask(shp.file, ref.grid, reg.ids = NUTS.id) -# mask2 <- shp_mask(shp.file, ref.grid, reg.names = NUTS.name) +#'A shapefile is the file type saving the information of polygonal regions. +#'First the function reads the .shp file and transfers it to an array. Then, +#'we subset the output for the requested region names or IDs. This information +#'is different from each shapefile format. The accepted shapefiles databases +#'names are: 'NUTS', 'LAU' and 'GADM'. After this step, we load the reference +#'dataset to access the longitude and latitude in order to compare the shapefile +#'and the coordinates. After loading the coordinates we intersect each subset +#'of the shapefile with them in order to select only the desired regions. Then, +#'the function creates a mask array. In the last step, the mask is either +#'returned or saved into NetCDF format in the desired directory. +#' +#'Modules GDAL, PROJ and GEOS are required. +#' +#'@param shp.file A character string indicating the shp file path. +#'@param ref.grid A character string indicating the path to the reference +#' data. Either (1) a netCDF file or (2) a list of lon and lat to provide the +#' reference grid points. It is NULL by default. +#'@param shp.system It is set to 'NUTS' by default. +#'@param reg.ids A character string indicating The unique ID in shapefile. +#' It is NULL by default. +#'@param reg.names A named list of character string vectors indicating the +#' country and the region name. The name of the list stands for the country +#' name code and the vector character strings indicate the region name for +#' each country. It is NULL by default. +#'@param reg.level An integer number from 1 to 3 indicating the 'NUTS' dataset +#' level. For other datasets this parameter is not used. One mask can only have +#' a unique level. It is set to 3 by default. +#'@param lat_dim A character string indicating the latitudinal dimension. If it +#' is NULL, the latitudinal name will be searched using an internal function +#' with the following possible names: 'lat', 'latitude', 'y', 'j' and +#' 'nav_lat'. It is NULL by default. +#'@param lon_dim A character string indicating the longitudinal dimension. If it +#' is NULL, the longitudinal name will be searched using an internal function +#' with the following possible names: 'lon', 'longitude', 'x', 'i' and +#' 'nav_lon'. It is NULL by default. +#'@param savefile A logical value indicating wether to save the mask array into +#' a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by +#' default. +#' +#'@examples +#'# Exmple (1): NUTS +#'#-------shp.file---------- +#'shp.file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', +#' 'NUTS_RG_60M_2021_4326.shp') +#'shp.file <- paste0('/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/', +#' 'tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp') +#'#-------ref.grid---------- +#'ref.grid <- paste0('/esarchive/exp/ecmwf/system5c3s/monthly_mean/', +#' 'tas_f6h/tas_20170201.nc') +#'ref.grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', +#' 'tas_f6h/tas_20191212.nc') +#'ref.grid <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/', +#' 'tas_f1h/tas_201006.nc') +#'ref.grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) +#'#-------reg.ids----------- +#'NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9)) +#'# ADM.id <- +#'#-------reg.names----------- +#'# NOTE: NUTS.name may be repetitive; use level to filter +#'NUTS.name <- list(FI = c('Lappi', 'Kainuu')) +#'NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) +#'#--------------------------- +#'mask1 <- shp_mask(shp.file, ref.grid, reg.ids = NUTS.id) +#'mask2 <- shp_mask(shp.file, ref.grid, reg.names = NUTS.name) -## Exmple (2): GADM -#-------shp.file---------- -# shp.file <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp" -#-------ref.grid---------- -# ref.grid <- '/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/tas_f6h/tas_20191212.nc' +#'# Exmple (2): GADM +#'#-------shp.file---------- +#'shp.file <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp" +#'#-------ref.grid---------- +#'ref.grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', +#' 'tas_f6h/tas_20191212.nc') #-------reg.ids----------- -# GADM.id <- c("ESP", "ITA") +#'GADM.id <- c("ESP", "ITA") #-------reg.names----------- -# GADM.name <- c("Spain", "Italy") +#'GADM.name <- c("Spain", "Italy") #--------------------------- -# mask1 <- shp_mask(shp.file = shp.file, ref.grid = ref.grid, -# reg.ids = GADM.id, shp.system = "GADM") -# mask2 <- shp_mask(shp.file = shp.file, ref.grid = ref.grid, -# reg.names = GADM.name, shp.system = "GADM") - - +#'mask1 <- shp_mask(shp.file = shp.file, ref.grid = ref.grid, +#' reg.ids = GADM.id, shp.system = "GADM") +#'mask2 <- shp_mask(shp.file = shp.file, ref.grid = ref.grid, +#' reg.names = GADM.name, shp.system = "GADM") +#'@import easyNCDF +#'@import sf +#'@export +shp_mask <- function(shp.file, ref.grid = NULL, + shp.system = "NUTS", reg.ids = NULL, reg.names = NULL, + reg.level = 3, lat_dim = NULL, lon_dim = NULL, savefile = FALSE) { #NOTE: One region is one number; need to have the option to combine them? #TODO: Suppress warnings? #TODO: Substitute packages -shp_mask <- function(shp.file, ref.grid = NULL, - shp.system = "NUTS", reg.ids = NULL, reg.names = NULL, - reg.level = 3, lat_dim = NULL, lon_dim = NULL, savefile = NULL) { - #################################################### # Step 1: Load the shapefile shp <- sf::st_read(shp.file) # class sf @@ -208,12 +240,12 @@ shp_mask <- function(shp.file, ref.grid = NULL, #################################################### # Step 5: Save the file or return the array - if (is.null(savefile)) { + if (!savefile) { return(mask) } else { + warning("This functionality is not developed yet.") #TODO - ArrayToNc() + # ArrayToNc() } - } -- GitLab From c7dadd7dfd7feba020655f31fd07c560ddb614d7 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 30 May 2023 17:52:58 +0200 Subject: [PATCH 03/11] Add documentation to function 'shp_mask' --- R/shp_mask.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/shp_mask.R b/R/shp_mask.R index 6491ad7..c6274ed 100644 --- a/R/shp_mask.R +++ b/R/shp_mask.R @@ -244,7 +244,7 @@ shp_mask <- function(shp.file, ref.grid = NULL, return(mask) } else { warning("This functionality is not developed yet.") - #TODO + # TODO # ArrayToNc() } } -- GitLab From 4b7dfc259923e3fc8df4b2118812ffffaffdbffb Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 31 May 2023 13:18:36 +0200 Subject: [PATCH 04/11] Improve documentation: function description and example --- NAMESPACE | 3 ++ R/shp_mask.R | 92 +++++++++++++++++++------------------------ man/shp_mask.Rd | 102 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 52 deletions(-) create mode 100644 man/shp_mask.Rd diff --git a/NAMESPACE b/NAMESPACE index d74cc97..5ed122b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -83,9 +83,11 @@ export(Trend) export(UltimateBrier) export(clim.colors) export(clim.palette) +export(shp_mask) import(NbClust) import(SpecsVerification) import(bigmemory) +import(easyNCDF) import(graphics) import(mapproj) import(maps) @@ -94,6 +96,7 @@ import(multiApply) import(ncdf4) import(parallel) import(plyr) +import(sf) import(stats) importFrom(ClimProjDiags,CombineIndices) importFrom(ClimProjDiags,Subset) diff --git a/R/shp_mask.R b/R/shp_mask.R index c6274ed..b3ed114 100644 --- a/R/shp_mask.R +++ b/R/shp_mask.R @@ -1,25 +1,27 @@ -#'Create mask array from a shapefile (.shp) and a dataset, and save to netCDF -#'file (optionally) +#'Convert Shapefile to Mask Array #' -#'A shapefile is the file type saving the information of polygonal regions. -#'First the function reads the .shp file and transfers it to an array. Then, -#'we subset the output for the requested region names or IDs. This information -#'is different from each shapefile format. The accepted shapefiles databases -#'names are: 'NUTS', 'LAU' and 'GADM'. After this step, we load the reference -#'dataset to access the longitude and latitude in order to compare the shapefile -#'and the coordinates. After loading the coordinates we intersect each subset -#'of the shapefile with them in order to select only the desired regions. Then, -#'the function creates a mask array. In the last step, the mask is either -#'returned or saved into NetCDF format in the desired directory. +#'This function reads a shapefile (.shp) containing information about polygonal +#'regions. It then transfers the shapefile data into an array and subsets the +#'output based on requested region names or IDs. The accepted shapefile +#'databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. +#'To ensure accurate comparison with the shapefile, the function loads a +#'reference dataset that provides longitude and latitude information. By +#'intersecting each subset of the shapefile with the reference coordinates, the +#'function selects only the desired regions. The final step involves creating a +#'mask array. Depending on the chosen option, the mask array is either returned +#'as the function's output or saved into a NetCDF format in the specified +#'directory. #' -#'Modules GDAL, PROJ and GEOS are required. +#'Note: Modules GDAL, PROJ and GEOS are required. #' #'@param shp.file A character string indicating the shp file path. #'@param ref.grid A character string indicating the path to the reference #' data. Either (1) a netCDF file or (2) a list of lon and lat to provide the #' reference grid points. It is NULL by default. -#'@param shp.system It is set to 'NUTS' by default. -#'@param reg.ids A character string indicating The unique ID in shapefile. +#'@param shp.system A character string containing the Shapefile System Database +#' name. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. It is set to +#' 'NUTS' by default. +#'@param reg.ids A character string indicating the unique ID in shapefile. #' It is NULL by default. #'@param reg.names A named list of character string vectors indicating the #' country and the region name. The name of the list stands for the country @@ -42,41 +44,30 @@ #' #'@examples #'# Exmple (1): NUTS -#'#-------shp.file---------- #'shp.file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', #' 'NUTS_RG_60M_2021_4326.shp') -#'shp.file <- paste0('/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/', -#' 'tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp') -#'#-------ref.grid---------- -#'ref.grid <- paste0('/esarchive/exp/ecmwf/system5c3s/monthly_mean/', -#' 'tas_f6h/tas_20170201.nc') -#'ref.grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', -#' 'tas_f6h/tas_20191212.nc') +#'# shp.file <- paste0('/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/', +#'# 'tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp') +#'# ref.grid <- paste0('/esarchive/exp/ecmwf/system5c3s/monthly_mean/', +#'# 'tas_f6h/tas_20170201.nc') +#'# ref.grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', +#'# 'tas_f6h/tas_20191212.nc') #'ref.grid <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/', #' 'tas_f1h/tas_201006.nc') -#'ref.grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) -#'#-------reg.ids----------- -#'NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9)) -#'# ADM.id <- -#'#-------reg.names----------- -#'# NOTE: NUTS.name may be repetitive; use level to filter -#'NUTS.name <- list(FI = c('Lappi', 'Kainuu')) +#'# ref.grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) +#' +#'NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9)) #'NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) -#'#--------------------------- #'mask1 <- shp_mask(shp.file, ref.grid, reg.ids = NUTS.id) -#'mask2 <- shp_mask(shp.file, ref.grid, reg.names = NUTS.name) - +#'mask2 <- shp_mask(shp.file = shp.file, ref.grid = ref.grid, +#' reg.names = NUTS.name) +#' #'# Exmple (2): GADM -#'#-------shp.file---------- #'shp.file <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp" -#'#-------ref.grid---------- #'ref.grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', #' 'tas_f6h/tas_20191212.nc') -#-------reg.ids----------- #'GADM.id <- c("ESP", "ITA") -#-------reg.names----------- #'GADM.name <- c("Spain", "Italy") -#--------------------------- #'mask1 <- shp_mask(shp.file = shp.file, ref.grid = ref.grid, #' reg.ids = GADM.id, shp.system = "GADM") #'mask2 <- shp_mask(shp.file = shp.file, ref.grid = ref.grid, @@ -86,11 +77,13 @@ #'@export shp_mask <- function(shp.file, ref.grid = NULL, shp.system = "NUTS", reg.ids = NULL, reg.names = NULL, - reg.level = 3, lat_dim = NULL, lon_dim = NULL, savefile = FALSE) { -#NOTE: One region is one number; need to have the option to combine them? -#TODO: Suppress warnings? -#TODO: Substitute packages -#################################################### + reg.level = 3, lat_dim = NULL, lon_dim = NULL, + savefile = FALSE) { + + # NOTE: One region is one number; need to have the option to combine them? + # TODO: Suppress warnings? + # TODO: Substitute packages + # Step 1: Load the shapefile shp <- sf::st_read(shp.file) # class sf @@ -136,9 +129,7 @@ shp_mask <- function(shp.file, ref.grid = NULL, shp <- shp_tmp } } - # plot(shp) -#################################################### # Step 2: Use the reference file to get lat and lon if (all(tools::file_ext(ref.grid) == 'nc')) { @@ -162,7 +153,7 @@ shp_mask <- function(shp.file, ref.grid = NULL, stop("If 'ref.grid' is a list, it must have two items for longitude and latitude.") } if (is.null(lat_dim) | is.null(lon_dim)) { - #NOTE: the names come from s2dv:::.KnownLonNames and .KnownLatNames + # NOTE: the names come from s2dv:::.KnownLonNames and .KnownLatNames lon_known_names <- c(s2dv:::.KnownLonNames(), 'lons') lat_known_names <- c(s2dv:::.KnownLatNames(), 'lats') lon_dim <- lon_known_names[which(lon_known_names %in% names(ref.grid))] @@ -189,7 +180,6 @@ shp_mask <- function(shp.file, ref.grid = NULL, xy.sfc <- st_sfc(xy.sfg) st_crs(xy.sfc) <- sf::st_crs(shp) -#################################################### # Step 3: Create mask ## Create mask array with 0; 1, 2, etc. will be filled in for each shp region mask <- array(0, dim = c(length(lon), length(lat))) @@ -197,7 +187,7 @@ shp_mask <- function(shp.file, ref.grid = NULL, ## Loop through each shp region for (shp_i in 1:nrow(shp)) { - #NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may + # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ]) tmp_coords <- st_coordinates(shp_pol)[, 1:2] @@ -207,8 +197,8 @@ shp_mask <- function(shp.file, ref.grid = NULL, pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) mask[which.min(abs(lon - tmp_coords[ii, 1])), which.min(abs(lat - tmp_coords[ii, 2]))] <- shp_i - #NOTE: This line can't catch correct point when the NOTE above happens. - #mask[which(lon == tmp_coords[ii, 1]), which(lat == tmp_coords[ii, 2])] <- shp_i + # NOTE: This line can't catch correct point when the NOTE above happens. + # mask[which(lon == tmp_coords[ii, 1]), which(lat == tmp_coords[ii, 2])] <- shp_i } } else { if (shp.system == "NUTS") { @@ -222,7 +212,6 @@ shp_mask <- function(shp.file, ref.grid = NULL, } } -#################################################### # Step 4: Add attributes attr(mask, lon_dim) <- lon attr(mask, lat_dim) <- lat @@ -238,7 +227,6 @@ shp_mask <- function(shp.file, ref.grid = NULL, ## Return all the info from shp attr(mask, "shapefile") <- attributes(shp) -#################################################### # Step 5: Save the file or return the array if (!savefile) { return(mask) diff --git a/man/shp_mask.Rd b/man/shp_mask.Rd new file mode 100644 index 0000000..ad018c4 --- /dev/null +++ b/man/shp_mask.Rd @@ -0,0 +1,102 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shp_mask.R +\name{shp_mask} +\alias{shp_mask} +\title{Convert Shapefile to Mask Array} +\usage{ +shp_mask( + shp.file, + ref.grid = NULL, + shp.system = "NUTS", + reg.ids = NULL, + reg.names = NULL, + reg.level = 3, + lat_dim = NULL, + lon_dim = NULL, + savefile = FALSE +) +} +\arguments{ +\item{shp.file}{A character string indicating the shp file path.} + +\item{ref.grid}{A character string indicating the path to the reference +data. Either (1) a netCDF file or (2) a list of lon and lat to provide the +reference grid points. It is NULL by default.} + +\item{shp.system}{A character string containing the Shapefile System Database +name. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. It is set to +'NUTS' by default.} + +\item{reg.ids}{A character string indicating the unique ID in shapefile. +It is NULL by default.} + +\item{reg.names}{A named list of character string vectors indicating the +country and the region name. The name of the list stands for the country +name code and the vector character strings indicate the region name for +each country. It is NULL by default.} + +\item{reg.level}{An integer number from 1 to 3 indicating the 'NUTS' dataset +level. For other datasets this parameter is not used. One mask can only have +a unique level. It is set to 3 by default.} + +\item{lat_dim}{A character string indicating the latitudinal dimension. If it +is NULL, the latitudinal name will be searched using an internal function +with the following possible names: 'lat', 'latitude', 'y', 'j' and +'nav_lat'. It is NULL by default.} + +\item{lon_dim}{A character string indicating the longitudinal dimension. If it +is NULL, the longitudinal name will be searched using an internal function +with the following possible names: 'lon', 'longitude', 'x', 'i' and +'nav_lon'. It is NULL by default.} + +\item{savefile}{A logical value indicating wether to save the mask array into +a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by +default.} +} +\description{ +This function reads a shapefile (.shp) containing information about polygonal +regions. It then transfers the shapefile data into an array and subsets the +output based on requested region names or IDs. The accepted shapefile +databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. +To ensure accurate comparison with the shapefile, the function loads a +reference dataset that provides longitude and latitude information. By +intersecting each subset of the shapefile with the reference coordinates, the +function selects only the desired regions. The final step involves creating a +mask array. Depending on the chosen option, the mask array is either returned +as the function's output or saved into a NetCDF format in the specified +directory. +} +\details{ +Note: Modules GDAL, PROJ and GEOS are required. +} +\examples{ +# Exmple (1): NUTS +shp.file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', + 'NUTS_RG_60M_2021_4326.shp') +# shp.file <- paste0('/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/', +# 'tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp') +# ref.grid <- paste0('/esarchive/exp/ecmwf/system5c3s/monthly_mean/', +# 'tas_f6h/tas_20170201.nc') +# ref.grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', +# 'tas_f6h/tas_20191212.nc') +ref.grid <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/', + 'tas_f1h/tas_201006.nc') +# ref.grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) + +NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9)) +NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) +mask1 <- shp_mask(shp.file, ref.grid, reg.ids = NUTS.id) +mask2 <- shp_mask(shp.file = shp.file, ref.grid = ref.grid, + reg.names = NUTS.name) + +# Exmple (2): GADM +shp.file <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp" +ref.grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', + 'tas_f6h/tas_20191212.nc') +GADM.id <- c("ESP", "ITA") +GADM.name <- c("Spain", "Italy") +mask1 <- shp_mask(shp.file = shp.file, ref.grid = ref.grid, + reg.ids = GADM.id, shp.system = "GADM") +mask2 <- shp_mask(shp.file = shp.file, ref.grid = ref.grid, + reg.names = GADM.name, shp.system = "GADM") +} -- GitLab From 394d55647aec0a8d99fb2cb0d5cb428cd29fdcc3 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 27 Jun 2023 17:28:36 +0200 Subject: [PATCH 05/11] Correct case: no intersection between shapefile and reference grid --- R/shp_mask.R | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/R/shp_mask.R b/R/shp_mask.R index b3ed114..dcd52d4 100644 --- a/R/shp_mask.R +++ b/R/shp_mask.R @@ -191,10 +191,16 @@ shp_mask <- function(shp.file, ref.grid = NULL, # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ]) tmp_coords <- st_coordinates(shp_pol)[, 1:2] - if (nrow(tmp_coords) != 0) { + if (length(tmp_coords) == 0) { + dim(tmp_coords) <- NULL + } else if (is.null(dim(tmp_coords))) { + tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords))) + } + if (!is.null(dim(tmp_coords))) { + # polygon_instersection for (ii in 1:nrow(tmp_coords)) { - pt_x <- which(lon == tmp_coords[ii, 1]) - pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) + # pt_x <- which(lon == tmp_coords[ii, 1]) + # pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) mask[which.min(abs(lon - tmp_coords[ii, 1])), which.min(abs(lat - tmp_coords[ii, 2]))] <- shp_i # NOTE: This line can't catch correct point when the NOTE above happens. -- GitLab From 4e3fe9ba05932d8b64bfa2a1e4edd8e05de35e4f Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 9 Aug 2023 18:41:33 +0200 Subject: [PATCH 06/11] Add parameters and improve function --- R/shp_mask.R | 89 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 68 insertions(+), 21 deletions(-) diff --git a/R/shp_mask.R b/R/shp_mask.R index dcd52d4..3d4891b 100644 --- a/R/shp_mask.R +++ b/R/shp_mask.R @@ -21,6 +21,8 @@ #'@param shp.system A character string containing the Shapefile System Database #' name. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. It is set to #' 'NUTS' by default. +#'@param shp.system.name A character string indicating the column name of the +#' column in where the specified 'reg.ids' will be taken. #'@param reg.ids A character string indicating the unique ID in shapefile. #' It is NULL by default. #'@param reg.names A named list of character string vectors indicating the @@ -38,10 +40,18 @@ #' is NULL, the longitudinal name will be searched using an internal function #' with the following possible names: 'lon', 'longitude', 'x', 'i' and #' 'nav_lon'. It is NULL by default. +#'@param target_crs A character string indicating the target 'Coordinate +#' Reference System'. +#'@param region A logical value indicating if we want a dimension for the +#' regions in the resulting mask array. It is FALSE by default. #'@param savefile A logical value indicating wether to save the mask array into #' a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by #' default. #' +#'@return A multidimensional array containing a mask array with longitude and +#'latitude diemensions. If 'region' is TRUE, there will be a dimension for +#'the region. +#' #'@examples #'# Exmple (1): NUTS #'shp.file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', @@ -76,9 +86,11 @@ #'@import sf #'@export shp_mask <- function(shp.file, ref.grid = NULL, - shp.system = "NUTS", reg.ids = NULL, reg.names = NULL, + shp.system = "NUTS", shp.system.name = NULL, + reg.ids = NULL, reg.names = NULL, reg.level = 3, lat_dim = NULL, lon_dim = NULL, - savefile = FALSE) { + savefile = FALSE, region = FALSE, target_crs = NULL, + check_valid = FALSE) { # NOTE: One region is one number; need to have the option to combine them? # TODO: Suppress warnings? @@ -86,13 +98,22 @@ shp_mask <- function(shp.file, ref.grid = NULL, # Step 1: Load the shapefile shp <- sf::st_read(shp.file) # class sf + if (!is.null(target_crs)) { + transformed_shapefile <- st_transform(shp, crs = target_crs) + shp <- transformed_shapefile + } if (all(is.null(reg.ids), is.null(reg.names))) { stop("Either provide parameter 'reg.ids' or 'reg.names'.") - } else if (!is.null(reg.ids)) { ## Method 1: Directly use IDs - if (shp.system == "NUTS") { + if (!is.null(shp.system.name)) { + if (shp.system.name %in% names(shp)) { + shp <- subset(shp, get(shp.system.name) %in% reg.ids) + } else { + stop("Shape system name not found in shapefile names.") + } + } else if (shp.system == "NUTS") { shp <- subset(shp, NUTS_ID %in% reg.ids) } else if (shp.system == "ADM") { shp <- subset(shp, ADM1_PCODE %in% reg.ids) @@ -178,17 +199,34 @@ shp_mask <- function(shp.file, ref.grid = NULL, xy.sfg <- st_multipoint(coord) xy.sfc <- st_sfc(xy.sfg) - st_crs(xy.sfc) <- sf::st_crs(shp) + # Assign crs of original shapefile + if (!is.null(target_crs)) { + st_crs(xy.sfc) <- sf::st_crs(target_crs) #initial_crs # asign crs of original shapefile + xy.sfc <- sf::st_transform(xy.sfc, st_crs(shp)) + } else { + st_crs(xy.sfc) <- sf::st_crs(shp) + } # Step 3: Create mask ## Create mask array with 0; 1, 2, etc. will be filled in for each shp region mask <- array(0, dim = c(length(lon), length(lat))) names(dim(mask)) <- c(lon_dim, lat_dim) + if (region) { + mask <- array(0, dim = c(nrow(shp), length(lon), length(lat))) + names(dim(mask)) <- c('region', lon_dim, lat_dim) + } + if (check_valid) { + xy.sfc <- st_make_valid(xy.sfc) + shp <- st_make_valid(shp) + } + ## Loop through each shp region for (shp_i in 1:nrow(shp)) { + print(shp_i) # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). + shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ]) tmp_coords <- st_coordinates(shp_pol)[, 1:2] if (length(tmp_coords) == 0) { @@ -201,12 +239,22 @@ shp_mask <- function(shp.file, ref.grid = NULL, for (ii in 1:nrow(tmp_coords)) { # pt_x <- which(lon == tmp_coords[ii, 1]) # pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) - mask[which.min(abs(lon - tmp_coords[ii, 1])), - which.min(abs(lat - tmp_coords[ii, 2]))] <- shp_i + if (length(dim(mask)) == 2) { + # min(abs(lon - tmp_coords[ii, 1])) + # min(abs(lat - tmp_coords[ii, 2])) + mask[which.min(abs(lon - tmp_coords[ii, 1])), + which.min(abs(lat - tmp_coords[ii, 2]))] <- shp_i + } else { + mask[shp_i, which.min(abs(lon - tmp_coords[ii, 1])), + which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 + } + # NOTE: This line can't catch correct point when the NOTE above happens. # mask[which(lon == tmp_coords[ii, 1]), which(lat == tmp_coords[ii, 2])] <- shp_i } } else { + print('No intersection') + if (shp.system == "NUTS") { tmp <- shp$NUTS_ID[shp_i] } else if (shp.system == "ADM") { @@ -219,19 +267,19 @@ shp_mask <- function(shp.file, ref.grid = NULL, } # Step 4: Add attributes - attr(mask, lon_dim) <- lon - attr(mask, lat_dim) <- lat - if (shp.system == "NUTS") { - attr(mask, "index") <- as.list(shp$NUTS_ID) - } else if (shp.system == "ADM") { - attr(mask, "index") <- as.list(shp$ADM1_PCODE) - } else if (shp.system == "GADM") { - attr(mask, "index") <- as.list(shp$ISO) - } - names(attr(mask, "index")) <- 1:nrow(shp) + # attr(mask, lon_dim) <- lon + # attr(mask, lat_dim) <- lat + # if (shp.system == "NUTS") { + # attr(mask, "index") <- as.list(shp$NUTS_ID) + # } else if (shp.system == "ADM") { + # attr(mask, "index") <- as.list(shp$ADM1_PCODE) + # } else if (shp.system == "GADM") { + # attr(mask, "index") <- as.list(shp$ISO) + # } + # names(attr(mask, "index")) <- 1:nrow(shp) - ## Return all the info from shp - attr(mask, "shapefile") <- attributes(shp) + # ## Return all the info from shp + # attr(mask, "shapefile") <- attributes(shp) # Step 5: Save the file or return the array if (!savefile) { @@ -241,5 +289,4 @@ shp_mask <- function(shp.file, ref.grid = NULL, # TODO # ArrayToNc() } -} - +} \ No newline at end of file -- GitLab From 8447210069abe90cc5e4eea36e1962948b8beab4 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 9 Aug 2023 18:42:29 +0200 Subject: [PATCH 07/11] Improve document --- man/shp_mask.Rd | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/man/shp_mask.Rd b/man/shp_mask.Rd index ad018c4..59fc54d 100644 --- a/man/shp_mask.Rd +++ b/man/shp_mask.Rd @@ -8,12 +8,16 @@ shp_mask( shp.file, ref.grid = NULL, shp.system = "NUTS", + shp.system.name = NULL, reg.ids = NULL, reg.names = NULL, reg.level = 3, lat_dim = NULL, lon_dim = NULL, - savefile = FALSE + savefile = FALSE, + region = FALSE, + target_crs = NULL, + check_valid = FALSE ) } \arguments{ @@ -27,6 +31,9 @@ reference grid points. It is NULL by default.} name. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. It is set to 'NUTS' by default.} +\item{shp.system.name}{A character string indicating the column name of the +column in where the specified 'reg.ids' will be taken.} + \item{reg.ids}{A character string indicating the unique ID in shapefile. It is NULL by default.} @@ -52,6 +59,17 @@ with the following possible names: 'lon', 'longitude', 'x', 'i' and \item{savefile}{A logical value indicating wether to save the mask array into a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by default.} + +\item{region}{A logical value indicating if we want a dimension for the +regions in the resulting mask array. It is FALSE by default.} + +\item{target_crs}{A character string indicating the target 'Coordinate +Reference System'.} +} +\value{ +A multidimensional array containing a mask array with longitude and +latitude diemensions. If 'region' is TRUE, there will be a dimension for +the region. } \description{ This function reads a shapefile (.shp) containing information about polygonal -- GitLab From 1abd91f3d49379505b6f9a4a0fa739073d41d890 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 10 Aug 2023 14:42:29 +0200 Subject: [PATCH 08/11] Update documentation description --- R/shp_mask.R | 9 +++++++-- man/shp_mask.Rd | 14 ++++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/R/shp_mask.R b/R/shp_mask.R index 3d4891b..a03fcd2 100644 --- a/R/shp_mask.R +++ b/R/shp_mask.R @@ -3,7 +3,10 @@ #'This function reads a shapefile (.shp) containing information about polygonal #'regions. It then transfers the shapefile data into an array and subsets the #'output based on requested region names or IDs. The accepted shapefile -#'databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. +#'databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. +#'However, the function can use other shapefiles databases with specifying the +#'categories names with the parameter 'shp.system.name'. +#' #'To ensure accurate comparison with the shapefile, the function loads a #'reference dataset that provides longitude and latitude information. By #'intersecting each subset of the shapefile with the reference coordinates, the @@ -44,12 +47,14 @@ #' Reference System'. #'@param region A logical value indicating if we want a dimension for the #' regions in the resulting mask array. It is FALSE by default. +#'@param check_valid A logical value that when it is TRUE it uses the function +#' 'sf::st_make_valid' applied to the shapefile and to the coordinates. #'@param savefile A logical value indicating wether to save the mask array into #' a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by #' default. #' #'@return A multidimensional array containing a mask array with longitude and -#'latitude diemensions. If 'region' is TRUE, there will be a dimension for +#'latitude dimensions. If 'region' is TRUE, there will be a dimension for #'the region. #' #'@examples diff --git a/man/shp_mask.Rd b/man/shp_mask.Rd index 59fc54d..b13e777 100644 --- a/man/shp_mask.Rd +++ b/man/shp_mask.Rd @@ -65,17 +65,24 @@ regions in the resulting mask array. It is FALSE by default.} \item{target_crs}{A character string indicating the target 'Coordinate Reference System'.} + +\item{check_valid}{A logical value that when it is TRUE it uses the function +'sf::st_make_valid' applied to the shapefile and to the coordinates.} } \value{ A multidimensional array containing a mask array with longitude and -latitude diemensions. If 'region' is TRUE, there will be a dimension for +latitude dimensions. If 'region' is TRUE, there will be a dimension for the region. } \description{ This function reads a shapefile (.shp) containing information about polygonal regions. It then transfers the shapefile data into an array and subsets the output based on requested region names or IDs. The accepted shapefile -databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. +databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. +However, the function can use other shapefiles databases with specifying the +categories names with the parameter 'shp.system.name'. +} +\details{ To ensure accurate comparison with the shapefile, the function loads a reference dataset that provides longitude and latitude information. By intersecting each subset of the shapefile with the reference coordinates, the @@ -83,8 +90,7 @@ function selects only the desired regions. The final step involves creating a mask array. Depending on the chosen option, the mask array is either returned as the function's output or saved into a NetCDF format in the specified directory. -} -\details{ + Note: Modules GDAL, PROJ and GEOS are required. } \examples{ -- GitLab From 32929fb11976d1d1214e2e61420e3715abf67faa Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 10 Aug 2023 15:52:38 +0200 Subject: [PATCH 09/11] Added ... parameter as input in st_intersection --- R/shp_mask.R | 10 ++++++---- man/shp_mask.Rd | 8 ++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/R/shp_mask.R b/R/shp_mask.R index a03fcd2..cd7a38d 100644 --- a/R/shp_mask.R +++ b/R/shp_mask.R @@ -51,7 +51,9 @@ #' 'sf::st_make_valid' applied to the shapefile and to the coordinates. #'@param savefile A logical value indicating wether to save the mask array into #' a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by -#' default. +#' default. This functionality is not developed yet. +#'@param ... Arguments passed on to 's2_options' in function 'st_intersection'. +#' See 's2 package'. #' #'@return A multidimensional array containing a mask array with longitude and #'latitude dimensions. If 'region' is TRUE, there will be a dimension for @@ -95,7 +97,7 @@ shp_mask <- function(shp.file, ref.grid = NULL, reg.ids = NULL, reg.names = NULL, reg.level = 3, lat_dim = NULL, lon_dim = NULL, savefile = FALSE, region = FALSE, target_crs = NULL, - check_valid = FALSE) { + check_valid = FALSE, ...) { # NOTE: One region is one number; need to have the option to combine them? # TODO: Suppress warnings? @@ -232,7 +234,7 @@ shp_mask <- function(shp.file, ref.grid = NULL, # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). - shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ]) + shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ], ...) tmp_coords <- st_coordinates(shp_pol)[, 1:2] if (length(tmp_coords) == 0) { dim(tmp_coords) <- NULL @@ -258,7 +260,7 @@ shp_mask <- function(shp.file, ref.grid = NULL, # mask[which(lon == tmp_coords[ii, 1]), which(lat == tmp_coords[ii, 2])] <- shp_i } } else { - print('No intersection') + print('No intersection.') if (shp.system == "NUTS") { tmp <- shp$NUTS_ID[shp_i] diff --git a/man/shp_mask.Rd b/man/shp_mask.Rd index b13e777..31d0bfa 100644 --- a/man/shp_mask.Rd +++ b/man/shp_mask.Rd @@ -17,7 +17,8 @@ shp_mask( savefile = FALSE, region = FALSE, target_crs = NULL, - check_valid = FALSE + check_valid = FALSE, + ... ) } \arguments{ @@ -58,7 +59,7 @@ with the following possible names: 'lon', 'longitude', 'x', 'i' and \item{savefile}{A logical value indicating wether to save the mask array into a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by -default.} +default. This functionality is not developed yet.} \item{region}{A logical value indicating if we want a dimension for the regions in the resulting mask array. It is FALSE by default.} @@ -68,6 +69,9 @@ Reference System'.} \item{check_valid}{A logical value that when it is TRUE it uses the function 'sf::st_make_valid' applied to the shapefile and to the coordinates.} + +\item{...}{Arguments passed on to 's2_options' in function 'st_intersection'. +See 's2 package'.} } \value{ A multidimensional array containing a mask array with longitude and -- GitLab From 8fde434bbb1decfc49c2dd966e7fb0b9d04eb6e7 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 10 Aug 2023 16:02:44 +0200 Subject: [PATCH 10/11] Remove unecessary prints --- R/shp_mask.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/R/shp_mask.R b/R/shp_mask.R index cd7a38d..8eb30a9 100644 --- a/R/shp_mask.R +++ b/R/shp_mask.R @@ -230,7 +230,6 @@ shp_mask <- function(shp.file, ref.grid = NULL, ## Loop through each shp region for (shp_i in 1:nrow(shp)) { - print(shp_i) # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). @@ -260,8 +259,6 @@ shp_mask <- function(shp.file, ref.grid = NULL, # mask[which(lon == tmp_coords[ii, 1]), which(lat == tmp_coords[ii, 2])] <- shp_i } } else { - print('No intersection.') - if (shp.system == "NUTS") { tmp <- shp$NUTS_ID[shp_i] } else if (shp.system == "ADM") { -- GitLab From 3b89ea5f3671382cd7dc0207c01574ce9cf5f4d8 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 11 Aug 2023 17:23:26 +0200 Subject: [PATCH 11/11] Add development when there is no intersection betweent the shapefile and the reference grid --- R/shp_mask.R | 50 +++++++++++++++++++++++++++++++++++-------------- man/shp_mask.Rd | 5 +++++ 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/R/shp_mask.R b/R/shp_mask.R index 8eb30a9..969f879 100644 --- a/R/shp_mask.R +++ b/R/shp_mask.R @@ -49,6 +49,9 @@ #' regions in the resulting mask array. It is FALSE by default. #'@param check_valid A logical value that when it is TRUE it uses the function #' 'sf::st_make_valid' applied to the shapefile and to the coordinates. +#'@param max_dist A numeric value indicating the maximum distance is accepted +#' to the closest gridpoint when there is no intersection between the shapefile +#' and the reference grid. #'@param savefile A logical value indicating wether to save the mask array into #' a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by #' default. This functionality is not developed yet. @@ -97,7 +100,7 @@ shp_mask <- function(shp.file, ref.grid = NULL, reg.ids = NULL, reg.names = NULL, reg.level = 3, lat_dim = NULL, lon_dim = NULL, savefile = FALSE, region = FALSE, target_crs = NULL, - check_valid = FALSE, ...) { + check_valid = FALSE, max_dist = 99999999, ...) { # NOTE: One region is one number; need to have the option to combine them? # TODO: Suppress warnings? @@ -122,10 +125,13 @@ shp_mask <- function(shp.file, ref.grid = NULL, } } else if (shp.system == "NUTS") { shp <- subset(shp, NUTS_ID %in% reg.ids) + shp.system.name <- NUTS_ID } else if (shp.system == "ADM") { shp <- subset(shp, ADM1_PCODE %in% reg.ids) + shp.system.name <- ADM1_PCODE } else if (shp.system == "GADM") { shp <- subset(shp, ISO %in% reg.ids) + shp.system.name <- ISO } else { stop("shp.system ", shp.system, " is not defined yet.") } @@ -134,16 +140,20 @@ shp_mask <- function(shp.file, ref.grid = NULL, } } else if (!is.null(reg.names)) { + shp.system.name <- NULL ## Method 2: Use country code & region name for (cntr_i in 1:length(reg.names)) { if (shp.system == "NUTS") { tmp <- subset(shp, CNTR_CODE == names(reg.names)[cntr_i]) tmp <- subset(tmp, NUTS_NAME %in% reg.names[[cntr_i]]) + shp.system.name <- NUTS_NAME } else if (shp.system == "ADM") { tmp <- subset(shp, ADM0_EN == names(reg.names)[cntr_i]) tmp <- subset(tmp, ADM1_EN %in% reg.names[[cntr_i]]) + shp.system.name <- ADM1_EN } else if (shp.system == "GADM") { tmp <- subset(shp, Name %in% reg.names) + shp.system.name <- Name } if (cntr_i == 1) { shp_tmp <- tmp @@ -231,9 +241,9 @@ shp_mask <- function(shp.file, ref.grid = NULL, ## Loop through each shp region for (shp_i in 1:nrow(shp)) { # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may - # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). - - shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ], ...) + # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). + shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ]) + # shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ], ...) tmp_coords <- st_coordinates(shp_pol)[, 1:2] if (length(tmp_coords) == 0) { dim(tmp_coords) <- NULL @@ -241,6 +251,7 @@ shp_mask <- function(shp.file, ref.grid = NULL, tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords))) } if (!is.null(dim(tmp_coords))) { + # polygon_instersection for (ii in 1:nrow(tmp_coords)) { # pt_x <- which(lon == tmp_coords[ii, 1]) @@ -254,19 +265,30 @@ shp_mask <- function(shp.file, ref.grid = NULL, mask[shp_i, which.min(abs(lon - tmp_coords[ii, 1])), which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 } - - # NOTE: This line can't catch correct point when the NOTE above happens. - # mask[which(lon == tmp_coords[ii, 1]), which(lat == tmp_coords[ii, 2])] <- shp_i } } else { - if (shp.system == "NUTS") { - tmp <- shp$NUTS_ID[shp_i] - } else if (shp.system == "ADM") { - tmp <- shp$ADM1_PCODE[shp_i] - } else if (shp.system == "GADM") { - tmp <- shp$ISO[shp_i] + x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shp[shp_i,]))[,1] + y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shp[shp_i,]))[,2] + dist <- sqrt((xy.sfg[,1] - x.centroid.shpi)**2 + (xy.sfg[,2] - y.centroid.shpi)**2) + tmp_coords <- array(xy.sfg[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2)) + colnames(tmp_coords) <- c('X', 'Y') + if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) & + any(round(lon,2) == round(tmp_coords[1,1],2))) ) { + if (length(dim(mask)) == 2) { + mask[which.min(abs(lon - tmp_coords[, 1])), + which.min(abs(lat - tmp_coords[, 2]))] <- shp_i + } else { + mask[shp_i, which.min(abs(lon - tmp_coords[, 1])), + which.min(abs(lat - tmp_coords[, 2]))] <- 1 + } + warning(paste0('The reference grid has no intersection with region ', + ifelse(is.character(shp.system.name), shp[shp_i,][[shp.system.name]], paste0('n° ', shp_i)), + ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], + ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) + } else { + warning(paste0('The reference grid has no intersection with region ', + ifelse(is.character(shp.system.name), shp[shp_i,][[shp.system.name]], paste0('n° ', shp_i)))) } - warning("shp ID '", tmp, "' cannot be identified in this reference grid.") } } diff --git a/man/shp_mask.Rd b/man/shp_mask.Rd index 31d0bfa..e562135 100644 --- a/man/shp_mask.Rd +++ b/man/shp_mask.Rd @@ -18,6 +18,7 @@ shp_mask( region = FALSE, target_crs = NULL, check_valid = FALSE, + max_dist = 99999999, ... ) } @@ -70,6 +71,10 @@ Reference System'.} \item{check_valid}{A logical value that when it is TRUE it uses the function 'sf::st_make_valid' applied to the shapefile and to the coordinates.} +\item{max_dist}{A numeric value indicating the maximum distance is accepted +to the closest gridpoint when there is no intersection between the shapefile +and the reference grid.} + \item{...}{Arguments passed on to 's2_options' in function 'st_intersection'. See 's2 package'.} } -- GitLab