From 274cbe1c9c284d8110cc04254d37866f3a263214 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 26 Jan 2024 16:57:14 +0100 Subject: [PATCH 01/12] Add area coverage development for shapetomask --- R/ShapeToMask.R | 156 +++++++++++++++++++++++++++--------------------- 1 file changed, 88 insertions(+), 68 deletions(-) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 350e38b..cf80a6e 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -81,7 +81,7 @@ #'@import foreach #'@importFrom doParallel registerDoParallel #'@export -ShapeToMask <- function(shp_file, ref_grid, +ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, shp_system = "NUTS", reg_names = NULL, reg_ids = NULL, shp_col_name_ids = NULL, reg_level = 3, lat_dim = NULL, lon_dim = NULL, @@ -175,6 +175,10 @@ ShapeToMask <- function(shp_file, ref_grid, if (!is.character(target_crs)) { stop("Parameter 'target_crs' must be a character string.") } + transform_shp_crs <- TRUE + } else { + transform_shp_crs <- FALSE + target_crs <- sf::st_crs(shp) } # check_valid if (!is.logical(check_valid)) { @@ -203,7 +207,7 @@ ShapeToMask <- function(shp_file, ref_grid, # Step 1: Load the shapefile shp <- sf::st_read(shp_file) # class sf - if (!is.null(target_crs)) { + if (transform_shp_crs) { transformed_shapefile <- st_transform(shp, crs = target_crs) shp <- transformed_shapefile } @@ -261,7 +265,7 @@ ShapeToMask <- function(shp_file, ref_grid, } } - # Step 2: Use the reference file to get lat and lon + # Step (2.1): Use the reference file to get lat and lon if (all(tools::file_ext(ref_grid) == 'nc')) { ## Method 1: ref_grid is a netCDF file if (is.null(lat_dim) | is.null(lon_dim)) { @@ -278,29 +282,35 @@ ShapeToMask <- function(shp_file, ref_grid, lon <- ref_grid[[lon_dim]] } - ## Create data frame & sp class for ref grid - ref.df <- data.frame(data = 0, - lon = rep(lon, times = length(lat)), - lat = rep(lat, each = length(lon))) - coord <- as.matrix(data.frame(x = ref.df$lon, y = ref.df$lat)) - - xy.sfg <- sf::st_multipoint(coord) - xy.sfc <- sf::st_sfc(xy.sfg) - # Assign crs of original shapefile - if (!is.null(target_crs)) { + # Step (2.2): Create the grid + lonlat_df <- data.frame(lon = rep(as.vector(lon), length(lat)), + lat = sort(rep(as.vector(lat), length(lon)), decreasing = TRUE)) + + if (return_area) { + locations <- array(1:(length(lon)*length(lat)), c(lon = length(lon), + lat = length(lat))) + xy.sfc <- areagrid(data = locations, lon = lon, lat = lat, + target_proj = target_proj) + region <- TRUE + } else { + ref.df <- data.frame(data = 0, + lon = rep(lon, times = length(lat)), + lat = rep(lat, each = length(lon))) + coord <- as.matrix(data.frame(x = ref.df$lon, y = ref.df$lat)) + + xy.sfg <- sf::st_multipoint(coord) + xy.sfc <- sf::st_sfc(xy.sfg) + # Assign crs of original shapefile st_crs(xy.sfc) <- sf::st_crs(target_crs) #initial_crs # asign crs of original shapefile xy.sfc <- sf::st_transform(xy.sfc, st_crs(shp)) - } else { - st_crs(xy.sfc) <- sf::st_crs(shp) - } - - # Step 3: Create mask - if (check_valid) { - xy.sfc <- st_make_valid(xy.sfc) - shp <- st_make_valid(shp) + # Check valid + if (check_valid) { + xy.sfc <- st_make_valid(xy.sfc) + shp <- st_make_valid(shp) + } } - ## Loop through each shp region + ## Step (3): Loop through each shp region to create the mask if (region) { cfun <- function(a, b) { if (length(dim(a)) == 2) { @@ -320,14 +330,16 @@ ShapeToMask <- function(shp_file, ref_grid, if (is.null(ncores)) { mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun') %do% .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, - xy.sfc = xy.sfc, find_min_dist = find_min_dist, + xy.sfc = xy.sfc, return_area = return_area, + find_min_dist = find_min_dist, shp_col_name_ids = shp_col_name_ids, max_dist = max_dist, region = region, ...) } else { registerDoParallel(ncores) mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun', .packages='sf') %dopar% .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, - xy.sfc = xy.sfc, find_min_dist = find_min_dist, + xy.sfc = xy.sfc, return_area = return_area, + find_min_dist = find_min_dist, shp_col_name_ids = shp_col_name_ids, max_dist = max_dist, region = region, ...) registerDoSEQ() @@ -357,58 +369,66 @@ ShapeToMask <- function(shp_file, ref_grid, return(mask) } -.shapetomask <- function(shp, n, lon, lat, xy.sfc, find_min_dist = FALSE, +.shapetomask <- function(shp, n, lon, lat, xy.sfc, return_area = FALSE, + find_min_dist = FALSE, shp_col_name_ids = NULL, max_dist = 50, region = FALSE, ...) { mask <- array(0, dim = c(length(lon), length(lat))) shpi <- shp[n, ] - # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may - # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). - shp_pol <- sf::st_intersection(x = xy.sfc, y = shpi, ...) - tmp_coords <- sf::st_coordinates(shp_pol)[, 1:2] - if (length(tmp_coords) == 0) { - dim(tmp_coords) <- NULL - } else if (is.null(dim(tmp_coords))) { - tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords))) - } - if (!is.null(dim(tmp_coords))) { + if (return_area) { + datapoly_int <- xy.sfc %>% dplyr::mutate(int = area_cov(xy.sfc$geometry, shpi)) + vals <- datapoly_int %>% dplyr::select(int, value) + mask <- vals[order(vals$value),] %>% dplyr::pull(int) + dim(mask) <- c(length(lon), length(lat)) + } else { + # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may + # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). + shp_pol <- sf::st_intersection(x = xy.sfc, y = shpi, ...) + tmp_coords <- sf::st_coordinates(shp_pol)[, 1:2] + if (length(tmp_coords) == 0) { + dim(tmp_coords) <- NULL + } else if (is.null(dim(tmp_coords))) { + tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords))) + } + if (!is.null(dim(tmp_coords))) { - # polygon_instersection - for (ii in 1:nrow(tmp_coords)) { - # pt_x <- which(lon == tmp_coords[ii, 1]) - # pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) - if (!region) { - # min(abs(lon - tmp_coords[ii, 1])) - # min(abs(lat - tmp_coords[ii, 2])) - mask[which.min(abs(lon - tmp_coords[ii, 1])), - which.min(abs(lat - tmp_coords[ii, 2]))] <- n - } else { - mask[which.min(abs(lon - tmp_coords[ii, 1])), - which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 + # polygon_instersection + for (ii in 1:nrow(tmp_coords)) { + # pt_x <- which(lon == tmp_coords[ii, 1]) + # pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) + if (!region) { + # min(abs(lon - tmp_coords[ii, 1])) + # min(abs(lat - tmp_coords[ii, 2])) + mask[which.min(abs(lon - tmp_coords[ii, 1])), + which.min(abs(lat - tmp_coords[ii, 2]))] <- n + } else { + mask[which.min(abs(lon - tmp_coords[ii, 1])), + which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 + } } - } - } else if (find_min_dist) { - x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,1] - y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,2] - dist <- sqrt((xy.sfc[,1] - x.centroid.shpi)**2 + (xy.sfc[,2] - y.centroid.shpi)**2) - tmp_coords <- array(xy.sfc[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2)) - colnames(tmp_coords) <- c('X', 'Y') - if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) & - any(round(lon,2) == round(tmp_coords[1,1],2))) ) { - if (length(dim(mask)) == 2) { - mask[which.min(abs(lon - tmp_coords[, 1])), - which.min(abs(lat - tmp_coords[, 2]))] <- n + } else if (find_min_dist) { + x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,1] + y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,2] + dist <- sqrt((xy.sfc[,1] - x.centroid.shpi)**2 + (xy.sfc[,2] - y.centroid.shpi)**2) + tmp_coords <- array(xy.sfc[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2)) + colnames(tmp_coords) <- c('X', 'Y') + if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) & + any(round(lon,2) == round(tmp_coords[1,1],2))) ) { + if (length(dim(mask)) == 2) { + mask[which.min(abs(lon - tmp_coords[, 1])), + which.min(abs(lat - tmp_coords[, 2]))] <- n + } else { + mask[which.min(abs(lon - tmp_coords[, 1])), + which.min(abs(lat - tmp_coords[, 2]))] <- 1 + } + # warning(paste0('The reference grid has no intersection with region ', + # ifelse(is.character(shp_col_name_ids), shpi[[shp_col_name_ids]], paste0('n° ', n)), + # ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], + # ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) } else { - mask[which.min(abs(lon - tmp_coords[, 1])), - which.min(abs(lat - tmp_coords[, 2]))] <- 1 + # warning(paste0('The reference grid has no intersection with region ', + # ifelse(is.character(shp_col_name_ids), shpi[[shp_col_name_ids]], paste0('n° ', n)))) } - # warning(paste0('The reference grid has no intersection with region ', - # ifelse(is.character(shp_col_name_ids), shpi[[shp_col_name_ids]], paste0('n° ', n)), - # ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], - # ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) - } else { - # warning(paste0('The reference grid has no intersection with region ', - # ifelse(is.character(shp_col_name_ids), shpi[[shp_col_name_ids]], paste0('n° ', n)))) } } return(mask) -- GitLab From 218585ee55bbca13c398c539144d99a89ed18c8c Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 26 Jan 2024 16:58:01 +0100 Subject: [PATCH 02/12] Add auxiliary function areagrid --- R/ShapeToMask.R | 68 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index cf80a6e..2053b42 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -432,4 +432,70 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, } } return(mask) -} \ No newline at end of file +} + + +areagrid <- function(data, lon, lat, target_proj) { + + # Build data dataframe + lonlat_df <- data.frame(lon = rep(as.vector(lon), length(lat)), + lat = sort(rep(as.vector(lat), length(lon)), decreasing = TRUE)) + + data_df <- lonlat_df %>% + dplyr::mutate(dat = as.vector(data)) + + lonlat_df_ori <- NULL + + #NOTE: if target_proj = "ESRI:54030", Nord3v2 has different behavior from hub and ws!! + data_df <- st_as_sf(data_df, coords = c("lon", "lat"), crs = target_proj) + # data_df <- st_transform(data_df, crs = target_proj) + data_df <- data_df %>% + dplyr::mutate(long = st_coordinates(data_df)[, 1], + lat = st_coordinates(data_df)[, 2]) + + # Calculate polygon points from regular lat/lon + #NOTE: The original grid must be regular grid with same space + d_lon <- abs(lon[2] - lon[1]) / 2 + d_lat <- abs(lat[2] - lat[1]) / 2 + lon_poly <- lat_poly <- rep(NA, 4 * dim(lonlat_df)[1]) + for (ii in 1:dim(lonlat_df)[1]) { + lon_poly[(ii*4-3):(ii*4)] <- c(lonlat_df$lon[ii] - d_lon, lonlat_df$lon[ii] + d_lon, + lonlat_df$lon[ii] + d_lon, lonlat_df$lon[ii] - d_lon) + lat_poly[(ii*4-3):(ii*4)] <- c(lonlat_df$lat[ii] - d_lat, lonlat_df$lat[ii] - d_lat, + lonlat_df$lat[ii] + d_lat, lonlat_df$lat[ii] + d_lat) + } + # # To prevent out-of-global lon + # lon_poly[which(lon_poly >= 180)] <- 179.9 + # lon_poly[which(lon_poly < -180)] <- -180 + # To prevent out-of-global lat + lat_poly[which(lat_poly > 90)] <- 90 + lat_poly[which(lat_poly < -90)] <- -90 + + lonlat_df <- data.frame(lon = lon_poly, lat = lat_poly) + # Transfer lon/lat to projection + proj_lonlat <- st_as_sf(lonlat_df, coords = c("lon", "lat"), crs = target_proj) + #NOTE: if target_proj = "ESRI:54030", on Nord3v2, st_transform has lon and lat swapped! + # proj_lonlat <- st_transform(proj_lonlat, crs = target_proj) + lonlat_df_proj <- st_coordinates(proj_lonlat) + + # Use id to create groups for each polygon + ids <- factor(paste0("id_", 1:dim(data_df)[1])) + values <- data.frame(id = ids, + value = data_df$dat) + positions <- data.frame(id = rep(ids, each = 4), + x = lonlat_df_proj[, 1], + y = lonlat_df_proj[, 2]) + datapoly <- merge(values, positions, by = "id") + datapoly <- st_as_sf(datapoly, coords = c("x", "y"), crs = target_proj) + + datapoly <- datapoly %>% + dplyr::group_by(.data$id) %>% + dplyr::summarise() %>% #NOTE: VERY SLOW if plot global + dplyr::mutate(value = values[order(values$id), ]$value) %>% + st_cast("POLYGON") %>% + st_convex_hull() # maintain outer polygen (no bowtie shape) + + return(datapoly) + +} + -- GitLab From 78ad04d3a5d06fb9dfe4d342bcb1360f20780e55 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 29 Jan 2024 15:56:49 +0100 Subject: [PATCH 03/12] Improve part of the area coverage --- R/ShapeToMask.R | 171 ++++++++++++++++++++++++++---------------------- 1 file changed, 93 insertions(+), 78 deletions(-) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 2053b42..2e06f63 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -79,6 +79,7 @@ #'@import easyNCDF #'@import sf #'@import foreach +#'@import dplyr #'@importFrom doParallel registerDoParallel #'@export ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, @@ -91,6 +92,7 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, # TODO: Suppress warnings? # TODO: Add saving option? + # sf_use_s2(FALSE) # Initial checks # shp_file @@ -170,16 +172,6 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, if (!is.logical(region)) { stop("Parameter 'region' must be a logical value.") } - # target_crs - if (!is.null(target_crs)) { - if (!is.character(target_crs)) { - stop("Parameter 'target_crs' must be a character string.") - } - transform_shp_crs <- TRUE - } else { - transform_shp_crs <- FALSE - target_crs <- sf::st_crs(shp) - } # check_valid if (!is.logical(check_valid)) { stop("Parameter 'check_valid' must be a logical value.") @@ -207,9 +199,20 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, # Step 1: Load the shapefile shp <- sf::st_read(shp_file) # class sf + + # target_crs + if (!is.null(target_crs)) { + if (!is.character(target_crs)) { + stop("Parameter 'target_crs' must be a character string.") + } + transform_shp_crs <- TRUE + } else { + transform_shp_crs <- FALSE + target_crs <- sf::st_crs(shp) + } + if (transform_shp_crs) { - transformed_shapefile <- st_transform(shp, crs = target_crs) - shp <- transformed_shapefile + shp <- st_transform(shp, crs = target_crs) } NUTS_ID <- ADM1_PCODE <- ISO <- NULL @@ -283,14 +286,26 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, } # Step (2.2): Create the grid - lonlat_df <- data.frame(lon = rep(as.vector(lon), length(lat)), - lat = sort(rep(as.vector(lat), length(lon)), decreasing = TRUE)) - if (return_area) { locations <- array(1:(length(lon)*length(lat)), c(lon = length(lon), lat = length(lat))) - xy.sfc <- areagrid(data = locations, lon = lon, lat = lat, - target_proj = target_proj) + # Build data dataframe + lonlat_df <- data.frame(lon = rep(as.vector(lon), length(lat)), + lat = sort(rep(as.vector(lat), length(lon)), decreasing = TRUE)) + + data_df <- lonlat_df %>% + mutate(dat = as.vector(locations)) + + lonlat_df_ori <- NULL + + # NOTE: if target_proj = "ESRI:54030", Nord3v2 has different behavior from hub and ws!! + data_df <- st_as_sf(data_df, coords = c("lon", "lat"), crs = target_proj) + # data_df <- st_transform(data_df, crs = target_proj) + data_df <- data_df %>% + mutate(long = st_coordinates(data_df)[, 1], + lat = st_coordinates(data_df)[, 2]) + xy.sfc <- polygonize(lonlat_df = lonlat_df, data_df = data_df, + lon = lon, lat = lat, target_proj = target_crs) region <- TRUE } else { ref.df <- data.frame(data = 0, @@ -336,7 +351,7 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, max_dist = max_dist, region = region, ...) } else { registerDoParallel(ncores) - mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun', .packages='sf') %dopar% + mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun', .packages = c('sf', 'dplyr')) %dopar% .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, xy.sfc = xy.sfc, return_area = return_area, find_min_dist = find_min_dist, @@ -373,14 +388,15 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, find_min_dist = FALSE, shp_col_name_ids = NULL, max_dist = 50, region = FALSE, ...) { - mask <- array(0, dim = c(length(lon), length(lat))) shpi <- shp[n, ] if (return_area) { - datapoly_int <- xy.sfc %>% dplyr::mutate(int = area_cov(xy.sfc$geometry, shpi)) - vals <- datapoly_int %>% dplyr::select(int, value) - mask <- vals[order(vals$value),] %>% dplyr::pull(int) + mask <- xy.sfc %>% + dplyr::mutate(int = areacov(geometry, shpi)) %>% + dplyr::arrange(value) %>% + dplyr::pull(int) dim(mask) <- c(length(lon), length(lat)) } else { + mask <- array(0, dim = c(length(lon), length(lat))) # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). shp_pol <- sf::st_intersection(x = xy.sfc, y = shpi, ...) @@ -434,68 +450,67 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, return(mask) } +# Function to create polygons from VizRobinson code +polygonize <- function(lonlat_df, data_df, lon, lat, target_proj) { -areagrid <- function(data, lon, lat, target_proj) { - - # Build data dataframe - lonlat_df <- data.frame(lon = rep(as.vector(lon), length(lat)), - lat = sort(rep(as.vector(lat), length(lon)), decreasing = TRUE)) - - data_df <- lonlat_df %>% - dplyr::mutate(dat = as.vector(data)) - - lonlat_df_ori <- NULL + # Calculate polygon points from regular lat/lon + # NOTE: The original grid must be regular grid with same space + d_lon <- abs(lon[2] - lon[1]) / 2 + d_lat <- abs(lat[2] - lat[1]) / 2 + lon_poly <- lat_poly <- rep(NA, 4 * dim(lonlat_df)[1]) + for (ii in 1:dim(lonlat_df)[1]) { + lon_poly[(ii*4-3):(ii*4)] <- c(lonlat_df$lon[ii] - d_lon, lonlat_df$lon[ii] + d_lon, + lonlat_df$lon[ii] + d_lon, lonlat_df$lon[ii] - d_lon) + lat_poly[(ii*4-3):(ii*4)] <- c(lonlat_df$lat[ii] - d_lat, lonlat_df$lat[ii] - d_lat, + lonlat_df$lat[ii] + d_lat, lonlat_df$lat[ii] + d_lat) + } - #NOTE: if target_proj = "ESRI:54030", Nord3v2 has different behavior from hub and ws!! - data_df <- st_as_sf(data_df, coords = c("lon", "lat"), crs = target_proj) - # data_df <- st_transform(data_df, crs = target_proj) - data_df <- data_df %>% - dplyr::mutate(long = st_coordinates(data_df)[, 1], - lat = st_coordinates(data_df)[, 2]) + # To prevent out-of-global lon + # lon_poly[which(lon_poly >= 180)] <- 179.9 + # lon_poly[which(lon_poly < -180)] <- -180 + # To prevent out-of-global lat + lat_poly[which(lat_poly > 90)] <- 90 + lat_poly[which(lat_poly < -90)] <- -90 - # Calculate polygon points from regular lat/lon - #NOTE: The original grid must be regular grid with same space - d_lon <- abs(lon[2] - lon[1]) / 2 - d_lat <- abs(lat[2] - lat[1]) / 2 - lon_poly <- lat_poly <- rep(NA, 4 * dim(lonlat_df)[1]) - for (ii in 1:dim(lonlat_df)[1]) { - lon_poly[(ii*4-3):(ii*4)] <- c(lonlat_df$lon[ii] - d_lon, lonlat_df$lon[ii] + d_lon, - lonlat_df$lon[ii] + d_lon, lonlat_df$lon[ii] - d_lon) - lat_poly[(ii*4-3):(ii*4)] <- c(lonlat_df$lat[ii] - d_lat, lonlat_df$lat[ii] - d_lat, - lonlat_df$lat[ii] + d_lat, lonlat_df$lat[ii] + d_lat) - } - # # To prevent out-of-global lon - # lon_poly[which(lon_poly >= 180)] <- 179.9 - # lon_poly[which(lon_poly < -180)] <- -180 - # To prevent out-of-global lat - lat_poly[which(lat_poly > 90)] <- 90 - lat_poly[which(lat_poly < -90)] <- -90 + lonlat_df <- data.frame(lon = lon_poly, lat = lat_poly) + # Transfer lon/lat to projection + proj_lonlat <- st_as_sf(lonlat_df, coords = c("lon", "lat"), crs = target_proj) + # NOTE: if target_proj = "ESRI:54030", on Nord3v2, st_transform has lon and lat swapped! + # proj_lonlat <- st_transform(proj_lonlat, crs = target_proj) + lonlat_df_proj <- st_coordinates(proj_lonlat) - lonlat_df <- data.frame(lon = lon_poly, lat = lat_poly) - # Transfer lon/lat to projection - proj_lonlat <- st_as_sf(lonlat_df, coords = c("lon", "lat"), crs = target_proj) - #NOTE: if target_proj = "ESRI:54030", on Nord3v2, st_transform has lon and lat swapped! - # proj_lonlat <- st_transform(proj_lonlat, crs = target_proj) - lonlat_df_proj <- st_coordinates(proj_lonlat) + # Use id to create groups for each polygon + ids <- factor(paste0("id_", 1:dim(data_df)[1])) + values <- data.frame(id = ids, + value = data_df$dat) + positions <- data.frame(id = rep(ids, each = 4), + x = lonlat_df_proj[, 1], + y = lonlat_df_proj[, 2]) + datapoly <- merge(values, positions, by = "id") + datapoly <- st_as_sf(datapoly, coords = c("x", "y"), crs = target_proj) - # Use id to create groups for each polygon - ids <- factor(paste0("id_", 1:dim(data_df)[1])) - values <- data.frame(id = ids, - value = data_df$dat) - positions <- data.frame(id = rep(ids, each = 4), - x = lonlat_df_proj[, 1], - y = lonlat_df_proj[, 2]) - datapoly <- merge(values, positions, by = "id") - datapoly <- st_as_sf(datapoly, coords = c("x", "y"), crs = target_proj) + datapoly <- datapoly %>% + group_by(.data$id) %>% + summarise() %>% #NOTE: VERY SLOW if plot global + mutate(value = values[order(values$id), ]$value) %>% + st_cast("POLYGON") %>% + st_convex_hull() # maintain outer polygen (no bowtie shape) - datapoly <- datapoly %>% - dplyr::group_by(.data$id) %>% - dplyr::summarise() %>% #NOTE: VERY SLOW if plot global - dplyr::mutate(value = values[order(values$id), ]$value) %>% - st_cast("POLYGON") %>% - st_convex_hull() # maintain outer polygen (no bowtie shape) + return(datapoly) +} - return(datapoly) +# Function to compute the coverage area ratio between the grid and the shapefile +areacov <- function(grid, shp) { + shp_int_grid <- sf::st_intersection(x = grid, y = shp) + idx <- attributes(shp_int_grid)$idx + i <- 0 + fr <- array(0, length(grid)) + for (idx_i in idx[, 1]) { + i <- i + 1 + area_grid <- sf::st_area(grid[idx_i]) + area_int <- sf::st_area(shp_int_grid[i]) + fr[idx_i] <- round(as.numeric(area_int/area_grid), digits = 6) + } + return(fr) } - -- GitLab From 881ef43103b64edacbc2221b049de11afe4b77ca Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 29 Jan 2024 16:30:01 +0100 Subject: [PATCH 04/12] Add option to save NetCDF in ShapeToMask --- NAMESPACE | 1 + R/ShapeToMask.R | 37 +++++++++++++++++++++++-------------- man/ShapeToMask.Rd | 8 ++++++++ man/VizRobinson.Rd | 10 ++++++---- 4 files changed, 38 insertions(+), 18 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e2f15f3..58ee625 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(VizTriangles4Categories) export(VizWeeklyClim) import(RColorBrewer) import(cowplot) +import(dplyr) import(easyNCDF) import(foreach) import(ggplot2) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 2e06f63..882a034 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -21,6 +21,8 @@ #'@param ref_grid A character string indicating the path to the reference #' data. Either (1) a netCDF file or (2) a list of lon and lat to provide the #' reference grid points. It is NULL by default. +#'@param return_area A logical value wether to return the area coverage or not. +#' It is FALSE by default. #'@param shp_system A character string containing the Shapefile System Database #' Name used to subset the shapefile into regions by using parameters 'reg_ids' #' or 'reg_names'. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. When it @@ -61,6 +63,8 @@ #' and the reference grid. #'@param ncores The number of parallel processes to spawn for the use for #' parallel computation in multiple cores. +#'@param fileout A character string of the path to save the NetCDF mask. If not +#' specified (default), the mask array will be returned. #'@param ... Arguments passed on to 's2_options' in function 'st_intersection'. #' See 's2 package'. #' @@ -88,7 +92,8 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, lat_dim = NULL, lon_dim = NULL, region = FALSE, target_crs = NULL, check_valid = FALSE, find_min_dist = FALSE, - max_dist = 50, ncores = NULL, ...) { + max_dist = 50, ncores = NULL, + fileout = NULL, ...) { # TODO: Suppress warnings? # TODO: Add saving option? @@ -367,20 +372,24 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, } # Step 4: Add attributes - # attr(mask, lon_dim) <- lon - # attr(mask, lat_dim) <- lat - # if (shp_system == "NUTS") { - # attr(mask, "index") <- as.list(shp$NUTS_ID) - # } else if (shp_system == "ADM") { - # attr(mask, "index") <- as.list(shp$ADM1_PCODE) - # } else if (shp_system == "GADM") { - # attr(mask, "index") <- as.list(shp$ISO) - # } - # names(attr(mask, "index")) <- 1:nrow(shp) + attr(mask, lon_dim) <- lon + attr(mask, lat_dim) <- lat + if (shp_system == "NUTS") { + attr(mask, "index") <- as.list(shp$NUTS_ID) + } else if (shp_system == "ADM") { + attr(mask, "index") <- as.list(shp$ADM1_PCODE) + } else if (shp_system == "GADM") { + attr(mask, "index") <- as.list(shp$ISO) + } + names(attr(mask, "index")) <- 1:nrow(shp) - # ## Return all the info from shp - # attr(mask, "shapefile") <- attributes(shp) - + ## Return all the info from shp + attr(mask, "shapefile") <- attributes(shp) + + # Step 5: Save to NetCDF + if (!is.null(fileout)) { + ArrayToNc(mask, fileout) + } return(mask) } diff --git a/man/ShapeToMask.Rd b/man/ShapeToMask.Rd index 5f4a660..b77136a 100644 --- a/man/ShapeToMask.Rd +++ b/man/ShapeToMask.Rd @@ -7,6 +7,7 @@ ShapeToMask( shp_file, ref_grid, + return_area = FALSE, shp_system = "NUTS", reg_names = NULL, reg_ids = NULL, @@ -20,6 +21,7 @@ ShapeToMask( find_min_dist = FALSE, max_dist = 50, ncores = NULL, + fileout = NULL, ... ) } @@ -30,6 +32,9 @@ ShapeToMask( data. Either (1) a netCDF file or (2) a list of lon and lat to provide the reference grid points. It is NULL by default.} +\item{return_area}{A logical value wether to return the area coverage or not. +It is FALSE by default.} + \item{shp_system}{A character string containing the Shapefile System Database Name used to subset the shapefile into regions by using parameters 'reg_ids' or 'reg_names'. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. When it @@ -83,6 +88,9 @@ and the reference grid.} \item{ncores}{The number of parallel processes to spawn for the use for parallel computation in multiple cores.} +\item{fileout}{A character string of the path to save the NetCDF mask. If not +specified (default), the mask array will be returned.} + \item{...}{Arguments passed on to 's2_options' in function 'st_intersection'. See 's2 package'.} } diff --git a/man/VizRobinson.Rd b/man/VizRobinson.Rd index 48687f1..b0f0e81 100644 --- a/man/VizRobinson.Rd +++ b/man/VizRobinson.Rd @@ -10,7 +10,7 @@ VizRobinson( lat, lon_dim = NULL, lat_dim = NULL, - target_proj = "ESRI:54030", + target_proj = NULL, drawleg = "bar", style = "point", dots = NULL, @@ -62,9 +62,11 @@ of ascending or descending order.} \code{esviz:::.KnownLatNames}. The default value is NULL.} \item{target_proj}{A character string indicating the target projection. It -should be a valid crs string. The default projection is Robinson -(ESRI:54030). Note that the character string may work differently depending -on PROJ and GDAL module version.} +should be a valid crs string. The default projection is Robinson: +"ESRI:54030". Note that the character string may work differently depending +on PROJ and GDAL module version. If package version 'sf' is lower than +"1.0.10" and an error appears regarding the target crs, you can try with +numeric crs (e.g. target_proj = 54030).} \item{drawleg}{A character string indicating the legend style. It can be 'bar' (color bar by \code{ColorBarContinuous()}), 'ggplot2' (discrete legend -- GitLab From 3225ad03248b3409833c3df29e00f7392fc816ef Mon Sep 17 00:00:00 2001 From: EVA RIFA ROVIRA Date: Tue, 30 Jan 2024 15:55:45 +0100 Subject: [PATCH 05/12] Improve code ShapeToMask; add testing file and usecase field --- DESCRIPTION | 2 +- R/ShapeToMask.R | 87 ++++++-------- inst/doc/usecase.md | 6 + inst/doc/usecase/ex1_ShapeToMask.R | 186 +++++++++++++++++++++++++++++ man/ShapeToMask.Rd | 15 ++- 5 files changed, 238 insertions(+), 58 deletions(-) create mode 100644 inst/doc/usecase.md create mode 100644 inst/doc/usecase/ex1_ShapeToMask.R diff --git a/DESCRIPTION b/DESCRIPTION index e4d4332..ae32af8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,6 +43,6 @@ URL: https://earth.bsc.es/gitlab/es/esviz/ BugReports: https://earth.bsc.es/gitlab/es/esviz/-/issues SystemRequirements: GDAL (>= 2.0.1), GEOS (>= 3.4.0), PROJ (>= 4.8.0) Encoding: UTF-8 -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Config/testthat/edition: 3 LazyData: true diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 882a034..35641c7 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -21,8 +21,11 @@ #'@param ref_grid A character string indicating the path to the reference #' data. Either (1) a netCDF file or (2) a list of lon and lat to provide the #' reference grid points. It is NULL by default. -#'@param return_area A logical value wether to return the area coverage or not. -#' It is FALSE by default. +#'@param compute_area_coverage A logical value indicating the method to find +#' the intersection of the reference grid and the shapefile. When it is TRUE, +#' the method used is the calculation of the area coverage fraction of +#' intersection. If it is FALSE, the method used is searching if the centroid +#' of the grid cell falls inside the shapefile or not. It is FALSE by default. #'@param shp_system A character string containing the Shapefile System Database #' Name used to subset the shapefile into regions by using parameters 'reg_ids' #' or 'reg_names'. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. When it @@ -48,8 +51,6 @@ #' is NULL, the longitudinal name will be searched using an internal function #' with the following possible names: 'lon', 'longitude', 'x', 'i' and #' 'nav_lon'. It is set to NULL by default. -#'@param target_crs A character string indicating the target 'Coordinate -#' Reference System'. #'@param region A logical value indicating if we want a dimension for the #' regions in the resulting mask array. It is FALSE by default. #'@param check_valid A logical value that when it is TRUE it uses the function @@ -64,7 +65,7 @@ #'@param ncores The number of parallel processes to spawn for the use for #' parallel computation in multiple cores. #'@param fileout A character string of the path to save the NetCDF mask. If not -#' specified (default), the mask array will be returned. +#' specified (default), only the mask array will be returned. #'@param ... Arguments passed on to 's2_options' in function 'st_intersection'. #' See 's2 package'. #' @@ -86,13 +87,12 @@ #'@import dplyr #'@importFrom doParallel registerDoParallel #'@export -ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, +ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, shp_system = "NUTS", reg_names = NULL, reg_ids = NULL, shp_col_name_ids = NULL, reg_level = 3, lat_dim = NULL, lon_dim = NULL, - region = FALSE, target_crs = NULL, - check_valid = FALSE, find_min_dist = FALSE, - max_dist = 50, ncores = NULL, + region = FALSE, check_valid = FALSE, + find_min_dist = FALSE, max_dist = 50, ncores = NULL, fileout = NULL, ...) { # TODO: Suppress warnings? @@ -205,20 +205,7 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, # Step 1: Load the shapefile shp <- sf::st_read(shp_file) # class sf - # target_crs - if (!is.null(target_crs)) { - if (!is.character(target_crs)) { - stop("Parameter 'target_crs' must be a character string.") - } - transform_shp_crs <- TRUE - } else { - transform_shp_crs <- FALSE - target_crs <- sf::st_crs(shp) - } - - if (transform_shp_crs) { - shp <- st_transform(shp, crs = target_crs) - } + shp_crs <- sf::st_crs(shp) NUTS_ID <- ADM1_PCODE <- ISO <- NULL CNTR_CODE <- NUTS_NAME <- ADM0_EN <- ADM1_EN <- Name <- LEVL_CODE <- NULL @@ -291,7 +278,7 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, } # Step (2.2): Create the grid - if (return_area) { + if (compute_area_coverage) { locations <- array(1:(length(lon)*length(lat)), c(lon = length(lon), lat = length(lat))) # Build data dataframe @@ -304,13 +291,14 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, lonlat_df_ori <- NULL # NOTE: if target_proj = "ESRI:54030", Nord3v2 has different behavior from hub and ws!! - data_df <- st_as_sf(data_df, coords = c("lon", "lat"), crs = target_proj) - # data_df <- st_transform(data_df, crs = target_proj) + data_df <- st_as_sf(data_df, coords = c("lon", "lat"), crs = shp_crs) + data_df <- st_transform(data_df, crs = shp_crs) data_df <- data_df %>% mutate(long = st_coordinates(data_df)[, 1], lat = st_coordinates(data_df)[, 2]) xy.sfc <- polygonize(lonlat_df = lonlat_df, data_df = data_df, - lon = lon, lat = lat, target_proj = target_crs) + lon = lon, lat = lat, target_proj = shp_crs, + original_proj = shp_crs) region <- TRUE } else { ref.df <- data.frame(data = 0, @@ -321,8 +309,8 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, xy.sfg <- sf::st_multipoint(coord) xy.sfc <- sf::st_sfc(xy.sfg) # Assign crs of original shapefile - st_crs(xy.sfc) <- sf::st_crs(target_crs) #initial_crs # asign crs of original shapefile - xy.sfc <- sf::st_transform(xy.sfc, st_crs(shp)) + st_crs(xy.sfc) <- sf::st_crs(shp) # asign crs of shapefile + # Check valid if (check_valid) { xy.sfc <- st_make_valid(xy.sfc) @@ -350,7 +338,7 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, if (is.null(ncores)) { mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun') %do% .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, - xy.sfc = xy.sfc, return_area = return_area, + xy.sfc = xy.sfc, compute_area_coverage = compute_area_coverage, find_min_dist = find_min_dist, shp_col_name_ids = shp_col_name_ids, max_dist = max_dist, region = region, ...) @@ -358,7 +346,7 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, registerDoParallel(ncores) mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun', .packages = c('sf', 'dplyr')) %dopar% .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, - xy.sfc = xy.sfc, return_area = return_area, + xy.sfc = xy.sfc, compute_area_coverage = compute_area_coverage, find_min_dist = find_min_dist, shp_col_name_ids = shp_col_name_ids, max_dist = max_dist, region = region, ...) @@ -374,31 +362,32 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, # Step 4: Add attributes attr(mask, lon_dim) <- lon attr(mask, lat_dim) <- lat - if (shp_system == "NUTS") { - attr(mask, "index") <- as.list(shp$NUTS_ID) - } else if (shp_system == "ADM") { - attr(mask, "index") <- as.list(shp$ADM1_PCODE) - } else if (shp_system == "GADM") { - attr(mask, "index") <- as.list(shp$ISO) + if (!is.null(shp_system)) { + if (shp_system == "NUTS") { + attr(mask, "index") <- as.list(shp$NUTS_ID) + } else if (shp_system == "ADM") { + attr(mask, "index") <- as.list(shp$ADM1_PCODE) + } else if (shp_system == "GADM") { + attr(mask, "index") <- as.list(shp$ISO) + } } - names(attr(mask, "index")) <- 1:nrow(shp) - + attr(mask, "index") <- 1:nrow(shp) ## Return all the info from shp attr(mask, "shapefile") <- attributes(shp) # Step 5: Save to NetCDF if (!is.null(fileout)) { - ArrayToNc(mask, fileout) + ArrayToNc(list(mask = mask), fileout) } return(mask) } -.shapetomask <- function(shp, n, lon, lat, xy.sfc, return_area = FALSE, +.shapetomask <- function(shp, n, lon, lat, xy.sfc, compute_area_coverage = FALSE, find_min_dist = FALSE, shp_col_name_ids = NULL, max_dist = 50, region = FALSE, ...) { shpi <- shp[n, ] - if (return_area) { + if (compute_area_coverage) { mask <- xy.sfc %>% dplyr::mutate(int = areacov(geometry, shpi)) %>% dplyr::arrange(value) %>% @@ -460,7 +449,8 @@ ShapeToMask <- function(shp_file, ref_grid, return_area = FALSE, } # Function to create polygons from VizRobinson code -polygonize <- function(lonlat_df, data_df, lon, lat, target_proj) { +polygonize <- function(lonlat_df, data_df, lon, lat, target_proj, + original_proj) { # Calculate polygon points from regular lat/lon # NOTE: The original grid must be regular grid with same space @@ -483,9 +473,9 @@ polygonize <- function(lonlat_df, data_df, lon, lat, target_proj) { lonlat_df <- data.frame(lon = lon_poly, lat = lat_poly) # Transfer lon/lat to projection - proj_lonlat <- st_as_sf(lonlat_df, coords = c("lon", "lat"), crs = target_proj) + proj_lonlat <- st_as_sf(lonlat_df, coords = c("lon", "lat"), crs = original_proj) # NOTE: if target_proj = "ESRI:54030", on Nord3v2, st_transform has lon and lat swapped! - # proj_lonlat <- st_transform(proj_lonlat, crs = target_proj) + proj_lonlat <- st_transform(proj_lonlat, crs = target_proj) lonlat_df_proj <- st_coordinates(proj_lonlat) # Use id to create groups for each polygon @@ -499,9 +489,9 @@ polygonize <- function(lonlat_df, data_df, lon, lat, target_proj) { datapoly <- st_as_sf(datapoly, coords = c("x", "y"), crs = target_proj) datapoly <- datapoly %>% - group_by(.data$id) %>% - summarise() %>% #NOTE: VERY SLOW if plot global - mutate(value = values[order(values$id), ]$value) %>% + dplyr::group_by(.data$id) %>% + dplyr::summarise() %>% # NOTE: VERY SLOW if plot global + dplyr::mutate(value = values[order(values$id), ]$value) %>% st_cast("POLYGON") %>% st_convex_hull() # maintain outer polygen (no bowtie shape) @@ -509,7 +499,6 @@ polygonize <- function(lonlat_df, data_df, lon, lat, target_proj) { } # Function to compute the coverage area ratio between the grid and the shapefile - areacov <- function(grid, shp) { shp_int_grid <- sf::st_intersection(x = grid, y = shp) idx <- attributes(shp_int_grid)$idx diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md new file mode 100644 index 0000000..b64af94 --- /dev/null +++ b/inst/doc/usecase.md @@ -0,0 +1,6 @@ +# Use case and example scripts + +In this document, you will find example scripts of the package. + +1. **Scripts with function examples** + 1. [Create an 's2dv_cube'](inst/doc/usecase/ex1_ShapeToMask.R) diff --git a/inst/doc/usecase/ex1_ShapeToMask.R b/inst/doc/usecase/ex1_ShapeToMask.R new file mode 100644 index 0000000..1fe5d1f --- /dev/null +++ b/inst/doc/usecase/ex1_ShapeToMask.R @@ -0,0 +1,186 @@ +#******************************************************************************* +# Script to test ShapeToMask +# Eva Rifà Rovira +#******************************************************************************* +devtools::load_all("/esarchive/scratch/erifarov/git/esviz") +#------------------------------------------------------------------------------- +setwd("/esarchive/scratch/erifarov/rpackages/esviz/shapefile/dev-shapetomask-area/out-testing") + +################################################################################ +# Shapefiles +colombia_cajamarca <- '/esarchive/scratch/erifarov/rpackages/esviz/shapefile/dev-shapetomask-area/scripts-alba/spatial_aggregation_alba/shp_test/colombia_cajamarca.shp' +colombia_sanvicent <- '/esarchive/scratch/erifarov/rpackages/esviz/shapefile/dev-shapetomask-area/scripts-alba/spatial_aggregation_alba/shp_test/colombia_sanvicentedelcaguan.shp' +NUTS_EU <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', + 'NUTS_RG_60M_2021_4326.shp') +# st_read +sf_colombia_cajamarca <- st_read(colombia_cajamarca) +sf_colombia_sanvicent <- st_read(colombia_sanvicent) +sf_NUTS_EU <- st_read(NUTS_EU) + +# reference grid: dataset = 'era5land' +lat <- array(seq(6, -1, -0.1), c(lat = 71)) +lon <- array(seq(-80, -70, 0.1), c(lon = 101)) +ref_grid_era5land_sample <- list(lat = lat, lon = lon) +# Data from era5land +load("/esarchive/scratch/erifarov/rpackages/esviz/shapefile/dev-shapetomask-area/scripts-alba/spatial_aggregation_alba/data/data.era5land.RData") +ref_grid_era5land <- list(latitude = attr(data.era5land, "Variable")$dat$latitude, + longitude = attr(data.era5land, "Variable")$dat$longitude) +################################################################################ +# Function to test plots +plot_shp <- function(fileout, x, y = NULL) { + png(file = fileout) + plot(x) + if (!is.null(y)) { + plot(y, add = TRUE) + } + dev.off() +} +################################################################################ +#----------------------------------------------------- +# Tests 1: colombia_cajamarca (only 1 region) +#----------------------------------------------------- +source("/esarchive/scratch/erifarov/git/esviz/R/ShapeToMask.R") +# (1.1) Centroid method + +# NOTE: We set shp_system = NULL because we don't want to subset any region +era5land_cajamarca_cntr <- ShapeToMask(shp_file = colombia_cajamarca, + shp_system = NULL, + ref_grid = ref_grid_era5land, region = TRUE, + find_min_dist = FALSE) +# (1.2) Area method +# target_crs = NULL +era5land_cajamarca_area <- ShapeToMask(shp_file = colombia_cajamarca, + ref_grid = ref_grid_era5land, + shp_system = NULL, + compute_area_coverage = TRUE, + region = TRUE) +# Error: +# Error in wk_handle.wk_wkb(wkb, s2_geography_writer(oriented = oriented, : +# Loop 0 is not valid: Edge 1 crosses edge 3 + +# Solution: Set sf::sf_use_s2(FALSE) +sf::sf_use_s2(FALSE) +era5land_cajamarca_area <- ShapeToMask(shp_file = colombia_cajamarca, + ref_grid = ref_grid_era5land, + shp_system = NULL, + compute_area_coverage = TRUE, + region = TRUE) +sf::sf_use_s2(TRUE) + +# Compare results different solutions +sum(era5land_cajamarca_area) +# [1] 4.140404 +sum(era5land_cajamarca_cntr) +[1] 4 + +# Positions +which(era5land_cajamarca_cntr != 0) +# [1] 1561 1662 1663 1762 +which(era5land_cajamarca_area != 0) +# [1] 1460 1560 1561 1562 1661 1662 1663 1762 1763 1863 1864 + +all(which(era5land_cajamarca_cntr != 0) %in% which(era5land_cajamarca_area != 0)) +# [1] TRUE + +# Apply the results to the data + +mean_extract <- function(data_cube, mask) { + locations <- which(mask != 0) + res <- mean(data_cube[locations]) + return(res) +} + +aggregated_area <- Apply(data = list(data_cube = data.era5land, mask = era5land_cajamarca_area), + target_dims = list(data_cube = c('longitude', 'latitude'), + mask = c('longitude', 'latitude')), + fun = mean_extract)$output1 +aggregated_ctr <- Apply(data = list(data_cube = data.era5land, mask = era5land_cajamarca_cntr), + target_dims = list(data_cube = c('longitude', 'latitude'), + mask = c('longitude', 'latitude')), + fun = mean_extract)$output1 +dim(aggregated_area) +# time syear region +# 12 1 1 +summary(aggregated_area) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 11.58 11.98 12.24 12.20 12.47 12.63 +summary(aggregated_ctr) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 11.49 11.87 12.12 12.09 12.33 12.50 + +# Test (1.3): Raster method with exactextractr +# Source the function +source("/esarchive/scratch/erifarov/rpackages/esviz/shapefile/dev-shapetomask-area/scripts-alba/spatial_aggregation_alba/functions/raster_extract_to_array.R") +raster_era5land_cajamarca <- raster.extract(data = data.era5land, + shp = sf_colombia_cajamarca) +dim(raster_era5land_cajamarca) +# region syear time ensemble +# 10 1 12 1 + +summary(raster_era5land_cajamarca) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 9.124 11.092 11.788 11.978 13.026 15.151 +################################################################################ + +#----------------------------------------------------- +# Tests 1: colombia_cajamarca (only 1 region) +#----------------------------------------------------- + +# Example ShapeToMask: NUTS +shp_file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', + 'NUTS_RG_60M_2021_4326.shp') +ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) +NUTS_name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) + +# Mask computed with area coverage +sf::sf_use_s2(FALSE) +mask_area <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, + compute_area_coverage = TRUE, reg_names = NUTS_name, + fileout = "mask_area.nc") +mask_cntr <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, + compute_area_coverage = FALSE, reg_names = NUTS_name, + fileout = "mask_cntr.nc") +dim(mask_area) +# lon lat region +# 61 91 4 +summary(mask_area) + +# Mask computed with centroid approach +mask_cntr <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, region = T, + compute_area_coverage = FALSE, reg_names = NUTS_name) +dim(mask_cntr) +# lon lat region +# 61 91 4 + +################################################################################ + +#***************************************************************************** +# Examples function: ShapeToMask +# Source: https://earth.bsc.es/gitlab/es/esviz/-/blob/main/R/ShapeToMask.R +# An-Chi & Eva (2023) +#***************************************************************************** +# Exmple (1): NUTS +shp_file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', + 'NUTS_RG_60M_2021_4326.shp') +ref_grid <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/', + 'tas_f1h/tas_201006.nc') +# ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) + +NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9)) +NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) +mask1 <- ShapeToMask(shp_file, ref_grid, reg_ids = NUTS.id) +mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, + reg_names = NUTS.name) + +# Exmple (2): GADM +shp_file <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp" +ref_grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', + 'tas_f6h/tas_20191212.nc') +GADM.id <- c("ESP", "ITA") +GADM.name <- c("Spain", "Italy") +mask1 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, + reg_ids = GADM.id, shp_system = "GADM") +mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, + reg_names = GADM.name, shp_system = "GADM") + +################################################################################ \ No newline at end of file diff --git a/man/ShapeToMask.Rd b/man/ShapeToMask.Rd index b77136a..4684214 100644 --- a/man/ShapeToMask.Rd +++ b/man/ShapeToMask.Rd @@ -7,7 +7,7 @@ ShapeToMask( shp_file, ref_grid, - return_area = FALSE, + compute_area_coverage = FALSE, shp_system = "NUTS", reg_names = NULL, reg_ids = NULL, @@ -16,7 +16,6 @@ ShapeToMask( lat_dim = NULL, lon_dim = NULL, region = FALSE, - target_crs = NULL, check_valid = FALSE, find_min_dist = FALSE, max_dist = 50, @@ -32,8 +31,11 @@ ShapeToMask( data. Either (1) a netCDF file or (2) a list of lon and lat to provide the reference grid points. It is NULL by default.} -\item{return_area}{A logical value wether to return the area coverage or not. -It is FALSE by default.} +\item{compute_area_coverage}{A logical value indicating the method to find +the intersection of the reference grid and the shapefile. When it is TRUE, +the method used is the calculation of the area coverage fraction of +intersection. If it is FALSE, the method used is searching if the centroid +of the grid cell falls inside the shapefile or not. It is FALSE by default.} \item{shp_system}{A character string containing the Shapefile System Database Name used to subset the shapefile into regions by using parameters 'reg_ids' @@ -70,9 +72,6 @@ with the following possible names: 'lon', 'longitude', 'x', 'i' and \item{region}{A logical value indicating if we want a dimension for the regions in the resulting mask array. It is FALSE by default.} -\item{target_crs}{A character string indicating the target 'Coordinate -Reference System'.} - \item{check_valid}{A logical value that when it is TRUE it uses the function 'sf::st_make_valid' applied to the shapefile and to the coordinates.} @@ -89,7 +88,7 @@ and the reference grid.} parallel computation in multiple cores.} \item{fileout}{A character string of the path to save the NetCDF mask. If not -specified (default), the mask array will be returned.} +specified (default), only the mask array will be returned.} \item{...}{Arguments passed on to 's2_options' in function 'st_intersection'. See 's2 package'.} -- GitLab From fe0eece4d10642b1191ea340fd4a971fea7872d3 Mon Sep 17 00:00:00 2001 From: Raul Capellan Date: Fri, 7 Jun 2024 10:10:25 +0200 Subject: [PATCH 06/12] Cambios para incluir lats lons y crs --- R/ShapeToMask.R | 92 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 5 deletions(-) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 35641c7..91fd1f2 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -85,6 +85,7 @@ #'@import sf #'@import foreach #'@import dplyr +#'@import jsonlite #'@importFrom doParallel registerDoParallel #'@export ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, @@ -93,7 +94,7 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, lat_dim = NULL, lon_dim = NULL, region = FALSE, check_valid = FALSE, find_min_dist = FALSE, max_dist = 50, ncores = NULL, - fileout = NULL, ...) { + fileout = NULL, units = 'degrees', ...) { # TODO: Suppress warnings? # TODO: Add saving option? @@ -204,7 +205,14 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, # Step 1: Load the shapefile shp <- sf::st_read(shp_file) # class sf + shp_0 <- shp + if (units == 'degrees') { + shp <- sf::st_transform(shp, 4326) + } else if (units == 'meters') { + shp <- sf::st_transform(shp, 3857) + } + shp_crs <- sf::st_crs(shp) NUTS_ID <- ADM1_PCODE <- ISO <- NULL @@ -278,7 +286,7 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, } # Step (2.2): Create the grid - if (compute_area_coverage) { + if (compute_area_coverage) { locations <- array(1:(length(lon)*length(lat)), c(lon = length(lon), lat = length(lat))) # Build data dataframe @@ -375,9 +383,24 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, ## Return all the info from shp attr(mask, "shapefile") <- attributes(shp) + # Generate attributes related with the shape file + shape_attrs = list() + for (i in 1:nrow(shp)) { + entry <- list( + id = shp$NUTS_ID[i], + name = shp$NUTS_NAME[i], + shape_val = i + ) + shape_attrs <- append(shape_attrs, list(entry)) + } + # Step 5: Save to NetCDF if (!is.null(fileout)) { - ArrayToNc(list(mask = mask), fileout) + # Lat Lon Max Min + lat_list <- attr(mask, 'lat') + lon_list <- attr(mask, 'lon') + + transformToNc (mask, lat_list, lon_list, NULL, fileout, units, shape_attrs) } return(mask) } @@ -414,10 +437,10 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, # min(abs(lon - tmp_coords[ii, 1])) # min(abs(lat - tmp_coords[ii, 2])) mask[which.min(abs(lon - tmp_coords[ii, 1])), - which.min(abs(lat - tmp_coords[ii, 2]))] <- n + which.min(abs(lat - tmp_coords[ii, 2]))] <- n # AQUI ES DONDE ASOCIA EL N A CADA VALOR DEL SHAPE PROBABLEMENTE RAUL } else { mask[which.min(abs(lon - tmp_coords[ii, 1])), - which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 + which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 # SI SOLO HAY UNA REGION O NO HAY QUE DIVIDIR POR REGIONES } } } else if (find_min_dist) { @@ -445,6 +468,13 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, } } } + + # Flip the matrix when the area_coverage is TRUE + if (compute_area_coverage) { + mask <- mask[, ncol(mask):1] # Flip in the horizontal axis + # mask <- mask[nrow(mask):1, ] # Flip in vertical axis + } + return(mask) } @@ -512,3 +542,55 @@ areacov <- function(grid, shp) { } return(fr) } + +transformToNc <- function(values, lat=NULL, lon=NULL, time=NULL, out_file, crs='degrees', shape_values = NULL) { + # # Take the name of lat lon dimensions + # possible_lon_names <- c("lon", "longitude", "x") + # possible_lat_names <- c("lat", "latitude" , "y") + + # rigth_lon_name <- possible_lon_names[possible_lon_names %in% tolower(names(dim(values)))] + # rigth_lat_name <- possible_lat_names[possible_lat_names %in% tolower(names(dim(values)))] + + # Change latitude and longitude names to lat, lon + + # Add coordinates + if (!is.null(lon) && length(lon) > 0) { + dim(lon) <- length(lon) + metadata <- list(lon = list(units = crs)) + attr(lon, 'variables') <- metadata + names(dim(lon)) <- 'lon' # rigth_lon_name + } + + if (!is.null(lat) && length(lat) > 0) { + dim(lat) <- length(lat) + metadata <- list(lat = list(units = crs)) + attr(lat, 'variables') <- metadata + names(dim(lat)) <- 'lat' # rigth_lat_name + } + + # Add the projection to the file + if (crs == 'degrees') { + crs = 'EPSG4326' + } else { + crs = 'EPSG3857' + } + + # Format the attributes before introducing to the file + formatted_string <- paste(shape_values, collapse="") + formatted_string <- gsub("\\(", "", formatted_string) + formatted_string <- gsub("\\)", "\n", formatted_string) + formatted_string <- gsub("list", "", formatted_string) + formatted_string <- gsub('\\"', "", formatted_string) + + # Add the atributes to the file + attrs <- list(global_attrs = list( + crs = crs, + shapes_values = formatted_string + )) + attributes(values) <- c(attributes(values), attrs) + + # Export to Nc + ArrayToNc(list(values, lon, lat), out_file) + +} + -- GitLab From 1202822360bac69cbd87e7c9f2209f159029a7bd Mon Sep 17 00:00:00 2001 From: Raul Capellan Date: Fri, 7 Jun 2024 17:08:27 +0200 Subject: [PATCH 07/12] Corregidos la sustitucion de nombres lat lon --- R/ShapeToMask.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 91fd1f2..611e74c 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -544,14 +544,13 @@ areacov <- function(grid, shp) { } transformToNc <- function(values, lat=NULL, lon=NULL, time=NULL, out_file, crs='degrees', shape_values = NULL) { - # # Take the name of lat lon dimensions - # possible_lon_names <- c("lon", "longitude", "x") - # possible_lat_names <- c("lat", "latitude" , "y") - - # rigth_lon_name <- possible_lon_names[possible_lon_names %in% tolower(names(dim(values)))] - # rigth_lat_name <- possible_lat_names[possible_lat_names %in% tolower(names(dim(values)))] + # Take the name of lat lon dimensions + possible_lon_names <- c("lon", "longitude", "x") + possible_lat_names <- c("lat", "latitude" , "y") # Change latitude and longitude names to lat, lon + names(dim(values)) <- gsub(paste(possible_lon_names, collapse = "|"), "lon", names(dim(values)), ignore.case = TRUE) + names(dim(values)) <- gsub(paste(possible_lat_names, collapse = "|"), "lat", names(dim(values)), ignore.case = TRUE) # Add coordinates if (!is.null(lon) && length(lon) > 0) { -- GitLab From cc59ff34ca18b6de4bb62b3abf290f9256de2277 Mon Sep 17 00:00:00 2001 From: Raul Capellan Date: Wed, 26 Jun 2024 09:42:29 +0200 Subject: [PATCH 08/12] bug: compute_area_coverage with GADM shape files --- R/ShapeToMask.R | 70 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 18 deletions(-) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 611e74c..83d42ab 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -285,6 +285,11 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, lon <- ref_grid[[lon_dim]] } + # Set degrees longitudes to -180 to 180 + lon <- ifelse(lon > 180, lon - 360, lon) + order_idx <- order(lon) + lon <- lon[order_idx] + # Step (2.2): Create the grid if (compute_area_coverage) { locations <- array(1:(length(lon)*length(lat)), c(lon = length(lon), @@ -292,7 +297,7 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, # Build data dataframe lonlat_df <- data.frame(lon = rep(as.vector(lon), length(lat)), lat = sort(rep(as.vector(lat), length(lon)), decreasing = TRUE)) - + data_df <- lonlat_df %>% mutate(dat = as.vector(locations)) @@ -312,6 +317,8 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, ref.df <- data.frame(data = 0, lon = rep(lon, times = length(lat)), lat = rep(lat, each = length(lon))) + + ref.df <- modifyLonsTo180(units, ref.df, order_idx) coord <- as.matrix(data.frame(x = ref.df$lon, y = ref.df$lat)) xy.sfg <- sf::st_multipoint(coord) @@ -349,7 +356,7 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, xy.sfc = xy.sfc, compute_area_coverage = compute_area_coverage, find_min_dist = find_min_dist, shp_col_name_ids = shp_col_name_ids, - max_dist = max_dist, region = region, ...) + max_dist = max_dist, region = region, shp_system = shp_system, ...) } else { registerDoParallel(ncores) mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun', .packages = c('sf', 'dplyr')) %dopar% @@ -357,7 +364,7 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, xy.sfc = xy.sfc, compute_area_coverage = compute_area_coverage, find_min_dist = find_min_dist, shp_col_name_ids = shp_col_name_ids, - max_dist = max_dist, region = region, ...) + max_dist = max_dist, region = region, shp_system = shp_system, ...) registerDoSEQ() } if (region) { @@ -366,29 +373,37 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, } else { names(dim(mask)) <- c(lon_dim, lat_dim) } - + # Step 4: Add attributes attr(mask, lon_dim) <- lon attr(mask, lat_dim) <- lat - if (!is.null(shp_system)) { - if (shp_system == "NUTS") { - attr(mask, "index") <- as.list(shp$NUTS_ID) - } else if (shp_system == "ADM") { - attr(mask, "index") <- as.list(shp$ADM1_PCODE) - } else if (shp_system == "GADM") { - attr(mask, "index") <- as.list(shp$ISO) - } - } attr(mask, "index") <- 1:nrow(shp) ## Return all the info from shp attr(mask, "shapefile") <- attributes(shp) # Generate attributes related with the shape file shape_attrs = list() + for (i in 1:nrow(shp)) { + if (!is.null(shp_system)) { + if (shp_system == "NUTS") { + id_aux = shp$NUTS_ID[i] + name_aux = shp$NUTS_NAME[i] + } else if (shp_system == "ADM") { + id_aux = shp$ADM1_PCODE[i] + name_aux = shp$ADM1_NAME[i] + } else if (shp_system == "GADM") { + id_aux = shp$ISO[i] + name_aux = shp$Name[i] + } else { + id_aux = NULL + name_aux = NULL + } + } + entry <- list( - id = shp$NUTS_ID[i], - name = shp$NUTS_NAME[i], + id = id_aux, + name = name_aux, shape_val = i ) shape_attrs <- append(shape_attrs, list(entry)) @@ -408,7 +423,7 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, .shapetomask <- function(shp, n, lon, lat, xy.sfc, compute_area_coverage = FALSE, find_min_dist = FALSE, shp_col_name_ids = NULL, max_dist = 50, - region = FALSE, ...) { + region = FALSE, shp_system = 'NUTS', ...) { shpi <- shp[n, ] if (compute_area_coverage) { mask <- xy.sfc %>% @@ -470,7 +485,7 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, } # Flip the matrix when the area_coverage is TRUE - if (compute_area_coverage) { + if (compute_area_coverage && grepl("NUTS", shp_system)) { mask <- mask[, ncol(mask):1] # Flip in the horizontal axis # mask <- mask[nrow(mask):1, ] # Flip in vertical axis } @@ -520,7 +535,8 @@ polygonize <- function(lonlat_df, data_df, lon, lat, target_proj, datapoly <- datapoly %>% dplyr::group_by(.data$id) %>% - dplyr::summarise() %>% # NOTE: VERY SLOW if plot global + dplyr::summarise(geometry = st_combine(geometry)) %>% + # dplyr::summarise() %>% # NOTE: VERY SLOW if plot global dplyr::mutate(value = values[order(values$id), ]$value) %>% st_cast("POLYGON") %>% st_convex_hull() # maintain outer polygen (no bowtie shape) @@ -593,3 +609,21 @@ transformToNc <- function(values, lat=NULL, lon=NULL, time=NULL, out_file, crs=' } +modifyLonsTo180 <- function(units, df, order_idx) { + + print('Check errors 1:') + # browser() + + if (units == 'degrees'){ + my_list <- c("lon", "longitude", "x") + data <- df[colnames(df)[1]] + name_lon <- my_list[tolower(my_list) %in% tolower(colnames(df))] + lon_data <- df[[name_lon]] + data_reorder <- df[[colnames(df)[1]]] + + df[colnames(df)[1]] <- data_reorder[order_idx] + } + + return (df) +} + -- GitLab From 89c98fa1518b711e54ca673d5008f50b5c2c4147 Mon Sep 17 00:00:00 2001 From: Raul Capellan Date: Wed, 26 Jun 2024 11:58:59 +0200 Subject: [PATCH 09/12] Correcting tests shapesToMask --- tests/testthat/test-ShapeToMask.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-ShapeToMask.R b/tests/testthat/test-ShapeToMask.R index 550f2ee..c4be789 100644 --- a/tests/testthat/test-ShapeToMask.R +++ b/tests/testthat/test-ShapeToMask.R @@ -100,7 +100,7 @@ test_that("2. Output", { ) expect_equal( sum(mask4), - 48 + 64 ) mask5 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, reg_names = GADM_name2, shp_system = "GADM", @@ -111,6 +111,6 @@ test_that("2. Output", { ) expect_equal( sum(mask5), - 56 + 88 ) }) \ No newline at end of file -- GitLab From 16d2f7b9d8e9e45c0f7a2086374479fd7e379a72 Mon Sep 17 00:00:00 2001 From: Raul Capellan Date: Wed, 26 Jun 2024 21:40:04 +0200 Subject: [PATCH 10/12] test: uploaded coverage for shapeToMask --- tests/testthat/test-ShapeToMask.R | 52 ++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-ShapeToMask.R b/tests/testthat/test-ShapeToMask.R index c4be789..49e8a23 100644 --- a/tests/testthat/test-ShapeToMask.R +++ b/tests/testthat/test-ShapeToMask.R @@ -87,10 +87,24 @@ test_that("2. Output", { sum(mask3), 105 ) + # Test a single region mask3_1 <- ShapeToMask(shp_file1, ref_grid = ref_grid1_1, shp_col_name_ids = "NUTS_ID", lon_dim = 'lon', lat_dim = 'lat', reg_ids = "DE149", region = TRUE) + + mask3_2 <- ShapeToMask(shp_file1, ref_grid = ref_grid1_1, + shp_col_name_ids = "NUTS_ID", lon_dim = 'lon', + lat_dim = 'lat', reg_ids = "DE149", compute_area_coverage = TRUE) + expect_equal( + dim(mask3_2), + c(lon = 61, lat = 91, region = 1) + ) + expect_equal( + sum(mask3_2), + 0 + ) + # Test GADM mask4 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, reg_ids = GADM_id2, shp_system = "GADM") @@ -113,4 +127,40 @@ test_that("2. Output", { sum(mask5), 88 ) -}) \ No newline at end of file + mask6 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, + reg_names = GADM_name2, shp_system = "GADM", + region = TRUE, compute_area_coverage = TRUE) + expect_equal( + dim(mask6), + c(longitude = 240, latitude = 121, region = 4) + ) + expect_equal( + as.integer(sum(mask6)), + 77 + ) + + mask7 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, compute_area_coverage = TRUE, + reg_names = GADM_name2, shp_system = "GADM", fileout="test_GADM_1_5.nc") + + expect_equal( + dim(mask7), + c(longitude = 240, latitude = 121, region = 4) + ) + expect_equal( + as.integer(sum(mask7)), + 77 + ) + + mask7 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, compute_area_coverage = TRUE, + reg_names = GADM_name2, shp_system = "GADM", fileout="test_GADM_1_5.nc", find_min_dist = TRUE, ncores=2) + + expect_equal( + dim(mask7), + c(longitude = 240, latitude = 121, region = 4) + ) + expect_equal( + as.integer(sum(mask7)), + 77 + ) + +}) -- GitLab From 3f7a6785cd047f45a01ca67c64de52e380ce21b9 Mon Sep 17 00:00:00 2001 From: Raul Capellan Date: Thu, 11 Jul 2024 15:15:36 +0200 Subject: [PATCH 11/12] hotfix: Added parameters and documentation to the function --- R/ShapeToMask.R | 67 ++++++++++++++++++++++++------- tests/testthat/test-ShapeToMask.R | 10 ++++- 2 files changed, 61 insertions(+), 16 deletions(-) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 83d42ab..3032f81 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -68,6 +68,13 @@ #' specified (default), only the mask array will be returned. #'@param ... Arguments passed on to 's2_options' in function 'st_intersection'. #' See 's2 package'. +#'@param units A character string indicating if your GIS files has a grid in degrees or meters. If it +#' is NULL, the units will be set as "meters" +#' with the following possible names: 'degrees', 'meters' +#'@param name_shp_col A character string indicating in the shape file which is the name of the column +#' with the values of the names of the different polygons. It is NULL by default. +#'@param id_shp_col A character string indicating in the shape file which is the name of the column +#' with the values of the IDs of the different polygons. It is NULL by default. #' #'@return A multidimensional array containing a mask array with longitude and #'latitude dimensions. If 'region' is TRUE, there will be a dimension for @@ -94,7 +101,8 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, lat_dim = NULL, lon_dim = NULL, region = FALSE, check_valid = FALSE, find_min_dist = FALSE, max_dist = 50, ncores = NULL, - fileout = NULL, units = 'degrees', ...) { + fileout = NULL, units = 'degrees', id_shape_col = NULL, + name_shape_col = NULL, ...) { # TODO: Suppress warnings? # TODO: Add saving option? @@ -119,6 +127,21 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, stop("Parameter 'lat_dim' must be a character string.") } } + if (!is.null(id_shape_col)) { + if (!is.character(id_shape_col)) { + stop("Parameter 'id_shape_col' must be a character string.") + } + } + if (!is.null(name_shape_col)) { + if (!is.character(name_shape_col)) { + stop("Parameter 'name_shape_col' must be a character string.") + } + } + if (!is.null(units)) { + if (!is.character(units)) { + stop("Parameter 'units' must be a character string.") + } + } # ref_grid if (is.null(ref_grid)) { stop("Parameter 'ref_grid' cannot be NULL.") @@ -386,19 +409,24 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, for (i in 1:nrow(shp)) { if (!is.null(shp_system)) { - if (shp_system == "NUTS") { + if (is.null(id_shape_col) && is.null(name_shape_col)) { + if (shp_system == "NUTS") { id_aux = shp$NUTS_ID[i] name_aux = shp$NUTS_NAME[i] - } else if (shp_system == "ADM") { - id_aux = shp$ADM1_PCODE[i] - name_aux = shp$ADM1_NAME[i] - } else if (shp_system == "GADM") { - id_aux = shp$ISO[i] - name_aux = shp$Name[i] - } else { - id_aux = NULL - name_aux = NULL + } else if (shp_system == "ADM") { + id_aux = shp$ADM1_PCODE[i] + name_aux = shp$ADM1_NAME[i] + } else if (shp_system == "GADM") { + id_aux = shp$ISO[i] + name_aux = shp$Name[i] + } else { + id_aux = NULL + name_aux = NULL + } } + } else { + id_aux = id_shape_col + name_aux = name_shape_col } entry <- list( @@ -415,7 +443,7 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, lat_list <- attr(mask, 'lat') lon_list <- attr(mask, 'lon') - transformToNc (mask, lat_list, lon_list, NULL, fileout, units, shape_attrs) + transformToNc (mask, lat_list, lon_list, NULL, fileout, units, shape_attrs, lat_dim, lon_dim) } return(mask) } @@ -559,10 +587,19 @@ areacov <- function(grid, shp) { return(fr) } -transformToNc <- function(values, lat=NULL, lon=NULL, time=NULL, out_file, crs='degrees', shape_values = NULL) { +transformToNc <- function(values, lat=NULL, lon=NULL, time=NULL, out_file, crs='degrees', shape_values = NULL, lat_dim, lon_dim) { # Take the name of lat lon dimensions - possible_lon_names <- c("lon", "longitude", "x") - possible_lat_names <- c("lat", "latitude" , "y") + if (is.null(lon_dim)) { + possible_lon_names <- c("lon", "longitude", "x", "i", "nav_lon") + } else { + possible_lon_names <- c(lon_dim) + } + + if (is.null(lat_dim)) { + possible_lat_names <- c("lat", "latitude" , "y", "j", "nav_lat") + } else { + possible_lat_names <- c(lat_dim) + } # Change latitude and longitude names to lat, lon names(dim(values)) <- gsub(paste(possible_lon_names, collapse = "|"), "lon", names(dim(values)), ignore.case = TRUE) diff --git a/tests/testthat/test-ShapeToMask.R b/tests/testthat/test-ShapeToMask.R index 49e8a23..e44c92d 100644 --- a/tests/testthat/test-ShapeToMask.R +++ b/tests/testthat/test-ShapeToMask.R @@ -46,6 +46,14 @@ test_that("1. Input checks", { ShapeToMask(shp_file1, lat_dim = 1, lon_dim = 'lat'), "Parameter 'lat_dim' must be a character string." ) + expect_error( + ShapeToMask(shp_file1, ref_grid1, reg_ids = NUTS_id1, id_shp_col = 1, name_shape_col = 'ESP'), + "Parameter 'id_shp_col' must be a character string." + ) + expect_error( + ShapeToMask(shp_file1, ref_grid1, reg_ids = NUTS_id1, name_shp_col = 1, id_shp_col = '4BJd'), + "Parameter 'name_shp_col' must be a character string." + ) # shp_system, reg_ids, reg_names, shp_col_name_ids # reg_level # region @@ -152,7 +160,7 @@ test_that("2. Output", { ) mask7 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, compute_area_coverage = TRUE, - reg_names = GADM_name2, shp_system = "GADM", fileout="test_GADM_1_5.nc", find_min_dist = TRUE, ncores=2) + reg_names = GADM_name2, shp_system = "GADM", fileout="test_GADM_1_5.nc", ncores=2) expect_equal( dim(mask7), -- GitLab From 1c61d21faa9cb8fd137cfa2cb9fc14a4d2eea858 Mon Sep 17 00:00:00 2001 From: Raul Capellan Date: Thu, 11 Jul 2024 16:59:05 +0200 Subject: [PATCH 12/12] hotfix: Correcting the names of the parameters --- .gitignore | 1 + R/ShapeToMask.R | 60 +++++++++++++++---------------- tests/testthat/test-ShapeToMask.R | 14 ++++---- 3 files changed, 37 insertions(+), 38 deletions(-) diff --git a/.gitignore b/.gitignore index b26d556..8e64182 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ master_pull.txt *.ps Rplots.pdf .nfs* +*.nc diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 3032f81..85d4c44 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -5,7 +5,7 @@ #'output based on requested region names or IDs. The accepted shapefile #'databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. #'However, the function can use other shapefiles databases with specifying the -#'categories names with the parameter 'shp_col_name_ids'. +#'categories names with the parameter 'id_shape_col'. #' #'To ensure accurate comparison with the shapefile, the function loads a #'reference dataset that provides longitude and latitude information. By @@ -32,8 +32,6 @@ #' is used, you must specify either 'reg_ids' or 'reg_names'; if you don't need #' to subset different regions, set it to NULL. It is set to "NUTS" by default #' (optional). -#'@param shp_col_name_ids A character string indicating the column name of the -#' column in where the specified 'reg_ids' will be taken (optional). #'@param reg_ids A character string indicating the unique ID in shapefile. #' It is NULL by default (optional). #'@param reg_names A named list of character string vectors indicating the @@ -71,9 +69,9 @@ #'@param units A character string indicating if your GIS files has a grid in degrees or meters. If it #' is NULL, the units will be set as "meters" #' with the following possible names: 'degrees', 'meters' -#'@param name_shp_col A character string indicating in the shape file which is the name of the column +#'@param name_shape_col A character string indicating in the shape file which is the name of the column #' with the values of the names of the different polygons. It is NULL by default. -#'@param id_shp_col A character string indicating in the shape file which is the name of the column +#'@param id_shape_col A character string indicating in the shape file which is the name of the column #' with the values of the IDs of the different polygons. It is NULL by default. #' #'@return A multidimensional array containing a mask array with longitude and @@ -97,7 +95,7 @@ #'@export ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, shp_system = "NUTS", reg_names = NULL, reg_ids = NULL, - shp_col_name_ids = NULL, reg_level = 3, + reg_level = 3, lat_dim = NULL, lon_dim = NULL, region = FALSE, check_valid = FALSE, find_min_dist = FALSE, max_dist = 50, ncores = NULL, @@ -171,7 +169,7 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, } } - # shp_system, reg_ids, reg_names, shp_col_name_ids + # shp_system, reg_ids, reg_names, id_shape_col if (!is.null(shp_system)) { if (!is.character(shp_system)) { stop("Parameter 'shp_system' must be a character strinig.") @@ -180,12 +178,12 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, stop("If 'shp_system' is used, you must provide either parameter ", "'reg_ids' or 'reg_names'.") } - } else if (!is.null(shp_col_name_ids)) { + } else if (!is.null(id_shape_col)) { if (is.null(reg_ids)) { - stop("If 'shp_col_name_ids' is used, parameter 'reg_ids' must be provided.") + stop("If 'id_shape_col' is used, parameter 'reg_ids' must be provided.") } - if (!is.character(shp_col_name_ids)) { - stop("Parameter 'shp_col_name_ids' must be a character strinig.") + if (!is.character(id_shape_col)) { + stop("Parameter 'id_shape_col' must be a character strinig.") } } if (all(!is.null(reg_ids), !is.null(reg_names))) { @@ -243,40 +241,40 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, if (!is.null(reg_ids)) { ## Method 1: Directly use IDs - if (!is.null(shp_col_name_ids)) { - if (shp_col_name_ids %in% names(shp)) { - shp <- subset(shp, get(shp_col_name_ids) %in% reg_ids) + if (!is.null(id_shape_col)) { + if (id_shape_col %in% names(shp)) { + shp <- subset(shp, get(id_shape_col) %in% reg_ids) } else { stop("Shape system name not found in shapefile names.") } } else if (shp_system == "NUTS") { shp <- subset(shp, NUTS_ID %in% reg_ids) - shp_col_name_ids <- "NUTS_ID" + id_shape_col <- "NUTS_ID" } else if (shp_system == "ADM") { shp <- subset(shp, ADM1_PCODE %in% reg_ids) - shp_col_name_ids <- "ADM1_PCODE" + id_shape_col <- "ADM1_PCODE" } else if (shp_system == "GADM") { shp <- subset(shp, ISO %in% reg_ids) - shp_col_name_ids <- "ISO" + id_shape_col <- "ISO" } else { stop("shp_system ", shp_system, " is not defined yet.") } } else if (!is.null(reg_names)) { - shp_col_name_ids <- NULL + id_shape_col <- NULL ## Method 2: Use country code & region name for (cntr_i in 1:length(reg_names)) { if (shp_system == "NUTS") { tmp <- subset(shp, CNTR_CODE == names(reg_names)[cntr_i]) tmp <- subset(tmp, NUTS_NAME %in% reg_names[[cntr_i]]) - shp_col_name_ids <- 'NUTS_NAME' + id_shape_col <- 'NUTS_NAME' } else if (shp_system == "ADM") { tmp <- subset(shp, ADM0_EN == names(reg_names)[cntr_i]) tmp <- subset(tmp, ADM1_EN %in% reg_names[[cntr_i]]) - shp_col_name_ids <- 'ADM1_EN' + id_shape_col <- 'ADM1_EN' } else if (shp_system == "GADM") { tmp <- subset(shp, Name %in% reg_names) - shp_col_name_ids <- 'Name' + id_shape_col <- 'Name' } if (cntr_i == 1) { shp_tmp <- tmp @@ -378,7 +376,7 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, xy.sfc = xy.sfc, compute_area_coverage = compute_area_coverage, find_min_dist = find_min_dist, - shp_col_name_ids = shp_col_name_ids, + id_shape_col = id_shape_col, max_dist = max_dist, region = region, shp_system = shp_system, ...) } else { registerDoParallel(ncores) @@ -386,7 +384,7 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, xy.sfc = xy.sfc, compute_area_coverage = compute_area_coverage, find_min_dist = find_min_dist, - shp_col_name_ids = shp_col_name_ids, + id_shape_col = id_shape_col, max_dist = max_dist, region = region, shp_system = shp_system, ...) registerDoSEQ() } @@ -411,8 +409,8 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, if (!is.null(shp_system)) { if (is.null(id_shape_col) && is.null(name_shape_col)) { if (shp_system == "NUTS") { - id_aux = shp$NUTS_ID[i] - name_aux = shp$NUTS_NAME[i] + id_aux = shp$NUTS_ID[i] + name_aux = shp$NUTS_NAME[i] } else if (shp_system == "ADM") { id_aux = shp$ADM1_PCODE[i] name_aux = shp$ADM1_NAME[i] @@ -423,10 +421,10 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, id_aux = NULL name_aux = NULL } + } else{ + id_aux = id_shape_col + name_aux = name_shape_col } - } else { - id_aux = id_shape_col - name_aux = name_shape_col } entry <- list( @@ -450,7 +448,7 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, .shapetomask <- function(shp, n, lon, lat, xy.sfc, compute_area_coverage = FALSE, find_min_dist = FALSE, - shp_col_name_ids = NULL, max_dist = 50, + id_shape_col = NULL, max_dist = 50, region = FALSE, shp_system = 'NUTS', ...) { shpi <- shp[n, ] if (compute_area_coverage) { @@ -502,12 +500,12 @@ ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, which.min(abs(lat - tmp_coords[, 2]))] <- 1 } # warning(paste0('The reference grid has no intersection with region ', - # ifelse(is.character(shp_col_name_ids), shpi[[shp_col_name_ids]], paste0('n° ', n)), + # ifelse(is.character(id_shape_col), shpi[[id_shape_col]], paste0('n° ', n)), # ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], # ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) } else { # warning(paste0('The reference grid has no intersection with region ', - # ifelse(is.character(shp_col_name_ids), shpi[[shp_col_name_ids]], paste0('n° ', n)))) + # ifelse(is.character(id_shape_col), shpi[[id_shape_col]], paste0('n° ', n)))) } } } diff --git a/tests/testthat/test-ShapeToMask.R b/tests/testthat/test-ShapeToMask.R index e44c92d..e99bee9 100644 --- a/tests/testthat/test-ShapeToMask.R +++ b/tests/testthat/test-ShapeToMask.R @@ -47,14 +47,14 @@ test_that("1. Input checks", { "Parameter 'lat_dim' must be a character string." ) expect_error( - ShapeToMask(shp_file1, ref_grid1, reg_ids = NUTS_id1, id_shp_col = 1, name_shape_col = 'ESP'), - "Parameter 'id_shp_col' must be a character string." + ShapeToMask(shp_file1, ref_grid1, reg_ids = NUTS_id1, id_shape_col = 1, name_shape_col = 'ESP'), + "Parameter 'id_shape_col' must be a character string." ) expect_error( - ShapeToMask(shp_file1, ref_grid1, reg_ids = NUTS_id1, name_shp_col = 1, id_shp_col = '4BJd'), - "Parameter 'name_shp_col' must be a character string." + ShapeToMask(shp_file1, ref_grid1, reg_ids = NUTS_id1, name_shape_col = 1, id_shape_col = '4BJd'), + "Parameter 'name_shape_col' must be a character string." ) - # shp_system, reg_ids, reg_names, shp_col_name_ids + # shp_system, reg_ids, reg_names, sname_shape_col # reg_level # region # target_crs @@ -98,11 +98,11 @@ test_that("2. Output", { # Test a single region mask3_1 <- ShapeToMask(shp_file1, ref_grid = ref_grid1_1, - shp_col_name_ids = "NUTS_ID", lon_dim = 'lon', + name_shape_col = "NUTS_ID", lon_dim = 'lon', lat_dim = 'lat', reg_ids = "DE149", region = TRUE) mask3_2 <- ShapeToMask(shp_file1, ref_grid = ref_grid1_1, - shp_col_name_ids = "NUTS_ID", lon_dim = 'lon', + name_shape_col = "NUTS_ID", lon_dim = 'lon', lat_dim = 'lat', reg_ids = "DE149", compute_area_coverage = TRUE) expect_equal( dim(mask3_2), -- GitLab