diff --git a/.gitignore b/.gitignore index b26d556a841a100900ec7e4824aded4dd3e3b82a..8e6418228b74aca9d93b13b5e4377dec74a92f98 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ master_pull.txt *.ps Rplots.pdf .nfs* +*.nc diff --git a/DESCRIPTION b/DESCRIPTION index e4d4332e1d33dd4ddc5aa99afac6a5166cbc457a..ae32af84b25228bc2253c69f874dd61c92f3c45f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,6 +43,6 @@ URL: https://earth.bsc.es/gitlab/es/esviz/ BugReports: https://earth.bsc.es/gitlab/es/esviz/-/issues SystemRequirements: GDAL (>= 2.0.1), GEOS (>= 3.4.0), PROJ (>= 4.8.0) Encoding: UTF-8 -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Config/testthat/edition: 3 LazyData: true diff --git a/NAMESPACE b/NAMESPACE index 254964cb4ab9e9a01c42ee49cf532f53d7bce3e3..09342a68244f435715e98dd0dbd3925f0d26edf9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(VizTriangles4Categories) export(VizWeeklyClim) import(RColorBrewer) import(cowplot) +import(dplyr) import(easyNCDF) import(foreach) import(ggplot2) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 350e38bbb793502c98ed3a1eae3234ed41a0e496..85d4c444606fe5a12ab40db69581ce9c2a9e9a69 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -5,7 +5,7 @@ #'output based on requested region names or IDs. The accepted shapefile #'databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. #'However, the function can use other shapefiles databases with specifying the -#'categories names with the parameter 'shp_col_name_ids'. +#'categories names with the parameter 'id_shape_col'. #' #'To ensure accurate comparison with the shapefile, the function loads a #'reference dataset that provides longitude and latitude information. By @@ -21,14 +21,17 @@ #'@param ref_grid A character string indicating the path to the reference #' data. Either (1) a netCDF file or (2) a list of lon and lat to provide the #' reference grid points. It is NULL by default. +#'@param compute_area_coverage A logical value indicating the method to find +#' the intersection of the reference grid and the shapefile. When it is TRUE, +#' the method used is the calculation of the area coverage fraction of +#' intersection. If it is FALSE, the method used is searching if the centroid +#' of the grid cell falls inside the shapefile or not. It is FALSE by default. #'@param shp_system A character string containing the Shapefile System Database #' Name used to subset the shapefile into regions by using parameters 'reg_ids' #' or 'reg_names'. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. When it #' is used, you must specify either 'reg_ids' or 'reg_names'; if you don't need #' to subset different regions, set it to NULL. It is set to "NUTS" by default #' (optional). -#'@param shp_col_name_ids A character string indicating the column name of the -#' column in where the specified 'reg_ids' will be taken (optional). #'@param reg_ids A character string indicating the unique ID in shapefile. #' It is NULL by default (optional). #'@param reg_names A named list of character string vectors indicating the @@ -46,8 +49,6 @@ #' is NULL, the longitudinal name will be searched using an internal function #' with the following possible names: 'lon', 'longitude', 'x', 'i' and #' 'nav_lon'. It is set to NULL by default. -#'@param target_crs A character string indicating the target 'Coordinate -#' Reference System'. #'@param region A logical value indicating if we want a dimension for the #' regions in the resulting mask array. It is FALSE by default. #'@param check_valid A logical value that when it is TRUE it uses the function @@ -61,8 +62,17 @@ #' and the reference grid. #'@param ncores The number of parallel processes to spawn for the use for #' parallel computation in multiple cores. +#'@param fileout A character string of the path to save the NetCDF mask. If not +#' specified (default), only the mask array will be returned. #'@param ... Arguments passed on to 's2_options' in function 'st_intersection'. #' See 's2 package'. +#'@param units A character string indicating if your GIS files has a grid in degrees or meters. If it +#' is NULL, the units will be set as "meters" +#' with the following possible names: 'degrees', 'meters' +#'@param name_shape_col A character string indicating in the shape file which is the name of the column +#' with the values of the names of the different polygons. It is NULL by default. +#'@param id_shape_col A character string indicating in the shape file which is the name of the column +#' with the values of the IDs of the different polygons. It is NULL by default. #' #'@return A multidimensional array containing a mask array with longitude and #'latitude dimensions. If 'region' is TRUE, there will be a dimension for @@ -79,18 +89,22 @@ #'@import easyNCDF #'@import sf #'@import foreach +#'@import dplyr +#'@import jsonlite #'@importFrom doParallel registerDoParallel #'@export -ShapeToMask <- function(shp_file, ref_grid, +ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, shp_system = "NUTS", reg_names = NULL, reg_ids = NULL, - shp_col_name_ids = NULL, reg_level = 3, + reg_level = 3, lat_dim = NULL, lon_dim = NULL, - region = FALSE, target_crs = NULL, - check_valid = FALSE, find_min_dist = FALSE, - max_dist = 50, ncores = NULL, ...) { + region = FALSE, check_valid = FALSE, + find_min_dist = FALSE, max_dist = 50, ncores = NULL, + fileout = NULL, units = 'degrees', id_shape_col = NULL, + name_shape_col = NULL, ...) { # TODO: Suppress warnings? # TODO: Add saving option? + # sf_use_s2(FALSE) # Initial checks # shp_file @@ -111,6 +125,21 @@ ShapeToMask <- function(shp_file, ref_grid, stop("Parameter 'lat_dim' must be a character string.") } } + if (!is.null(id_shape_col)) { + if (!is.character(id_shape_col)) { + stop("Parameter 'id_shape_col' must be a character string.") + } + } + if (!is.null(name_shape_col)) { + if (!is.character(name_shape_col)) { + stop("Parameter 'name_shape_col' must be a character string.") + } + } + if (!is.null(units)) { + if (!is.character(units)) { + stop("Parameter 'units' must be a character string.") + } + } # ref_grid if (is.null(ref_grid)) { stop("Parameter 'ref_grid' cannot be NULL.") @@ -140,7 +169,7 @@ ShapeToMask <- function(shp_file, ref_grid, } } - # shp_system, reg_ids, reg_names, shp_col_name_ids + # shp_system, reg_ids, reg_names, id_shape_col if (!is.null(shp_system)) { if (!is.character(shp_system)) { stop("Parameter 'shp_system' must be a character strinig.") @@ -149,12 +178,12 @@ ShapeToMask <- function(shp_file, ref_grid, stop("If 'shp_system' is used, you must provide either parameter ", "'reg_ids' or 'reg_names'.") } - } else if (!is.null(shp_col_name_ids)) { + } else if (!is.null(id_shape_col)) { if (is.null(reg_ids)) { - stop("If 'shp_col_name_ids' is used, parameter 'reg_ids' must be provided.") + stop("If 'id_shape_col' is used, parameter 'reg_ids' must be provided.") } - if (!is.character(shp_col_name_ids)) { - stop("Parameter 'shp_col_name_ids' must be a character strinig.") + if (!is.character(id_shape_col)) { + stop("Parameter 'id_shape_col' must be a character strinig.") } } if (all(!is.null(reg_ids), !is.null(reg_names))) { @@ -170,12 +199,6 @@ ShapeToMask <- function(shp_file, ref_grid, if (!is.logical(region)) { stop("Parameter 'region' must be a logical value.") } - # target_crs - if (!is.null(target_crs)) { - if (!is.character(target_crs)) { - stop("Parameter 'target_crs' must be a character string.") - } - } # check_valid if (!is.logical(check_valid)) { stop("Parameter 'check_valid' must be a logical value.") @@ -203,50 +226,55 @@ ShapeToMask <- function(shp_file, ref_grid, # Step 1: Load the shapefile shp <- sf::st_read(shp_file) # class sf - if (!is.null(target_crs)) { - transformed_shapefile <- st_transform(shp, crs = target_crs) - shp <- transformed_shapefile + shp_0 <- shp + + if (units == 'degrees') { + shp <- sf::st_transform(shp, 4326) + } else if (units == 'meters') { + shp <- sf::st_transform(shp, 3857) } + + shp_crs <- sf::st_crs(shp) NUTS_ID <- ADM1_PCODE <- ISO <- NULL CNTR_CODE <- NUTS_NAME <- ADM0_EN <- ADM1_EN <- Name <- LEVL_CODE <- NULL if (!is.null(reg_ids)) { ## Method 1: Directly use IDs - if (!is.null(shp_col_name_ids)) { - if (shp_col_name_ids %in% names(shp)) { - shp <- subset(shp, get(shp_col_name_ids) %in% reg_ids) + if (!is.null(id_shape_col)) { + if (id_shape_col %in% names(shp)) { + shp <- subset(shp, get(id_shape_col) %in% reg_ids) } else { stop("Shape system name not found in shapefile names.") } } else if (shp_system == "NUTS") { shp <- subset(shp, NUTS_ID %in% reg_ids) - shp_col_name_ids <- "NUTS_ID" + id_shape_col <- "NUTS_ID" } else if (shp_system == "ADM") { shp <- subset(shp, ADM1_PCODE %in% reg_ids) - shp_col_name_ids <- "ADM1_PCODE" + id_shape_col <- "ADM1_PCODE" } else if (shp_system == "GADM") { shp <- subset(shp, ISO %in% reg_ids) - shp_col_name_ids <- "ISO" + id_shape_col <- "ISO" } else { stop("shp_system ", shp_system, " is not defined yet.") } } else if (!is.null(reg_names)) { - shp_col_name_ids <- NULL + id_shape_col <- NULL ## Method 2: Use country code & region name for (cntr_i in 1:length(reg_names)) { if (shp_system == "NUTS") { tmp <- subset(shp, CNTR_CODE == names(reg_names)[cntr_i]) tmp <- subset(tmp, NUTS_NAME %in% reg_names[[cntr_i]]) - shp_col_name_ids <- 'NUTS_NAME' + id_shape_col <- 'NUTS_NAME' } else if (shp_system == "ADM") { tmp <- subset(shp, ADM0_EN == names(reg_names)[cntr_i]) tmp <- subset(tmp, ADM1_EN %in% reg_names[[cntr_i]]) - shp_col_name_ids <- 'ADM1_EN' + id_shape_col <- 'ADM1_EN' } else if (shp_system == "GADM") { tmp <- subset(shp, Name %in% reg_names) - shp_col_name_ids <- 'Name' + id_shape_col <- 'Name' } if (cntr_i == 1) { shp_tmp <- tmp @@ -261,7 +289,7 @@ ShapeToMask <- function(shp_file, ref_grid, } } - # Step 2: Use the reference file to get lat and lon + # Step (2.1): Use the reference file to get lat and lon if (all(tools::file_ext(ref_grid) == 'nc')) { ## Method 1: ref_grid is a netCDF file if (is.null(lat_dim) | is.null(lon_dim)) { @@ -278,29 +306,55 @@ ShapeToMask <- function(shp_file, ref_grid, lon <- ref_grid[[lon_dim]] } - ## Create data frame & sp class for ref grid - ref.df <- data.frame(data = 0, - lon = rep(lon, times = length(lat)), - lat = rep(lat, each = length(lon))) - coord <- as.matrix(data.frame(x = ref.df$lon, y = ref.df$lat)) - - xy.sfg <- sf::st_multipoint(coord) - xy.sfc <- sf::st_sfc(xy.sfg) - # Assign crs of original shapefile - if (!is.null(target_crs)) { - st_crs(xy.sfc) <- sf::st_crs(target_crs) #initial_crs # asign crs of original shapefile - xy.sfc <- sf::st_transform(xy.sfc, st_crs(shp)) + # Set degrees longitudes to -180 to 180 + lon <- ifelse(lon > 180, lon - 360, lon) + order_idx <- order(lon) + lon <- lon[order_idx] + + # Step (2.2): Create the grid + if (compute_area_coverage) { + locations <- array(1:(length(lon)*length(lat)), c(lon = length(lon), + lat = length(lat))) + # Build data dataframe + lonlat_df <- data.frame(lon = rep(as.vector(lon), length(lat)), + lat = sort(rep(as.vector(lat), length(lon)), decreasing = TRUE)) + + data_df <- lonlat_df %>% + mutate(dat = as.vector(locations)) + + lonlat_df_ori <- NULL + + # NOTE: if target_proj = "ESRI:54030", Nord3v2 has different behavior from hub and ws!! + data_df <- st_as_sf(data_df, coords = c("lon", "lat"), crs = shp_crs) + data_df <- st_transform(data_df, crs = shp_crs) + data_df <- data_df %>% + mutate(long = st_coordinates(data_df)[, 1], + lat = st_coordinates(data_df)[, 2]) + xy.sfc <- polygonize(lonlat_df = lonlat_df, data_df = data_df, + lon = lon, lat = lat, target_proj = shp_crs, + original_proj = shp_crs) + region <- TRUE } else { - st_crs(xy.sfc) <- sf::st_crs(shp) - } + ref.df <- data.frame(data = 0, + lon = rep(lon, times = length(lat)), + lat = rep(lat, each = length(lon))) + + ref.df <- modifyLonsTo180(units, ref.df, order_idx) + coord <- as.matrix(data.frame(x = ref.df$lon, y = ref.df$lat)) + + xy.sfg <- sf::st_multipoint(coord) + xy.sfc <- sf::st_sfc(xy.sfg) + # Assign crs of original shapefile + st_crs(xy.sfc) <- sf::st_crs(shp) # asign crs of shapefile - # Step 3: Create mask - if (check_valid) { - xy.sfc <- st_make_valid(xy.sfc) - shp <- st_make_valid(shp) + # Check valid + if (check_valid) { + xy.sfc <- st_make_valid(xy.sfc) + shp <- st_make_valid(shp) + } } - ## Loop through each shp region + ## Step (3): Loop through each shp region to create the mask if (region) { cfun <- function(a, b) { if (length(dim(a)) == 2) { @@ -320,16 +374,18 @@ ShapeToMask <- function(shp_file, ref_grid, if (is.null(ncores)) { mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun') %do% .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, - xy.sfc = xy.sfc, find_min_dist = find_min_dist, - shp_col_name_ids = shp_col_name_ids, - max_dist = max_dist, region = region, ...) + xy.sfc = xy.sfc, compute_area_coverage = compute_area_coverage, + find_min_dist = find_min_dist, + id_shape_col = id_shape_col, + max_dist = max_dist, region = region, shp_system = shp_system, ...) } else { registerDoParallel(ncores) - mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun', .packages='sf') %dopar% + mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun', .packages = c('sf', 'dplyr')) %dopar% .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, - xy.sfc = xy.sfc, find_min_dist = find_min_dist, - shp_col_name_ids = shp_col_name_ids, - max_dist = max_dist, region = region, ...) + xy.sfc = xy.sfc, compute_area_coverage = compute_area_coverage, + find_min_dist = find_min_dist, + id_shape_col = id_shape_col, + max_dist = max_dist, region = region, shp_system = shp_system, ...) registerDoSEQ() } if (region) { @@ -338,78 +394,271 @@ ShapeToMask <- function(shp_file, ref_grid, } else { names(dim(mask)) <- c(lon_dim, lat_dim) } - + # Step 4: Add attributes - # attr(mask, lon_dim) <- lon - # attr(mask, lat_dim) <- lat - # if (shp_system == "NUTS") { - # attr(mask, "index") <- as.list(shp$NUTS_ID) - # } else if (shp_system == "ADM") { - # attr(mask, "index") <- as.list(shp$ADM1_PCODE) - # } else if (shp_system == "GADM") { - # attr(mask, "index") <- as.list(shp$ISO) - # } - # names(attr(mask, "index")) <- 1:nrow(shp) - - # ## Return all the info from shp - # attr(mask, "shapefile") <- attributes(shp) - + attr(mask, lon_dim) <- lon + attr(mask, lat_dim) <- lat + attr(mask, "index") <- 1:nrow(shp) + ## Return all the info from shp + attr(mask, "shapefile") <- attributes(shp) + + # Generate attributes related with the shape file + shape_attrs = list() + + for (i in 1:nrow(shp)) { + if (!is.null(shp_system)) { + if (is.null(id_shape_col) && is.null(name_shape_col)) { + if (shp_system == "NUTS") { + id_aux = shp$NUTS_ID[i] + name_aux = shp$NUTS_NAME[i] + } else if (shp_system == "ADM") { + id_aux = shp$ADM1_PCODE[i] + name_aux = shp$ADM1_NAME[i] + } else if (shp_system == "GADM") { + id_aux = shp$ISO[i] + name_aux = shp$Name[i] + } else { + id_aux = NULL + name_aux = NULL + } + } else{ + id_aux = id_shape_col + name_aux = name_shape_col + } + } + + entry <- list( + id = id_aux, + name = name_aux, + shape_val = i + ) + shape_attrs <- append(shape_attrs, list(entry)) + } + + # Step 5: Save to NetCDF + if (!is.null(fileout)) { + # Lat Lon Max Min + lat_list <- attr(mask, 'lat') + lon_list <- attr(mask, 'lon') + + transformToNc (mask, lat_list, lon_list, NULL, fileout, units, shape_attrs, lat_dim, lon_dim) + } return(mask) } -.shapetomask <- function(shp, n, lon, lat, xy.sfc, find_min_dist = FALSE, - shp_col_name_ids = NULL, max_dist = 50, - region = FALSE, ...) { - mask <- array(0, dim = c(length(lon), length(lat))) +.shapetomask <- function(shp, n, lon, lat, xy.sfc, compute_area_coverage = FALSE, + find_min_dist = FALSE, + id_shape_col = NULL, max_dist = 50, + region = FALSE, shp_system = 'NUTS', ...) { shpi <- shp[n, ] - # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may - # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). - shp_pol <- sf::st_intersection(x = xy.sfc, y = shpi, ...) - tmp_coords <- sf::st_coordinates(shp_pol)[, 1:2] - if (length(tmp_coords) == 0) { - dim(tmp_coords) <- NULL - } else if (is.null(dim(tmp_coords))) { - tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords))) - } - if (!is.null(dim(tmp_coords))) { - - # polygon_instersection - for (ii in 1:nrow(tmp_coords)) { - # pt_x <- which(lon == tmp_coords[ii, 1]) - # pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) - if (!region) { - # min(abs(lon - tmp_coords[ii, 1])) - # min(abs(lat - tmp_coords[ii, 2])) - mask[which.min(abs(lon - tmp_coords[ii, 1])), - which.min(abs(lat - tmp_coords[ii, 2]))] <- n - } else { - mask[which.min(abs(lon - tmp_coords[ii, 1])), - which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 - } + if (compute_area_coverage) { + mask <- xy.sfc %>% + dplyr::mutate(int = areacov(geometry, shpi)) %>% + dplyr::arrange(value) %>% + dplyr::pull(int) + dim(mask) <- c(length(lon), length(lat)) + } else { + mask <- array(0, dim = c(length(lon), length(lat))) + # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may + # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). + shp_pol <- sf::st_intersection(x = xy.sfc, y = shpi, ...) + tmp_coords <- sf::st_coordinates(shp_pol)[, 1:2] + if (length(tmp_coords) == 0) { + dim(tmp_coords) <- NULL + } else if (is.null(dim(tmp_coords))) { + tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords))) } - } else if (find_min_dist) { - x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,1] - y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,2] - dist <- sqrt((xy.sfc[,1] - x.centroid.shpi)**2 + (xy.sfc[,2] - y.centroid.shpi)**2) - tmp_coords <- array(xy.sfc[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2)) - colnames(tmp_coords) <- c('X', 'Y') - if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) & - any(round(lon,2) == round(tmp_coords[1,1],2))) ) { - if (length(dim(mask)) == 2) { - mask[which.min(abs(lon - tmp_coords[, 1])), - which.min(abs(lat - tmp_coords[, 2]))] <- n + if (!is.null(dim(tmp_coords))) { + + # polygon_instersection + for (ii in 1:nrow(tmp_coords)) { + # pt_x <- which(lon == tmp_coords[ii, 1]) + # pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) + if (!region) { + # min(abs(lon - tmp_coords[ii, 1])) + # min(abs(lat - tmp_coords[ii, 2])) + mask[which.min(abs(lon - tmp_coords[ii, 1])), + which.min(abs(lat - tmp_coords[ii, 2]))] <- n # AQUI ES DONDE ASOCIA EL N A CADA VALOR DEL SHAPE PROBABLEMENTE RAUL + } else { + mask[which.min(abs(lon - tmp_coords[ii, 1])), + which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 # SI SOLO HAY UNA REGION O NO HAY QUE DIVIDIR POR REGIONES + } + } + } else if (find_min_dist) { + x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,1] + y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,2] + dist <- sqrt((xy.sfc[,1] - x.centroid.shpi)**2 + (xy.sfc[,2] - y.centroid.shpi)**2) + tmp_coords <- array(xy.sfc[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2)) + colnames(tmp_coords) <- c('X', 'Y') + if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) & + any(round(lon,2) == round(tmp_coords[1,1],2))) ) { + if (length(dim(mask)) == 2) { + mask[which.min(abs(lon - tmp_coords[, 1])), + which.min(abs(lat - tmp_coords[, 2]))] <- n + } else { + mask[which.min(abs(lon - tmp_coords[, 1])), + which.min(abs(lat - tmp_coords[, 2]))] <- 1 + } + # warning(paste0('The reference grid has no intersection with region ', + # ifelse(is.character(id_shape_col), shpi[[id_shape_col]], paste0('n° ', n)), + # ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], + # ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) } else { - mask[which.min(abs(lon - tmp_coords[, 1])), - which.min(abs(lat - tmp_coords[, 2]))] <- 1 + # warning(paste0('The reference grid has no intersection with region ', + # ifelse(is.character(id_shape_col), shpi[[id_shape_col]], paste0('n° ', n)))) } - # warning(paste0('The reference grid has no intersection with region ', - # ifelse(is.character(shp_col_name_ids), shpi[[shp_col_name_ids]], paste0('n° ', n)), - # ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], - # ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) - } else { - # warning(paste0('The reference grid has no intersection with region ', - # ifelse(is.character(shp_col_name_ids), shpi[[shp_col_name_ids]], paste0('n° ', n)))) } } + + # Flip the matrix when the area_coverage is TRUE + if (compute_area_coverage && grepl("NUTS", shp_system)) { + mask <- mask[, ncol(mask):1] # Flip in the horizontal axis + # mask <- mask[nrow(mask):1, ] # Flip in vertical axis + } + return(mask) -} \ No newline at end of file +} + +# Function to create polygons from VizRobinson code +polygonize <- function(lonlat_df, data_df, lon, lat, target_proj, + original_proj) { + + # Calculate polygon points from regular lat/lon + # NOTE: The original grid must be regular grid with same space + d_lon <- abs(lon[2] - lon[1]) / 2 + d_lat <- abs(lat[2] - lat[1]) / 2 + lon_poly <- lat_poly <- rep(NA, 4 * dim(lonlat_df)[1]) + for (ii in 1:dim(lonlat_df)[1]) { + lon_poly[(ii*4-3):(ii*4)] <- c(lonlat_df$lon[ii] - d_lon, lonlat_df$lon[ii] + d_lon, + lonlat_df$lon[ii] + d_lon, lonlat_df$lon[ii] - d_lon) + lat_poly[(ii*4-3):(ii*4)] <- c(lonlat_df$lat[ii] - d_lat, lonlat_df$lat[ii] - d_lat, + lonlat_df$lat[ii] + d_lat, lonlat_df$lat[ii] + d_lat) + } + + # To prevent out-of-global lon + # lon_poly[which(lon_poly >= 180)] <- 179.9 + # lon_poly[which(lon_poly < -180)] <- -180 + # To prevent out-of-global lat + lat_poly[which(lat_poly > 90)] <- 90 + lat_poly[which(lat_poly < -90)] <- -90 + + lonlat_df <- data.frame(lon = lon_poly, lat = lat_poly) + # Transfer lon/lat to projection + proj_lonlat <- st_as_sf(lonlat_df, coords = c("lon", "lat"), crs = original_proj) + # NOTE: if target_proj = "ESRI:54030", on Nord3v2, st_transform has lon and lat swapped! + proj_lonlat <- st_transform(proj_lonlat, crs = target_proj) + lonlat_df_proj <- st_coordinates(proj_lonlat) + + # Use id to create groups for each polygon + ids <- factor(paste0("id_", 1:dim(data_df)[1])) + values <- data.frame(id = ids, + value = data_df$dat) + positions <- data.frame(id = rep(ids, each = 4), + x = lonlat_df_proj[, 1], + y = lonlat_df_proj[, 2]) + datapoly <- merge(values, positions, by = "id") + datapoly <- st_as_sf(datapoly, coords = c("x", "y"), crs = target_proj) + + datapoly <- datapoly %>% + dplyr::group_by(.data$id) %>% + dplyr::summarise(geometry = st_combine(geometry)) %>% + # dplyr::summarise() %>% # NOTE: VERY SLOW if plot global + dplyr::mutate(value = values[order(values$id), ]$value) %>% + st_cast("POLYGON") %>% + st_convex_hull() # maintain outer polygen (no bowtie shape) + + return(datapoly) +} + +# Function to compute the coverage area ratio between the grid and the shapefile +areacov <- function(grid, shp) { + shp_int_grid <- sf::st_intersection(x = grid, y = shp) + idx <- attributes(shp_int_grid)$idx + i <- 0 + fr <- array(0, length(grid)) + for (idx_i in idx[, 1]) { + i <- i + 1 + area_grid <- sf::st_area(grid[idx_i]) + area_int <- sf::st_area(shp_int_grid[i]) + fr[idx_i] <- round(as.numeric(area_int/area_grid), digits = 6) + } + return(fr) +} + +transformToNc <- function(values, lat=NULL, lon=NULL, time=NULL, out_file, crs='degrees', shape_values = NULL, lat_dim, lon_dim) { + # Take the name of lat lon dimensions + if (is.null(lon_dim)) { + possible_lon_names <- c("lon", "longitude", "x", "i", "nav_lon") + } else { + possible_lon_names <- c(lon_dim) + } + + if (is.null(lat_dim)) { + possible_lat_names <- c("lat", "latitude" , "y", "j", "nav_lat") + } else { + possible_lat_names <- c(lat_dim) + } + + # Change latitude and longitude names to lat, lon + names(dim(values)) <- gsub(paste(possible_lon_names, collapse = "|"), "lon", names(dim(values)), ignore.case = TRUE) + names(dim(values)) <- gsub(paste(possible_lat_names, collapse = "|"), "lat", names(dim(values)), ignore.case = TRUE) + + # Add coordinates + if (!is.null(lon) && length(lon) > 0) { + dim(lon) <- length(lon) + metadata <- list(lon = list(units = crs)) + attr(lon, 'variables') <- metadata + names(dim(lon)) <- 'lon' # rigth_lon_name + } + + if (!is.null(lat) && length(lat) > 0) { + dim(lat) <- length(lat) + metadata <- list(lat = list(units = crs)) + attr(lat, 'variables') <- metadata + names(dim(lat)) <- 'lat' # rigth_lat_name + } + + # Add the projection to the file + if (crs == 'degrees') { + crs = 'EPSG4326' + } else { + crs = 'EPSG3857' + } + + # Format the attributes before introducing to the file + formatted_string <- paste(shape_values, collapse="") + formatted_string <- gsub("\\(", "", formatted_string) + formatted_string <- gsub("\\)", "\n", formatted_string) + formatted_string <- gsub("list", "", formatted_string) + formatted_string <- gsub('\\"', "", formatted_string) + + # Add the atributes to the file + attrs <- list(global_attrs = list( + crs = crs, + shapes_values = formatted_string + )) + attributes(values) <- c(attributes(values), attrs) + + # Export to Nc + ArrayToNc(list(values, lon, lat), out_file) + +} + +modifyLonsTo180 <- function(units, df, order_idx) { + + print('Check errors 1:') + # browser() + + if (units == 'degrees'){ + my_list <- c("lon", "longitude", "x") + data <- df[colnames(df)[1]] + name_lon <- my_list[tolower(my_list) %in% tolower(colnames(df))] + lon_data <- df[[name_lon]] + data_reorder <- df[[colnames(df)[1]]] + + df[colnames(df)[1]] <- data_reorder[order_idx] + } + + return (df) +} + diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md new file mode 100644 index 0000000000000000000000000000000000000000..b64af943fbf0a1546d389ba51970edf664404df3 --- /dev/null +++ b/inst/doc/usecase.md @@ -0,0 +1,6 @@ +# Use case and example scripts + +In this document, you will find example scripts of the package. + +1. **Scripts with function examples** + 1. [Create an 's2dv_cube'](inst/doc/usecase/ex1_ShapeToMask.R) diff --git a/inst/doc/usecase/ex1_ShapeToMask.R b/inst/doc/usecase/ex1_ShapeToMask.R new file mode 100644 index 0000000000000000000000000000000000000000..1fe5d1f84a91519d7c6f4e26b4556afdc61905dc --- /dev/null +++ b/inst/doc/usecase/ex1_ShapeToMask.R @@ -0,0 +1,186 @@ +#******************************************************************************* +# Script to test ShapeToMask +# Eva Rifà Rovira +#******************************************************************************* +devtools::load_all("/esarchive/scratch/erifarov/git/esviz") +#------------------------------------------------------------------------------- +setwd("/esarchive/scratch/erifarov/rpackages/esviz/shapefile/dev-shapetomask-area/out-testing") + +################################################################################ +# Shapefiles +colombia_cajamarca <- '/esarchive/scratch/erifarov/rpackages/esviz/shapefile/dev-shapetomask-area/scripts-alba/spatial_aggregation_alba/shp_test/colombia_cajamarca.shp' +colombia_sanvicent <- '/esarchive/scratch/erifarov/rpackages/esviz/shapefile/dev-shapetomask-area/scripts-alba/spatial_aggregation_alba/shp_test/colombia_sanvicentedelcaguan.shp' +NUTS_EU <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', + 'NUTS_RG_60M_2021_4326.shp') +# st_read +sf_colombia_cajamarca <- st_read(colombia_cajamarca) +sf_colombia_sanvicent <- st_read(colombia_sanvicent) +sf_NUTS_EU <- st_read(NUTS_EU) + +# reference grid: dataset = 'era5land' +lat <- array(seq(6, -1, -0.1), c(lat = 71)) +lon <- array(seq(-80, -70, 0.1), c(lon = 101)) +ref_grid_era5land_sample <- list(lat = lat, lon = lon) +# Data from era5land +load("/esarchive/scratch/erifarov/rpackages/esviz/shapefile/dev-shapetomask-area/scripts-alba/spatial_aggregation_alba/data/data.era5land.RData") +ref_grid_era5land <- list(latitude = attr(data.era5land, "Variable")$dat$latitude, + longitude = attr(data.era5land, "Variable")$dat$longitude) +################################################################################ +# Function to test plots +plot_shp <- function(fileout, x, y = NULL) { + png(file = fileout) + plot(x) + if (!is.null(y)) { + plot(y, add = TRUE) + } + dev.off() +} +################################################################################ +#----------------------------------------------------- +# Tests 1: colombia_cajamarca (only 1 region) +#----------------------------------------------------- +source("/esarchive/scratch/erifarov/git/esviz/R/ShapeToMask.R") +# (1.1) Centroid method + +# NOTE: We set shp_system = NULL because we don't want to subset any region +era5land_cajamarca_cntr <- ShapeToMask(shp_file = colombia_cajamarca, + shp_system = NULL, + ref_grid = ref_grid_era5land, region = TRUE, + find_min_dist = FALSE) +# (1.2) Area method +# target_crs = NULL +era5land_cajamarca_area <- ShapeToMask(shp_file = colombia_cajamarca, + ref_grid = ref_grid_era5land, + shp_system = NULL, + compute_area_coverage = TRUE, + region = TRUE) +# Error: +# Error in wk_handle.wk_wkb(wkb, s2_geography_writer(oriented = oriented, : +# Loop 0 is not valid: Edge 1 crosses edge 3 + +# Solution: Set sf::sf_use_s2(FALSE) +sf::sf_use_s2(FALSE) +era5land_cajamarca_area <- ShapeToMask(shp_file = colombia_cajamarca, + ref_grid = ref_grid_era5land, + shp_system = NULL, + compute_area_coverage = TRUE, + region = TRUE) +sf::sf_use_s2(TRUE) + +# Compare results different solutions +sum(era5land_cajamarca_area) +# [1] 4.140404 +sum(era5land_cajamarca_cntr) +[1] 4 + +# Positions +which(era5land_cajamarca_cntr != 0) +# [1] 1561 1662 1663 1762 +which(era5land_cajamarca_area != 0) +# [1] 1460 1560 1561 1562 1661 1662 1663 1762 1763 1863 1864 + +all(which(era5land_cajamarca_cntr != 0) %in% which(era5land_cajamarca_area != 0)) +# [1] TRUE + +# Apply the results to the data + +mean_extract <- function(data_cube, mask) { + locations <- which(mask != 0) + res <- mean(data_cube[locations]) + return(res) +} + +aggregated_area <- Apply(data = list(data_cube = data.era5land, mask = era5land_cajamarca_area), + target_dims = list(data_cube = c('longitude', 'latitude'), + mask = c('longitude', 'latitude')), + fun = mean_extract)$output1 +aggregated_ctr <- Apply(data = list(data_cube = data.era5land, mask = era5land_cajamarca_cntr), + target_dims = list(data_cube = c('longitude', 'latitude'), + mask = c('longitude', 'latitude')), + fun = mean_extract)$output1 +dim(aggregated_area) +# time syear region +# 12 1 1 +summary(aggregated_area) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 11.58 11.98 12.24 12.20 12.47 12.63 +summary(aggregated_ctr) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 11.49 11.87 12.12 12.09 12.33 12.50 + +# Test (1.3): Raster method with exactextractr +# Source the function +source("/esarchive/scratch/erifarov/rpackages/esviz/shapefile/dev-shapetomask-area/scripts-alba/spatial_aggregation_alba/functions/raster_extract_to_array.R") +raster_era5land_cajamarca <- raster.extract(data = data.era5land, + shp = sf_colombia_cajamarca) +dim(raster_era5land_cajamarca) +# region syear time ensemble +# 10 1 12 1 + +summary(raster_era5land_cajamarca) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 9.124 11.092 11.788 11.978 13.026 15.151 +################################################################################ + +#----------------------------------------------------- +# Tests 1: colombia_cajamarca (only 1 region) +#----------------------------------------------------- + +# Example ShapeToMask: NUTS +shp_file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', + 'NUTS_RG_60M_2021_4326.shp') +ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) +NUTS_name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) + +# Mask computed with area coverage +sf::sf_use_s2(FALSE) +mask_area <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, + compute_area_coverage = TRUE, reg_names = NUTS_name, + fileout = "mask_area.nc") +mask_cntr <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, + compute_area_coverage = FALSE, reg_names = NUTS_name, + fileout = "mask_cntr.nc") +dim(mask_area) +# lon lat region +# 61 91 4 +summary(mask_area) + +# Mask computed with centroid approach +mask_cntr <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, region = T, + compute_area_coverage = FALSE, reg_names = NUTS_name) +dim(mask_cntr) +# lon lat region +# 61 91 4 + +################################################################################ + +#***************************************************************************** +# Examples function: ShapeToMask +# Source: https://earth.bsc.es/gitlab/es/esviz/-/blob/main/R/ShapeToMask.R +# An-Chi & Eva (2023) +#***************************************************************************** +# Exmple (1): NUTS +shp_file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', + 'NUTS_RG_60M_2021_4326.shp') +ref_grid <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/', + 'tas_f1h/tas_201006.nc') +# ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) + +NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9)) +NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) +mask1 <- ShapeToMask(shp_file, ref_grid, reg_ids = NUTS.id) +mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, + reg_names = NUTS.name) + +# Exmple (2): GADM +shp_file <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp" +ref_grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', + 'tas_f6h/tas_20191212.nc') +GADM.id <- c("ESP", "ITA") +GADM.name <- c("Spain", "Italy") +mask1 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, + reg_ids = GADM.id, shp_system = "GADM") +mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, + reg_names = GADM.name, shp_system = "GADM") + +################################################################################ \ No newline at end of file diff --git a/man/ShapeToMask.Rd b/man/ShapeToMask.Rd index 5f4a6600baeccb14e0cda51d4c8c31ca4d561d5e..4684214356b79354a6934435fff7f6dfce1ece36 100644 --- a/man/ShapeToMask.Rd +++ b/man/ShapeToMask.Rd @@ -7,6 +7,7 @@ ShapeToMask( shp_file, ref_grid, + compute_area_coverage = FALSE, shp_system = "NUTS", reg_names = NULL, reg_ids = NULL, @@ -15,11 +16,11 @@ ShapeToMask( lat_dim = NULL, lon_dim = NULL, region = FALSE, - target_crs = NULL, check_valid = FALSE, find_min_dist = FALSE, max_dist = 50, ncores = NULL, + fileout = NULL, ... ) } @@ -30,6 +31,12 @@ ShapeToMask( data. Either (1) a netCDF file or (2) a list of lon and lat to provide the reference grid points. It is NULL by default.} +\item{compute_area_coverage}{A logical value indicating the method to find +the intersection of the reference grid and the shapefile. When it is TRUE, +the method used is the calculation of the area coverage fraction of +intersection. If it is FALSE, the method used is searching if the centroid +of the grid cell falls inside the shapefile or not. It is FALSE by default.} + \item{shp_system}{A character string containing the Shapefile System Database Name used to subset the shapefile into regions by using parameters 'reg_ids' or 'reg_names'. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. When it @@ -65,9 +72,6 @@ with the following possible names: 'lon', 'longitude', 'x', 'i' and \item{region}{A logical value indicating if we want a dimension for the regions in the resulting mask array. It is FALSE by default.} -\item{target_crs}{A character string indicating the target 'Coordinate -Reference System'.} - \item{check_valid}{A logical value that when it is TRUE it uses the function 'sf::st_make_valid' applied to the shapefile and to the coordinates.} @@ -83,6 +87,9 @@ and the reference grid.} \item{ncores}{The number of parallel processes to spawn for the use for parallel computation in multiple cores.} +\item{fileout}{A character string of the path to save the NetCDF mask. If not +specified (default), only the mask array will be returned.} + \item{...}{Arguments passed on to 's2_options' in function 'st_intersection'. See 's2 package'.} } diff --git a/man/VizRobinson.Rd b/man/VizRobinson.Rd index 48687f1fda6332c03701794280dee8e858020e40..b0f0e817e9156db1a5514d591c0827575a5e6dd4 100644 --- a/man/VizRobinson.Rd +++ b/man/VizRobinson.Rd @@ -10,7 +10,7 @@ VizRobinson( lat, lon_dim = NULL, lat_dim = NULL, - target_proj = "ESRI:54030", + target_proj = NULL, drawleg = "bar", style = "point", dots = NULL, @@ -62,9 +62,11 @@ of ascending or descending order.} \code{esviz:::.KnownLatNames}. The default value is NULL.} \item{target_proj}{A character string indicating the target projection. It -should be a valid crs string. The default projection is Robinson -(ESRI:54030). Note that the character string may work differently depending -on PROJ and GDAL module version.} +should be a valid crs string. The default projection is Robinson: +"ESRI:54030". Note that the character string may work differently depending +on PROJ and GDAL module version. If package version 'sf' is lower than +"1.0.10" and an error appears regarding the target crs, you can try with +numeric crs (e.g. target_proj = 54030).} \item{drawleg}{A character string indicating the legend style. It can be 'bar' (color bar by \code{ColorBarContinuous()}), 'ggplot2' (discrete legend diff --git a/tests/testthat/test-ShapeToMask.R b/tests/testthat/test-ShapeToMask.R index 550f2ee008d8dca00fe7307cda69c071ac04d049..e99bee9a102d7101b0b55edeaffb7e7071cbd428 100644 --- a/tests/testthat/test-ShapeToMask.R +++ b/tests/testthat/test-ShapeToMask.R @@ -46,7 +46,15 @@ test_that("1. Input checks", { ShapeToMask(shp_file1, lat_dim = 1, lon_dim = 'lat'), "Parameter 'lat_dim' must be a character string." ) - # shp_system, reg_ids, reg_names, shp_col_name_ids + expect_error( + ShapeToMask(shp_file1, ref_grid1, reg_ids = NUTS_id1, id_shape_col = 1, name_shape_col = 'ESP'), + "Parameter 'id_shape_col' must be a character string." + ) + expect_error( + ShapeToMask(shp_file1, ref_grid1, reg_ids = NUTS_id1, name_shape_col = 1, id_shape_col = '4BJd'), + "Parameter 'name_shape_col' must be a character string." + ) + # shp_system, reg_ids, reg_names, sname_shape_col # reg_level # region # target_crs @@ -87,10 +95,24 @@ test_that("2. Output", { sum(mask3), 105 ) + # Test a single region mask3_1 <- ShapeToMask(shp_file1, ref_grid = ref_grid1_1, - shp_col_name_ids = "NUTS_ID", lon_dim = 'lon', + name_shape_col = "NUTS_ID", lon_dim = 'lon', lat_dim = 'lat', reg_ids = "DE149", region = TRUE) + + mask3_2 <- ShapeToMask(shp_file1, ref_grid = ref_grid1_1, + name_shape_col = "NUTS_ID", lon_dim = 'lon', + lat_dim = 'lat', reg_ids = "DE149", compute_area_coverage = TRUE) + expect_equal( + dim(mask3_2), + c(lon = 61, lat = 91, region = 1) + ) + expect_equal( + sum(mask3_2), + 0 + ) + # Test GADM mask4 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, reg_ids = GADM_id2, shp_system = "GADM") @@ -100,7 +122,7 @@ test_that("2. Output", { ) expect_equal( sum(mask4), - 48 + 64 ) mask5 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, reg_names = GADM_name2, shp_system = "GADM", @@ -111,6 +133,42 @@ test_that("2. Output", { ) expect_equal( sum(mask5), - 56 + 88 + ) + mask6 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, + reg_names = GADM_name2, shp_system = "GADM", + region = TRUE, compute_area_coverage = TRUE) + expect_equal( + dim(mask6), + c(longitude = 240, latitude = 121, region = 4) + ) + expect_equal( + as.integer(sum(mask6)), + 77 ) -}) \ No newline at end of file + + mask7 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, compute_area_coverage = TRUE, + reg_names = GADM_name2, shp_system = "GADM", fileout="test_GADM_1_5.nc") + + expect_equal( + dim(mask7), + c(longitude = 240, latitude = 121, region = 4) + ) + expect_equal( + as.integer(sum(mask7)), + 77 + ) + + mask7 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, compute_area_coverage = TRUE, + reg_names = GADM_name2, shp_system = "GADM", fileout="test_GADM_1_5.nc", ncores=2) + + expect_equal( + dim(mask7), + c(longitude = 240, latitude = 121, region = 4) + ) + expect_equal( + as.integer(sum(mask7)), + 77 + ) + +})