diff --git a/DESCRIPTION b/DESCRIPTION index 88e38bd4b91c0bd1a7ec3722747fbcb07964a827..09c1dbc2ee01809acd2a07d9bddbea541c9e653a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,7 +33,9 @@ Imports: reshape2, scales, stats, - utils + utils, + foreach, + doParallel Suggests: testthat License: GPL-3 diff --git a/NAMESPACE b/NAMESPACE index a5dbed7bdec09f2186ac294fd965a48d432292cb..9139e77251726d2938431ba60c824aff6f61d8e5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ export(VizWeeklyClim) import(RColorBrewer) import(cowplot) import(easyNCDF) +import(foreach) import(ggplot2) import(graphics) import(mapproj) @@ -41,6 +42,7 @@ importFrom(RColorBrewer,brewer.pal) importFrom(data.table,CJ) importFrom(data.table,data.table) importFrom(data.table,setkey) +importFrom(doParallel,registerDoParallel) importFrom(dplyr,group_by) importFrom(dplyr,mutate) importFrom(dplyr,summarise) diff --git a/R/ShapeToMask.R b/R/ShapeToMask.R index 7bf8de45efdcf6bea8db58f698b77a3de36531b3..248775eec5371271d2e997867145ae512fae951b 100644 --- a/R/ShapeToMask.R +++ b/R/ShapeToMask.R @@ -5,7 +5,7 @@ #'output based on requested region names or IDs. The accepted shapefile #'databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. #'However, the function can use other shapefiles databases with specifying the -#'categories names with the parameter 'shp_system_name'. +#'categories names with the parameter 'shp_col_name_ids'. #' #'To ensure accurate comparison with the shapefile, the function loads a #'reference dataset that provides longitude and latitude information. By @@ -24,7 +24,7 @@ #'@param shp_system A character string containing the Shapefile System Database #' name. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. It is set to #' 'NUTS' by default. -#'@param shp_system_name A character string indicating the column name of the +#'@param shp_col_name_ids A character string indicating the column name of the #' column in where the specified 'reg_ids' will be taken. #'@param reg_ids A character string indicating the unique ID in shapefile. #' It is NULL by default. @@ -42,7 +42,7 @@ #'@param lon_dim A character string indicating the longitudinal dimension. If it #' is NULL, the longitudinal name will be searched using an internal function #' with the following possible names: 'lon', 'longitude', 'x', 'i' and -#' 'nav_lon'. It is NULL by default. +#' 'nav_lon'. #'@param target_crs A character string indicating the target 'Coordinate #' Reference System'. #'@param region A logical value indicating if we want a dimension for the @@ -56,9 +56,8 @@ #'@param max_dist A numeric value indicating the maximum distance is accepted #' to the closest gridpoint when there is no intersection between the shapefile #' and the reference grid. -#'@param savefile A logical value indicating wether to save the mask array into -#' a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by -#' default. This functionality is not developed yet. +#'@param ncores The number of parallel processes to spawn for the use for +#' parallel computation in multiple cores. #'@param ... Arguments passed on to 's2_options' in function 'st_intersection'. #' See 's2 package'. #' @@ -70,48 +69,135 @@ #'# Exmple (1): NUTS #'shp_file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', #' 'NUTS_RG_60M_2021_4326.shp') -#'# shp_file <- paste0('/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/', -#'# 'tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp') -#'# ref_grid <- paste0('/esarchive/exp/ecmwf/system5c3s/monthly_mean/', -#'# 'tas_f6h/tas_20170201.nc') -#'# ref_grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', -#'# 'tas_f6h/tas_20191212.nc') -#'ref_grid <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/', -#' 'tas_f1h/tas_201006.nc') -#'# ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) -#' -#'NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9)) -#'NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) -#'mask1 <- ShapeToMask(shp_file, ref_grid, reg_ids = NUTS.id) -#'mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, -#' reg_names = NUTS.name) -#' -#'# Exmple (2): GADM -#'shp_file <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp" -#'ref_grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', -#' 'tas_f6h/tas_20191212.nc') -#'GADM.id <- c("ESP", "ITA") -#'GADM.name <- c("Spain", "Italy") -#'mask1 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, -#' reg_ids = GADM.id, shp_system = "GADM") +#'ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) +#'NUTS_name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) #'mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, -#' reg_names = GADM.name, shp_system = "GADM") +#' reg_names = NUTS_name) #'@import easyNCDF #'@import sf +#'@import foreach +#'@importFrom doParallel registerDoParallel #'@export -ShapeToMask <- function(shp_file, ref_grid = NULL, - shp_system = "NUTS", shp_system_name = NULL, - reg_ids = NULL, reg_names = NULL, +ShapeToMask <- function(shp_file, ref_grid, + shp_system = "NUTS", reg_names = NULL, + reg_ids = NULL, shp_col_name_ids = NULL, reg_level = 3, lat_dim = NULL, lon_dim = NULL, - savefile = FALSE, region = FALSE, target_crs = NULL, + region = FALSE, target_crs = NULL, check_valid = FALSE, find_min_dist = FALSE, - max_dist = 50, ...) { + max_dist = 50, ncores = NULL, ...) { - # NOTE: One region is one number; need to have the option to combine them? # TODO: Suppress warnings? - # TODO: Substitute packages - # TODO: Substitute loop for each region with multiApply - # TODO: Add initial checks + # TODO: Add saving option? + + # Initial checks + # shp_file + if (is.null(shp_file)) { + stop("Parameter 'shp_file' cannot be NULL.") + } + if (!is.character(shp_file)) { + stop("Parameter 'shp_file' must be a character string.") + } + # lon_dim, lat_dim + if (!is.null(lon_dim)) { + if (!is.character(lon_dim)) { + stop("Parameter 'lon_dim' must be a character string.") + } + } + if (!is.null(lat_dim)) { + if (!is.character(lat_dim)) { + stop("Parameter 'lat_dim' must be a character string.") + } + } + # ref_grid + if (is.null(ref_grid)) { + stop("Parameter 'ref_grid' cannot be NULL.") + } + if (!any(is.character(ref_grid) | is.list(ref_grid))) { + stop("Parameter 'ref_grid' must be either a netCDF file or a list of lon and lat.") + } + if (is.character(ref_grid)) { + if (!file.exists(ref_grid)) { + stop("ref_grid file does not exist.") + } + } + if (is.list(ref_grid)) { + if (length(ref_grid) != 2) { + stop("If 'ref_grid' is a list, it must have two items for longitude and latitude.") + } + if (is.null(lat_dim) | is.null(lon_dim)) { + lon_known_names <- c(.KnownLonNames(), 'lons') + lat_known_names <- c(.KnownLatNames(), 'lats') + lon_dim <- lon_known_names[which(lon_known_names %in% names(ref_grid))] + lat_dim <- lat_known_names[which(lat_known_names %in% names(ref_grid))] + + if (identical(lon_dim, character(0)) | identical(lat_dim, character(0))) { + stop("longitude and latitude names are not recognized in 'ref_grid'. ", + "Please specify 'lon_dim' and 'lat_dim'.") + } + } + } + + # shp_system, reg_ids, reg_names, shp_col_name_ids + if (!is.null(shp_system)) { + if (!is.character(shp_system)) { + stop("Parameter 'shp_system' must be a character strinig.") + } + if (all(is.null(reg_ids), is.null(reg_names))) { + stop("Either provide parameter 'reg_ids' or 'reg_names'.") + } + } else if (is.null(shp_col_name_ids)) { + stop("Either provide parameter 'shp_system' or 'shp_col_name_ids'.") + } else { + if (is.null(reg_ids)) { + stop("If 'shp_col_name_ids' is used, parameter 'reg_ids' must be provided.") + } + if (!is.character(shp_col_name_ids)) { + stop("Parameter 'shp_col_name_ids' must be a character strinig.") + } + } + if (all(!is.null(reg_ids), !is.null(reg_names))) { + warning("Only use 'reg_ids' to get the shape region. 'reg_names' is not used.") + } + # reg_level + if (!is.null(reg_level)) { + if (!is.numeric(reg_level)) { + stop("Parameter 'reg_level' must be numeric.") + } + } + # region + if (!is.logical(region)) { + stop("Parameter 'region' must be a logical value.") + } + # target_crs + if (!is.null(target_crs)) { + if (!is.character(target_crs)) { + stop("Parameter 'target_crs' must be a character string.") + } + } + # check_valid + if (!is.logical(check_valid)) { + stop("Parameter 'check_valid' must be a logical value.") + } + # find_min_dist + if (!is.logical(find_min_dist)) { + stop("Parameter 'find_min_dist' must be a logical value.") + } + # max_dist + if (!is.null(max_dist)) { + if (!is.numeric(max_dist)) { + stop("Parameter 'max_dist' must be a numeric.") + } + } + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores)) { + stop("Parameter 'ncores' must be numeric.") + } + ncores <- round(ncores) + if (ncores < 2) { + ncores <- NULL + } + } # Step 1: Load the shapefile shp <- sf::st_read(shp_file) # class sf @@ -119,48 +205,46 @@ ShapeToMask <- function(shp_file, ref_grid = NULL, transformed_shapefile <- st_transform(shp, crs = target_crs) shp <- transformed_shapefile } - - if (all(is.null(reg_ids), is.null(reg_names))) { - stop("Either provide parameter 'reg_ids' or 'reg_names'.") - } else if (!is.null(reg_ids)) { + + NUTS_ID <- ADM1_PCODE <- ISO <- NULL + CNTR_CODE <- NUTS_NAME <- ADM0_EN <- ADM1_EN <- Name <- LEVL_CODE <- NULL + + if (!is.null(reg_ids)) { ## Method 1: Directly use IDs - if (!is.null(shp_system_name)) { - if (shp_system_name %in% names(shp)) { - shp <- subset(shp, get(shp_system_name) %in% reg_ids) + if (!is.null(shp_col_name_ids)) { + if (shp_col_name_ids %in% names(shp)) { + shp <- subset(shp, get(shp_col_name_ids) %in% reg_ids) } else { stop("Shape system name not found in shapefile names.") } } else if (shp_system == "NUTS") { shp <- subset(shp, NUTS_ID %in% reg_ids) - shp_system_name <- "NUTS_ID" + shp_col_name_ids <- "NUTS_ID" } else if (shp_system == "ADM") { shp <- subset(shp, ADM1_PCODE %in% reg_ids) - shp_system_name <- "ADM1_PCODE" + shp_col_name_ids <- "ADM1_PCODE" } else if (shp_system == "GADM") { shp <- subset(shp, ISO %in% reg_ids) - shp_system_name <- "ISO" + shp_col_name_ids <- "ISO" } else { stop("shp_system ", shp_system, " is not defined yet.") } - if (!is.null(reg_names)) { - warning("Only use 'reg_ids' to get the shape region. 'reg_names' is not used.") - } } else if (!is.null(reg_names)) { - shp_system_name <- NULL + shp_col_name_ids <- NULL ## Method 2: Use country code & region name for (cntr_i in 1:length(reg_names)) { if (shp_system == "NUTS") { tmp <- subset(shp, CNTR_CODE == names(reg_names)[cntr_i]) tmp <- subset(tmp, NUTS_NAME %in% reg_names[[cntr_i]]) - shp_system_name <- NUTS_NAME + shp_col_name_ids <- 'NUTS_NAME' } else if (shp_system == "ADM") { tmp <- subset(shp, ADM0_EN == names(reg_names)[cntr_i]) tmp <- subset(tmp, ADM1_EN %in% reg_names[[cntr_i]]) - shp_system_name <- ADM1_EN + shp_col_name_ids <- 'ADM1_EN' } else if (shp_system == "GADM") { tmp <- subset(shp, Name %in% reg_names) - shp_system_name <- Name + shp_col_name_ids <- 'Name' } if (cntr_i == 1) { shp_tmp <- tmp @@ -176,42 +260,20 @@ ShapeToMask <- function(shp_file, ref_grid = NULL, } # Step 2: Use the reference file to get lat and lon - if (all(tools::file_ext(ref_grid) == 'nc')) { - if (!file.exists(ref_grid)) { - stop("ref_grid file does not exist.") - } else { - - ## Method 1: ref_grid is a netCDF file - if (is.null(lat_dim) | is.null(lon_dim)) { - var_names <- easyNCDF::NcReadVarNames(ref_grid) - lat_dim <- var_names[which(var_names %in% .KnownLatNames())] - lon_dim <- var_names[which(var_names %in% .KnownLonNames())] - } - latlon <- NcToArray(ref_grid, vars_to_read = c(lat_dim, lon_dim)) - lat <- NcToArray(ref_grid, vars_to_read = lat_dim)[1, ] - lon <- NcToArray(ref_grid, vars_to_read = lon_dim)[1, ] + ## Method 1: ref_grid is a netCDF file + if (is.null(lat_dim) | is.null(lon_dim)) { + var_names <- easyNCDF::NcReadVarNames(ref_grid) + lat_dim <- var_names[which(var_names %in% .KnownLatNames())] + lon_dim <- var_names[which(var_names %in% .KnownLonNames())] } + latlon <- easyNCDF::NcToArray(ref_grid, vars_to_read = c(lat_dim, lon_dim)) + lat <- easyNCDF::NcToArray(ref_grid, vars_to_read = lat_dim)[1, ] + lon <- easyNCDF::NcToArray(ref_grid, vars_to_read = lon_dim)[1, ] } else if (is.list(ref_grid)) { ## Method 2: ref_grid is a list of lon and lat - if (length(ref_grid) != 2) { - stop("If 'ref_grid' is a list, it must have two items for longitude and latitude.") - } - if (is.null(lat_dim) | is.null(lon_dim)) { - lon_known_names <- c(.KnownLonNames(), 'lons') - lat_known_names <- c(.KnownLatNames(), 'lats') - lon_dim <- lon_known_names[which(lon_known_names %in% names(ref_grid))] - lat_dim <- lat_known_names[which(lat_known_names %in% names(ref_grid))] - - if (identical(lon_dim, character(0)) | identical(lat_dim, character(0))) { - stop("longitude and latitude names are not recognized in 'ref_grid'. Please specify 'lon_dim' and 'lat_dim'.") - } - } lat <- ref_grid[[lat_dim]] lon <- ref_grid[[lon_dim]] - - } else { - stop("Parameter 'ref_grid' must be either a netCDF file or a list of lon and lat.") } ## Create data frame & sp class for ref grid @@ -220,8 +282,8 @@ ShapeToMask <- function(shp_file, ref_grid = NULL, lat = rep(lat, each = length(lon))) coord <- as.matrix(data.frame(x = ref.df$lon, y = ref.df$lat)) - xy.sfg <- st_multipoint(coord) - xy.sfc <- st_sfc(xy.sfg) + xy.sfg <- sf::st_multipoint(coord) + xy.sfc <- sf::st_sfc(xy.sfg) # Assign crs of original shapefile if (!is.null(target_crs)) { st_crs(xy.sfc) <- sf::st_crs(target_crs) #initial_crs # asign crs of original shapefile @@ -231,72 +293,48 @@ ShapeToMask <- function(shp_file, ref_grid = NULL, } # Step 3: Create mask - ## Create mask array with 0; 1, 2, etc. will be filled in for each shp region - mask <- array(0, dim = c(length(lon), length(lat))) - names(dim(mask)) <- c(lon_dim, lat_dim) - if (region) { - mask <- array(0, dim = c(nrow(shp), length(lon), length(lat))) - names(dim(mask)) <- c('region', lon_dim, lat_dim) - } - if (check_valid) { xy.sfc <- st_make_valid(xy.sfc) shp <- st_make_valid(shp) } ## Loop through each shp region - for (shp_i in 1:nrow(shp)) { - # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may - # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). - shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ]) - # shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ], ...) - tmp_coords <- st_coordinates(shp_pol)[, 1:2] - if (length(tmp_coords) == 0) { - dim(tmp_coords) <- NULL - } else if (is.null(dim(tmp_coords))) { - tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords))) - } - if (!is.null(dim(tmp_coords))) { - - # polygon_instersection - for (ii in 1:nrow(tmp_coords)) { - # pt_x <- which(lon == tmp_coords[ii, 1]) - # pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) - if (length(dim(mask)) == 2) { - # min(abs(lon - tmp_coords[ii, 1])) - # min(abs(lat - tmp_coords[ii, 2])) - mask[which.min(abs(lon - tmp_coords[ii, 1])), - which.min(abs(lat - tmp_coords[ii, 2]))] <- shp_i - } else { - mask[shp_i, which.min(abs(lon - tmp_coords[ii, 1])), - which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 - } - } - } else if (find_min_dist) { - x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shp[shp_i,]))[,1] - y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shp[shp_i,]))[,2] - dist <- sqrt((xy.sfg[,1] - x.centroid.shpi)**2 + (xy.sfg[,2] - y.centroid.shpi)**2) - tmp_coords <- array(xy.sfg[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2)) - colnames(tmp_coords) <- c('X', 'Y') - if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) & - any(round(lon,2) == round(tmp_coords[1,1],2))) ) { - if (length(dim(mask)) == 2) { - mask[which.min(abs(lon - tmp_coords[, 1])), - which.min(abs(lat - tmp_coords[, 2]))] <- shp_i - } else { - mask[shp_i, which.min(abs(lon - tmp_coords[, 1])), - which.min(abs(lat - tmp_coords[, 2]))] <- 1 - } - warning(paste0('The reference grid has no intersection with region ', - ifelse(is.character(shp_system_name), shp[shp_i,][[shp_system_name]], paste0('n° ', shp_i)), - ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], - ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) + if (region) { + cfun <- function(a, b) { + if (length(dim(a)) == 2) { + dims <- c(dim(a), 2) } else { - warning(paste0('The reference grid has no intersection with region ', - ifelse(is.character(shp_system_name), shp[shp_i,][[shp_system_name]], paste0('n° ', shp_i)))) + dims <- c(dim(a)[1:2], dim(a)[length(dim(a))]+1) } + return(array(c(a,b), dim = dims)) + } + } else { + cfun <- function(a, b) { + a[which(b != 0)] <- b[which(b != 0)] + return(a) } } + shp_i <- NULL + if (is.null(ncores)) { + mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun') %do% + .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, + xy.sfc = xy.sfc, find_min_dist = find_min_dist, + shp_col_name_ids = shp_col_name_ids, + max_dist = max_dist, region = region, ...) + } else { + registerDoParallel(ncores) + mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun', .packages='sf') %dopar% + .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, + xy.sfc = xy.sfc, find_min_dist = find_min_dist, + shp_col_name_ids = shp_col_name_ids, + max_dist = max_dist, region = region, ...) + registerDoSEQ() + } + if (region) { + names(dim(mask)) <- c(lon_dim, lat_dim, 'region') + } else { + names(dim(mask)) <- c(lon_dim, lat_dim) + } # Step 4: Add attributes # attr(mask, lon_dim) <- lon @@ -313,12 +351,62 @@ ShapeToMask <- function(shp_file, ref_grid = NULL, # ## Return all the info from shp # attr(mask, "shapefile") <- attributes(shp) - # Step 5: Save the file or return the array - if (!savefile) { - return(mask) - } else { - warning("This functionality is not developed yet.") - # TODO - # ArrayToNc() + return(mask) +} + +.shapetomask <- function(shp, n, lon, lat, xy.sfc, find_min_dist = FALSE, + shp_col_name_ids = NULL, max_dist = 50, + region = FALSE, ...) { + mask <- array(0, dim = c(length(lon), length(lat))) + shpi <- shp[n, ] + # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may + # not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65). + shp_pol <- sf::st_intersection(x = xy.sfc, y = shpi, ...) + tmp_coords <- sf::st_coordinates(shp_pol)[, 1:2] + if (length(tmp_coords) == 0) { + dim(tmp_coords) <- NULL + } else if (is.null(dim(tmp_coords))) { + tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords))) + } + if (!is.null(dim(tmp_coords))) { + + # polygon_instersection + for (ii in 1:nrow(tmp_coords)) { + # pt_x <- which(lon == tmp_coords[ii, 1]) + # pt_y <- which.min(abs(lat - tmp_coords[ii, 2])) + if (!region) { + # min(abs(lon - tmp_coords[ii, 1])) + # min(abs(lat - tmp_coords[ii, 2])) + mask[which.min(abs(lon - tmp_coords[ii, 1])), + which.min(abs(lat - tmp_coords[ii, 2]))] <- n + } else { + mask[which.min(abs(lon - tmp_coords[ii, 1])), + which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 + } + } + } else if (find_min_dist) { + x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,1] + y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,2] + dist <- sqrt((xy.sfc[,1] - x.centroid.shpi)**2 + (xy.sfc[,2] - y.centroid.shpi)**2) + tmp_coords <- array(xy.sfc[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2)) + colnames(tmp_coords) <- c('X', 'Y') + if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) & + any(round(lon,2) == round(tmp_coords[1,1],2))) ) { + if (length(dim(mask)) == 2) { + mask[which.min(abs(lon - tmp_coords[, 1])), + which.min(abs(lat - tmp_coords[, 2]))] <- n + } else { + mask[which.min(abs(lon - tmp_coords[, 1])), + which.min(abs(lat - tmp_coords[, 2]))] <- 1 + } + # warning(paste0('The reference grid has no intersection with region ', + # ifelse(is.character(shp_col_name_ids), shpi[[shp_col_name_ids]], paste0('n° ', n)), + # ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], + # ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).')) + } else { + # warning(paste0('The reference grid has no intersection with region ', + # ifelse(is.character(shp_col_name_ids), shpi[[shp_col_name_ids]], paste0('n° ', n)))) + } } + return(mask) } \ No newline at end of file diff --git a/R/Viz2VarsVsLTime.R b/R/Viz2VarsVsLTime.R index 9a61df6a4dd569405826494d3b2cfff7b5df325f..47aae43ce632c5e7c6f9b9da17e182363ee3b5d6 100644 --- a/R/Viz2VarsVsLTime.R +++ b/R/Viz2VarsVsLTime.R @@ -47,8 +47,8 @@ #'@examples #'clim <- s2dv::Clim(ts_temp$exp, ts_temp$obs, time_dim = "sdate", #' dat_dim = c("dat", "member")) -#'ano_exp <- Ano(ts_temp$exp, clim$clim_exp) -#'ano_obs <- Ano(ts_temp$obs, clim$clim_obs) +#'ano_exp <- s2dv::Ano(ts_temp$exp, clim$clim_exp) +#'ano_obs <- s2dv::Ano(ts_temp$obs, clim$clim_obs) #'corr_ano <- s2dv::Corr(s2dv::MeanDims(ano_exp, 'member'), ano_obs, #' time_dim = 'sdate', dat_dim = 'dat') #'input_cor <- array(dim = c(dat = 1, 3, time = 5)) diff --git a/R/VizLayout.R b/R/VizLayout.R index 4cf0a546b92336c9c1b1f817b091a912a1c57486..c9984f76989871eac0e17adcd7a10710ee229f36 100644 --- a/R/VizLayout.R +++ b/R/VizLayout.R @@ -185,7 +185,7 @@ #'lats <- attr(map_temp$exp, "Variables")$common$lat #'lons <- attr(map_temp$exp, "Variables")$common$lon #' -#'VizLayout(fun = VizquiMap, plot_dims = c('lat', 'lon'), +#'VizLayout(fun = VizEquiMap, plot_dims = c('lat', 'lon'), #' var = var[, 1, 1, 1, , ], lon = lons, lat = lats, #' filled.continents = FALSE, #' toptitle = 'Near-surface temperature Nov.', diff --git a/R/VizRobinson.R b/R/VizRobinson.R index 285e8b44bfcad6e70b2a536fb9b1f4de4264f2fa..048677a5e0064a28a36de8e67a2ded73590e6e7d 100644 --- a/R/VizRobinson.R +++ b/R/VizRobinson.R @@ -103,10 +103,10 @@ #'dots[which(dots != 0)] <- 1 #'VizRobinson(data, lon = 0:359, lat = -90:90, dots = dots, #' brks = seq(-10, 10, length.out = 11), -#' toptitle = 'synthetic example', vertical = F, +#' toptitle = 'synthetic example', vertical = FALSE, #' caption = 'Robinson Projection', #' bar_extra_margin = c(0, 1, 0, 1), width = 8, height = 6) -#'Vizobinson(data, lon = 0:359, lat = -90:90, mask = dots, legend = 'ggplot2', +#'VizRobinson(data, lon = 0:359, lat = -90:90, mask = dots, legend = 'ggplot2', #' target_proj = "+proj=moll", brks = seq(-10, 10, length.out = 11), #' color_fun = ClimPalette("purpleorange"), colNA = 'green', #' toptitle = 'synthetic example', caption = 'Mollweide Projection', diff --git a/R/VizSection.R b/R/VizSection.R index 06a272413ad2618ab0abf557a421ac908a5ffaea..b375ea78bbd1d702e8b8f240c54c985a8d31c7f9 100644 --- a/R/VizSection.R +++ b/R/VizSection.R @@ -41,7 +41,6 @@ #'data <- array(rep(seq(25, 10, length.out = 7), each = 21) - rnorm(147), #' dim = c(lat = 21, depth = 7)) #'VizSection(data, horiz = 0:20, depth = seq(0, 300, length.out = 7), -#' intydep = 50, intxhoriz = 5, brks = 11, #' toptitle = 'Temperature cross-section', units = "degC") #'@importFrom grDevices dev.cur dev.new dev.off rainbow #'@export diff --git a/R/VizVsLTime.R b/R/VizVsLTime.R index 7dfdae95f170519e9cb1522f83dce5df3c6fee75..cff8550fbe7e268c39687879318ef01155da96f8 100644 --- a/R/VizVsLTime.R +++ b/R/VizVsLTime.R @@ -68,8 +68,8 @@ #'@examples #'clim <- s2dv::Clim(ts_temp$exp, ts_temp$obs, time_dim = "sdate", #' dat_dim = c("dat", "member")) -#'ano_exp <- Ano(ts_temp$exp, clim$clim_exp) -#'ano_obs <- Ano(ts_temp$obs, clim$clim_obs) +#'ano_exp <- s2dv::Ano(ts_temp$exp, clim$clim_exp) +#'ano_obs <- s2dv::Ano(ts_temp$obs, clim$clim_obs) #'corr_ano <- s2dv::Corr(s2dv::MeanDims(ano_exp, 'member'), ano_obs, #' time_dim = 'sdate', dat_dim = 'dat') #'input_cor <- array(dim = c(dat = 1, 4, time = 5)) diff --git a/R/VizWeeklyClim.R b/R/VizWeeklyClim.R index 3fbe62b67fb1b3bd53c6628a523d40d1d5659e0e..01f71d84dbfdc1592d22558aa9d49bce6eb30125 100644 --- a/R/VizWeeklyClim.R +++ b/R/VizWeeklyClim.R @@ -287,9 +287,9 @@ VizWeeklyClim <- function(data, first_date, ref_period, last_date = NULL, minor_breaks = NULL, expand = c(0.03, 0.03), labels = scales::date_format("%d %b %Y")) + theme(axis.text.x = element_text(angle = 45, hjust = 1), - panel.grid.major = element_line(linewidth = 0.5, linetype = 'solid', + panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "gray92"), - panel.grid.minor = element_line(linewidth = 0.25, linetype = 'solid', + panel.grid.minor = element_line(size = 0.25, linetype = 'solid', colour = "gray92"), legend.spacing = unit(-0.2, "cm")) + scale_fill_manual(name = NULL, diff --git a/man/ShapeToMask.Rd b/man/ShapeToMask.Rd index 87826d3daf36405c4e706bbc8f391a89dcec107c..f5beede0a5562e3688e3bbd87995736a793f1b23 100644 --- a/man/ShapeToMask.Rd +++ b/man/ShapeToMask.Rd @@ -6,20 +6,20 @@ \usage{ ShapeToMask( shp_file, - ref_grid = NULL, + ref_grid, shp_system = "NUTS", - shp_system_name = NULL, - reg_ids = NULL, reg_names = NULL, + reg_ids = NULL, + shp_col_name_ids = NULL, reg_level = 3, lat_dim = NULL, lon_dim = NULL, - savefile = FALSE, region = FALSE, target_crs = NULL, check_valid = FALSE, find_min_dist = FALSE, max_dist = 50, + ncores = NULL, ... ) } @@ -34,17 +34,17 @@ reference grid points. It is NULL by default.} name. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. It is set to 'NUTS' by default.} -\item{shp_system_name}{A character string indicating the column name of the -column in where the specified 'reg_ids' will be taken.} - -\item{reg_ids}{A character string indicating the unique ID in shapefile. -It is NULL by default.} - \item{reg_names}{A named list of character string vectors indicating the country and the region name. The name of the list stands for the country name code and the vector character strings indicate the region name for each country. It is NULL by default.} +\item{reg_ids}{A character string indicating the unique ID in shapefile. +It is NULL by default.} + +\item{shp_col_name_ids}{A character string indicating the column name of the +column in where the specified 'reg_ids' will be taken.} + \item{reg_level}{An integer number from 1 to 3 indicating the 'NUTS' dataset level. For other datasets this parameter is not used. One mask can only have a unique level. It is set to 3 by default.} @@ -57,11 +57,7 @@ with the following possible names: 'lat', 'latitude', 'y', 'j' and \item{lon_dim}{A character string indicating the longitudinal dimension. If it is NULL, the longitudinal name will be searched using an internal function with the following possible names: 'lon', 'longitude', 'x', 'i' and -'nav_lon'. It is NULL by default.} - -\item{savefile}{A logical value indicating wether to save the mask array into -a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by -default. This functionality is not developed yet.} +'nav_lon'.} \item{region}{A logical value indicating if we want a dimension for the regions in the resulting mask array. It is FALSE by default.} @@ -81,6 +77,9 @@ FALSE by default.} to the closest gridpoint when there is no intersection between the shapefile and the reference grid.} +\item{ncores}{The number of parallel processes to spawn for the use for +parallel computation in multiple cores.} + \item{...}{Arguments passed on to 's2_options' in function 'st_intersection'. See 's2 package'.} } @@ -95,7 +94,7 @@ regions. It then transfers the shapefile data into an array and subsets the output based on requested region names or IDs. The accepted shapefile databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. However, the function can use other shapefiles databases with specifying the -categories names with the parameter 'shp_system_name'. +categories names with the parameter 'shp_col_name_ids'. } \details{ To ensure accurate comparison with the shapefile, the function loads a @@ -112,30 +111,8 @@ Note: Modules GDAL, PROJ and GEOS are required. # Exmple (1): NUTS shp_file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', 'NUTS_RG_60M_2021_4326.shp') -# shp_file <- paste0('/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/', -# 'tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp') -# ref_grid <- paste0('/esarchive/exp/ecmwf/system5c3s/monthly_mean/', -# 'tas_f6h/tas_20170201.nc') -# ref_grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', -# 'tas_f6h/tas_20191212.nc') -ref_grid <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/', - 'tas_f1h/tas_201006.nc') -# ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) - -NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9)) -NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) -mask1 <- ShapeToMask(shp_file, ref_grid, reg_ids = NUTS.id) -mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, - reg_names = NUTS.name) - -# Exmple (2): GADM -shp_file <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp" -ref_grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', - 'tas_f6h/tas_20191212.nc') -GADM.id <- c("ESP", "ITA") -GADM.name <- c("Spain", "Italy") -mask1 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, - reg_ids = GADM.id, shp_system = "GADM") +ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) +NUTS_name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) mask2 <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, - reg_names = GADM.name, shp_system = "GADM") + reg_names = NUTS_name) } diff --git a/man/Viz2VarsVsLTime.Rd b/man/Viz2VarsVsLTime.Rd index 1e7d388283b5101ae44745d1be1482d1598b7a3a..c74534b87628b2aa842a27f8a4abc219a5099e85 100644 --- a/man/Viz2VarsVsLTime.Rd +++ b/man/Viz2VarsVsLTime.Rd @@ -99,8 +99,8 @@ The input variables should have dimensions (nexp/nmod, nltime). \examples{ clim <- s2dv::Clim(ts_temp$exp, ts_temp$obs, time_dim = "sdate", dat_dim = c("dat", "member")) -ano_exp <- Ano(ts_temp$exp, clim$clim_exp) -ano_obs <- Ano(ts_temp$obs, clim$clim_obs) +ano_exp <- s2dv::Ano(ts_temp$exp, clim$clim_exp) +ano_obs <- s2dv::Ano(ts_temp$obs, clim$clim_obs) corr_ano <- s2dv::Corr(s2dv::MeanDims(ano_exp, 'member'), ano_obs, time_dim = 'sdate', dat_dim = 'dat') input_cor <- array(dim = c(dat = 1, 3, time = 5)) diff --git a/man/VizLayout.Rd b/man/VizLayout.Rd index b025a19a3c80ceed68eeb7f95b9970c66dc0253b..525b11e20611eb86301dbc0417cbf06e0e80eaf2 100644 --- a/man/VizLayout.Rd +++ b/man/VizLayout.Rd @@ -271,7 +271,7 @@ var <- s2dv::MeanDims(ano$exp, "member") lats <- attr(map_temp$exp, "Variables")$common$lat lons <- attr(map_temp$exp, "Variables")$common$lon -VizLayout(fun = VizquiMap, plot_dims = c('lat', 'lon'), +VizLayout(fun = VizEquiMap, plot_dims = c('lat', 'lon'), var = var[, 1, 1, 1, , ], lon = lons, lat = lats, filled.continents = FALSE, toptitle = 'Near-surface temperature Nov.', diff --git a/man/VizRobinson.Rd b/man/VizRobinson.Rd index 7f2ee853838f31bfe9f5081cfd733bfc091216f9..fd0dd94c469375f0db7c32e478bb2d778f227eaf 100644 --- a/man/VizRobinson.Rd +++ b/man/VizRobinson.Rd @@ -174,10 +174,10 @@ dots[which(dots < 4 & dots > -4)] <- 0 dots[which(dots != 0)] <- 1 VizRobinson(data, lon = 0:359, lat = -90:90, dots = dots, brks = seq(-10, 10, length.out = 11), - toptitle = 'synthetic example', vertical = F, + toptitle = 'synthetic example', vertical = FALSE, caption = 'Robinson Projection', bar_extra_margin = c(0, 1, 0, 1), width = 8, height = 6) -Vizobinson(data, lon = 0:359, lat = -90:90, mask = dots, legend = 'ggplot2', +VizRobinson(data, lon = 0:359, lat = -90:90, mask = dots, legend = 'ggplot2', target_proj = "+proj=moll", brks = seq(-10, 10, length.out = 11), color_fun = ClimPalette("purpleorange"), colNA = 'green', toptitle = 'synthetic example', caption = 'Mollweide Projection', diff --git a/man/VizSection.Rd b/man/VizSection.Rd index f60d0aa61ed2ba8f8db555657c6976b52e1fa8ed..c11f2c910976505c534c966cbce49ed7fdfb4bad 100644 --- a/man/VizSection.Rd +++ b/man/VizSection.Rd @@ -85,6 +85,5 @@ Plot a (longitude,depth) or (latitude,depth) section. data <- array(rep(seq(25, 10, length.out = 7), each = 21) - rnorm(147), dim = c(lat = 21, depth = 7)) VizSection(data, horiz = 0:20, depth = seq(0, 300, length.out = 7), - intydep = 50, intxhoriz = 5, brks = 11, toptitle = 'Temperature cross-section', units = "degC") } diff --git a/man/VizVsLTime.Rd b/man/VizVsLTime.Rd index 52f79ba0f910596fccbbfcf3e9aede1323157e9c..f8905175adeae244d4b37f5b0ac4023264cee25c 100644 --- a/man/VizVsLTime.Rd +++ b/man/VizVsLTime.Rd @@ -118,8 +118,8 @@ of the forecast time. \examples{ clim <- s2dv::Clim(ts_temp$exp, ts_temp$obs, time_dim = "sdate", dat_dim = c("dat", "member")) -ano_exp <- Ano(ts_temp$exp, clim$clim_exp) -ano_obs <- Ano(ts_temp$obs, clim$clim_obs) +ano_exp <- s2dv::Ano(ts_temp$exp, clim$clim_exp) +ano_obs <- s2dv::Ano(ts_temp$obs, clim$clim_obs) corr_ano <- s2dv::Corr(s2dv::MeanDims(ano_exp, 'member'), ano_obs, time_dim = 'sdate', dat_dim = 'dat') input_cor <- array(dim = c(dat = 1, 4, time = 5)) diff --git a/tests/testthat/test-ShapeToMask.R b/tests/testthat/test-ShapeToMask.R new file mode 100644 index 0000000000000000000000000000000000000000..0217da5ab4511cdb2e69cd430cc4caf49901365a --- /dev/null +++ b/tests/testthat/test-ShapeToMask.R @@ -0,0 +1,111 @@ +############################################## + +# data1 +shp_file1 <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', + 'NUTS_RG_60M_2021_4326.shp') +ref_grid1 <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/', + 'tas_f1h/tas_201006.nc') +ref_grid1_1 <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5)) +NUTS_id1 <- paste0("FI1D", c(1:3, 5, 7:9)) +NUTS_name1 <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska')) + +# data2 +shp_file2 <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp" +ref_grid2 <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/', + 'tas_f6h/tas_20191212.nc') +GADM_id2 <- c("ESP", "ITA") +GADM_name2 <- c("Spain", "Italy") + +############################################## + +test_that("1. Input checks", { + # shp_file + expect_error( + ShapeToMask(NULL), + "Parameter 'shp_file' cannot be NULL." + ) + expect_error( + ShapeToMask(1), + "Parameter 'shp_file' must be a character string." + ) + # ref_grid + expect_error( + ShapeToMask(shp_file1, ref_grid = NULL), + "Parameter 'ref_grid' cannot be NULL." + ) + expect_error( + ShapeToMask(shp_file1, ref_grid = 1), + "Parameter 'ref_grid' must be either a netCDF file or a list of lon and lat." + ) + # lon_dim, lat_dim + expect_error( + ShapeToMask(shp_file1, lon_dim = 1, lat_dim = 'lat'), + "Parameter 'lon_dim' must be a character string." + ) + expect_error( + ShapeToMask(shp_file1, lat_dim = 1, lon_dim = 'lat'), + "Parameter 'lat_dim' must be a character string." + ) + # shp_system, reg_ids, reg_names, shp_col_name_ids + # reg_level + # region + # target_crs + # check_valid + # find_min_dist + # max_dist + # ncores +}) + +############################################## + +test_that("2. Output", { + mask1 <- ShapeToMask(shp_file1, ref_grid1, reg_ids = NUTS_id1) + expect_equal( + dim(mask1), + c(lon = 3600, lat = 1801) + ) + expect_equal( + sum(mask1), + c(20899) + ) + mask2 <- ShapeToMask(shp_file1, ref_grid = ref_grid1_1, reg_ids = NUTS_id1) + expect_equal( + sum(mask2), + 830 + ) + expect_equal( + dim(mask2), + c(lon = 61, lat = 91) + ) + mask3 <- ShapeToMask(shp_file1, ref_grid = ref_grid1_1, reg_names = NUTS_name1, + region = TRUE) + expect_equal( + dim(mask3), + c(lon = 61, lat = 91, region = 4) + ) + expect_equal( + sum(mask3), + 105 + ) + mask4 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, + reg_ids = GADM_id2, shp_system = "GADM") + expect_equal( + dim(mask4), + c(longitude = 240, latitude = 121) + ) + expect_equal( + sum(mask4), + 48 + ) + mask5 <- ShapeToMask(shp_file = shp_file2, ref_grid = ref_grid2, + reg_names = GADM_name2, shp_system = "GADM", + region = TRUE) + expect_equal( + dim(mask5), + c(longitude = 240, latitude = 121, region = 4) + ) + expect_equal( + sum(mask5), + 56 + ) +}) \ No newline at end of file