Newer
Older
#library(easyNCDF)
########################
# module load GDAL PROJ GEOS
########################
# shp.file: The shp file
# ref.grid: Either (1) a netCDF file or (2) a list of lon and lat to provide the reference grid points
# reg.ids: The unique ID in shapefile
# reg.names: A list of country and the region name
# reg.level: Can only have the same level in one mask; only for NUTS
# lat_dim: 'latitude' for example
# lon_dim: 'longitude' for example
# savefile: If NULL, return an array
### Some example inputs ###
#-------shp.file----------
#shp.file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/NUTS_RG_60M_2021_4326.shp')
#shp.file <- '/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp'
#-------ref.grid----------
#ref.grid <- '/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20170201.nc'
#ref.grid <- '/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/tas_f6h/tas_20191212.nc'
#ref.grid <- '/esarchive/recon/ecmwf/era5land/monthly_mean/tas_f1h/tas_201006.nc'
#ref.grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5))
#-------reg.ids-----------
#NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9))
#ADM.id <-
#-------reg.names-----------
#NUTS.name <- list(FI = c('Lappi', 'Kainuu')) #NOTE: NUTS.name may be repetitive; use level to filter
#NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska'))
#---------------------------
# mask1 <- shp_mask(shp.file, ref.grid, reg.ids = NUTS.id)
# mask2 <- shp_mask(shp.file, ref.grid, reg.names = NUTS.name)
#NOTE: One region is one number; need to have the option to combine them?
#TODO: Suppress warnings?
#TODO: Substitute packages
shp_mask <- function(shp.file, ref.grid = NULL,
shp.system = "NUTS", reg.ids = NULL, reg.names = NULL,
reg.level = 3, lat_dim = NULL, lon_dim = NULL, savefile = NULL) {
####################################################
# Step 1: Load the shapefile
if (all(is.null(reg.ids), is.null(reg.names))) {
stop("Either provide parameter 'reg.ids' or 'reg.names'.")
} else {
stop("shp.system ", shp.system, " is not defined yet.")
}
if (!is.null(reg.names)) {
warning("Only use 'reg.ids' to get the shape region. 'reg.names' is not used.")
}
## Method 2: Use country code & region name
tmp <- subset(shp, CNTR_CODE == names(reg.names)[cntr_i])
tmp <- subset(tmp, NUTS_NAME %in% reg.names[[cntr_i]])
tmp <- subset(shp, ADM0_EN == names(reg.names)[cntr_i])
tmp <- subset(tmp, ADM1_EN %in% reg.names[[cntr_i]])
if (cntr_i == 1) {
shp_tmp <- tmp
} else {
shp_tmp <- rbind(shp_tmp, tmp)
}
}
} else if (shp.system == "ADM") {
shp <- shp_tmp
}
# plot(shp)
####################################################
# Step 2: Use the reference file to get lat and lon
if (all(tools::file_ext(ref.grid) == 'nc')) {
stop("ref.grid file does not exist.")
} else {
## Method 1: ref.grid is a netCDF file
if (is.null(lat_dim) | is.null(lon_dim)) {
lat_dim <- var_names[which(var_names %in% s2dv:::.KnownLatNames())]
lon_dim <- var_names[which(var_names %in% s2dv:::.KnownLonNames())]
}
latlon <- NcToArray(ref.grid, vars_to_read = c(lat_dim, lon_dim))
lat <- NcToArray(ref.grid, vars_to_read = lat_dim)[1, ]
lon <- NcToArray(ref.grid, vars_to_read = lon_dim)[1, ]
}
} else if (is.list(ref.grid)) {
## Method 2: ref.grid is a list of lon and lat
if (length(ref.grid) != 2) {
stop("If 'ref.grid' is a list, it must have two items for longitude and latitude.")
}
if (is.null(lat_dim) | is.null(lon_dim)) {
#NOTE: the names come from s2dv:::.KnownLonNames and .KnownLatNames
lon_known_names <- c(s2dv:::.KnownLonNames(), 'lons')
lat_known_names <- c(s2dv:::.KnownLatNames(), 'lats')
lon_dim <- lon_known_names[which(lon_known_names %in% names(ref.grid))]
lat_dim <- lat_known_names[which(lat_known_names %in% names(ref.grid))]
if (identical(lon_dim, character(0)) | identical(lat_dim, character(0))) {
stop("longitude and latitude names are not recognized in 'ref.grid'. Please specify 'lon_dim' and 'lat_dim'.")
}
}
lat <- ref.grid[[lat_dim]]
lon <- ref.grid[[lon_dim]]
} else {
stop("Parameter 'ref.grid' must be either a netCDF file or a list of lon and lat.")
## Create data frame & sp class for ref grid
ref.df <- data.frame(data = 0,
lon = rep(lon, times = length(lat)),
lat = rep(lat, each = length(lon)))
xy.sfg <- st_multipoint(coord)
xy.sfc <- st_sfc(xy.sfg)
st_crs(xy.sfc) <- sf::st_crs(shp)
####################################################
# Step 3: Create mask
## Create mask array with 0; 1, 2, etc. will be filled in for each shp region
mask <- array(0, dim = c(length(lon), length(lat)))
names(dim(mask)) <- c(lon_dim, lat_dim)
## Loop through each shp region
for (shp_i in 1:nrow(shp)) {
#NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may
# not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65).
shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ])
tmp_coords <- st_coordinates(shp_pol)[, 1:2]
if (nrow(tmp_coords) != 0) {
for (ii in 1:nrow(tmp_coords)) {
pt_x <- which(lon == tmp_coords[ii, 1])
pt_y <- which.min(abs(lat - tmp_coords[ii, 2]))
mask[which.min(abs(lon - tmp_coords[ii, 1])),
which.min(abs(lat - tmp_coords[ii, 2]))] <- shp_i
#NOTE: This line can't catch correct point when the NOTE above happens.
#mask[which(lon == tmp_coords[ii, 1]), which(lat == tmp_coords[ii, 2])] <- shp_i
}
} else {
if (shp.system == "NUTS") {
tmp <- shp$NUTS_ID[shp_i]
} else if (shp.system == "ADM") {
tmp <- shp$ADM1_PCODE[shp_i]
}
warning("shp ID '", tmp, "' cannot be identified in this reference grid.")
}
}
####################################################
# Step 4: Add attributes
attr(mask, lon_dim) <- lon
attr(mask, lat_dim) <- lat
if (shp.system == "NUTS") {
attr(mask, "index") <- as.list(shp$NUTS_ID)
} else if (shp.system == "ADM") {
attr(mask, "index") <- as.list(shp$ADM1_PCODE)
}
## Return all the info from shp
attr(mask, "shapefile") <- attributes(shp)
####################################################
# Step 5: Save the file or return the array
if (is.null(savefile)) {
return(mask)
} else {
#TODO
ArrayToNc()
}
}