Newer
Older
#library(easyNCDF)
#library(sp)
#library(rgeos)
#library(rgdal)
########################
# module load GDAL PROJ GEOS
########################
# shp.file: The shp file
# ref.grid: Either (1) a netCDF file or (2) a list of lon and lat to provide the reference grid points
# reg.ids: The unique ID in shapefile
# reg.names: A list of country and the region name
# reg.level: Can only have the same level in one mask; only for NUTS
# lat_dim: 'latitude' for example
# lon_dim: 'longitude' for example
# savefile: If NULL, return an array
### Some example inputs ###
#-------shp.file----------
#shp.file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/NUTS_RG_60M_2021_4326.shp')
#shp.file <- '/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp'
#-------ref.grid----------
#ref.grid <- '/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20170201.nc'
#ref.grid <- '/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/tas_f6h/tas_20191212.nc'
#ref.grid <- '/esarchive/recon/ecmwf/era5land/monthly_mean/tas_f1h/tas_201006.nc'
#ref.grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5))
#-------reg.ids-----------
#NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9))
#ADM.id <-
#-------reg.names-----------
#NUTS.name <- list(FI = c('Lappi', 'Kainuu')) #NOTE: NUTS.name may be repetitive; use level to filter
#NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska'))
#---------------------------
# mask1 <- shp_mask(shp.file, ref.grid, reg.ids = NUTS.id)
# mask2 <- shp_mask(shp.file, ref.grid, reg.names = NUTS.name)
#NOTE: One region is one number; need to have the option to combine them?
#TODO: Suppress warnings?
#TODO: Substitute packages
shp_mask <- function(shp.file, ref.grid = NULL,
shp.system = "NUTS", reg.ids = NULL, reg.names = NULL,
reg.level = 3, lat_dim = NULL, lon_dim = NULL, savefile = NULL) {
####################################################
# Step 1: Load the shapefile
shp <- rgdal::readOGR(shp.file)
if (all(is.null(reg.ids), is.null(reg.names))) {
stop("Either provide parameter 'reg.ids' or 'reg.names'.")
} else {
stop("shp.system ", shp.system, " is not defined yet.")
}
if (!is.null(reg.names)) {
warning("Only use 'reg.ids' to get the shape region. 'reg.names' is not used.")
}
## Method 2: Use country code & region name
tmp <- subset(shp, CNTR_CODE == names(reg.names)[cntr_i])
tmp <- subset(tmp, NUTS_NAME %in% reg.names[[cntr_i]])
tmp <- subset(shp, ADM0_EN == names(reg.names)[cntr_i])
tmp <- subset(tmp, ADM1_EN %in% reg.names[[cntr_i]])
if (cntr_i == 1) {
shp_tmp <- tmp
} else {
shp_tmp <- rbind(shp_tmp, tmp)
}
}
shp <- subset(shp_tmp, LEVL_CODE == NUTS.level)
} else if (shp.system == "ADM") {
shp <- shp_tmp
}
# plot(shp)
####################################################
# Step 2: Use the reference file to get lat and lon
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
if (all(tools::file_ext(ref.grid) == 'nc')) {
if (!file_exists(ref.grid)) {
stop("ref.grid file does not exist.")
} else {
## Method 1: ref.grid is a netCDF file
if (is.null(lat_dim) | is.null(lon_dim)) {
var_names <- easyNCDF::NcReadVarNames(ref.file)
lat_dim <- var_names[which(var_names %in% s2dv:::.KnownLatNames())]
lon_dim <- var_names[which(var_names %in% s2dv:::.KnownLonNames())]
}
latlon <- NcToArray(ref.file, vars_to_read = c(lat_dim, lon_dim))
lat <- NcToArray(ref.file, vars_to_read = lat_dim)[1, ]
lon <- NcToArray(ref.file, vars_to_read = lon_dim)[1, ]
}
} else if (is.list(ref.grid)) {
## Method 2: ref.grid is a list of lon and lat
if (length(ref.grid) != 2) {
stop("If 'ref.grid' is a list, it must have two items for longitude and latitude.")
}
if (is.null(lat_dim) | is.null(lon_dim)) {
#NOTE: the names come from s2dv:::.KnownLonNames and .KnownLatNames
lon_known_names <- c(s2dv:::.KnownLonNames(), 'lons')
lat_known_names <- c(s2dv:::.KnownLatNames(), 'lats')
lon_dim <- lon_known_names[which(lon_known_names %in% names(ref.grid))]
lat_dim <- lat_known_names[which(lat_known_names %in% names(ref.grid))]
if (identical(lon_dim, character(0)) | identical(lat_dim, character(0))) {
stop("longitude and latitude names are not recognized in 'ref.grid'. Please specify 'lon_dim' and 'lat_dim'.")
}
}
lat <- ref.grid[[lat_dim]]
lon <- ref.grid[[lon_dim]]
} else {
stop("Parameter 'ref.grid' must be either a netCDF file or a list of lon and lat.")
## Create data frame & sp class for ref grid
ref.df <- data.frame(data = 0,
lon = rep(lon, times = length(lat)),
lat = rep(lat, each = length(lon)))
coord <- data.frame(x = ref.df$lon, y = ref.df$lat)
ref.sp <- SpatialPointsDataFrame(coord,
data = data.frame(ref.df$data))
proj4string(ref.sp) <- sp::proj4string(shp)
####################################################
# Step 3: Create mask
## Create mask array with 0; 1, 2, etc. will be filled in for each shp region
mask <- array(0, dim = c(length(lon), length(lat)))
names(dim(mask)) <- c(lon_dim, lat_dim)
## Loop through each shp region
for (shp_i in 1:nrow(shp)) {
shp_pol <- gIntersection(ref.sp, shp[shp_i, ])
if (!is.null(shp_pol)) {
for (ii in 1:nrow(shp_pol@coords)) {
mask[which(lon == shp_pol@coords[ii, 1]), which(lat == shp_pol@coords[ii, 2])] <- shp_i
}
} else {
if (shp.system == "NUTS") {
tmp <- shp$NUTS_ID[shp_i]
} else if (shp.system == "ADM") {
tmp <- shp$ADM1_PCODE[shp_i]
}
warning("shp ID '", tmp, "' cannot be identified in this reference grid.")
}
}
####################################################
# Step 4: Add attributes
attr(mask, lon_dim) <- lon
attr(mask, lat_dim) <- lat
if (shp.system == "NUTS") {
attr(mask, "index") <- as.list(shp$NUTS_ID)
} else if (shp.system == "ADM") {
attr(mask, "index") <- as.list(shp$ADM1_PCODE)
}
## Return all the info from shp
attr(mask, "shapefile") <- attributes(shp)
####################################################
# Step 5: Save the file or return the array
if (is.null(savefile)) {
return(mask)
} else {
#TODO
ArrayToNc()
}
}