Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#'Convert Shapefile to Mask Array
#'
#'This function reads a shapefile (.shp) containing information about polygonal
#'regions. It then transfers the shapefile data into an array and subsets the
#'output based on requested region names or IDs. The accepted shapefile
#'databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format.
#'However, the function can use other shapefiles databases with specifying the
#'categories names with the parameter 'shp.system.name'.
#'
#'To ensure accurate comparison with the shapefile, the function loads a
#'reference dataset that provides longitude and latitude information. By
#'intersecting each subset of the shapefile with the reference coordinates, the
#'function selects only the desired regions. The final step involves creating a
#'mask array. Depending on the chosen option, the mask array is either returned
#'as the function's output or saved into a NetCDF format in the specified
#'directory.
#'
#'Note: Modules GDAL, PROJ and GEOS are required.
#'
#'@param shp.file A character string indicating the shp file path.
#'@param ref.grid A character string indicating the path to the reference
#' data. Either (1) a netCDF file or (2) a list of lon and lat to provide the
#' reference grid points. It is NULL by default.
#'@param shp.system A character string containing the Shapefile System Database
#' name. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. It is set to
#' 'NUTS' by default.
#'@param shp.system.name A character string indicating the column name of the
#' column in where the specified 'reg.ids' will be taken.
#'@param reg.ids A character string indicating the unique ID in shapefile.
#' It is NULL by default.
#'@param reg.names A named list of character string vectors indicating the
#' country and the region name. The name of the list stands for the country
#' name code and the vector character strings indicate the region name for
#' each country. It is NULL by default.
#'@param reg.level An integer number from 1 to 3 indicating the 'NUTS' dataset
#' level. For other datasets this parameter is not used. One mask can only have
#' a unique level. It is set to 3 by default.
#'@param lat_dim A character string indicating the latitudinal dimension. If it
#' is NULL, the latitudinal name will be searched using an internal function
#' with the following possible names: 'lat', 'latitude', 'y', 'j' and
#' 'nav_lat'. It is NULL by default.
#'@param lon_dim A character string indicating the longitudinal dimension. If it
#' is NULL, the longitudinal name will be searched using an internal function
#' with the following possible names: 'lon', 'longitude', 'x', 'i' and
#' 'nav_lon'. It is NULL by default.
#'@param target_crs A character string indicating the target 'Coordinate
#' Reference System'.
#'@param region A logical value indicating if we want a dimension for the
#' regions in the resulting mask array. It is FALSE by default.
#'@param check_valid A logical value that when it is TRUE it uses the function
#' 'sf::st_make_valid' applied to the shapefile and to the coordinates.
#'@param max_dist A numeric value indicating the maximum distance is accepted
#' to the closest gridpoint when there is no intersection between the shapefile
#' and the reference grid.
#'@param savefile A logical value indicating wether to save the mask array into
#' a NetCDF format file (TRUE) or to return an array (FALSE). It is FALSE by
#' default. This functionality is not developed yet.
#'@param ... Arguments passed on to 's2_options' in function 'st_intersection'.
#' See 's2 package'.
#'
#'@return A multidimensional array containing a mask array with longitude and
#'latitude dimensions. If 'region' is TRUE, there will be a dimension for
#'the region.
#'
#'@examples
#'# Exmple (1): NUTS
#'shp.file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/',
#' 'NUTS_RG_60M_2021_4326.shp')
#'# shp.file <- paste0('/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/',
#'# 'tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp')
#'# ref.grid <- paste0('/esarchive/exp/ecmwf/system5c3s/monthly_mean/',
#'# 'tas_f6h/tas_20170201.nc')
#'# ref.grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/',
#'# 'tas_f6h/tas_20191212.nc')
#'ref.grid <- paste0('/esarchive/recon/ecmwf/era5land/monthly_mean/',
#' 'tas_f1h/tas_201006.nc')
#'# ref.grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5))
#'
#'NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9))
#'NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska'))
#'mask1 <- ShapeToMask(shp.file, ref.grid, reg.ids = NUTS.id)
#'mask2 <- ShapeToMask(shp.file = shp.file, ref.grid = ref.grid,
#' reg.names = NUTS.name)
#'
#'# Exmple (2): GADM
#'shp.file <- "/esarchive/shapefiles/gadm_country_mask/gadm_country_ISO3166.shp"
#'ref.grid <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/',
#' 'tas_f6h/tas_20191212.nc')
#'GADM.id <- c("ESP", "ITA")
#'GADM.name <- c("Spain", "Italy")
#'mask1 <- ShapeToMask(shp.file = shp.file, ref.grid = ref.grid,
#' reg.ids = GADM.id, shp.system = "GADM")
#'mask2 <- ShapeToMask(shp.file = shp.file, ref.grid = ref.grid,
#' reg.names = GADM.name, shp.system = "GADM")
#'@import easyNCDF
#'@import sf
#'@export
ShapeToMask <- function(shp.file, ref.grid = NULL,
shp.system = "NUTS", shp.system.name = NULL,
reg.ids = NULL, reg.names = NULL,
reg.level = 3, lat_dim = NULL, lon_dim = NULL,
savefile = FALSE, region = FALSE, target_crs = NULL,
check_valid = FALSE, max_dist = 99999999, ...) {
# NOTE: One region is one number; need to have the option to combine them?
# TODO: Suppress warnings?
# TODO: Substitute packages
# Step 1: Load the shapefile
shp <- sf::st_read(shp.file) # class sf
if (!is.null(target_crs)) {
transformed_shapefile <- st_transform(shp, crs = target_crs)
shp <- transformed_shapefile
}
if (all(is.null(reg.ids), is.null(reg.names))) {
stop("Either provide parameter 'reg.ids' or 'reg.names'.")
} else if (!is.null(reg.ids)) {
## Method 1: Directly use IDs
if (!is.null(shp.system.name)) {
if (shp.system.name %in% names(shp)) {
shp <- subset(shp, get(shp.system.name) %in% reg.ids)
} else {
stop("Shape system name not found in shapefile names.")
}
} else if (shp.system == "NUTS") {
shp <- subset(shp, NUTS_ID %in% reg.ids)
shp.system.name <- NUTS_ID
} else if (shp.system == "ADM") {
shp <- subset(shp, ADM1_PCODE %in% reg.ids)
shp.system.name <- ADM1_PCODE
} else if (shp.system == "GADM") {
shp <- subset(shp, ISO %in% reg.ids)
shp.system.name <- ISO
} else {
stop("shp.system ", shp.system, " is not defined yet.")
}
if (!is.null(reg.names)) {
warning("Only use 'reg.ids' to get the shape region. 'reg.names' is not used.")
}
} else if (!is.null(reg.names)) {
shp.system.name <- NULL
## Method 2: Use country code & region name
for (cntr_i in 1:length(reg.names)) {
if (shp.system == "NUTS") {
tmp <- subset(shp, CNTR_CODE == names(reg.names)[cntr_i])
tmp <- subset(tmp, NUTS_NAME %in% reg.names[[cntr_i]])
shp.system.name <- NUTS_NAME
} else if (shp.system == "ADM") {
tmp <- subset(shp, ADM0_EN == names(reg.names)[cntr_i])
tmp <- subset(tmp, ADM1_EN %in% reg.names[[cntr_i]])
shp.system.name <- ADM1_EN
} else if (shp.system == "GADM") {
tmp <- subset(shp, Name %in% reg.names)
shp.system.name <- Name
}
if (cntr_i == 1) {
shp_tmp <- tmp
} else {
shp_tmp <- rbind(shp_tmp, tmp)
}
}
if (shp.system == "NUTS") {
shp <- subset(shp_tmp, LEVL_CODE == reg.level)
} else if (shp.system == "ADM" | shp.system == "GADM") {
shp <- shp_tmp
}
}
# Step 2: Use the reference file to get lat and lon
if (all(tools::file_ext(ref.grid) == 'nc')) {
if (!file.exists(ref.grid)) {
stop("ref.grid file does not exist.")
} else {
## Method 1: ref.grid is a netCDF file
if (is.null(lat_dim) | is.null(lon_dim)) {
var_names <- easyNCDF::NcReadVarNames(ref.grid)
lat_dim <- var_names[which(var_names %in% s2dv:::.KnownLatNames())]
lon_dim <- var_names[which(var_names %in% s2dv:::.KnownLonNames())]
}
latlon <- NcToArray(ref.grid, vars_to_read = c(lat_dim, lon_dim))
lat <- NcToArray(ref.grid, vars_to_read = lat_dim)[1, ]
lon <- NcToArray(ref.grid, vars_to_read = lon_dim)[1, ]
}
} else if (is.list(ref.grid)) {
## Method 2: ref.grid is a list of lon and lat
if (length(ref.grid) != 2) {
stop("If 'ref.grid' is a list, it must have two items for longitude and latitude.")
}
if (is.null(lat_dim) | is.null(lon_dim)) {
# NOTE: the names come from s2dv:::.KnownLonNames and .KnownLatNames
lon_known_names <- c(s2dv:::.KnownLonNames(), 'lons')
lat_known_names <- c(s2dv:::.KnownLatNames(), 'lats')
lon_dim <- lon_known_names[which(lon_known_names %in% names(ref.grid))]
lat_dim <- lat_known_names[which(lat_known_names %in% names(ref.grid))]
if (identical(lon_dim, character(0)) | identical(lat_dim, character(0))) {
stop("longitude and latitude names are not recognized in 'ref.grid'. Please specify 'lon_dim' and 'lat_dim'.")
}
}
lat <- ref.grid[[lat_dim]]
lon <- ref.grid[[lon_dim]]
} else {
stop("Parameter 'ref.grid' must be either a netCDF file or a list of lon and lat.")
}
## Create data frame & sp class for ref grid
ref.df <- data.frame(data = 0,
lon = rep(lon, times = length(lat)),
lat = rep(lat, each = length(lon)))
coord <- as.matrix(data.frame(x = ref.df$lon, y = ref.df$lat))
xy.sfg <- st_multipoint(coord)
xy.sfc <- st_sfc(xy.sfg)
# Assign crs of original shapefile
if (!is.null(target_crs)) {
st_crs(xy.sfc) <- sf::st_crs(target_crs) #initial_crs # asign crs of original shapefile
xy.sfc <- sf::st_transform(xy.sfc, st_crs(shp))
} else {
st_crs(xy.sfc) <- sf::st_crs(shp)
}
# Step 3: Create mask
## Create mask array with 0; 1, 2, etc. will be filled in for each shp region
mask <- array(0, dim = c(length(lon), length(lat)))
names(dim(mask)) <- c(lon_dim, lat_dim)
if (region) {
mask <- array(0, dim = c(nrow(shp), length(lon), length(lat)))
names(dim(mask)) <- c('region', lon_dim, lat_dim)
}
if (check_valid) {
xy.sfc <- st_make_valid(xy.sfc)
shp <- st_make_valid(shp)
}
## Loop through each shp region
for (shp_i in 1:nrow(shp)) {
# NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may
# not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65).
shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ])
# shp_pol <- sf::st_intersection(xy.sfc, shp[shp_i, ], ...)
tmp_coords <- st_coordinates(shp_pol)[, 1:2]
if (length(tmp_coords) == 0) {
dim(tmp_coords) <- NULL
} else if (is.null(dim(tmp_coords))) {
tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords)))
}
if (!is.null(dim(tmp_coords))) {
# polygon_instersection
for (ii in 1:nrow(tmp_coords)) {
# pt_x <- which(lon == tmp_coords[ii, 1])
# pt_y <- which.min(abs(lat - tmp_coords[ii, 2]))
if (length(dim(mask)) == 2) {
# min(abs(lon - tmp_coords[ii, 1]))
# min(abs(lat - tmp_coords[ii, 2]))
mask[which.min(abs(lon - tmp_coords[ii, 1])),
which.min(abs(lat - tmp_coords[ii, 2]))] <- shp_i
} else {
mask[shp_i, which.min(abs(lon - tmp_coords[ii, 1])),
which.min(abs(lat - tmp_coords[ii, 2]))] <- 1
}
}
} else {
x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shp[shp_i,]))[,1]
y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shp[shp_i,]))[,2]
dist <- sqrt((xy.sfg[,1] - x.centroid.shpi)**2 + (xy.sfg[,2] - y.centroid.shpi)**2)
tmp_coords <- array(xy.sfg[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2))
colnames(tmp_coords) <- c('X', 'Y')
if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) &
any(round(lon,2) == round(tmp_coords[1,1],2))) ) {
if (length(dim(mask)) == 2) {
mask[which.min(abs(lon - tmp_coords[, 1])),
which.min(abs(lat - tmp_coords[, 2]))] <- shp_i
} else {
mask[shp_i, which.min(abs(lon - tmp_coords[, 1])),
which.min(abs(lat - tmp_coords[, 2]))] <- 1
}
warning(paste0('The reference grid has no intersection with region ',
ifelse(is.character(shp.system.name), shp[shp_i,][[shp.system.name]], paste0('n° ', shp_i)),
' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))],
' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).'))
} else {
warning(paste0('The reference grid has no intersection with region ',
ifelse(is.character(shp.system.name), shp[shp_i,][[shp.system.name]], paste0('n° ', shp_i))))
}
}
}
# Step 4: Add attributes
# attr(mask, lon_dim) <- lon
# attr(mask, lat_dim) <- lat
# if (shp.system == "NUTS") {
# attr(mask, "index") <- as.list(shp$NUTS_ID)
# } else if (shp.system == "ADM") {
# attr(mask, "index") <- as.list(shp$ADM1_PCODE)
# } else if (shp.system == "GADM") {
# attr(mask, "index") <- as.list(shp$ISO)
# }
# names(attr(mask, "index")) <- 1:nrow(shp)
# ## Return all the info from shp
# attr(mask, "shapefile") <- attributes(shp)
# Step 5: Save the file or return the array
if (!savefile) {
return(mask)
} else {
warning("This functionality is not developed yet.")
# TODO
# ArrayToNc()
}
}