shp_mask.R 6.83 KB
Newer Older
aho's avatar
aho committed
##library(sf)
#library(sp)
#library(rgeos)
#library(rgdal)

########################
# module load GDAL PROJ GEOS
########################
# shp.file: The shp file
# ref.grid: Either (1) a netCDF file or (2) a list of lon and lat to provide the reference grid points
# reg.ids: The unique ID in shapefile
# reg.names: A list of country and the region name
# reg.level: Can only have the same level in one mask; only for NUTS
# lat_dim: 'latitude' for example
# lon_dim: 'longitude' for example
# savefile: If NULL, return an array

### Some example inputs ###
#-------shp.file----------
#shp.file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/NUTS_RG_60M_2021_4326.shp')
#shp.file <- '/esarchive/scratch/cdelgado/focus_outputs/Shapefiles/tza_admbnda_adm1/tza_admbnda_adm1_20181019.shp'
#-------ref.grid----------
#ref.grid <- '/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20170201.nc'
#ref.grid <- '/esarchive/exp/ecmwf/s2s-monthly_ensfor/weekly_mean/tas_f6h/tas_20191212.nc'
#ref.grid <- '/esarchive/recon/ecmwf/era5land/monthly_mean/tas_f1h/tas_201006.nc'
#ref.grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5))
#-------reg.ids-----------
#NUTS.id <- paste0("FI1D", c(1:3, 5, 7:9))
#ADM.id <- 
#-------reg.names-----------
#NUTS.name <- list(FI = c('Lappi', 'Kainuu'))  #NOTE: NUTS.name may be repetitive; use level to filter
#NUTS.name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska'))
#---------------------------
# mask1 <- shp_mask(shp.file, ref.file, reg.ids = NUTS.id)
# mask2 <- shp_mask(shp.file, ref.file, reg.names = NUTS.name)

#NOTE: One region is one number; need to have the option to combine them?
#TODO: Suppress warnings?
#TODO: Substitute packages
shp_mask <- function(shp.file, ref.grid = NULL, 
                     shp.system = "NUTS", reg.ids = NULL, reg.names = NULL,
                     reg.level = 3, lat_dim = NULL, lon_dim = NULL, savefile = NULL) {

####################################################
  # Step 1: Load the shapefile
  shp <- rgdal::readOGR(shp.file)
  
  if (all(is.null(reg.ids), is.null(reg.names))) {
    stop("Either provide parameter 'reg.ids' or 'reg.names'.")
  } else if (!is.null(reg.ids)) {
aho's avatar
aho committed
    ## Method 1: Directly use IDs
    if (shp.system == "NUTS") {
      shp <- subset(shp, NUTS_ID %in% reg.ids)
aho's avatar
aho committed
    } else if (shp.system == "ADM") {
      shp <- subset(shp, ADM1_PCODE %in% reg.ids)
aho's avatar
aho committed
    } else {
      stop("shp.system ", shp.system, " is not defined yet.")
    }
    if (!is.null(reg.names)) {
      warning("Only use 'reg.ids' to get the shape region. 'reg.names' is not used.")
  } else if (!is.null(reg.names)) {
    ## Method 2: Use country code & region name
    for (cntr_i in 1:length(reg.names)) {
aho's avatar
aho committed
      if (shp.system == "NUTS") {
        tmp <- subset(shp, CNTR_CODE == names(reg.names)[cntr_i])
        tmp <- subset(tmp, NUTS_NAME %in% reg.names[[cntr_i]])
aho's avatar
aho committed
      } else if (shp.system == "ADM") {
        tmp <- subset(shp, ADM0_EN == names(reg.names)[cntr_i])
        tmp <- subset(tmp, ADM1_EN %in% reg.names[[cntr_i]])
aho's avatar
aho committed
      }
      if (cntr_i == 1) {
        shp_tmp <- tmp
      } else {
        shp_tmp <- rbind(shp_tmp, tmp)
      }
    }
aho's avatar
aho committed
    if (shp.system == "NUTS") {
      shp <- subset(shp_tmp, LEVL_CODE == NUTS.level)
    } else if (shp.system == "ADM") {
      shp <- shp_tmp
aho's avatar
aho committed
    }
  }
  # plot(shp)
  
####################################################
  # Step 2: Use the reference file to get lat and lon
 
  if (all(tools::file_ext(ref.grid) == 'nc')) {
    if (!file_exists(ref.grid)) {
      stop("ref.grid file does not exist.")
    } else {
    
      ## Method 1: ref.grid is a netCDF file
      if (is.null(lat_dim) | is.null(lon_dim)) {
        var_names <- easyNCDF::NcReadVarNames(ref.file)
        lat_dim <- var_names[which(var_names %in% s2dv:::.KnownLatNames())]
        lon_dim <- var_names[which(var_names %in% s2dv:::.KnownLonNames())]
      }
      latlon <- NcToArray(ref.file, vars_to_read = c(lat_dim, lon_dim))
      lat <- NcToArray(ref.file, vars_to_read = lat_dim)[1, ]
      lon <- NcToArray(ref.file, vars_to_read = lon_dim)[1, ]
    }
  } else if (is.list(ref.grid)) {
    ## Method 2: ref.grid is a list of lon and lat
    if (length(ref.grid) != 2) {
      stop("If 'ref.grid' is a list, it must have two items for longitude and latitude.")
    }
    if (is.null(lat_dim) | is.null(lon_dim)) {
      #NOTE: the names come from s2dv:::.KnownLonNames and .KnownLatNames
      lon_known_names <- c(s2dv:::.KnownLonNames(), 'lons')
      lat_known_names <- c(s2dv:::.KnownLatNames(), 'lats')
      lon_dim <- lon_known_names[which(lon_known_names %in% names(ref.grid))]
      lat_dim <- lat_known_names[which(lat_known_names %in% names(ref.grid))]

      if (identical(lon_dim, character(0)) | identical(lat_dim, character(0))) {
        stop("longitude and latitude names are not recognized in 'ref.grid'. Please specify 'lon_dim' and 'lat_dim'.")
      }
    }
    lat <- ref.grid[[lat_dim]]
    lon <- ref.grid[[lon_dim]]

  } else {
    stop("Parameter 'ref.grid' must be either a netCDF file or a list of lon and lat.")
  ## Create data frame & sp class for ref grid
  ref.df <- data.frame(data = 0,
                       lon = rep(lon, times = length(lat)),
                       lat = rep(lat, each = length(lon)))
  coord <- data.frame(x = ref.df$lon, y = ref.df$lat)
  ref.sp <- SpatialPointsDataFrame(coord,
                                   data = data.frame(ref.df$data))
  proj4string(ref.sp) <- sp::proj4string(shp)
  
####################################################
  # Step 3: Create mask 
  ## Create mask array with 0; 1, 2, etc. will be filled in for each shp region
  mask <- array(0, dim = c(length(lon), length(lat)))
  names(dim(mask)) <- c(lon_dim, lat_dim)
  
  ## Loop through each shp region
  for (shp_i in 1:nrow(shp)) {
    shp_pol <- gIntersection(ref.sp, shp[shp_i, ])
    if (!is.null(shp_pol)) {
      for (ii in 1:nrow(shp_pol@coords)) {
        mask[which(lon == shp_pol@coords[ii, 1]), which(lat == shp_pol@coords[ii, 2])] <- shp_i
      }
    } else {
      if (shp.system == "NUTS") {
        tmp <- shp$NUTS_ID[shp_i]
      } else if (shp.system == "ADM") {
        tmp <- shp$ADM1_PCODE[shp_i]
      }
      warning("shp ID '", tmp, "' cannot be identified in this reference grid.")
    }
  }
  
####################################################
  # Step 4: Add attributes
  attr(mask, lon_dim) <- lon
  attr(mask, lat_dim) <- lat
aho's avatar
aho committed
  if (shp.system == "NUTS") {
    attr(mask, "index") <- as.list(shp$NUTS_ID)
  } else if (shp.system == "ADM") {
    attr(mask, "index") <- as.list(shp$ADM1_PCODE)
  }
aho's avatar
aho committed
  names(attr(mask, "index")) <- 1:nrow(shp)

  ## Return all the info from shp
  attr(mask, "shapefile") <- attributes(shp)
  
####################################################
  # Step 5: Save the file or return the array 
  if (is.null(savefile)) {
    return(mask)
  } else {
    #TODO
    ArrayToNc()
  }

}