diff --git a/CHANGELOG b/CHANGELOG index 41756c634261e8959995e73ce6bb55b9ddd088df..74ed4172dcb29a5c80ced82920839bf4f290b75c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,10 +3,12 @@ CHANGELOG ## 0.1.1 - 2025/02/28 - Code refactoring -DRY code +DRY code more tests of the processing data and Grid restructure ### Features - Drying of code, moving duplicated code to Parent class and creating Multiprocessing class +- More tests of the processing data and Grid restructure +- Grids split into base grid rotated and regular files with the ## 0.1.0 - 2025/02/21 - First version of MAPIES diff --git a/mapies/config/satellite_config.yaml b/mapies/config/satellite_config.yaml index b94f53247a4560ddaef13571c76444d03edfb296..36ef5e04732b6184eeea08320309e1a193b31631 100644 --- a/mapies/config/satellite_config.yaml +++ b/mapies/config/satellite_config.yaml @@ -10,22 +10,6 @@ viirs: da: obsid: 34 uncertainty_constants: [0.2, 0.05, 0.02] - grids: - grid_repr: "" # default grid repr - rotated: - centre_lat: 35 - centre_lon: 20 - west: -51 - south: -35 - dlon: 0.1 - dlat: 0.1 - regular: - west: -180 - south: -90 - dlon: 0.1 - dlat: 0.1 - #nlon: 3600 - #nlat: 1800 qa: qa_variables: qa_land: @@ -47,22 +31,6 @@ tropomi: lon_variable: "longitude" lat_variable: "latitude" obs_variable: "nitrogendioxide_tropospheric_column" - grids: - grid_repr: "" # default grid repr - rotated: - centre_lat: 35 - centre_lon: 20 - west: -51 - south: -35 - dlon: 0.1 - dlat: 0.1 - regular: - west: -180 - south: -90 - dlon: 0.1 - dlat: 0.1 - #nlon: 3600 - #nlat: 1800 qa: qa_variables: qa_value: @@ -72,5 +40,20 @@ tropomi: qa_condition: "greater_than_or_equal" # Examples, greater_than, less_than, equal, greater_than_or_equal, less_than_or_equal - +grids: + grid_repr: "" # default grid repr + rotated: + centre_lat: 35 + centre_lon: 20 + west: -51 + south: -35 + dlon: 0.1 + dlat: 0.1 + regular: + west: -180 + south: -90 + dlon: 0.1 + dlat: 0.1 + #nlon: 3600 + #nlat: 1800 diff --git a/mapies/grids/base_grid.py b/mapies/grids/base_grid.py new file mode 100644 index 0000000000000000000000000000000000000000..31ef319fc9a1f245db8541d5db3234ca479e1493 --- /dev/null +++ b/mapies/grids/base_grid.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +from dataclasses import dataclass +import pandas as pd +import numpy as np +from typing import Tuple, Dict, Optional +from numpy import cos, sin, arctan, pi, nan +import geopandas as gpd +from geopandas import GeoDataFrame +from shapely.geometry import Polygon, Point +from mapies.util.func_tools import * +import logging + +#logger = logging.getLogger(__name__) +#logging.basicConfig(filename='./logs/mapies.log', level=logging.INFO) + + + + +@dataclass +class Grid: + dlon: float = None + dlat: float = None + nlon: float = None + nlat: float = None + west: float = None + south: float = None + centre_lat: Optional[float] = None + centre_lon: Optional[float] = None + + + def calculate_grid_coords(self): + """ + This method does two things: + 1. It calculates the centre values of the grid + 2. Before Calculating the boundary points of the regular grid + + This can then be used in many grid representations, for example in a rotated grid used in the ihouse Monarch model + """ + #logger.info("Creating Grid") + if (self.nlon == None) and (self.nlat == None): + self.nlat = int((abs(self.south) / self.dlat) * 2) + self.nlon = int((abs(self.west) / self.dlon) * 2) + elif (self.dlon == None) and (self.dlat == None): + self.dlat = abs(self.south) * 2 / (self.nlat) + self.dlon = abs(self.west) * 2 / (self.nlon) + else: + print("Enter resolution or number of grid cells") + + self.cenlat = self.south + self.dlat/2 + self.cenlon = self.west + self.dlon/2 + self.center_latitudes = np.linspace(self.cenlat, self.cenlat + self.dlat * (self.nlat) - self.dlat, self.nlat, dtype=float) + self.center_longitudes = np.linspace(self.cenlon, self.cenlon + self.dlon * (self.nlon) - self.dlat, self.nlon, dtype=float) + + counter = 0 + self.coords = np.empty((self.center_latitudes.shape[0] * self.center_longitudes.shape[0], 2)) + for i in range(self.center_longitudes.shape[0]): + for j in range(self.center_latitudes.shape[0]): + self.coords[counter, 0] = self.center_longitudes[i] + self.coords[counter, 1] = self.center_latitudes[j] + counter += 1 + + + self.b_lats = self.create_bounds(self.coords[:, 1], self.dlat, number_vertices=4, inverse=True) + self.b_lons = self.create_bounds(self.coords[:, 0], self.dlon, number_vertices=4) + + return True + + + @staticmethod + def create_bounds(coordinates, inc, number_vertices=2, inverse=False): + """ + Calculate the boundaries of each grid points + """ + # Create new arrays moving the centers half increment less and more. + coords_left = coordinates - inc / 2 + coords_right = coordinates + inc / 2 + + # Defining the number of corners needed. 2 to regular grids and 4 for irregular ones. + if number_vertices == 2: + # Create an array of N arrays of 2 elements to store the floor and the ceil values for each cell + bound_coords = np.dstack((coords_left, coords_right)) + bound_coords = bound_coords.reshape((len(coordinates), number_vertices)) + elif number_vertices == 4: + # Create an array of N arrays of 4 elements to store the corner values for each cell + # It can be stored in clockwise starting form the left-top element, or in inverse mode. + if inverse: + bound_coords = np.dstack((coords_left, coords_left, coords_right, coords_right)) + else: + bound_coords = np.dstack((coords_left, coords_right, coords_right, coords_left)) + else: + print('The number of vertices of the boundaries must be 2 or 4.') + return bound_coords + + + def building_grid(self): + """ + Here we build the grid using Shapely polygons. And attribute it to a geopandas dataframe + + This is also where we decide whether to use a regular grid for global or a rotated grid for regional + + """ + + + # May need to add centre lats and centre lons to this dataframe as well + + # Re-shape + aux_b_lats = self.b_lats.squeeze() + aux_b_lons = self.b_lons.squeeze() + + + geometry = [] + # Create one dataframe with 8 columns, 4 points with two coordinates each one + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + self.gdf = GeoDataFrame(index=range(aux_b_lons.shape[0]), geometry=geometry, crs='epsg:4326') + self.gdf["grid_cell"] = ["grid_cell_{}".format(i+1) for i in range(len(self.gdf))] + + # Append the center lat and lon values to this dataframe + self.gdf["lon_cen"] = self.coords[:, 0] + self.gdf["lat_cen"] = self.coords[:, 1] + + + return True diff --git a/mapies/grids/monarch.py b/mapies/grids/monarch.py deleted file mode 100644 index 2134dde1026da7267873ed400334cb962e8c9860..0000000000000000000000000000000000000000 --- a/mapies/grids/monarch.py +++ /dev/null @@ -1,428 +0,0 @@ -#!/usr/bin/env python -from dataclasses import dataclass -import pandas as pd -import numpy as np -from typing import Tuple, Dict, Optional -from numpy import cos, sin, arctan, pi, nan -import geopandas as gpd -from geopandas import GeoDataFrame -from shapely.geometry import Polygon, Point -from mapies.util.func_tools import * -import logging - -#logger = logging.getLogger(__name__) -#logging.basicConfig(filename='./logs/mapies.log', level=logging.INFO) - - -def regridding_function(r, lon, lat, obs, obserr=None): - - # If obserr is selected return that - if obserr is not None: - if isinstance(r, RotatedGrid): - lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, obserr_agg, count_obs = r.aggregate(lon, lat, obs, obserr) - return lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, obserr_agg, count_obs - elif isinstance(r, RegularGrid): - lon_agg, lat_agg, obs_agg, obserr_agg, count_obs = r.aggregate(lon, lat, obs, obserr) - return lon_agg, lat_agg, obs_agg, obserr_agg, count_obs - else: - raise ValueError("Invalid grid representation") - - else: - if isinstance(r, RotatedGrid): - lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) - return lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs - elif isinstance(r, RegularGrid): - lon_agg, lat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) - return lon_agg, lat_agg, obs_agg, count_obs - else: - raise ValueError("Invalid grid representation") - -def geo_to_rot(lons, lats, centre_lon: float, centre_lat: float): - """ - Rotating coordinates from cartesian lat/lon to rotated rlon/rlat - """ - distance_lons = np.radians(lons - centre_lon) - lons = np.radians(lons) - lats = np.radians(lats) - centre_lon = np.radians(centre_lon) - centre_lat = np.radians(centre_lat) - - x = cos(centre_lat) * sin(lats) - sin(centre_lat) * cos(lats) * cos(distance_lons) - y = cos(lats) * sin(distance_lons) - z = cos(centre_lat) * cos(lats) * cos(distance_lons) + sin(centre_lat) * sin(lats) - # Arctan2 used - # Explanation of the difference https://geo.libretexts.org/Courses/University_of_California_Davis/GEL_056%3A_Introduction_to_Geophysics/Geophysics_is_everywhere_in_geology.../zz%3A_Back_Matter/Arctan_vs_Arctan2 - rlon = np.arctan2(y, z) - rlat = np.arcsin(x) - # Convert back to degrees - rlon = np.degrees(rlon) - rlat = np.degrees(rlat) - return rlon, rlat - - -@dataclass -class Grid: - dlon: float = None - dlat: float = None - nlon: float = None - nlat: float = None - west: float = None - south: float = None - centre_lat: Optional[float] = None - centre_lon: Optional[float] = None - - - def calculate_grid_coords(self): - """ - This method does two things: - 1. It calculates the centre values of the grid - 2. Before Calculating the boundary points of the regular grid - - This can then be used in many grid representations, for example in a rotated grid used in the ihouse Monarch model - """ - #logger.info("Creating Grid") - if (self.nlon == None) and (self.nlat == None): - self.nlat = int((abs(self.south) / self.dlat) * 2) - self.nlon = int((abs(self.west) / self.dlon) * 2) - elif (self.dlon == None) and (self.dlat == None): - self.dlat = abs(self.south) * 2 / (self.nlat) - self.dlon = abs(self.west) * 2 / (self.nlon) - else: - print("Enter resolution or number of grid cells") - - self.cenlat = self.south + self.dlat/2 - self.cenlon = self.west + self.dlon/2 - self.center_latitudes = np.linspace(self.cenlat, self.cenlat + self.dlat * (self.nlat) - self.dlat, self.nlat, dtype=float) - self.center_longitudes = np.linspace(self.cenlon, self.cenlon + self.dlon * (self.nlon) - self.dlat, self.nlon, dtype=float) - - counter = 0 - self.coords = np.empty((self.center_latitudes.shape[0] * self.center_longitudes.shape[0], 2)) - for i in range(self.center_longitudes.shape[0]): - for j in range(self.center_latitudes.shape[0]): - self.coords[counter, 0] = self.center_longitudes[i] - self.coords[counter, 1] = self.center_latitudes[j] - counter += 1 - - - self.b_lats = self.create_bounds(self.coords[:, 1], self.dlat, number_vertices=4, inverse=True) - self.b_lons = self.create_bounds(self.coords[:, 0], self.dlon, number_vertices=4) - - return True - - - @staticmethod - def create_bounds(coordinates, inc, number_vertices=2, inverse=False): - """ - Calculate the boundaries of each grid points - """ - # Create new arrays moving the centers half increment less and more. - coords_left = coordinates - inc / 2 - coords_right = coordinates + inc / 2 - - # Defining the number of corners needed. 2 to regular grids and 4 for irregular ones. - if number_vertices == 2: - # Create an array of N arrays of 2 elements to store the floor and the ceil values for each cell - bound_coords = np.dstack((coords_left, coords_right)) - bound_coords = bound_coords.reshape((len(coordinates), number_vertices)) - elif number_vertices == 4: - # Create an array of N arrays of 4 elements to store the corner values for each cell - # It can be stored in clockwise starting form the left-top element, or in inverse mode. - if inverse: - bound_coords = np.dstack((coords_left, coords_left, coords_right, coords_right)) - else: - bound_coords = np.dstack((coords_left, coords_right, coords_right, coords_left)) - else: - print('The number of vertices of the boundaries must be 2 or 4.') - return bound_coords - - - def building_grid(self): - """ - Here we build the grid using Shapely polygons. And attribute it to a geopandas dataframe - - This is also where we decide whether to use a regular grid for global or a rotated grid for regional - - """ - - - # May need to add centre lats and centre lons to this dataframe as well - - # Re-shape - aux_b_lats = self.b_lats.squeeze() - aux_b_lons = self.b_lons.squeeze() - - - geometry = [] - # Create one dataframe with 8 columns, 4 points with two coordinates each one - for i in range(aux_b_lons.shape[0]): - geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), - (aux_b_lons[i, 1], aux_b_lats[i, 1]), - (aux_b_lons[i, 2], aux_b_lats[i, 2]), - (aux_b_lons[i, 3], aux_b_lats[i, 3]), - (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - - self.gdf = GeoDataFrame(index=range(aux_b_lons.shape[0]), geometry=geometry, crs='epsg:4326') - self.gdf["grid_cell"] = ["grid_cell_{}".format(i+1) for i in range(len(self.gdf))] - - # Append the center lat and lon values to this dataframe - self.gdf["lon_cen"] = self.coords[:, 0] - self.gdf["lat_cen"] = self.coords[:, 1] - - - return True - - -@dataclass -class RegularGrid(Grid): - """ - Regridding if the Grid we want is regular, but also want to aggregate the observations to a regular grid. - - Useful for a global grid representation - """ - projection: str = "regular" - - def __post_init__(self): - self.calculate_grid_coords() - - def aggregate(self, lon, lat, obs, obserr=None): - """ - Aggregate the observations to the grid cells - - Parameters: - lon, lat, obs, obserr (optional) -- Longitude, Latitude, Observations, and Errors - - Returns: - Aggregated lon, lat, obs values - """ - - # Make sure lon and lat are within the borders of the grid - # Filter out values that are outside the grid bounds - mask = (lon >= self.west) & (lon <= self.west + (self.dlon * (self.nlon - 1))) & \ - (lat >= self.south) & (lat <= self.south + (self.dlat * (self.nlat - 1))) & \ - (~np.isnan(obs)) - - - # Apply the mask to filter out the out-of-bounds points - lon = lon[mask] - lat = lat[mask] - obs = obs[mask] - if obserr is not None: - obserr = obserr[mask] - - # Vectorized calculation of grid indices - lon_idx = np.searchsorted(self.center_longitudes, lon) - 1 # Index of longitude grid - lat_idx = np.searchsorted(self.center_latitudes, lat) - 1 # Index of latitude grid - - # Ensure indices are within bounds - lon_idx = np.clip(lon_idx, 0, self.nlon - 1) - lat_idx = np.clip(lat_idx, 0, self.nlat - 1) - - # Initialize arrays for aggregation - aggregated_obs = np.zeros((self.nlat, self.nlon)) - aggregated_lon = np.zeros((self.nlat, self.nlon)) - aggregated_lat = np.zeros((self.nlat, self.nlon)) - aggregated_obs_err = np.zeros((self.nlat, self.nlon)) if obserr is not None else None - count_obs = np.zeros((self.nlat, self.nlon), dtype=int) - - # Efficient accumulation using np.add.at - np.add.at(aggregated_obs, (lat_idx, lon_idx), obs) # Accumulate observations - np.add.at(aggregated_lon, (lat_idx, lon_idx), lon) # Accumulate lon values - np.add.at(aggregated_lat, (lat_idx, lon_idx), lat) # Accumulate lat values - np.add.at(count_obs, (lat_idx, lon_idx), 1) # Count observations per grid cell - - if obserr is not None: - np.add.at(aggregated_obs_err, (lat_idx, lon_idx), obserr) # Accumulate errors - - # Compute averages by dividing by counts - non_zero_counts = count_obs > 0 - aggregated_obs[non_zero_counts] /= count_obs[non_zero_counts] - aggregated_lon[non_zero_counts] /= count_obs[non_zero_counts] - aggregated_lat[non_zero_counts] /= count_obs[non_zero_counts] - - if aggregated_obs_err is not None: - aggregated_obs_err[non_zero_counts] /= count_obs[non_zero_counts] - - # TODO: Add a function that takes the center or aggreagated value - # Return the results - lon_out = aggregated_lon.flatten() #self.center_longitudes - lat_out = aggregated_lat.flatten() #self.center_latitudes - obs_out = aggregated_obs.flatten() - count_obs = count_obs.flatten() - - if aggregated_obs_err is not None: - obs_err_out = aggregated_obs_err.flatten() - return lon_out, lat_out, obs_out, obs_err_out, count_obs - else: - return lon_out, lat_out, obs_out, count_obs - - -@dataclass -class RotatedGrid(Grid): - """ - Regridding if the Grid we want is regular, but also want to aggregate the observations to a regular grid. - - Useful for a global grid representation - """ - - projection: str = "rotated" - - def __post_init__(self): - self.calculate_grid_coords() - - def aggregate(self, lon, lat, obs, obserr=None): - """ - Aggregate the observations to the grid cells - - Parameters: - lon, lat, obs, obserr (optional) -- Longitude, Latitude, Observations, and Errors - - Returns: - Aggregated lon, lat, rlon, rlat, obs values - """ - # Perform the rotation - rlon, rlat = geo_to_rot(lon, lat, centre_lon=self.centre_lon, centre_lat=self.centre_lat) - - # Make sure rlon and rlat are within the borders of the grid - # Filter out values that are outside the grid bounds - mask = (rlon >= self.west) & (rlon <= self.west + (self.dlon * (self.nlon - 1))) & \ - (rlat >= self.south) & (rlat <= self.south + (self.dlat * (self.nlat - 1))) & \ - (~np.isnan(obs)) - - - # Apply the mask to filter out the out-of-bounds points - rlon = rlon[mask] - rlat = rlat[mask] - lon = lon[mask] - lat = lat[mask] - obs = obs[mask] - if obserr is not None: - obserr = obserr[mask] - - - # Vectorized calculation of grid indices - rlon_idx = np.searchsorted(self.center_longitudes, rlon) - 1 # Index of longitude grid - rlat_idx = np.searchsorted(self.center_latitudes, rlat) - 1 # Index of latitude grid - - - # Ensure indices are within bounds - rlon_idx = np.clip(rlon_idx, 0, self.nlon - 1) - rlat_idx = np.clip(rlat_idx, 0, self.nlat - 1) - - - # Initialize arrays for aggregation - aggregated_obs = np.zeros((self.nlat, self.nlon)) - aggregated_lon = np.zeros((self.nlat, self.nlon)) - aggregated_lat = np.zeros((self.nlat, self.nlon)) - aggregated_rlon = np.zeros((self.nlat, self.nlon)) - aggregated_rlat = np.zeros((self.nlat, self.nlon)) - aggregated_obs_err = np.ma.zeros((self.nlat, self.nlon)) if obserr is not None else None - count_obs = np.zeros((self.nlat, self.nlon), dtype=int) - - - - # Efficient accumulation using np.add.at - np.add.at(aggregated_obs, (rlat_idx, rlon_idx), obs) # Accumulate observations - np.add.at(aggregated_lon, (rlat_idx, rlon_idx), lon) # Accumulate lon values - np.add.at(aggregated_lat, (rlat_idx, rlon_idx), lat) # Accumulate lat values - np.add.at(aggregated_rlon, (rlat_idx, rlon_idx), rlon) # Accumulate lon values - np.add.at(aggregated_rlat, (rlat_idx, rlon_idx), rlat) # Accumulate lat values - np.add.at(count_obs, (rlat_idx, rlon_idx), 1) # Count observations per grid cell - - - if obserr is not None: - np.add.at(aggregated_obs_err, (rlat_idx, rlon_idx), obserr) # Accumulate errors - - - # Compute averages by dividing by counts - non_zero_counts = count_obs > 0 - aggregated_obs[non_zero_counts] /= count_obs[non_zero_counts] - aggregated_lon[non_zero_counts] /= count_obs[non_zero_counts] - aggregated_lat[non_zero_counts] /= count_obs[non_zero_counts] - aggregated_rlon[non_zero_counts] /= count_obs[non_zero_counts] - aggregated_rlat[non_zero_counts] /= count_obs[non_zero_counts] - - if aggregated_obs_err is not None: - aggregated_obs_err[non_zero_counts] /= count_obs[non_zero_counts] - - # Return the results - lon_out = aggregated_lon.flatten() - lat_out = aggregated_lat.flatten() - rlon_out = aggregated_lon.flatten() #self.center_longitudes - rlat_out = aggregated_lat.flatten() #self.center_latitudess - obs_out = aggregated_obs.flatten() - count_obs = count_obs.flatten() - - - if aggregated_obs_err is not None: - obs_err_out = aggregated_obs_err.flatten() - return lon_out, lat_out, rlon_out, rlat_out, obs_out, obs_err_out, count_obs - else: - return lon_out, lat_out, rlon_out, rlat_out, obs_out, count_obs - - -@dataclass -class IrregularRotatedGrid(Grid): - - projection: str = "rotated" - - def __post_init__(self): - self.calculate_grid_coords() - self.building_grid() - - - def aggregate(self, lon, lat, obs, obserr=None): - """ - Aggregate - - Parameters: - lon, lat, obs values Optional obserr - - Returns: - lon, lat, rlon, rlat, obs values - """ - # Rotate the observations - rlon, rlat = geo_to_rot(lon, lat, centre_lon=self.centre_lon, centre_lat=self.centre_lat) - - if obserr is not None: - df = pd.DataFrame({"obs":obs, "lon":lon, "lat":lat, "rlon":rlon, "rlat":rlat, "obserr": obserr}) - else: - df = pd.DataFrame({"obs":obs, "lon":lon, "lat":lat, "rlon":rlon, "rlat":rlat}) - - df['coords'] = list(zip(df['rlon'],df['rlat'])) - df['coords'] = df['coords'].apply(Point) - - - # Create a geodataframe of the rotated obseravtional lon/ lat points - points = gpd.GeoDataFrame(df, geometry='coords', crs='epsg:4326') - - - # Geopandas sjoin calculates if these shapely points are within the shapely grid pologons - try: - gdf = self.gdf.sjoin(points, how="left") # , predicate="within") - except TypeError: - print("Empty dataframe messing up some versions of geopandas") - gdf = pd.DataFrame(columns=[ - 'obstype', 'obslon', 'obslat', 'obsn', 'obsid', 'obslev', 'obs', - 'obserr', 'time', 'lon', 'lat', 'coords', 'index_right', - 'lon_cen', 'lat_cen', 'grid_cell'], - ) - # print(f'gdf: {gdf}', flush=True) - - # Need to also return the original lat lon - # Calculate the mean of all points within each grid cell - counts = gdf[["grid_cell","obs"]].groupby("grid_cell").count().reset_index() - gdf = gdf[["grid_cell", "lon_cen", "lat_cen", "obs", "lon", "lat", "rlon", "rlat"]].groupby("grid_cell").mean().reset_index() - - # Remove nan values - gdf = gdf.dropna(subset="obs") - counts = counts.dropna(subset="obs") - - - # Return the data we are interested in - if obserr is not None: - return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values, gdf["obserr"].values, counts["obs"].values - else: - return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values, counts["obs"].values - - - \ No newline at end of file diff --git a/mapies/grids/regular.py b/mapies/grids/regular.py new file mode 100644 index 0000000000000000000000000000000000000000..0ca54c99bfaf9f1e3e566829e071d7cac5c1a4da --- /dev/null +++ b/mapies/grids/regular.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +from dataclasses import dataclass +import pandas as pd +import numpy as np +from typing import Tuple, Dict, Optional +from mapies.util.func_tools import * +from mapies.grids.base_grid import Grid +import logging + + + +@dataclass +class RegularGrid(Grid): + """ + Regridding if the Grid we want is regular, but also want to aggregate the observations to a regular grid. + + Useful for a global grid representation + """ + projection: str = "regular" + + def __post_init__(self): + self.calculate_grid_coords() + + def aggregate(self, lon, lat, obs, obserr=None): + """ + Aggregate the observations to the grid cells + + Parameters: + lon, lat, obs, obserr (optional) -- Longitude, Latitude, Observations, and Errors + + Returns: + Aggregated lon, lat, obs values + """ + + # Make sure lon and lat are within the borders of the grid + # Filter out values that are outside the grid bounds + mask = (lon >= self.west) & (lon <= self.west + (self.dlon * (self.nlon - 1))) & \ + (lat >= self.south) & (lat <= self.south + (self.dlat * (self.nlat - 1))) & \ + (~np.isnan(obs)) + + + # Apply the mask to filter out the out-of-bounds points + lon = lon[mask] + lat = lat[mask] + obs = obs[mask] + if obserr is not None: + obserr = obserr[mask] + + # Vectorized calculation of grid indices + lon_idx = np.searchsorted(self.center_longitudes, lon) - 1 # Index of longitude grid + lat_idx = np.searchsorted(self.center_latitudes, lat) - 1 # Index of latitude grid + + # Ensure indices are within bounds + lon_idx = np.clip(lon_idx, 0, self.nlon - 1) + lat_idx = np.clip(lat_idx, 0, self.nlat - 1) + + # Initialize arrays for aggregation + aggregated_obs = np.zeros((self.nlat, self.nlon)) + aggregated_lon = np.zeros((self.nlat, self.nlon)) + aggregated_lat = np.zeros((self.nlat, self.nlon)) + aggregated_obs_err = np.zeros((self.nlat, self.nlon)) if obserr is not None else None + count_obs = np.zeros((self.nlat, self.nlon), dtype=int) + + # Efficient accumulation using np.add.at + np.add.at(aggregated_obs, (lat_idx, lon_idx), obs) # Accumulate observations + np.add.at(aggregated_lon, (lat_idx, lon_idx), lon) # Accumulate lon values + np.add.at(aggregated_lat, (lat_idx, lon_idx), lat) # Accumulate lat values + np.add.at(count_obs, (lat_idx, lon_idx), 1) # Count observations per grid cell + + if obserr is not None: + np.add.at(aggregated_obs_err, (lat_idx, lon_idx), obserr) # Accumulate errors + + # Compute averages by dividing by counts + non_zero_counts = count_obs > 0 + aggregated_obs[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_lon[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_lat[non_zero_counts] /= count_obs[non_zero_counts] + + if aggregated_obs_err is not None: + aggregated_obs_err[non_zero_counts] /= count_obs[non_zero_counts] + + # TODO: Add a function that takes the center or aggreagated value + # Return the results + lon_out = aggregated_lon.flatten() #self.center_longitudes + lat_out = aggregated_lat.flatten() #self.center_latitudes + obs_out = aggregated_obs.flatten() + count_obs = count_obs.flatten() + + if aggregated_obs_err is not None: + obs_err_out = aggregated_obs_err.flatten() + return lon_out, lat_out, obs_out, obs_err_out, count_obs + else: + return lon_out, lat_out, obs_out, count_obs diff --git a/mapies/grids/rotated.py b/mapies/grids/rotated.py new file mode 100644 index 0000000000000000000000000000000000000000..5cfdaa2fbf7fcfa1b5da8519a4e99ea96db11076 --- /dev/null +++ b/mapies/grids/rotated.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python +from dataclasses import dataclass +import pandas as pd +import numpy as np +from typing import Tuple, Dict, Optional +from numpy import cos, sin, arctan, pi, nan +import geopandas as gpd +from geopandas import GeoDataFrame +from shapely.geometry import Polygon, Point +from mapies.util.func_tools import * +from mapies.grids.base_grid import Grid +import logging + + +def geo_to_rot(lons, lats, centre_lon: float, centre_lat: float): + """ + Rotating coordinates from cartesian lat/lon to rotated rlon/rlat + """ + distance_lons = np.radians(lons - centre_lon) + lons = np.radians(lons) + lats = np.radians(lats) + centre_lon = np.radians(centre_lon) + centre_lat = np.radians(centre_lat) + + x = cos(centre_lat) * sin(lats) - sin(centre_lat) * cos(lats) * cos(distance_lons) + y = cos(lats) * sin(distance_lons) + z = cos(centre_lat) * cos(lats) * cos(distance_lons) + sin(centre_lat) * sin(lats) + # Arctan2 used + # Explanation of the difference https://geo.libretexts.org/Courses/University_of_California_Davis/GEL_056%3A_Introduction_to_Geophysics/Geophysics_is_everywhere_in_geology.../zz%3A_Back_Matter/Arctan_vs_Arctan2 + rlon = np.arctan2(y, z) + rlat = np.arcsin(x) + # Convert back to degrees + rlon = np.degrees(rlon) + rlat = np.degrees(rlat) + return rlon, rlat + + +@dataclass +class RotatedGrid(Grid): + """ + Regridding if the Grid we want is regular, but also want to aggregate the observations to a regular grid. + + Useful for a global grid representation + """ + + projection: str = "rotated" + + def __post_init__(self): + self.calculate_grid_coords() + + def aggregate(self, lon, lat, obs, obserr=None): + """ + Aggregate the observations to the grid cells + + Parameters: + lon, lat, obs, obserr (optional) -- Longitude, Latitude, Observations, and Errors + + Returns: + Aggregated lon, lat, rlon, rlat, obs values + """ + # Perform the rotation + rlon, rlat = geo_to_rot(lon, lat, centre_lon=self.centre_lon, centre_lat=self.centre_lat) + + # Make sure rlon and rlat are within the borders of the grid + # Filter out values that are outside the grid bounds + mask = (rlon >= self.west) & (rlon <= self.west + (self.dlon * (self.nlon - 1))) & \ + (rlat >= self.south) & (rlat <= self.south + (self.dlat * (self.nlat - 1))) & \ + (~np.isnan(obs)) + + + # Apply the mask to filter out the out-of-bounds points + rlon = rlon[mask] + rlat = rlat[mask] + lon = lon[mask] + lat = lat[mask] + obs = obs[mask] + if obserr is not None: + obserr = obserr[mask] + + + # Vectorized calculation of grid indices + rlon_idx = np.searchsorted(self.center_longitudes, rlon) - 1 # Index of longitude grid + rlat_idx = np.searchsorted(self.center_latitudes, rlat) - 1 # Index of latitude grid + + + # Ensure indices are within bounds + rlon_idx = np.clip(rlon_idx, 0, self.nlon - 1) + rlat_idx = np.clip(rlat_idx, 0, self.nlat - 1) + + + # Initialize arrays for aggregation + aggregated_obs = np.zeros((self.nlat, self.nlon)) + aggregated_lon = np.zeros((self.nlat, self.nlon)) + aggregated_lat = np.zeros((self.nlat, self.nlon)) + aggregated_rlon = np.zeros((self.nlat, self.nlon)) + aggregated_rlat = np.zeros((self.nlat, self.nlon)) + aggregated_obs_err = np.ma.zeros((self.nlat, self.nlon)) if obserr is not None else None + count_obs = np.zeros((self.nlat, self.nlon), dtype=int) + + + + # Efficient accumulation using np.add.at + np.add.at(aggregated_obs, (rlat_idx, rlon_idx), obs) # Accumulate observations + np.add.at(aggregated_lon, (rlat_idx, rlon_idx), lon) # Accumulate lon values + np.add.at(aggregated_lat, (rlat_idx, rlon_idx), lat) # Accumulate lat values + np.add.at(aggregated_rlon, (rlat_idx, rlon_idx), rlon) # Accumulate lon values + np.add.at(aggregated_rlat, (rlat_idx, rlon_idx), rlat) # Accumulate lat values + np.add.at(count_obs, (rlat_idx, rlon_idx), 1) # Count observations per grid cell + + + if obserr is not None: + np.add.at(aggregated_obs_err, (rlat_idx, rlon_idx), obserr) # Accumulate errors + + + # Compute averages by dividing by counts + non_zero_counts = count_obs > 0 + aggregated_obs[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_lon[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_lat[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_rlon[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_rlat[non_zero_counts] /= count_obs[non_zero_counts] + + if aggregated_obs_err is not None: + aggregated_obs_err[non_zero_counts] /= count_obs[non_zero_counts] + + # Return the results + lon_out = aggregated_lon.flatten() + lat_out = aggregated_lat.flatten() + rlon_out = aggregated_lon.flatten() #self.center_longitudes + rlat_out = aggregated_lat.flatten() #self.center_latitudess + obs_out = aggregated_obs.flatten() + count_obs = count_obs.flatten() + + + if aggregated_obs_err is not None: + obs_err_out = aggregated_obs_err.flatten() + return lon_out, lat_out, rlon_out, rlat_out, obs_out, obs_err_out, count_obs + else: + return lon_out, lat_out, rlon_out, rlat_out, obs_out, count_obs + + +@dataclass +class CAMSGrid(RotatedGrid): + dlon: float = 0.2 + dlat: float = 0.2 + west: float = -35. + south: float = -27. + centre_lat: float = 35. + centre_lon: float = 15. + projection: str = "cams" + +@dataclass +class BDRCGrid(RotatedGrid): + dlon: float = 0.1 + dlat: float = 0.1 + west: float = -51. + south: float = -35. + centre_lat: float = 35. + centre_lon: float = 20. + projection: str = "bdrc" + + + +@dataclass +class IrregularRotatedGrid(Grid): + + projection: str = "rotated" + + def __post_init__(self): + self.calculate_grid_coords() + self.building_grid() + + + def aggregate(self, lon, lat, obs, obserr=None): + """ + Aggregate + + Parameters: + lon, lat, obs values Optional obserr + + Returns: + lon, lat, rlon, rlat, obs values + """ + # Rotate the observations + rlon, rlat = geo_to_rot(lon, lat, centre_lon=self.centre_lon, centre_lat=self.centre_lat) + + if obserr is not None: + df = pd.DataFrame({"obs":obs, "lon":lon, "lat":lat, "rlon":rlon, "rlat":rlat, "obserr": obserr}) + else: + df = pd.DataFrame({"obs":obs, "lon":lon, "lat":lat, "rlon":rlon, "rlat":rlat}) + + df['coords'] = list(zip(df['rlon'],df['rlat'])) + df['coords'] = df['coords'].apply(Point) + + + # Create a geodataframe of the rotated obseravtional lon/ lat points + points = gpd.GeoDataFrame(df, geometry='coords', crs='epsg:4326') + + + # Geopandas sjoin calculates if these shapely points are within the shapely grid pologons + try: + gdf = self.gdf.sjoin(points, how="left") # , predicate="within") + except TypeError: + print("Empty dataframe messing up some versions of geopandas") + gdf = pd.DataFrame(columns=[ + 'obstype', 'obslon', 'obslat', 'obsn', 'obsid', 'obslev', 'obs', + 'obserr', 'time', 'lon', 'lat', 'coords', 'index_right', + 'lon_cen', 'lat_cen', 'grid_cell'], + ) + # print(f'gdf: {gdf}', flush=True) + + # Need to also return the original lat lon + # Calculate the mean of all points within each grid cell + counts = gdf[["grid_cell","obs"]].groupby("grid_cell").count().reset_index() + gdf = gdf[["grid_cell", "lon_cen", "lat_cen", "obs", "lon", "lat", "rlon", "rlat"]].groupby("grid_cell").mean().reset_index() + + # Remove nan values + gdf = gdf.dropna(subset="obs") + counts = counts.dropna(subset="obs") + + + # Return the data we are interested in + if obserr is not None: + return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values, gdf["obserr"].values, counts["obs"].values + else: + return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values, counts["obs"].values + + + \ No newline at end of file diff --git a/mapies/mapies.py b/mapies/mapies.py index 5b840ed28c4fc150d282fd3854af1a2fc5b527d5..0b4577689f59710a070ddc3e7e84359e68f5d406 100644 --- a/mapies/mapies.py +++ b/mapies/mapies.py @@ -4,7 +4,8 @@ from datetime import datetime from mapies.util.func_tools import * -from mapies.grids.monarch import RotatedGrid, IrregularRotatedGrid, RegularGrid +from mapies.grids.rotated import RotatedGrid, IrregularRotatedGrid, CAMSGrid, BDRCGrid +from mapies.grids.regular import RegularGrid import time import logging import numpy as np @@ -45,7 +46,9 @@ class MAPIES: self.grid_dict = { "rotated":RotatedGrid, "regular":RegularGrid, - "ireg_rotated":IrregularRotatedGrid + "ireg_rotated":IrregularRotatedGrid, + "cams":CAMSGrid, + "bdrc":BDRCGrid } @@ -81,7 +84,7 @@ class MAPIES: self.obs_var = variable_dict["obs_variable"] - grid_dict = self.config[self.datatype]["grids"] + grid_dict = self.config["grids"] grid_repr = kwargs.get("grid_repr") if grid_repr: self.grid_repr = grid_repr @@ -90,8 +93,11 @@ class MAPIES: if self.grid_repr: # Cread grid object if grid repr is selected in the input - self.grid_config = grid_dict[self.grid_repr] - self.grid = self.grid_dict[self.grid_repr](**self.grid_config) + if self.grid_repr in grid_dict: + self.grid_config = grid_dict[self.grid_repr] + self.grid = self.grid_dict[self.grid_repr](**self.grid_config) + else: + self.grid = self.grid_dict[self.grid_repr]() print(self.grid) diff --git a/mapies/tests/test_monarch_grid.py b/mapies/tests/test_grids.py similarity index 92% rename from mapies/tests/test_monarch_grid.py rename to mapies/tests/test_grids.py index 12240f54ba9d5d9f29983ec9a21393a76fac1121..dd907897ef9a6bcd6a2b830112865a679607b130 100644 --- a/mapies/tests/test_monarch_grid.py +++ b/mapies/tests/test_grids.py @@ -3,11 +3,12 @@ import pytest import pandas as pd import numpy as np -from mapies.grids.monarch import RegularGrid, RotatedGrid, IrregularRotatedGrid, geo_to_rot +from mapies.grids.rotated import RotatedGrid, IrregularRotatedGrid, geo_to_rot +from mapies.grids.regular import RegularGrid @pytest.mark.parametrize( - "test_input,expected", + "test_input,expected", [ ((np.array([-10, -10, -10]), np.array([-10, -10, -10]), np.array([1, 2, 3])), (np.array([-10., 0., 0., 0.]), np.array([-10., 0., 0., 0.]), np.array([2., 0., 0., 0.]), np.array([[3., 0., 0., 0.]]))), diff --git a/mapies/util/multiprocessing.py b/mapies/util/multiprocessing.py index 3ce5bed9164dadd89d2c17bcdbcc4eb72379873f..65204c94a360614bea2c481cd9915b87e6c67a1e 100644 --- a/mapies/util/multiprocessing.py +++ b/mapies/util/multiprocessing.py @@ -8,11 +8,14 @@ import multiprocessing from dataclasses import dataclass from numpy.typing import DTypeLike, NDArray from mapies.util.func_tools import * -from mapies.grids.monarch import Grid, RotatedGrid, RegularGrid, regridding_function +from mapies.grids.base_grid import Grid +from mapies.grids.rotated import RotatedGrid +from mapies.grids.regular import RegularGrid + def process_single_file_tropomi( - r, + grid, file, obs_var, lat_var, @@ -66,23 +69,23 @@ def process_single_file_tropomi( if not flag: try: # Regrid and aggregate - if isinstance(r, RotatedGrid): - lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = regridding_function(r, lon, lat, obs) - elif isinstance(r, RegularGrid): - lon_agg, lat_agg, obs_agg, count_obs = regridding_function(r, lon, lat, obs) + if isinstance(grid, RotatedGrid): + lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = grid.aggregate(lon, lat, obs) + elif isinstance(grid, RegularGrid): + lon_agg, lat_agg, obs_agg, count_obs = grid.aggregate(lon, lat, obs) ds.close() return obs_agg, lat_agg, lon_agg, count_obs except Exception as e: - print(f"Error performing regridding for file {file}: {e}") + print(f"Error performing regridding for file {file}: {e}") return None else: ds.close() - return obs, lat, lon, time_values + return obs, lat, lon, time_values def process_single_file_viirs( - r, + grid, file, obs_var, lat_var, @@ -136,10 +139,10 @@ def process_single_file_viirs( if not flag: try: # Regrid and aggregate - if isinstance(r, RotatedGrid): - lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = regridding_function(r, lon, lat, obs) - elif isinstance(r, RegularGrid): - lon_agg, lat_agg, obs_agg, count_obs = regridding_function(r, lon, lat, obs) + if isinstance(grid, RotatedGrid): + lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = grid.aggregate(lon, lat, obs) + elif isinstance(grid, RegularGrid): + lon_agg, lat_agg, obs_agg, count_obs = grid.aggregate(lon, lat, obs) ds.close() return obs_agg, lat_agg, lon_agg, count_obs except Exception as e: @@ -147,7 +150,7 @@ def process_single_file_viirs( return None else: ds.close() - return obs, lat, lon, time_values + return obs, lat, lon, time_values diff --git a/mapies/viirs.py b/mapies/viirs.py index 8629f8351d5a06e7fc95bc0cce5d37fa383b8c90..17ab2c160d3d53cb6e5fd8e22c5ce4a1cc05dd29 100644 --- a/mapies/viirs.py +++ b/mapies/viirs.py @@ -8,10 +8,10 @@ import dask.array as da import numpy as np import pandas as pd -import cartopy.crs as ccrs from .mapies import MAPIES +from mapies.grids.regular import RegularGrid +from mapies.grids.rotated import RotatedGrid from mapies.util.func_tools import * -from mapies.grids.monarch import regridding_function from mapies.util.variables_names import VARS_ATTRIBUTES from pathlib import Path @@ -23,6 +23,7 @@ os.environ["OMP_NUM_THREADS"] = "8" #logging.basicConfig(filename='../logs/mapies.log', level=logging.INFO) + class VIIRS(MAPIES): """ Class for VIIRS specific data @@ -198,10 +199,10 @@ class VIIRS(MAPIES): # Run aggregation with grid representation try: - if self.grid_repr == "rotated": - lon, lat, rlon, rlat, obs, obserr, count_obs = regridding_function(self.grid, lon, lat, obs, obserr) - elif self.grid_repr == "regular": - lon, lat, obs, obserr, count_obs = regridding_function(self.grid, lon, lat, obs, obserr) + if isinstance(self.grid, RotatedGrid): + lon, lat, rlon, rlat, obs, obserr, count_obs = self.grid.aggregate(lon, lat, obs, obserr) + elif isinstance(self.grid, RegularGrid): + lon, lat, obs, obserr, count_obs = self.grid.aggregate(lon, lat, obs, obserr) except ValueError: raise Exception("Have you specified a grid_repr") diff --git a/run/run_tropomi.py b/run/run_tropomi.py index f818894351fe6a42c646c6fd0fc8996fc2f2919d..fd74d50ee01a016b8def005fdf99292e9f3d1e8d 100644 --- a/run/run_tropomi.py +++ b/run/run_tropomi.py @@ -12,7 +12,7 @@ if __name__ == "__main__": start_time = time.time() - c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, grid_repr="regular") + c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, grid_repr="bdrc") c.gather_nc_files() c.process_avg_data(monthly_avg=False, batch_size = 14, apply_qa=True, geospatial_crop=[[-40, 60], [0, 70]]) diff --git a/run/run_viirs.py b/run/run_viirs.py index b644c4b41af6af38fafb6564e9058edc529cf892..d4ced0cd7adce443dbee19923ef0af653636763a 100644 --- a/run/run_viirs.py +++ b/run/run_viirs.py @@ -6,21 +6,20 @@ import time if __name__ == "__main__": # Change these variables # 00:30 to 02:36 from 2024-01-01 to 2024-01-03 - start_date = "202401310000" - end_date = "202401312359" + start_date = "202401010000" + end_date = "202401012359" - # outdir="/home/cmeikle/Projects/data/" - # indir="/home/cmeikle/Projects/data/VIIRS" + outdir="/home/cmeikle/Projects/data/" + indir="/home/cmeikle/Projects/data/VIIRS" + - outdir="/home/cgile/Documents/mapies/figures" - indir="/home/cgile/Documents/mapies/VIIRS" start_time = time.time() - c = VIIRS(start_date, end_date, dest=outdir, indir=indir, grid_repr="rotated") + c = VIIRS(start_date, end_date, dest=outdir, indir=indir, grid_repr="cams") c.gather_nc_files()