From 3d03b8d56663b7d84534100ae598eb5538ea94c8 Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Tue, 4 Mar 2025 16:55:31 +0100 Subject: [PATCH 1/4] Splitting of grid files --- mapies/grids/base_grid.py | 128 ++++++++++ mapies/grids/monarch.py | 428 --------------------------------- mapies/grids/regular.py | 93 +++++++ mapies/grids/rotated.py | 219 +++++++++++++++++ mapies/mapies.py | 3 +- mapies/util/multiprocessing.py | 30 ++- mapies/viirs.py | 29 ++- 7 files changed, 498 insertions(+), 432 deletions(-) create mode 100644 mapies/grids/base_grid.py delete mode 100644 mapies/grids/monarch.py create mode 100644 mapies/grids/regular.py create mode 100644 mapies/grids/rotated.py diff --git a/mapies/grids/base_grid.py b/mapies/grids/base_grid.py new file mode 100644 index 00000000..31ef319f --- /dev/null +++ b/mapies/grids/base_grid.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +from dataclasses import dataclass +import pandas as pd +import numpy as np +from typing import Tuple, Dict, Optional +from numpy import cos, sin, arctan, pi, nan +import geopandas as gpd +from geopandas import GeoDataFrame +from shapely.geometry import Polygon, Point +from mapies.util.func_tools import * +import logging + +#logger = logging.getLogger(__name__) +#logging.basicConfig(filename='./logs/mapies.log', level=logging.INFO) + + + + +@dataclass +class Grid: + dlon: float = None + dlat: float = None + nlon: float = None + nlat: float = None + west: float = None + south: float = None + centre_lat: Optional[float] = None + centre_lon: Optional[float] = None + + + def calculate_grid_coords(self): + """ + This method does two things: + 1. It calculates the centre values of the grid + 2. Before Calculating the boundary points of the regular grid + + This can then be used in many grid representations, for example in a rotated grid used in the ihouse Monarch model + """ + #logger.info("Creating Grid") + if (self.nlon == None) and (self.nlat == None): + self.nlat = int((abs(self.south) / self.dlat) * 2) + self.nlon = int((abs(self.west) / self.dlon) * 2) + elif (self.dlon == None) and (self.dlat == None): + self.dlat = abs(self.south) * 2 / (self.nlat) + self.dlon = abs(self.west) * 2 / (self.nlon) + else: + print("Enter resolution or number of grid cells") + + self.cenlat = self.south + self.dlat/2 + self.cenlon = self.west + self.dlon/2 + self.center_latitudes = np.linspace(self.cenlat, self.cenlat + self.dlat * (self.nlat) - self.dlat, self.nlat, dtype=float) + self.center_longitudes = np.linspace(self.cenlon, self.cenlon + self.dlon * (self.nlon) - self.dlat, self.nlon, dtype=float) + + counter = 0 + self.coords = np.empty((self.center_latitudes.shape[0] * self.center_longitudes.shape[0], 2)) + for i in range(self.center_longitudes.shape[0]): + for j in range(self.center_latitudes.shape[0]): + self.coords[counter, 0] = self.center_longitudes[i] + self.coords[counter, 1] = self.center_latitudes[j] + counter += 1 + + + self.b_lats = self.create_bounds(self.coords[:, 1], self.dlat, number_vertices=4, inverse=True) + self.b_lons = self.create_bounds(self.coords[:, 0], self.dlon, number_vertices=4) + + return True + + + @staticmethod + def create_bounds(coordinates, inc, number_vertices=2, inverse=False): + """ + Calculate the boundaries of each grid points + """ + # Create new arrays moving the centers half increment less and more. + coords_left = coordinates - inc / 2 + coords_right = coordinates + inc / 2 + + # Defining the number of corners needed. 2 to regular grids and 4 for irregular ones. + if number_vertices == 2: + # Create an array of N arrays of 2 elements to store the floor and the ceil values for each cell + bound_coords = np.dstack((coords_left, coords_right)) + bound_coords = bound_coords.reshape((len(coordinates), number_vertices)) + elif number_vertices == 4: + # Create an array of N arrays of 4 elements to store the corner values for each cell + # It can be stored in clockwise starting form the left-top element, or in inverse mode. + if inverse: + bound_coords = np.dstack((coords_left, coords_left, coords_right, coords_right)) + else: + bound_coords = np.dstack((coords_left, coords_right, coords_right, coords_left)) + else: + print('The number of vertices of the boundaries must be 2 or 4.') + return bound_coords + + + def building_grid(self): + """ + Here we build the grid using Shapely polygons. And attribute it to a geopandas dataframe + + This is also where we decide whether to use a regular grid for global or a rotated grid for regional + + """ + + + # May need to add centre lats and centre lons to this dataframe as well + + # Re-shape + aux_b_lats = self.b_lats.squeeze() + aux_b_lons = self.b_lons.squeeze() + + + geometry = [] + # Create one dataframe with 8 columns, 4 points with two coordinates each one + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + self.gdf = GeoDataFrame(index=range(aux_b_lons.shape[0]), geometry=geometry, crs='epsg:4326') + self.gdf["grid_cell"] = ["grid_cell_{}".format(i+1) for i in range(len(self.gdf))] + + # Append the center lat and lon values to this dataframe + self.gdf["lon_cen"] = self.coords[:, 0] + self.gdf["lat_cen"] = self.coords[:, 1] + + + return True diff --git a/mapies/grids/monarch.py b/mapies/grids/monarch.py deleted file mode 100644 index 2134dde1..00000000 --- a/mapies/grids/monarch.py +++ /dev/null @@ -1,428 +0,0 @@ -#!/usr/bin/env python -from dataclasses import dataclass -import pandas as pd -import numpy as np -from typing import Tuple, Dict, Optional -from numpy import cos, sin, arctan, pi, nan -import geopandas as gpd -from geopandas import GeoDataFrame -from shapely.geometry import Polygon, Point -from mapies.util.func_tools import * -import logging - -#logger = logging.getLogger(__name__) -#logging.basicConfig(filename='./logs/mapies.log', level=logging.INFO) - - -def regridding_function(r, lon, lat, obs, obserr=None): - - # If obserr is selected return that - if obserr is not None: - if isinstance(r, RotatedGrid): - lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, obserr_agg, count_obs = r.aggregate(lon, lat, obs, obserr) - return lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, obserr_agg, count_obs - elif isinstance(r, RegularGrid): - lon_agg, lat_agg, obs_agg, obserr_agg, count_obs = r.aggregate(lon, lat, obs, obserr) - return lon_agg, lat_agg, obs_agg, obserr_agg, count_obs - else: - raise ValueError("Invalid grid representation") - - else: - if isinstance(r, RotatedGrid): - lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) - return lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs - elif isinstance(r, RegularGrid): - lon_agg, lat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) - return lon_agg, lat_agg, obs_agg, count_obs - else: - raise ValueError("Invalid grid representation") - -def geo_to_rot(lons, lats, centre_lon: float, centre_lat: float): - """ - Rotating coordinates from cartesian lat/lon to rotated rlon/rlat - """ - distance_lons = np.radians(lons - centre_lon) - lons = np.radians(lons) - lats = np.radians(lats) - centre_lon = np.radians(centre_lon) - centre_lat = np.radians(centre_lat) - - x = cos(centre_lat) * sin(lats) - sin(centre_lat) * cos(lats) * cos(distance_lons) - y = cos(lats) * sin(distance_lons) - z = cos(centre_lat) * cos(lats) * cos(distance_lons) + sin(centre_lat) * sin(lats) - # Arctan2 used - # Explanation of the difference https://geo.libretexts.org/Courses/University_of_California_Davis/GEL_056%3A_Introduction_to_Geophysics/Geophysics_is_everywhere_in_geology.../zz%3A_Back_Matter/Arctan_vs_Arctan2 - rlon = np.arctan2(y, z) - rlat = np.arcsin(x) - # Convert back to degrees - rlon = np.degrees(rlon) - rlat = np.degrees(rlat) - return rlon, rlat - - -@dataclass -class Grid: - dlon: float = None - dlat: float = None - nlon: float = None - nlat: float = None - west: float = None - south: float = None - centre_lat: Optional[float] = None - centre_lon: Optional[float] = None - - - def calculate_grid_coords(self): - """ - This method does two things: - 1. It calculates the centre values of the grid - 2. Before Calculating the boundary points of the regular grid - - This can then be used in many grid representations, for example in a rotated grid used in the ihouse Monarch model - """ - #logger.info("Creating Grid") - if (self.nlon == None) and (self.nlat == None): - self.nlat = int((abs(self.south) / self.dlat) * 2) - self.nlon = int((abs(self.west) / self.dlon) * 2) - elif (self.dlon == None) and (self.dlat == None): - self.dlat = abs(self.south) * 2 / (self.nlat) - self.dlon = abs(self.west) * 2 / (self.nlon) - else: - print("Enter resolution or number of grid cells") - - self.cenlat = self.south + self.dlat/2 - self.cenlon = self.west + self.dlon/2 - self.center_latitudes = np.linspace(self.cenlat, self.cenlat + self.dlat * (self.nlat) - self.dlat, self.nlat, dtype=float) - self.center_longitudes = np.linspace(self.cenlon, self.cenlon + self.dlon * (self.nlon) - self.dlat, self.nlon, dtype=float) - - counter = 0 - self.coords = np.empty((self.center_latitudes.shape[0] * self.center_longitudes.shape[0], 2)) - for i in range(self.center_longitudes.shape[0]): - for j in range(self.center_latitudes.shape[0]): - self.coords[counter, 0] = self.center_longitudes[i] - self.coords[counter, 1] = self.center_latitudes[j] - counter += 1 - - - self.b_lats = self.create_bounds(self.coords[:, 1], self.dlat, number_vertices=4, inverse=True) - self.b_lons = self.create_bounds(self.coords[:, 0], self.dlon, number_vertices=4) - - return True - - - @staticmethod - def create_bounds(coordinates, inc, number_vertices=2, inverse=False): - """ - Calculate the boundaries of each grid points - """ - # Create new arrays moving the centers half increment less and more. - coords_left = coordinates - inc / 2 - coords_right = coordinates + inc / 2 - - # Defining the number of corners needed. 2 to regular grids and 4 for irregular ones. - if number_vertices == 2: - # Create an array of N arrays of 2 elements to store the floor and the ceil values for each cell - bound_coords = np.dstack((coords_left, coords_right)) - bound_coords = bound_coords.reshape((len(coordinates), number_vertices)) - elif number_vertices == 4: - # Create an array of N arrays of 4 elements to store the corner values for each cell - # It can be stored in clockwise starting form the left-top element, or in inverse mode. - if inverse: - bound_coords = np.dstack((coords_left, coords_left, coords_right, coords_right)) - else: - bound_coords = np.dstack((coords_left, coords_right, coords_right, coords_left)) - else: - print('The number of vertices of the boundaries must be 2 or 4.') - return bound_coords - - - def building_grid(self): - """ - Here we build the grid using Shapely polygons. And attribute it to a geopandas dataframe - - This is also where we decide whether to use a regular grid for global or a rotated grid for regional - - """ - - - # May need to add centre lats and centre lons to this dataframe as well - - # Re-shape - aux_b_lats = self.b_lats.squeeze() - aux_b_lons = self.b_lons.squeeze() - - - geometry = [] - # Create one dataframe with 8 columns, 4 points with two coordinates each one - for i in range(aux_b_lons.shape[0]): - geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), - (aux_b_lons[i, 1], aux_b_lats[i, 1]), - (aux_b_lons[i, 2], aux_b_lats[i, 2]), - (aux_b_lons[i, 3], aux_b_lats[i, 3]), - (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - - self.gdf = GeoDataFrame(index=range(aux_b_lons.shape[0]), geometry=geometry, crs='epsg:4326') - self.gdf["grid_cell"] = ["grid_cell_{}".format(i+1) for i in range(len(self.gdf))] - - # Append the center lat and lon values to this dataframe - self.gdf["lon_cen"] = self.coords[:, 0] - self.gdf["lat_cen"] = self.coords[:, 1] - - - return True - - -@dataclass -class RegularGrid(Grid): - """ - Regridding if the Grid we want is regular, but also want to aggregate the observations to a regular grid. - - Useful for a global grid representation - """ - projection: str = "regular" - - def __post_init__(self): - self.calculate_grid_coords() - - def aggregate(self, lon, lat, obs, obserr=None): - """ - Aggregate the observations to the grid cells - - Parameters: - lon, lat, obs, obserr (optional) -- Longitude, Latitude, Observations, and Errors - - Returns: - Aggregated lon, lat, obs values - """ - - # Make sure lon and lat are within the borders of the grid - # Filter out values that are outside the grid bounds - mask = (lon >= self.west) & (lon <= self.west + (self.dlon * (self.nlon - 1))) & \ - (lat >= self.south) & (lat <= self.south + (self.dlat * (self.nlat - 1))) & \ - (~np.isnan(obs)) - - - # Apply the mask to filter out the out-of-bounds points - lon = lon[mask] - lat = lat[mask] - obs = obs[mask] - if obserr is not None: - obserr = obserr[mask] - - # Vectorized calculation of grid indices - lon_idx = np.searchsorted(self.center_longitudes, lon) - 1 # Index of longitude grid - lat_idx = np.searchsorted(self.center_latitudes, lat) - 1 # Index of latitude grid - - # Ensure indices are within bounds - lon_idx = np.clip(lon_idx, 0, self.nlon - 1) - lat_idx = np.clip(lat_idx, 0, self.nlat - 1) - - # Initialize arrays for aggregation - aggregated_obs = np.zeros((self.nlat, self.nlon)) - aggregated_lon = np.zeros((self.nlat, self.nlon)) - aggregated_lat = np.zeros((self.nlat, self.nlon)) - aggregated_obs_err = np.zeros((self.nlat, self.nlon)) if obserr is not None else None - count_obs = np.zeros((self.nlat, self.nlon), dtype=int) - - # Efficient accumulation using np.add.at - np.add.at(aggregated_obs, (lat_idx, lon_idx), obs) # Accumulate observations - np.add.at(aggregated_lon, (lat_idx, lon_idx), lon) # Accumulate lon values - np.add.at(aggregated_lat, (lat_idx, lon_idx), lat) # Accumulate lat values - np.add.at(count_obs, (lat_idx, lon_idx), 1) # Count observations per grid cell - - if obserr is not None: - np.add.at(aggregated_obs_err, (lat_idx, lon_idx), obserr) # Accumulate errors - - # Compute averages by dividing by counts - non_zero_counts = count_obs > 0 - aggregated_obs[non_zero_counts] /= count_obs[non_zero_counts] - aggregated_lon[non_zero_counts] /= count_obs[non_zero_counts] - aggregated_lat[non_zero_counts] /= count_obs[non_zero_counts] - - if aggregated_obs_err is not None: - aggregated_obs_err[non_zero_counts] /= count_obs[non_zero_counts] - - # TODO: Add a function that takes the center or aggreagated value - # Return the results - lon_out = aggregated_lon.flatten() #self.center_longitudes - lat_out = aggregated_lat.flatten() #self.center_latitudes - obs_out = aggregated_obs.flatten() - count_obs = count_obs.flatten() - - if aggregated_obs_err is not None: - obs_err_out = aggregated_obs_err.flatten() - return lon_out, lat_out, obs_out, obs_err_out, count_obs - else: - return lon_out, lat_out, obs_out, count_obs - - -@dataclass -class RotatedGrid(Grid): - """ - Regridding if the Grid we want is regular, but also want to aggregate the observations to a regular grid. - - Useful for a global grid representation - """ - - projection: str = "rotated" - - def __post_init__(self): - self.calculate_grid_coords() - - def aggregate(self, lon, lat, obs, obserr=None): - """ - Aggregate the observations to the grid cells - - Parameters: - lon, lat, obs, obserr (optional) -- Longitude, Latitude, Observations, and Errors - - Returns: - Aggregated lon, lat, rlon, rlat, obs values - """ - # Perform the rotation - rlon, rlat = geo_to_rot(lon, lat, centre_lon=self.centre_lon, centre_lat=self.centre_lat) - - # Make sure rlon and rlat are within the borders of the grid - # Filter out values that are outside the grid bounds - mask = (rlon >= self.west) & (rlon <= self.west + (self.dlon * (self.nlon - 1))) & \ - (rlat >= self.south) & (rlat <= self.south + (self.dlat * (self.nlat - 1))) & \ - (~np.isnan(obs)) - - - # Apply the mask to filter out the out-of-bounds points - rlon = rlon[mask] - rlat = rlat[mask] - lon = lon[mask] - lat = lat[mask] - obs = obs[mask] - if obserr is not None: - obserr = obserr[mask] - - - # Vectorized calculation of grid indices - rlon_idx = np.searchsorted(self.center_longitudes, rlon) - 1 # Index of longitude grid - rlat_idx = np.searchsorted(self.center_latitudes, rlat) - 1 # Index of latitude grid - - - # Ensure indices are within bounds - rlon_idx = np.clip(rlon_idx, 0, self.nlon - 1) - rlat_idx = np.clip(rlat_idx, 0, self.nlat - 1) - - - # Initialize arrays for aggregation - aggregated_obs = np.zeros((self.nlat, self.nlon)) - aggregated_lon = np.zeros((self.nlat, self.nlon)) - aggregated_lat = np.zeros((self.nlat, self.nlon)) - aggregated_rlon = np.zeros((self.nlat, self.nlon)) - aggregated_rlat = np.zeros((self.nlat, self.nlon)) - aggregated_obs_err = np.ma.zeros((self.nlat, self.nlon)) if obserr is not None else None - count_obs = np.zeros((self.nlat, self.nlon), dtype=int) - - - - # Efficient accumulation using np.add.at - np.add.at(aggregated_obs, (rlat_idx, rlon_idx), obs) # Accumulate observations - np.add.at(aggregated_lon, (rlat_idx, rlon_idx), lon) # Accumulate lon values - np.add.at(aggregated_lat, (rlat_idx, rlon_idx), lat) # Accumulate lat values - np.add.at(aggregated_rlon, (rlat_idx, rlon_idx), rlon) # Accumulate lon values - np.add.at(aggregated_rlat, (rlat_idx, rlon_idx), rlat) # Accumulate lat values - np.add.at(count_obs, (rlat_idx, rlon_idx), 1) # Count observations per grid cell - - - if obserr is not None: - np.add.at(aggregated_obs_err, (rlat_idx, rlon_idx), obserr) # Accumulate errors - - - # Compute averages by dividing by counts - non_zero_counts = count_obs > 0 - aggregated_obs[non_zero_counts] /= count_obs[non_zero_counts] - aggregated_lon[non_zero_counts] /= count_obs[non_zero_counts] - aggregated_lat[non_zero_counts] /= count_obs[non_zero_counts] - aggregated_rlon[non_zero_counts] /= count_obs[non_zero_counts] - aggregated_rlat[non_zero_counts] /= count_obs[non_zero_counts] - - if aggregated_obs_err is not None: - aggregated_obs_err[non_zero_counts] /= count_obs[non_zero_counts] - - # Return the results - lon_out = aggregated_lon.flatten() - lat_out = aggregated_lat.flatten() - rlon_out = aggregated_lon.flatten() #self.center_longitudes - rlat_out = aggregated_lat.flatten() #self.center_latitudess - obs_out = aggregated_obs.flatten() - count_obs = count_obs.flatten() - - - if aggregated_obs_err is not None: - obs_err_out = aggregated_obs_err.flatten() - return lon_out, lat_out, rlon_out, rlat_out, obs_out, obs_err_out, count_obs - else: - return lon_out, lat_out, rlon_out, rlat_out, obs_out, count_obs - - -@dataclass -class IrregularRotatedGrid(Grid): - - projection: str = "rotated" - - def __post_init__(self): - self.calculate_grid_coords() - self.building_grid() - - - def aggregate(self, lon, lat, obs, obserr=None): - """ - Aggregate - - Parameters: - lon, lat, obs values Optional obserr - - Returns: - lon, lat, rlon, rlat, obs values - """ - # Rotate the observations - rlon, rlat = geo_to_rot(lon, lat, centre_lon=self.centre_lon, centre_lat=self.centre_lat) - - if obserr is not None: - df = pd.DataFrame({"obs":obs, "lon":lon, "lat":lat, "rlon":rlon, "rlat":rlat, "obserr": obserr}) - else: - df = pd.DataFrame({"obs":obs, "lon":lon, "lat":lat, "rlon":rlon, "rlat":rlat}) - - df['coords'] = list(zip(df['rlon'],df['rlat'])) - df['coords'] = df['coords'].apply(Point) - - - # Create a geodataframe of the rotated obseravtional lon/ lat points - points = gpd.GeoDataFrame(df, geometry='coords', crs='epsg:4326') - - - # Geopandas sjoin calculates if these shapely points are within the shapely grid pologons - try: - gdf = self.gdf.sjoin(points, how="left") # , predicate="within") - except TypeError: - print("Empty dataframe messing up some versions of geopandas") - gdf = pd.DataFrame(columns=[ - 'obstype', 'obslon', 'obslat', 'obsn', 'obsid', 'obslev', 'obs', - 'obserr', 'time', 'lon', 'lat', 'coords', 'index_right', - 'lon_cen', 'lat_cen', 'grid_cell'], - ) - # print(f'gdf: {gdf}', flush=True) - - # Need to also return the original lat lon - # Calculate the mean of all points within each grid cell - counts = gdf[["grid_cell","obs"]].groupby("grid_cell").count().reset_index() - gdf = gdf[["grid_cell", "lon_cen", "lat_cen", "obs", "lon", "lat", "rlon", "rlat"]].groupby("grid_cell").mean().reset_index() - - # Remove nan values - gdf = gdf.dropna(subset="obs") - counts = counts.dropna(subset="obs") - - - # Return the data we are interested in - if obserr is not None: - return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values, gdf["obserr"].values, counts["obs"].values - else: - return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values, counts["obs"].values - - - \ No newline at end of file diff --git a/mapies/grids/regular.py b/mapies/grids/regular.py new file mode 100644 index 00000000..0ca54c99 --- /dev/null +++ b/mapies/grids/regular.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +from dataclasses import dataclass +import pandas as pd +import numpy as np +from typing import Tuple, Dict, Optional +from mapies.util.func_tools import * +from mapies.grids.base_grid import Grid +import logging + + + +@dataclass +class RegularGrid(Grid): + """ + Regridding if the Grid we want is regular, but also want to aggregate the observations to a regular grid. + + Useful for a global grid representation + """ + projection: str = "regular" + + def __post_init__(self): + self.calculate_grid_coords() + + def aggregate(self, lon, lat, obs, obserr=None): + """ + Aggregate the observations to the grid cells + + Parameters: + lon, lat, obs, obserr (optional) -- Longitude, Latitude, Observations, and Errors + + Returns: + Aggregated lon, lat, obs values + """ + + # Make sure lon and lat are within the borders of the grid + # Filter out values that are outside the grid bounds + mask = (lon >= self.west) & (lon <= self.west + (self.dlon * (self.nlon - 1))) & \ + (lat >= self.south) & (lat <= self.south + (self.dlat * (self.nlat - 1))) & \ + (~np.isnan(obs)) + + + # Apply the mask to filter out the out-of-bounds points + lon = lon[mask] + lat = lat[mask] + obs = obs[mask] + if obserr is not None: + obserr = obserr[mask] + + # Vectorized calculation of grid indices + lon_idx = np.searchsorted(self.center_longitudes, lon) - 1 # Index of longitude grid + lat_idx = np.searchsorted(self.center_latitudes, lat) - 1 # Index of latitude grid + + # Ensure indices are within bounds + lon_idx = np.clip(lon_idx, 0, self.nlon - 1) + lat_idx = np.clip(lat_idx, 0, self.nlat - 1) + + # Initialize arrays for aggregation + aggregated_obs = np.zeros((self.nlat, self.nlon)) + aggregated_lon = np.zeros((self.nlat, self.nlon)) + aggregated_lat = np.zeros((self.nlat, self.nlon)) + aggregated_obs_err = np.zeros((self.nlat, self.nlon)) if obserr is not None else None + count_obs = np.zeros((self.nlat, self.nlon), dtype=int) + + # Efficient accumulation using np.add.at + np.add.at(aggregated_obs, (lat_idx, lon_idx), obs) # Accumulate observations + np.add.at(aggregated_lon, (lat_idx, lon_idx), lon) # Accumulate lon values + np.add.at(aggregated_lat, (lat_idx, lon_idx), lat) # Accumulate lat values + np.add.at(count_obs, (lat_idx, lon_idx), 1) # Count observations per grid cell + + if obserr is not None: + np.add.at(aggregated_obs_err, (lat_idx, lon_idx), obserr) # Accumulate errors + + # Compute averages by dividing by counts + non_zero_counts = count_obs > 0 + aggregated_obs[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_lon[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_lat[non_zero_counts] /= count_obs[non_zero_counts] + + if aggregated_obs_err is not None: + aggregated_obs_err[non_zero_counts] /= count_obs[non_zero_counts] + + # TODO: Add a function that takes the center or aggreagated value + # Return the results + lon_out = aggregated_lon.flatten() #self.center_longitudes + lat_out = aggregated_lat.flatten() #self.center_latitudes + obs_out = aggregated_obs.flatten() + count_obs = count_obs.flatten() + + if aggregated_obs_err is not None: + obs_err_out = aggregated_obs_err.flatten() + return lon_out, lat_out, obs_out, obs_err_out, count_obs + else: + return lon_out, lat_out, obs_out, count_obs diff --git a/mapies/grids/rotated.py b/mapies/grids/rotated.py new file mode 100644 index 00000000..a958ca8a --- /dev/null +++ b/mapies/grids/rotated.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python +from dataclasses import dataclass +import pandas as pd +import numpy as np +from typing import Tuple, Dict, Optional +from numpy import cos, sin, arctan, pi, nan +import geopandas as gpd +from geopandas import GeoDataFrame +from shapely.geometry import Polygon, Point +from mapies.util.func_tools import * +from mapies.grids.base_grid import Grid +import logging + + +def geo_to_rot(lons, lats, centre_lon: float, centre_lat: float): + """ + Rotating coordinates from cartesian lat/lon to rotated rlon/rlat + """ + distance_lons = np.radians(lons - centre_lon) + lons = np.radians(lons) + lats = np.radians(lats) + centre_lon = np.radians(centre_lon) + centre_lat = np.radians(centre_lat) + + x = cos(centre_lat) * sin(lats) - sin(centre_lat) * cos(lats) * cos(distance_lons) + y = cos(lats) * sin(distance_lons) + z = cos(centre_lat) * cos(lats) * cos(distance_lons) + sin(centre_lat) * sin(lats) + # Arctan2 used + # Explanation of the difference https://geo.libretexts.org/Courses/University_of_California_Davis/GEL_056%3A_Introduction_to_Geophysics/Geophysics_is_everywhere_in_geology.../zz%3A_Back_Matter/Arctan_vs_Arctan2 + rlon = np.arctan2(y, z) + rlat = np.arcsin(x) + # Convert back to degrees + rlon = np.degrees(rlon) + rlat = np.degrees(rlat) + return rlon, rlat + + +@dataclass +class RotatedGrid(Grid): + """ + Regridding if the Grid we want is regular, but also want to aggregate the observations to a regular grid. + + Useful for a global grid representation + """ + + projection: str = "rotated" + + def __post_init__(self): + self.calculate_grid_coords() + + def aggregate(self, lon, lat, obs, obserr=None): + """ + Aggregate the observations to the grid cells + + Parameters: + lon, lat, obs, obserr (optional) -- Longitude, Latitude, Observations, and Errors + + Returns: + Aggregated lon, lat, rlon, rlat, obs values + """ + # Perform the rotation + rlon, rlat = geo_to_rot(lon, lat, centre_lon=self.centre_lon, centre_lat=self.centre_lat) + + # Make sure rlon and rlat are within the borders of the grid + # Filter out values that are outside the grid bounds + mask = (rlon >= self.west) & (rlon <= self.west + (self.dlon * (self.nlon - 1))) & \ + (rlat >= self.south) & (rlat <= self.south + (self.dlat * (self.nlat - 1))) & \ + (~np.isnan(obs)) + + + # Apply the mask to filter out the out-of-bounds points + rlon = rlon[mask] + rlat = rlat[mask] + lon = lon[mask] + lat = lat[mask] + obs = obs[mask] + if obserr is not None: + obserr = obserr[mask] + + + # Vectorized calculation of grid indices + rlon_idx = np.searchsorted(self.center_longitudes, rlon) - 1 # Index of longitude grid + rlat_idx = np.searchsorted(self.center_latitudes, rlat) - 1 # Index of latitude grid + + + # Ensure indices are within bounds + rlon_idx = np.clip(rlon_idx, 0, self.nlon - 1) + rlat_idx = np.clip(rlat_idx, 0, self.nlat - 1) + + + # Initialize arrays for aggregation + aggregated_obs = np.zeros((self.nlat, self.nlon)) + aggregated_lon = np.zeros((self.nlat, self.nlon)) + aggregated_lat = np.zeros((self.nlat, self.nlon)) + aggregated_rlon = np.zeros((self.nlat, self.nlon)) + aggregated_rlat = np.zeros((self.nlat, self.nlon)) + aggregated_obs_err = np.ma.zeros((self.nlat, self.nlon)) if obserr is not None else None + count_obs = np.zeros((self.nlat, self.nlon), dtype=int) + + + + # Efficient accumulation using np.add.at + np.add.at(aggregated_obs, (rlat_idx, rlon_idx), obs) # Accumulate observations + np.add.at(aggregated_lon, (rlat_idx, rlon_idx), lon) # Accumulate lon values + np.add.at(aggregated_lat, (rlat_idx, rlon_idx), lat) # Accumulate lat values + np.add.at(aggregated_rlon, (rlat_idx, rlon_idx), rlon) # Accumulate lon values + np.add.at(aggregated_rlat, (rlat_idx, rlon_idx), rlat) # Accumulate lat values + np.add.at(count_obs, (rlat_idx, rlon_idx), 1) # Count observations per grid cell + + + if obserr is not None: + np.add.at(aggregated_obs_err, (rlat_idx, rlon_idx), obserr) # Accumulate errors + + + # Compute averages by dividing by counts + non_zero_counts = count_obs > 0 + aggregated_obs[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_lon[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_lat[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_rlon[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_rlat[non_zero_counts] /= count_obs[non_zero_counts] + + if aggregated_obs_err is not None: + aggregated_obs_err[non_zero_counts] /= count_obs[non_zero_counts] + + # Return the results + lon_out = aggregated_lon.flatten() + lat_out = aggregated_lat.flatten() + rlon_out = aggregated_lon.flatten() #self.center_longitudes + rlat_out = aggregated_lat.flatten() #self.center_latitudess + obs_out = aggregated_obs.flatten() + count_obs = count_obs.flatten() + + + if aggregated_obs_err is not None: + obs_err_out = aggregated_obs_err.flatten() + return lon_out, lat_out, rlon_out, rlat_out, obs_out, obs_err_out, count_obs + else: + return lon_out, lat_out, rlon_out, rlat_out, obs_out, count_obs + + +@dataclass +class CAMS(RotatedGrid): + dlon: float = None + dlat: float = None + nlon: float = None + nlat: float = None + west: float = None + south: float = None + centre_lat: Optional[float] = None + centre_lon: Optional[float] = None + + +@dataclass +class IrregularRotatedGrid(Grid): + + projection: str = "rotated" + + def __post_init__(self): + self.calculate_grid_coords() + self.building_grid() + + + def aggregate(self, lon, lat, obs, obserr=None): + """ + Aggregate + + Parameters: + lon, lat, obs values Optional obserr + + Returns: + lon, lat, rlon, rlat, obs values + """ + # Rotate the observations + rlon, rlat = geo_to_rot(lon, lat, centre_lon=self.centre_lon, centre_lat=self.centre_lat) + + if obserr is not None: + df = pd.DataFrame({"obs":obs, "lon":lon, "lat":lat, "rlon":rlon, "rlat":rlat, "obserr": obserr}) + else: + df = pd.DataFrame({"obs":obs, "lon":lon, "lat":lat, "rlon":rlon, "rlat":rlat}) + + df['coords'] = list(zip(df['rlon'],df['rlat'])) + df['coords'] = df['coords'].apply(Point) + + + # Create a geodataframe of the rotated obseravtional lon/ lat points + points = gpd.GeoDataFrame(df, geometry='coords', crs='epsg:4326') + + + # Geopandas sjoin calculates if these shapely points are within the shapely grid pologons + try: + gdf = self.gdf.sjoin(points, how="left") # , predicate="within") + except TypeError: + print("Empty dataframe messing up some versions of geopandas") + gdf = pd.DataFrame(columns=[ + 'obstype', 'obslon', 'obslat', 'obsn', 'obsid', 'obslev', 'obs', + 'obserr', 'time', 'lon', 'lat', 'coords', 'index_right', + 'lon_cen', 'lat_cen', 'grid_cell'], + ) + # print(f'gdf: {gdf}', flush=True) + + # Need to also return the original lat lon + # Calculate the mean of all points within each grid cell + counts = gdf[["grid_cell","obs"]].groupby("grid_cell").count().reset_index() + gdf = gdf[["grid_cell", "lon_cen", "lat_cen", "obs", "lon", "lat", "rlon", "rlat"]].groupby("grid_cell").mean().reset_index() + + # Remove nan values + gdf = gdf.dropna(subset="obs") + counts = counts.dropna(subset="obs") + + + # Return the data we are interested in + if obserr is not None: + return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values, gdf["obserr"].values, counts["obs"].values + else: + return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values, counts["obs"].values + + + \ No newline at end of file diff --git a/mapies/mapies.py b/mapies/mapies.py index 5b840ed2..dcb565d2 100644 --- a/mapies/mapies.py +++ b/mapies/mapies.py @@ -4,7 +4,8 @@ from datetime import datetime from mapies.util.func_tools import * -from mapies.grids.monarch import RotatedGrid, IrregularRotatedGrid, RegularGrid +from mapies.grids.rotated import RotatedGrid, IrregularRotatedGrid +from mapies.grids.regular import RegularGrid import time import logging import numpy as np diff --git a/mapies/util/multiprocessing.py b/mapies/util/multiprocessing.py index 3ce5bed9..b455e3ac 100644 --- a/mapies/util/multiprocessing.py +++ b/mapies/util/multiprocessing.py @@ -8,7 +8,35 @@ import multiprocessing from dataclasses import dataclass from numpy.typing import DTypeLike, NDArray from mapies.util.func_tools import * -from mapies.grids.monarch import Grid, RotatedGrid, RegularGrid, regridding_function +from mapies.grids.base_grid import Grid +from mapies.grids.rotated import RotatedGrid +from mapies.grids.regular import RegularGrid + + + +def regridding_function(r, lon, lat, obs, obserr=None): + + # If obserr is selected return that + if obserr is not None: + if isinstance(r, RotatedGrid): + lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, obserr_agg, count_obs = r.aggregate(lon, lat, obs, obserr) + return lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, obserr_agg, count_obs + elif isinstance(r, RegularGrid): + lon_agg, lat_agg, obs_agg, obserr_agg, count_obs = r.aggregate(lon, lat, obs, obserr) + return lon_agg, lat_agg, obs_agg, obserr_agg, count_obs + else: + raise ValueError("Invalid grid representation") + + else: + if isinstance(r, RotatedGrid): + lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) + return lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs + elif isinstance(r, RegularGrid): + lon_agg, lat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) + return lon_agg, lat_agg, obs_agg, count_obs + else: + raise ValueError("Invalid grid representation") + def process_single_file_tropomi( diff --git a/mapies/viirs.py b/mapies/viirs.py index 8629f835..78efe490 100644 --- a/mapies/viirs.py +++ b/mapies/viirs.py @@ -8,10 +8,10 @@ import dask.array as da import numpy as np import pandas as pd -import cartopy.crs as ccrs from .mapies import MAPIES +from mapies.grids.regular import RegularGrid +from mapies.grids.rotated import RotatedGrid from mapies.util.func_tools import * -from mapies.grids.monarch import regridding_function from mapies.util.variables_names import VARS_ATTRIBUTES from pathlib import Path @@ -23,6 +23,31 @@ os.environ["OMP_NUM_THREADS"] = "8" #logging.basicConfig(filename='../logs/mapies.log', level=logging.INFO) +def regridding_function(r, lon, lat, obs, obserr=None): + + # If obserr is selected return that + if obserr is not None: + if isinstance(r, RotatedGrid): + lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, obserr_agg, count_obs = r.aggregate(lon, lat, obs, obserr) + return lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, obserr_agg, count_obs + elif isinstance(r, RegularGrid): + lon_agg, lat_agg, obs_agg, obserr_agg, count_obs = r.aggregate(lon, lat, obs, obserr) + return lon_agg, lat_agg, obs_agg, obserr_agg, count_obs + else: + raise ValueError("Invalid grid representation") + + else: + if isinstance(r, RotatedGrid): + lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) + return lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs + elif isinstance(r, RegularGrid): + lon_agg, lat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) + return lon_agg, lat_agg, obs_agg, count_obs + else: + raise ValueError("Invalid grid representation") + + + class VIIRS(MAPIES): """ Class for VIIRS specific data -- GitLab From 3c2bab6c00b4a682844ba5eab283c916fdd986f5 Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Wed, 5 Mar 2025 17:00:00 +0100 Subject: [PATCH 2/4] Adding CAMS and BDRC default grid repr and getting rid of regridding function --- mapies/config/satellite_config.yaml | 37 +++++++------------------ mapies/grids/rotated.py | 28 +++++++++++++------ mapies/mapies.py | 15 ++++++---- mapies/util/multiprocessing.py | 43 ++++++----------------------- mapies/viirs.py | 32 +++------------------ run/run_tropomi.py | 2 +- run/run_viirs.py | 21 +++++++------- 7 files changed, 63 insertions(+), 115 deletions(-) diff --git a/mapies/config/satellite_config.yaml b/mapies/config/satellite_config.yaml index b94f5324..49d6c112 100644 --- a/mapies/config/satellite_config.yaml +++ b/mapies/config/satellite_config.yaml @@ -10,22 +10,6 @@ viirs: da: obsid: 34 uncertainty_constants: [0.2, 0.05, 0.02] - grids: - grid_repr: "" # default grid repr - rotated: - centre_lat: 35 - centre_lon: 20 - west: -51 - south: -35 - dlon: 0.1 - dlat: 0.1 - regular: - west: -180 - south: -90 - dlon: 0.1 - dlat: 0.1 - #nlon: 3600 - #nlat: 1800 qa: qa_variables: qa_land: @@ -47,7 +31,16 @@ tropomi: lon_variable: "longitude" lat_variable: "latitude" obs_variable: "nitrogendioxide_tropospheric_column" - grids: + qa: + qa_variables: + qa_value: + qa_name: "qa_value" + qa_values: 0.5 + qa_combined: True + qa_condition: "greater_than_or_equal" # Examples, greater_than, less_than, equal, greater_than_or_equal, less_than_or_equal + + +grids: grid_repr: "" # default grid repr rotated: centre_lat: 35 @@ -63,14 +56,4 @@ tropomi: dlat: 0.1 #nlon: 3600 #nlat: 1800 - qa: - qa_variables: - qa_value: - qa_name: "qa_value" - qa_values: 0.5 - qa_combined: True - qa_condition: "greater_than_or_equal" # Examples, greater_than, less_than, equal, greater_than_or_equal, less_than_or_equal - - - diff --git a/mapies/grids/rotated.py b/mapies/grids/rotated.py index a958ca8a..5cfdaa2f 100644 --- a/mapies/grids/rotated.py +++ b/mapies/grids/rotated.py @@ -140,15 +140,25 @@ class RotatedGrid(Grid): @dataclass -class CAMS(RotatedGrid): - dlon: float = None - dlat: float = None - nlon: float = None - nlat: float = None - west: float = None - south: float = None - centre_lat: Optional[float] = None - centre_lon: Optional[float] = None +class CAMSGrid(RotatedGrid): + dlon: float = 0.2 + dlat: float = 0.2 + west: float = -35. + south: float = -27. + centre_lat: float = 35. + centre_lon: float = 15. + projection: str = "cams" + +@dataclass +class BDRCGrid(RotatedGrid): + dlon: float = 0.1 + dlat: float = 0.1 + west: float = -51. + south: float = -35. + centre_lat: float = 35. + centre_lon: float = 20. + projection: str = "bdrc" + @dataclass diff --git a/mapies/mapies.py b/mapies/mapies.py index dcb565d2..0b457768 100644 --- a/mapies/mapies.py +++ b/mapies/mapies.py @@ -4,7 +4,7 @@ from datetime import datetime from mapies.util.func_tools import * -from mapies.grids.rotated import RotatedGrid, IrregularRotatedGrid +from mapies.grids.rotated import RotatedGrid, IrregularRotatedGrid, CAMSGrid, BDRCGrid from mapies.grids.regular import RegularGrid import time import logging @@ -46,7 +46,9 @@ class MAPIES: self.grid_dict = { "rotated":RotatedGrid, "regular":RegularGrid, - "ireg_rotated":IrregularRotatedGrid + "ireg_rotated":IrregularRotatedGrid, + "cams":CAMSGrid, + "bdrc":BDRCGrid } @@ -82,7 +84,7 @@ class MAPIES: self.obs_var = variable_dict["obs_variable"] - grid_dict = self.config[self.datatype]["grids"] + grid_dict = self.config["grids"] grid_repr = kwargs.get("grid_repr") if grid_repr: self.grid_repr = grid_repr @@ -91,8 +93,11 @@ class MAPIES: if self.grid_repr: # Cread grid object if grid repr is selected in the input - self.grid_config = grid_dict[self.grid_repr] - self.grid = self.grid_dict[self.grid_repr](**self.grid_config) + if self.grid_repr in grid_dict: + self.grid_config = grid_dict[self.grid_repr] + self.grid = self.grid_dict[self.grid_repr](**self.grid_config) + else: + self.grid = self.grid_dict[self.grid_repr]() print(self.grid) diff --git a/mapies/util/multiprocessing.py b/mapies/util/multiprocessing.py index b455e3ac..f92d6914 100644 --- a/mapies/util/multiprocessing.py +++ b/mapies/util/multiprocessing.py @@ -14,33 +14,8 @@ from mapies.grids.regular import RegularGrid -def regridding_function(r, lon, lat, obs, obserr=None): - - # If obserr is selected return that - if obserr is not None: - if isinstance(r, RotatedGrid): - lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, obserr_agg, count_obs = r.aggregate(lon, lat, obs, obserr) - return lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, obserr_agg, count_obs - elif isinstance(r, RegularGrid): - lon_agg, lat_agg, obs_agg, obserr_agg, count_obs = r.aggregate(lon, lat, obs, obserr) - return lon_agg, lat_agg, obs_agg, obserr_agg, count_obs - else: - raise ValueError("Invalid grid representation") - - else: - if isinstance(r, RotatedGrid): - lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) - return lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs - elif isinstance(r, RegularGrid): - lon_agg, lat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) - return lon_agg, lat_agg, obs_agg, count_obs - else: - raise ValueError("Invalid grid representation") - - - def process_single_file_tropomi( - r, + grid, file, obs_var, lat_var, @@ -94,18 +69,18 @@ def process_single_file_tropomi( if not flag: try: # Regrid and aggregate - if isinstance(r, RotatedGrid): - lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = regridding_function(r, lon, lat, obs) - elif isinstance(r, RegularGrid): - lon_agg, lat_agg, obs_agg, count_obs = regridding_function(r, lon, lat, obs) + if isinstance(grid, RotatedGrid): + lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = grid.aggregate(lon, lat, obs) + elif isinstance(grid, RegularGrid): + lon_agg, lat_agg, obs_agg, count_obs = grid.aggregate(lon, lat, obs) ds.close() return obs_agg, lat_agg, lon_agg, count_obs except Exception as e: - print(f"Error performing regridding for file {file}: {e}") + print(f"Error performing regridding for file {file}: {e}") return None else: ds.close() - return obs, lat, lon, time_values + return obs, lat, lon, time_values @@ -165,9 +140,9 @@ def process_single_file_viirs( try: # Regrid and aggregate if isinstance(r, RotatedGrid): - lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = regridding_function(r, lon, lat, obs) + lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) elif isinstance(r, RegularGrid): - lon_agg, lat_agg, obs_agg, count_obs = regridding_function(r, lon, lat, obs) + lon_agg, lat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) ds.close() return obs_agg, lat_agg, lon_agg, count_obs except Exception as e: diff --git a/mapies/viirs.py b/mapies/viirs.py index 78efe490..17ab2c16 100644 --- a/mapies/viirs.py +++ b/mapies/viirs.py @@ -23,30 +23,6 @@ os.environ["OMP_NUM_THREADS"] = "8" #logging.basicConfig(filename='../logs/mapies.log', level=logging.INFO) -def regridding_function(r, lon, lat, obs, obserr=None): - - # If obserr is selected return that - if obserr is not None: - if isinstance(r, RotatedGrid): - lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, obserr_agg, count_obs = r.aggregate(lon, lat, obs, obserr) - return lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, obserr_agg, count_obs - elif isinstance(r, RegularGrid): - lon_agg, lat_agg, obs_agg, obserr_agg, count_obs = r.aggregate(lon, lat, obs, obserr) - return lon_agg, lat_agg, obs_agg, obserr_agg, count_obs - else: - raise ValueError("Invalid grid representation") - - else: - if isinstance(r, RotatedGrid): - lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) - return lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs - elif isinstance(r, RegularGrid): - lon_agg, lat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) - return lon_agg, lat_agg, obs_agg, count_obs - else: - raise ValueError("Invalid grid representation") - - class VIIRS(MAPIES): """ @@ -223,10 +199,10 @@ class VIIRS(MAPIES): # Run aggregation with grid representation try: - if self.grid_repr == "rotated": - lon, lat, rlon, rlat, obs, obserr, count_obs = regridding_function(self.grid, lon, lat, obs, obserr) - elif self.grid_repr == "regular": - lon, lat, obs, obserr, count_obs = regridding_function(self.grid, lon, lat, obs, obserr) + if isinstance(self.grid, RotatedGrid): + lon, lat, rlon, rlat, obs, obserr, count_obs = self.grid.aggregate(lon, lat, obs, obserr) + elif isinstance(self.grid, RegularGrid): + lon, lat, obs, obserr, count_obs = self.grid.aggregate(lon, lat, obs, obserr) except ValueError: raise Exception("Have you specified a grid_repr") diff --git a/run/run_tropomi.py b/run/run_tropomi.py index f8188943..fd74d50e 100644 --- a/run/run_tropomi.py +++ b/run/run_tropomi.py @@ -12,7 +12,7 @@ if __name__ == "__main__": start_time = time.time() - c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, grid_repr="regular") + c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, grid_repr="bdrc") c.gather_nc_files() c.process_avg_data(monthly_avg=False, batch_size = 14, apply_qa=True, geospatial_crop=[[-40, 60], [0, 70]]) diff --git a/run/run_viirs.py b/run/run_viirs.py index b644c4b4..fe1a2fee 100644 --- a/run/run_viirs.py +++ b/run/run_viirs.py @@ -6,28 +6,27 @@ import time if __name__ == "__main__": # Change these variables # 00:30 to 02:36 from 2024-01-01 to 2024-01-03 - start_date = "202401310000" - end_date = "202401312359" + start_date = "202401010000" + end_date = "202401012359" - # outdir="/home/cmeikle/Projects/data/" - # indir="/home/cmeikle/Projects/data/VIIRS" + outdir="/home/cmeikle/Projects/data/" + indir="/home/cmeikle/Projects/data/VIIRS" + - outdir="/home/cgile/Documents/mapies/figures" - indir="/home/cgile/Documents/mapies/VIIRS" start_time = time.time() - c = VIIRS(start_date, end_date, dest=outdir, indir=indir, grid_repr="rotated") + c = VIIRS(start_date, end_date, dest=outdir, indir=indir, grid_repr="cams") c.gather_nc_files() - c.process_avg_data(monthly_avg=True, batch_size = 100, apply_qa=True, save=True) - c.yearly_average() - c.plot_2D_observations(months=None, filename="new.png", outdir=outdir) - c.plot_2D_num_obs(months=None, outdir=outdir) + c.process_avg_data(monthly_avg=False, batch_size = 100, apply_qa=True, save=True) + #c.yearly_average() + c.plot_2D_observations(months=[0], filename="new.png", outdir=outdir) + c.plot_2D_num_obs(months=[0], outdir=outdir) c.process_lazy_data(apply_qa=True, save=False) c.to_da(frequency="3H", save_figs=True) -- GitLab From 9e7209d0497bce53d866966c40fec30e1e52eced Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Wed, 5 Mar 2025 17:15:58 +0100 Subject: [PATCH 3/4] Updating of tests and small name changes --- mapies/config/satellite_config.yaml | 30 +++++++++---------- .../{test_monarch_grid.py => test_grids.py} | 5 ++-- mapies/util/multiprocessing.py | 12 ++++---- 3 files changed, 24 insertions(+), 23 deletions(-) rename mapies/tests/{test_monarch_grid.py => test_grids.py} (92%) diff --git a/mapies/config/satellite_config.yaml b/mapies/config/satellite_config.yaml index 49d6c112..36ef5e04 100644 --- a/mapies/config/satellite_config.yaml +++ b/mapies/config/satellite_config.yaml @@ -41,19 +41,19 @@ tropomi: grids: - grid_repr: "" # default grid repr - rotated: - centre_lat: 35 - centre_lon: 20 - west: -51 - south: -35 - dlon: 0.1 - dlat: 0.1 - regular: - west: -180 - south: -90 - dlon: 0.1 - dlat: 0.1 - #nlon: 3600 - #nlat: 1800 + grid_repr: "" # default grid repr + rotated: + centre_lat: 35 + centre_lon: 20 + west: -51 + south: -35 + dlon: 0.1 + dlat: 0.1 + regular: + west: -180 + south: -90 + dlon: 0.1 + dlat: 0.1 + #nlon: 3600 + #nlat: 1800 diff --git a/mapies/tests/test_monarch_grid.py b/mapies/tests/test_grids.py similarity index 92% rename from mapies/tests/test_monarch_grid.py rename to mapies/tests/test_grids.py index 12240f54..dd907897 100644 --- a/mapies/tests/test_monarch_grid.py +++ b/mapies/tests/test_grids.py @@ -3,11 +3,12 @@ import pytest import pandas as pd import numpy as np -from mapies.grids.monarch import RegularGrid, RotatedGrid, IrregularRotatedGrid, geo_to_rot +from mapies.grids.rotated import RotatedGrid, IrregularRotatedGrid, geo_to_rot +from mapies.grids.regular import RegularGrid @pytest.mark.parametrize( - "test_input,expected", + "test_input,expected", [ ((np.array([-10, -10, -10]), np.array([-10, -10, -10]), np.array([1, 2, 3])), (np.array([-10., 0., 0., 0.]), np.array([-10., 0., 0., 0.]), np.array([2., 0., 0., 0.]), np.array([[3., 0., 0., 0.]]))), diff --git a/mapies/util/multiprocessing.py b/mapies/util/multiprocessing.py index f92d6914..65204c94 100644 --- a/mapies/util/multiprocessing.py +++ b/mapies/util/multiprocessing.py @@ -85,7 +85,7 @@ def process_single_file_tropomi( def process_single_file_viirs( - r, + grid, file, obs_var, lat_var, @@ -139,10 +139,10 @@ def process_single_file_viirs( if not flag: try: # Regrid and aggregate - if isinstance(r, RotatedGrid): - lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) - elif isinstance(r, RegularGrid): - lon_agg, lat_agg, obs_agg, count_obs = r.aggregate(lon, lat, obs) + if isinstance(grid, RotatedGrid): + lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = grid.aggregate(lon, lat, obs) + elif isinstance(grid, RegularGrid): + lon_agg, lat_agg, obs_agg, count_obs = grid.aggregate(lon, lat, obs) ds.close() return obs_agg, lat_agg, lon_agg, count_obs except Exception as e: @@ -150,7 +150,7 @@ def process_single_file_viirs( return None else: ds.close() - return obs, lat, lon, time_values + return obs, lat, lon, time_values -- GitLab From 0cc6ba703f0ec74ccb7700ddc06baf9924a0b882 Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Wed, 5 Mar 2025 17:25:27 +0100 Subject: [PATCH 4/4] Updating of run viirs and changelog --- CHANGELOG | 4 +++- run/run_viirs.py | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 41756c63..74ed4172 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,10 +3,12 @@ CHANGELOG ## 0.1.1 - 2025/02/28 - Code refactoring -DRY code +DRY code more tests of the processing data and Grid restructure ### Features - Drying of code, moving duplicated code to Parent class and creating Multiprocessing class +- More tests of the processing data and Grid restructure +- Grids split into base grid rotated and regular files with the ## 0.1.0 - 2025/02/21 - First version of MAPIES diff --git a/run/run_viirs.py b/run/run_viirs.py index fe1a2fee..d4ced0cd 100644 --- a/run/run_viirs.py +++ b/run/run_viirs.py @@ -23,10 +23,10 @@ if __name__ == "__main__": c.gather_nc_files() - c.process_avg_data(monthly_avg=False, batch_size = 100, apply_qa=True, save=True) - #c.yearly_average() - c.plot_2D_observations(months=[0], filename="new.png", outdir=outdir) - c.plot_2D_num_obs(months=[0], outdir=outdir) + c.process_avg_data(monthly_avg=True, batch_size = 100, apply_qa=True, save=True) + c.yearly_average() + c.plot_2D_observations(months=None, filename="new.png", outdir=outdir) + c.plot_2D_num_obs(months=None, outdir=outdir) c.process_lazy_data(apply_qa=True, save=False) c.to_da(frequency="3H", save_figs=True) -- GitLab