diff --git a/mapies/grids/monarch.py b/mapies/grids/monarch.py index 9f468294efde3d96aca7d8d9955d7ec9580135b7..1a4e2b03473a529b1702813f65e4ca9168050080 100644 --- a/mapies/grids/monarch.py +++ b/mapies/grids/monarch.py @@ -4,7 +4,7 @@ from pandas import DataFrame import xarray as xr import pandas as pd import numpy as np -from typing import Tuple, Dict +from typing import Tuple, Dict, Optional from numpy import cos, sin, arctan, pi, nan from functools import partial, wraps import geopandas as gpd @@ -17,12 +17,14 @@ import logging @dataclass class Grid: - centre_lat: float - centre_lon: float - dlon: float - dlat: float + dlon: float = None + dlat: float = None + nlon: float = None + nlat: float = None west: float = None south: float = None + centre_lat: Optional[float] = None + centre_lon: Optional[float] = None def calculate_grid_coords(self): @@ -33,10 +35,20 @@ class Grid: This can then be used in many grid representations, for example in a rotated grid used in the ihouse Monarch model """ - self.nlat = int((abs(self.south) / self.dlat) * 2 + 1) - self.nlon = int((abs(self.west) / self.dlon) * 2 + 1) - self.center_latitudes = np.linspace(self.south, self.south + (self.dlat * (self.nlat - 1)), self.nlat, dtype=float) - self.center_longitudes = np.linspace(self.west, self.west + (self.dlon * (self.nlon - 1)), self.nlon, dtype=float) + if (self.nlon == None) and (self.nlat == None): + self.nlat = int((abs(self.south) / self.dlat) * 2) + self.nlon = int((abs(self.west) / self.dlon) * 2) + elif (self.dlon == None) and (self.dlat == None): + self.dlat = abs(self.south) * 2 / (self.nlat) + self.dlon = abs(self.west) * 2 / (self.nlon) + else: + print("Enter resolution or number of grid cells") + + self.cenlat = self.south + self.dlat/2 + self.cenlon = self.west + self.dlon/2 + self.center_latitudes = np.linspace(self.cenlat, self.cenlat + self.dlat * (self.nlat) - self.dlat, self.nlat, dtype=float) + self.center_longitudes = np.linspace(self.cenlon, self.cenlon + self.dlon * (self.nlon) - self.dlat, self.nlon, dtype=float) + counter = 0 self.coords = np.empty((self.center_latitudes.shape[0] * self.center_longitudes.shape[0], 2)) @@ -114,77 +126,219 @@ class Grid: return True - @dataclass class RegularGrid(Grid): """ Regridding if the Grid we want is regular, but also want to aggregate the observations to a regular grid. - + Useful for a global grid representation - """ + """ projection: str = "regular" def __post_init__(self): self.calculate_grid_coords() - self.building_grid() def aggregate(self, lon, lat, obs, obserr=None): """ - Aggregate + Aggregate the observations to the grid cells Parameters: - lon, lat, obs values Optional obserr + lon, lat, obs, obserr (optional) -- Longitude, Latitude, Observations, and Errors Returns: - lon, lat, obs values + Aggregated lon, lat, obs values """ + + # Make sure lon and lat are within the borders of the grid + # Filter out values that are outside the grid bounds + mask = (lon >= self.west) & (lon <= self.west + (self.dlon * (self.nlon - 1))) & \ + (lat >= self.south) & (lat <= self.south + (self.dlat * (self.nlat - 1))) & \ + (~np.isnan(obs)) + + # Apply the mask to filter out the out-of-bounds points + lon = lon[mask] + lat = lat[mask] + obs = obs[mask] if obserr is not None: - df = pd.DataFrame({"obs":obs, "lon":lon, "lat":lat, "obserr": obserr}) + obserr = obserr[mask] + + # Vectorized calculation of grid indices + lon_idx = np.searchsorted(self.center_longitudes, lon) - 1 # Index of longitude grid + lat_idx = np.searchsorted(self.center_latitudes, lat) - 1 # Index of latitude grid + + # Ensure indices are within bounds + lon_idx = np.clip(lon_idx, 0, self.nlon - 1) + lat_idx = np.clip(lat_idx, 0, self.nlat - 1) + + # Initialize arrays for aggregation + aggregated_obs = np.zeros((self.nlat, self.nlon)) + aggregated_lon = np.zeros((self.nlat, self.nlon)) + aggregated_lat = np.zeros((self.nlat, self.nlon)) + aggregated_obs_err = np.zeros((self.nlat, self.nlon)) if obserr is not None else None + count_obs = np.zeros((self.nlat, self.nlon), dtype=int) + + # Efficient accumulation using np.add.at + np.add.at(aggregated_obs, (lat_idx, lon_idx), obs) # Accumulate observations + np.add.at(aggregated_lon, (lat_idx, lon_idx), lon) # Accumulate lon values + np.add.at(aggregated_lat, (lat_idx, lon_idx), lat) # Accumulate lat values + np.add.at(count_obs, (lat_idx, lon_idx), 1) # Count observations per grid cell + + if obserr is not None: + np.add.at(aggregated_obs_err, (lat_idx, lon_idx), obserr) # Accumulate errors + + # Compute averages by dividing by counts + non_zero_counts = count_obs > 0 + aggregated_obs[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_lon[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_lat[non_zero_counts] /= count_obs[non_zero_counts] + + if aggregated_obs_err is not None: + aggregated_obs_err[non_zero_counts] /= count_obs[non_zero_counts] + + # Mask for zero counts + aggregated_obs = aggregated_obs[non_zero_counts] + aggregated_lon = aggregated_lon[non_zero_counts] + aggregated_lat = aggregated_lat[non_zero_counts] + count_obs = count_obs[non_zero_counts] + + if aggregated_obs_err is not None: + aggregated_obs_err = aggregated_obs_err[non_zero_counts] + + + # TODO: Add a function that takes the center or aggreagated value + # Return the results + lon_out = aggregated_lon.flatten() #self.center_longitudes + lat_out = aggregated_lat.flatten() #self.center_latitudes + obs_out = aggregated_obs.flatten() + count_obs = count_obs.flatten() + + if aggregated_obs_err is not None: + obs_err_out = aggregated_obs_err.flatten() + return lon_out, lat_out, obs_out, obs_err_out, count_obs else: - df = pd.DataFrame({"obs":obs, "lon":lon, "lat":lat}) + return lon_out, lat_out, obs_out, count_obs - df['coords'] = list(zip(df['lon'],df['lat'])) - df['coords'] = df['coords'].apply(Point) - # Create a geodataframe of the rotated obseravtional lon/ lat points - points = gpd.GeoDataFrame(df, geometry='coords', crs='epsg:4326') +@dataclass +class RotatedGrid(Grid): + """ + Regridding if the Grid we want is regular, but also want to aggregate the observations to a regular grid. + + Useful for a global grid representation + """ + + projection: str = "rotated" + + def __post_init__(self): + self.calculate_grid_coords() + def aggregate(self, lon, lat, obs, obserr=None): + """ + Aggregate the observations to the grid cells - # Geopandas sjoin calculates if these shapely points are within the shapely grid pologons - try: - gdf = self.gdf.sjoin(points, how="left") - except TypeError: - print("Empty dataframe messing up some versions of geopandas") - gdf = pd.DataFrame(columns=[ - 'obstype', 'obslon', 'obslat', 'obsn', 'obsid', 'obslev', 'obs', - 'obserr', 'time', 'lon', 'lat', 'coords', 'index_right', - 'lon_cen', 'lat_cen', 'grid_cell'], - ) + Parameters: + lon, lat, obs, obserr (optional) -- Longitude, Latitude, Observations, and Errors - # # Need to also return the original lat lon - # # Calculate the mean of all points within each grid cell - # gdf = gdf[["grid_cell", "lon_cen", "lat_cen", "obs", "obserr", "lon", "lat"]].groupby("grid_cell").mean().reset_index() + Returns: + Aggregated lon, lat, rlon, rlat, obs values + """ + # Perform the rotation + rlon, rlat = geo_to_rot(lon, lat, centre_lon=self.centre_lon, centre_lat=self.centre_lat) - # ============================================================================= - # CARLOTTA MODIFIED THIS - # ============================================================================= - # Handle the case where 'obserr' might not exist - columns_to_group = ["grid_cell", "lon_cen", "lat_cen", "obs", "lon", "lat"] - if "obserr" in gdf.columns: - columns_to_group.append("obserr") + # Make sure rlon and rlat are within the borders of the grid + # Filter out values that are outside the grid bounds + mask = (rlon >= self.west) & (rlon <= self.west + (self.dlon * (self.nlon - 1))) & \ + (rlat >= self.south) & (rlat <= self.south + (self.dlat * (self.nlat - 1))) & \ + (~np.isnan(obs)) - gdf = gdf[columns_to_group].groupby("grid_cell").mean().reset_index() - # ============================================================================= - # Return the data we are interested in + # Apply the mask to filter out the out-of-bounds points + rlon = rlon[mask] + rlat = rlat[mask] + lon = lon[mask] + lat = lat[mask] + obs = obs[mask] + if obserr is not None: + obserr = obserr[mask] + + + # Vectorized calculation of grid indices + rlon_idx = np.searchsorted(self.center_longitudes, rlon) - 1 # Index of longitude grid + rlat_idx = np.searchsorted(self.center_latitudes, rlat) - 1 # Index of latitude grid + + + # Ensure indices are within bounds + rlon_idx = np.clip(rlon_idx, 0, self.nlon - 1) + rlat_idx = np.clip(rlat_idx, 0, self.nlat - 1) + + + # Initialize arrays for aggregation + aggregated_obs = np.zeros((self.nlat, self.nlon)) + aggregated_lon = np.zeros((self.nlat, self.nlon)) + aggregated_lat = np.zeros((self.nlat, self.nlon)) + aggregated_rlon = np.zeros((self.nlat, self.nlon)) + aggregated_rlat = np.zeros((self.nlat, self.nlon)) + aggregated_obs_err = np.ma.zeros((self.nlat, self.nlon)) if obserr is not None else None + count_obs = np.zeros((self.nlat, self.nlon), dtype=int) + + + + # Efficient accumulation using np.add.at + np.add.at(aggregated_obs, (rlat_idx, rlon_idx), obs) # Accumulate observations + np.add.at(aggregated_lon, (rlat_idx, rlon_idx), lon) # Accumulate lon values + np.add.at(aggregated_lat, (rlat_idx, rlon_idx), lat) # Accumulate lat values + np.add.at(aggregated_rlon, (rlat_idx, rlon_idx), rlon) # Accumulate lon values + np.add.at(aggregated_rlat, (rlat_idx, rlon_idx), rlat) # Accumulate lat values + np.add.at(count_obs, (rlat_idx, rlon_idx), 1) # Count observations per grid cell + + if obserr is not None: - return gdf["lon"].values, gdf["lat"].values, gdf["obs"].values, gdf["obserr"].values + np.add.at(aggregated_obs_err, (rlat_idx, rlon_idx), obserr) # Accumulate errors + + + # Compute averages by dividing by counts + non_zero_counts = count_obs > 0 + aggregated_obs[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_lon[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_lat[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_rlon[non_zero_counts] /= count_obs[non_zero_counts] + aggregated_rlat[non_zero_counts] /= count_obs[non_zero_counts] + + if aggregated_obs_err is not None: + aggregated_obs_err[non_zero_counts] /= count_obs[non_zero_counts] + + + # Mask for zero counts + aggregated_obs = aggregated_obs[non_zero_counts] + aggregated_lon = aggregated_lon[non_zero_counts] + aggregated_lat = aggregated_lat[non_zero_counts] + aggregated_rlon = aggregated_rlon[non_zero_counts] + aggregated_rlat = aggregated_rlat[non_zero_counts] + count_obs = count_obs[non_zero_counts] + + if aggregated_obs_err is not None: + aggregated_obs_err = aggregated_obs_err[non_zero_counts] + + + # Return the results + lon_out = aggregated_lon.flatten() + lat_out = aggregated_lat.flatten() + rlon_out = aggregated_lon.flatten() #self.center_longitudes + rlat_out = aggregated_lat.flatten() #self.center_latitudess + obs_out = aggregated_obs.flatten() + count_obs = count_obs.flatten() + + + if aggregated_obs_err is not None: + obs_err_out = aggregated_obs_err.flatten() + return lon_out, lat_out, rlon_out, rlat_out, obs_out, obs_err_out, count_obs else: - return gdf["lon"].values, gdf["lat"].values, gdf["obs"].values + return lon_out, lat_out, rlon_out, rlat_out, obs_out, count_obs @dataclass -class RotatedGrid(Grid): +class IrregularRotatedGrid(Grid): + projection: str = "rotated" def __post_init__(self): @@ -197,13 +351,14 @@ class RotatedGrid(Grid): Aggregate Parameters: - rlon, rlat, obs values Optional obserr + lon, lat, obs values Optional obserr Returns: - rlon, rlat, obs values + lon, lat, rlon, rlat, obs values """ # Rotate the observations rlon, rlat = geo_to_rot(lon, lat, centre_lon=self.centre_lon, centre_lat=self.centre_lat) + if obserr is not None: df = pd.DataFrame({"obs":obs, "lon":lon, "lat":lat, "rlon":rlon, "rlat":rlat, "obserr": obserr}) else: @@ -230,13 +385,16 @@ class RotatedGrid(Grid): # Need to also return the original lat lon # Calculate the mean of all points within each grid cell + counts = gdf[["grid_cell","obs"]].groupby("grid_cell").count().reset_index() gdf = gdf[["grid_cell", "lon_cen", "lat_cen", "obs", "lon", "lat", "rlon", "rlat"]].groupby("grid_cell").mean().reset_index() + # Remove nan values + gdf = gdf.dropna(subset="obs") + counts = counts.dropna(subset="obs") + # Return the data we are interested in if obserr is not None: - return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values, gdf["obserr"].values + return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values, gdf["obserr"].values, counts["obs"].values else: - return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values - - + return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values, counts["obs"].values \ No newline at end of file diff --git a/mapies/mapies.py b/mapies/mapies.py index 097d98cd5a5a8913ad900aa9602c30bb02516c4f..05517be843e467abcb945e84ab6a417ced445a2a 100644 --- a/mapies/mapies.py +++ b/mapies/mapies.py @@ -137,10 +137,16 @@ class MAPIES: @timeit - def plot_2D_obs_custom(self, lon_values, lat_values, obs_values, outdir="/home/cgile/Documents/mapies/mapies/figures", title=None, filename=None): + def plot_2D_obs_custom(self, lon_values=None, lat_values=None, obs_values=None, outdir="./", title=None, filename=None): """ General method for plotting observations with CB-RdYlBu colormap and arrow-like ends for out-of-bound values. """ + if lon_values == None: + lon_values = self.lon_values + if lat_values == None: + lat_values = self.lat_values + if obs_values == None: + obs_values = self.obs # Set Basemap projection figsize = (15, 10) @@ -184,7 +190,7 @@ class MAPIES: # Ensure the output directory exists os.makedirs(outdir, exist_ok=True) - filepath = os.path.join(outdir, filename or "observation_2D_plot.png") + filepath = os.path.join(self.dest, filename or "observation_2D_plot.png") plt.savefig(filepath, format="png") plt.close(fig) print(f"Saved plot: {filepath}") diff --git a/mapies/tests/test_func_tools.py b/mapies/tests/test_func_tools.py index 9070ba6195d61291857aae17324ec8e4704266b7..23f8731604439eedee7bcbee5ecce1fedeb25802 100644 --- a/mapies/tests/test_func_tools.py +++ b/mapies/tests/test_func_tools.py @@ -3,7 +3,7 @@ import pytest import pandas as pd import numpy as np -from util.func_tools import time_converter, time_domain_selection, geo_to_rot +from mapies.util.func_tools import time_converter, time_domain_selection, geo_to_rot diff --git a/mapies/tests/test_monarch_grid.py b/mapies/tests/test_monarch_grid.py new file mode 100644 index 0000000000000000000000000000000000000000..8555e5074677a0fdb712ae96226181aa83cfd4ef --- /dev/null +++ b/mapies/tests/test_monarch_grid.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python + +import pytest +import pandas as pd +import numpy as np +from mapies.grids.monarch import RegularGrid, RotatedGrid, IrregularRotatedGrid + + +@pytest.mark.parametrize( + "test_input,expected", + [ + ((np.array([-10, -10, -10]), np.array([-10, -10, -10]), np.array([1, 2, 3])), + (np.array([-10.]), np.array([-10.]), np.array([2.]), np.array([[3]]))), + ] +) +def test_regular_grid_aggregation(test_input, expected): + r = RegularGrid(dlon=180, dlat=90, west=-180, south=-90) + agg = r.aggregate(*test_input) + assert agg[0].all() == expected[0].all() + assert agg[1].all() == expected[1].all() + assert agg[2].all() == expected[2].all() + assert agg[3].all() == expected[3].all() + + +@pytest.mark.parametrize( + "test_input,expected", + [ + ((np.array([0, 0, 0]), np.array([0, 0, 0]), np.array([1, 2, 3])), + (np.array([0.]), np.array([0.]), np.array([0.]), np.array([0.]), np.array([2.]), np.array([3]))), + ] +) +def test_rotated_grid_aggregation(test_input, expected): + r = RotatedGrid(dlon=1, dlat=1, west=-1, south=-1, centre_lon=0, centre_lat=0) + agg = r.aggregate(*test_input) + assert agg[0].all() == expected[0].all() + assert agg[1].all() == expected[1].all() + assert agg[2].all() == expected[2].all() + assert agg[3].all() == expected[3].all() + + + +@pytest.mark.parametrize( + "test_input,expected", + [ + ((np.array([0, 0, 0]), np.array([0, 0, 0]), np.array([1, 2, 3])), + (np.array([0.]), np.array([0.]), np.array([0.]), np.array([0.]), np.array([2.]), np.array([3]))), + ] +) +def test_irregular_rotated_grid_aggregation(test_input, expected): + r = IrregularRotatedGrid(dlon=1, dlat=1, west=-1, south=-1, centre_lon=0, centre_lat=0) + agg = r.aggregate(*test_input) + assert agg[0].all() == expected[0].all() + assert agg[1].all() == expected[1].all() + assert agg[2].all() == expected[2].all() + assert agg[3].all() == expected[3].all() \ No newline at end of file diff --git a/mapies/viirs.py b/mapies/viirs.py index b0b2b17dbef2c25210a6da3850633f069a3f4304..00d21110140eb86a8d2a00903801d077879f919a 100644 --- a/mapies/viirs.py +++ b/mapies/viirs.py @@ -11,10 +11,10 @@ import numpy as np import pandas as pd import xarray as xr -from mapies import MAPIES -from util.func_tools import * +from mapies.mapies import MAPIES +from mapies.util.func_tools import * #timeit, get_file_list, time_converter, frequency_int, error_estimation, time_domain_selection, geo_to_rot -from grids.monarch import RotatedGrid, RegularGrid +from mapies.grids.monarch import RotatedGrid, IrregularRotatedGrid, RegularGrid from pathlib import Path from glob import glob as glob_module @@ -257,12 +257,15 @@ class VIIRS(MAPIES): # Calculate rotated lon and lat values # Calculate the grid representation - r = RotatedGrid(centre_lon=20, centre_lat=35, dlon=.1, dlat=.1, west=-51, south=-35) + r = RotatedGrid(dlon=.1, dlat=.1, west=-51, south=-35, centre_lon=20, centre_lat=35) + #r = RegularGrid(dlon=.1, dlat=.1, west=-180, south=-90) # Aggregate the the observations to the grid representation - lon, lat, rlon, rlat, obs = r.aggregate(self.lon_values, self.lat_values, self.obs) + lon, lat, rlon, rlat, obs, count_obs = r.aggregate(self.lon_values, self.lat_values, self.obs) + #lon, lat, obs, count_obs = r.aggregate(self.lon_values, self.lat_values, self.obs) self.lon_values = lon self.lat_values = lat self.obs = obs + self.count_obs = count_obs @timeit def read_nc(self): @@ -371,6 +374,15 @@ class VIIRS(MAPIES): self.rotate() super().plot_2D_obs() + # Plots + def plot_2D_obs_custom(self, outdir="./", **kwargs): + """ + Plotting the observations specific to VIIRS + """ + if self.grid_repr == "rotated": + self.rotate() + super().plot_2D_obs_custom() + @timeit def plot_2D_average(self, month, outdir, batch_size = 10, chunk_size = 1): """ diff --git a/run/run_viirs.py b/run/run_viirs.py index ed879e64ea3de7ff9d3e64fddd906478522f2908..ec6858d9e94dd9038a35a7439ee8fcfc9c9034f6 100644 --- a/run/run_viirs.py +++ b/run/run_viirs.py @@ -1,4 +1,4 @@ -from viirs import VIIRS +from mapies.viirs import VIIRS import time @@ -11,10 +11,10 @@ if __name__ == "__main__": c = VIIRS(start_date, end_date, frequency="3H", dest=outdir, indir=indir) c.read_nc() c.preprocess_vars() - - c.plot_2D_obs() + #c.plot_2D_obs() + c.plot_2D_obs_custom() #c.to_da() # Monthly average plot (in 'month' variables specify the month) - c.discover_files() - c.plot_2D_average(month=1, outdir=outdir, batch_size = 10, chunk_size = 1) + #c.discover_files() + #c.plot_2D_average(month=1, outdir=outdir, batch_size = 10, chunk_size = 1) diff --git a/setup.py b/setup.py index e8e3567304f03cf6c6c39a88af791ceee23181f7..b2e6abad5bc7f7b819caaf2910cd9aa1c36d7972 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ from setuptools import find_packages from setuptools import setup # Could update this using versioneer -version="0.0.4" +version="0.0.5" setup( name="mapies",