diff --git a/.gitignore b/.gitignore index 9f3041738b34eab07c82e208bb5fa272f1d69500..32f77acd7c65666188985ec794b492215e7b67a5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,9 @@ .obsidian *.orig +venv out-logs __pycache__ +run/run-nord3.sh grids/__pycache__ util/__pycache__ tests/__pycache__ diff --git a/README.md b/README.md index 82b7a3993e6dca6b85ada05f593a8f9629b88583..2a532c7a9ef8d65b6e6b435d5fd8d1a0e16d3b67 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,23 @@ MAPIES Monarch Assimilation Preproc and Inversion of Emissions plus Satellites -To run presently it is only available on nord3 as we use in-house modules like NES. -You can run it off-line on a local machine by running only global runs with the local-machine-run -git branch. +To setup on your own machine, I would recommend creating a virtual environment t. For example, + +``` +python -m env venv +``` + +This will set up a virtual environment. From there you can install all python modules you want by running + +``` +pip install -r requirements.txt +``` + +Next to run the tool, we have examples in the `mapies/run` folder. For example you can run the run_viirs.py file with a simple `python run_viirs.py` after updating the necessary variables. The dates and the directories. + +Presently you will need to run `export PYTHONPATH=../mapies` from inside this directory for the tool to see the in-built modules. We are working on a way to incorporate this into the pipeline. + +Happy mapping! + + + diff --git a/mapies/grids/monarch.py b/mapies/grids/monarch.py index 93e49431ebd4178d9afb69f2664fe2e918e6d2b3..1e8b52d62b70d0116ba4014d4d98512712872c14 100644 --- a/mapies/grids/monarch.py +++ b/mapies/grids/monarch.py @@ -10,8 +10,13 @@ from functools import partial, wraps import geopandas as gpd from geopandas import GeoDataFrame from shapely.geometry import Polygon, Point +from util.func_tools import * import logging -from nes import * +#from nes import * + + +# Grid without using NES + @dataclass class Grid: @@ -21,33 +26,117 @@ class Grid: dlat: float west: float = None south: float = None - - + + def calculate_grid_coords(self): """ - Use Nes to create a grid representation + This method does two things: + 1. It calculates the centre values of the grid + 2. Before Calculating the boundary points of the regular grid + + This can then be used in many grid representations, for example in a rotated grid used in the ihouse Monarch model """ - nessy = create_nes(comm=None, info=False, projection=self.projection, - centre_lat=self.centre_lat, centre_lon=self.centre_lon, - west_boundary=self.west, south_boundary=self.south, - inc_rlat=self.dlon, inc_rlon=self.dlat) - nessy.create_shapefile() - self.gdf = nessy.shapefile + self.nlat = int((abs(self.south) / self.dlat) * 2 + 1) + self.nlon = int((abs(self.west) / self.dlon) * 2 + 1) + self.center_latitudes = np.linspace(self.south, self.south + (self.dlat * (self.nlat - 1)), self.nlat, dtype=float) + self.center_longitudes = np.linspace(self.west, self.west + (self.dlon * (self.nlon - 1)), self.nlon, dtype=float) + + counter = 0 + self.coords = np.empty((self.center_latitudes.shape[0] * self.center_longitudes.shape[0], 2)) + for i in range(self.center_longitudes.shape[0]): + for j in range(self.center_latitudes.shape[0]): + self.coords[counter, 0] = self.center_longitudes[i] + self.coords[counter, 1] = self.center_latitudes[j] + counter += 1 + + + self.b_lats = self.create_bounds(self.coords[:, 1], self.dlat, number_vertices=4, inverse=True) + self.b_lons = self.create_bounds(self.coords[:, 0], self.dlon, number_vertices=4) + + return True + + + @staticmethod + def create_bounds(coordinates, inc, number_vertices=2, inverse=False): + """ + Calculate the boundaries of each grid points + """ + # Create new arrays moving the centers half increment less and more. + coords_left = coordinates - inc / 2 + coords_right = coordinates + inc / 2 + + # Defining the number of corners needed. 2 to regular grids and 4 for irregular ones. + if number_vertices == 2: + # Create an array of N arrays of 2 elements to store the floor and the ceil values for each cell + bound_coords = np.dstack((coords_left, coords_right)) + bound_coords = bound_coords.reshape((len(coordinates), number_vertices)) + elif number_vertices == 4: + # Create an array of N arrays of 4 elements to store the corner values for each cell + # It can be stored in clockwise starting form the left-top element, or in inverse mode. + if inverse: + bound_coords = np.dstack((coords_left, coords_left, coords_right, coords_right)) + else: + bound_coords = np.dstack((coords_left, coords_right, coords_right, coords_left)) + else: + print('The number of vertices of the boundaries must be 2 or 4.') + return bound_coords + + + def building_grid(self): + """ + Here we build the grid using Shapely polygons. And attribute it to a geopandas dataframe + + This is also where we decide whether to use a regular grid for global or a rotated grid for regional + + """ + + + # May need to add centre lats and centre lons to this dataframe as well + + # Re-shape + aux_b_lats = self.b_lats.squeeze() + aux_b_lons = self.b_lons.squeeze() + + + geometry = [] + # Create one dataframe with 8 columns, 4 points with two coordinates each one + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + self.gdf = GeoDataFrame(index=range(aux_b_lons.shape[0]), geometry=geometry, crs='epsg:4326') self.gdf["grid_cell"] = ["grid_cell_{}".format(i+1) for i in range(len(self.gdf))] + + # Append the center lat and lon values to this dataframe + self.gdf["lon_cen"] = self.coords[:, 0] + self.gdf["lat_cen"] = self.coords[:, 1] -@dataclass -class RotatedGrid(Grid): - projection: str = "rotated" + return True + + +@dataclass +class RegularGrid(Grid): + """ + Regridding if the Grid we want is regular, but also want to aggregate the observations to a regular grid. + + Useful for a global grid representation + """ + projection: str = "regular" + def __post_init__(self): self.calculate_grid_coords() - + self.building_grid() + def aggregate(self, lon, lat, obs, obserr=None): """ Aggregate Parameters: - lon, lat, obs values OPtional obserr + lon, lat, obs values Optional obserr Returns: lon, lat, obs values @@ -60,14 +149,86 @@ class RotatedGrid(Grid): df['coords'] = list(zip(df['lon'],df['lat'])) df['coords'] = df['coords'].apply(Point) + # Create a geodataframe of the rotated obseravtional lon/ lat points points = gpd.GeoDataFrame(df, geometry='coords', crs='epsg:4326') - gdf = self.gdf.sjoin(points) - gdf = gdf.groupby("grid_cell").mean().reset_index() + # Geopandas sjoin calculates if these shapely points are within the shapely grid pologons + try: + gdf = self.gdf.sjoin(points, how="left") + except TypeError: + print("Empty dataframe messing up some versions of geopandas") + gdf = pd.DataFrame(columns=[ + 'obstype', 'obslon', 'obslat', 'obsn', 'obsid', 'obslev', 'obs', + 'obserr', 'time', 'lon', 'lat', 'coords', 'index_right', + 'lon_cen', 'lat_cen', 'grid_cell'], + ) + + # Need to also return the original lat lon + # Calculate the mean of all points within each grid cell + gdf = gdf[["grid_cell", "lon_cen", "lat_cen", "obs", "obserr", "lon", "lat"]].groupby("grid_cell").mean().reset_index() + + # Return the data we are interested in if obserr is not None: return gdf["lon"].values, gdf["lat"].values, gdf["obs"].values, gdf["obserr"].values else: - return gdf["lon"].values, gdf["lat"].values, gdf["obs"].values + return gdf["lon"].values, gdf["lat"].values, gdf["obs"].values + + +@dataclass +class RotatedGrid(Grid): + projection: str = "rotated" + + def __post_init__(self): + self.calculate_grid_coords() + self.building_grid() + + + def aggregate(self, lon, lat, obs, obserr=None): + """ + Aggregate + + Parameters: + rlon, rlat, obs values Optional obserr + + Returns: + rlon, rlat, obs values + """ + # Rotate the observations + rlon, rlat = geo_to_rot(lon, lat, centre_lon=self.centre_lon, centre_lat=self.centre_lat) + if obserr is not None: + df = pd.DataFrame({"obs":obs, "lon":lon, "lat":lat, "rlon":rlon, "rlat":rlat, "obserr": obserr}) + else: + df = pd.DataFrame({"obs":obs, "lon":lon, "lat":lat, "rlon":rlon, "rlat":rlat}) + + df['coords'] = list(zip(df['rlon'],df['rlat'])) + df['coords'] = df['coords'].apply(Point) + + + # Create a geodataframe of the rotated obseravtional lon/ lat points + points = gpd.GeoDataFrame(df, geometry='coords', crs='epsg:4326') + + + # Geopandas sjoin calculates if these shapely points are within the shapely grid pologons + try: + gdf = self.gdf.sjoin(points, how="left") + except TypeError: + print("Empty dataframe messing up some versions of geopandas") + gdf = pd.DataFrame(columns=[ + 'obstype', 'obslon', 'obslat', 'obsn', 'obsid', 'obslev', 'obs', + 'obserr', 'time', 'lon', 'lat', 'coords', 'index_right', + 'lon_cen', 'lat_cen', 'grid_cell'], + ) + + # Need to also return the original lat lon + # Calculate the mean of all points within each grid cell + gdf = gdf[["grid_cell", "lon_cen", "lat_cen", "obs", "lon", "lat", "rlon", "rlat"]].groupby("grid_cell").mean().reset_index() + + + # Return the data we are interested in + if obserr is not None: + return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values, gdf["obserr"].values + else: + return gdf["lon"].values, gdf["lat"].values, gdf["rlon"].values, gdf["rlat"].values, gdf["obs"].values diff --git a/mapies/mapies.py b/mapies/mapies.py index cbe07dce1e941c2feb7f1a9ac656eb5971033406..d8b64edfdcdd37ce056ce45ca2a623f6fe83c808 100644 --- a/mapies/mapies.py +++ b/mapies/mapies.py @@ -65,7 +65,7 @@ class MAPIES: @timeit - def plot_2D_obs(self, outdir="./", **kwargs): + def plot_2D_obs(self, **kwargs): """ Plotting the observations """ @@ -96,10 +96,8 @@ class MAPIES: fig.colorbar(im, ax=ax) ax.set_title(f'Observation 2D plot of {self.datatype.upper()} data from {self.start_date} to {self.end_date}') - print("Showing figure") - #plt.show() print("Saving Figure") - plt.savefig("/esarchive/scratch/cmeikle/Projects/data/VIIRS/obslocation_regional_april23.png", format="png") + plt.savefig(f"{self.dest}{self.datatype}_2D_obs.png", format="png") plt.close(fig) @@ -108,7 +106,7 @@ class MAPIES: dependent_var_index, independent_var_values, ): - """ + """" Recutting the data whenever a selction of the data has been made along one of the dimenisons Based off of how it has been done in Providentia diff --git a/mapies/tests/test_func_tools.py b/mapies/tests/test_func_tools.py index 94587e32f07d2b01b069610efbe6796b9ab97a67..9070ba6195d61291857aae17324ec8e4704266b7 100644 --- a/mapies/tests/test_func_tools.py +++ b/mapies/tests/test_func_tools.py @@ -3,7 +3,7 @@ import pytest import pandas as pd import numpy as np -from util.func_tools import time_converter, time_domain_selection +from util.func_tools import time_converter, time_domain_selection, geo_to_rot @@ -23,6 +23,18 @@ def test_time_converter_error(): with pytest.raises(ValueError): time_converter("20240607141") +@pytest.mark.parametrize( + "test_input,expected", + [ + ((0, 0, 20, 35), (-23.95680324, -32.61460715)), + ((10, 10, 20, 35), (-10.82847335, -24.45866947)), + ((-10, -10, 20, 35), (-39.42034458, -39.15567129)), + ((-100, -50, 20, 35), (-141.61239392, -26.30586515)) + ] +) +def test_geo_to_rot(test_input, expected): + assert (round(geo_to_rot(*test_input)[0], 8), round(geo_to_rot(*test_input)[1], 8)) == expected + """ @pytest.mark.parametrize( "test_input,expected", diff --git a/mapies/util/func_tools.py b/mapies/util/func_tools.py index 3e9d0ca6b466f54273032541349fd28cc68c690a..7bcc9cef46b590ffb295a69cb37ac1f5a7556c81 100644 --- a/mapies/util/func_tools.py +++ b/mapies/util/func_tools.py @@ -164,15 +164,17 @@ def geo_to_rot(lons, lats, centre_lon: float, centre_lat: float): """ Rotating coordinates from cartesian lat/lon to rotated rlon/rlat """ - + distance_lons = np.radians(lons - centre_lon) lons = np.radians(lons) lats = np.radians(lats) centre_lon = np.radians(centre_lon) centre_lat = np.radians(centre_lat) - x = cos(centre_lat) * sin(lats) - sin(centre_lat) * cos(lats) * cos(lons - centre_lon) - y = cos(lats) * sin(lons - centre_lon) - z = cos(centre_lat) * cos(lats) * cos(lons - centre_lon) - sin(centre_lat) * sin(lats) + x = cos(centre_lat) * sin(lats) - sin(centre_lat) * cos(lats) * cos(distance_lons) + y = cos(lats) * sin(distance_lons) + z = cos(centre_lat) * cos(lats) * cos(distance_lons) + sin(centre_lat) * sin(lats) + # Arctan2 used + # Explanation of the difference https://geo.libretexts.org/Courses/University_of_California_Davis/GEL_056%3A_Introduction_to_Geophysics/Geophysics_is_everywhere_in_geology.../zz%3A_Back_Matter/Arctan_vs_Arctan2 rlon = np.arctan2(y, z) rlat = np.arcsin(x) #rlon[x < 0] += pi diff --git a/mapies/viirs.py b/mapies/viirs.py index 5c0db33410340e2a65c09b6884629d5e915efc4f..090c2a75a90beebfc54fc011e10714a1401ac1ea 100644 --- a/mapies/viirs.py +++ b/mapies/viirs.py @@ -41,6 +41,7 @@ class VIIRS(MAPIES): self.datatype="viirs" frequency = kwargs.get("frequency") self.dest = kwargs.get("dest") + self.indir = kwargs.get("indir") self.dates_slice = pd.date_range( self.start_date, @@ -65,6 +66,8 @@ class VIIRS(MAPIES): Read yaml config file """ module_dir = os.path.dirname(__file__) + + # Let the user read in their own config config_file = kwargs.get("config_file") if config_file: @@ -75,6 +78,7 @@ class VIIRS(MAPIES): else: config = yaml.safe_load(open(os.path.join(module_dir, "config/satellite_config.yaml"))) + variable_dict = config[self.datatype]["variables"] self.time_var = variable_dict["time_variable"] self.lon_var = variable_dict["lon_variable"] @@ -155,10 +159,13 @@ class VIIRS(MAPIES): """ Perform Rotation of Grid representation """ - + # Calculate rotated lon and lat values + + # Calculate the grid representation r = RotatedGrid(centre_lon=20, centre_lat=35, dlon=.1, dlat=.1, west=-51, south=-35) - lon, lat, obs = r.aggregate(self.lon_values, self.lat_values, self.obs) - self.lon_values = lon + # Aggregate the the observations to the grid representation + lon, lat, rlon, rlat, obs = r.aggregate(self.lon_values, self.lat_values, self.obs) + self.lon_values = lon self.lat_values = lat self.obs = obs @@ -171,7 +178,7 @@ class VIIRS(MAPIES): file_patterns = [] for date in self.dates_slice: date = datetime.strptime(date, '%Y%m%d%H%M').strftime('%Y%j') - filepaths = f'/esarchive/obs/nasa/viirs_noaa20_aerdb_l2_nrt/original_files/VIIRS/{date[0:4]}/{date[4:]}/AERDB_L2_VIIRS_NOAA20.A{date}*' + filepaths = f'{self.indir}/{date[0:4]}/{date[4:]}/AERDB_L2_VIIRS_NOAA20.A{date}*' print(filepaths) file_patterns.append(filepaths) diff --git a/requirements.txt b/requirements.txt index d4d41236e476f28650484aa4025509c12b5c5b7b..2613cd0ca7edaa3b41efcdc2549ce8ff24db2f42 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,45 @@ -xarray==0.19.0 -pandas==1.3.5 -geopandas==0.10.2 -dask==2022.2.0 -Cartopy==0.20.3 -netcdf4-python==1.5.3 \ No newline at end of file +attrs==24.2.0 +Cartopy==0.21.1 +certifi==2024.8.30 +cftime==1.6.4.post1 +click==8.1.7 +click-plugins==1.1.1 +cligj==0.7.2 +cloudpickle==3.1.0 +contourpy==1.1.1 +cycler==0.12.1 +dask==2023.5.0 +exceptiongroup==1.2.2 +fiona==1.10.1 +fonttools==4.54.1 +fsspec==2024.10.0 +geopandas==0.13.2 +importlib-metadata==8.5.0 +importlib-resources==6.4.5 +iniconfig==2.0.0 +jinja2==3.1.4 +kiwisolver==1.4.7 +locket==1.0.0 +MarkupSafe==2.1.5 +matplotlib==3.7.5 +netCDF4==1.7.2 +numpy==1.24.4 +packaging==24.1 +pandas==2.0.3 +partd==1.4.1 +pillow==10.4.0 +pluggy==1.5.0 +pyparsing==3.1.4 +pyproj==3.5.0 +pyshp==2.3.1 +pytest==8.3.3 +python-dateutil==2.9.0.post0 +pytz==2024.2 +PyYAML==6.0.2 +shapely==2.0.6 +six==1.16.0 +tomli==2.0.2 +toolz==1.0.0 +tzdata==2024.2 +xarray==2023.1.0 +zipp==3.20.2 \ No newline at end of file diff --git a/run/run_viirs.py b/run/run_viirs.py index 738ec6df235641c4bd1b44d3c2568bbabba392f3..379cedbc416b213980fad5dfaf18e75cb4ce79e4 100644 --- a/run/run_viirs.py +++ b/run/run_viirs.py @@ -2,11 +2,14 @@ from viirs import VIIRS if __name__ == "__main__": + # Change these variables start_date = "202404230730" end_date = "202404231830" - c = VIIRS(start_date, end_date, frequency="3H", dest="/esarchive/scratch/cmeikle/Projects/data/VIIRS") + outdir="/home/cmeikle/Projects/data/" + indir="/home/cmeikle/Projects/data/VIIRS/original_files/AERDB_L2_VIIRS_NOAA20" + c = VIIRS(start_date, end_date, frequency="3H", dest=outdir, indir=indir) c.read_nc() c.preprocess_vars() - #c.plot_2D_obs() - c.to_da() \ No newline at end of file + c.plot_2D_obs() + #c.to_da() \ No newline at end of file diff --git a/setup.py b/setup.py index 8784c08d349c49399dd4e74dce55b880fa3f3624..ec3ed241fd020636b4f6288d3ea18dae8b5c7f3a 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ from setuptools import find_packages from setuptools import setup # Could update this using versioneer -version="0.0.2" +version="0.0.3" setup( name="mapies",