From c53dd89208d1bbed8333405c174e4b33efe0965d Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Mon, 23 Sep 2024 12:24:39 +0200 Subject: [PATCH 1/2] Added a run directory and examples --- .gitignore | 1 - mapies/mapies.py | 2 - mapies/tropomi.py | 19 +++++---- mapies/util/func_tools.py | 3 +- mapies/viirs.py | 87 ++++++++++++++++++--------------------- run/__init__.py | 0 run/run-nord3.sh | 25 +++++++++++ run/run_viirs.py | 12 ++++++ 8 files changed, 88 insertions(+), 61 deletions(-) create mode 100644 run/__init__.py create mode 100644 run/run-nord3.sh create mode 100644 run/run_viirs.py diff --git a/.gitignore b/.gitignore index 63be71a..c4a11ce 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ .obsidian *.orig -run-nord3.sh __pycache__ grids/__pycache__ util/__pycache__ diff --git a/mapies/mapies.py b/mapies/mapies.py index 5c84dee..cbe07dc 100644 --- a/mapies/mapies.py +++ b/mapies/mapies.py @@ -18,8 +18,6 @@ import cartopy.crs as ccrs - - class MAPIES: """ Base class for only base functions used in every Mapies run diff --git a/mapies/tropomi.py b/mapies/tropomi.py index 24b56ad..acba956 100644 --- a/mapies/tropomi.py +++ b/mapies/tropomi.py @@ -26,17 +26,20 @@ class TROPOMI(MAPIES): super().__init__(start_date, end_date) self.time_orig = np.datetime64("1993-01-01T00:00:00") - + self.lat=lat + self.lon=lon + self.quality_flag_limit + # Add quality value filter number - + if isinstance(self.start_date, str): self.start_date = time_converter(self.start_date) if isinstance(self.end_date, str): self.end_date = time_converter(self.end_date) - + self.read_config(**kwargs) @@ -46,13 +49,13 @@ class TROPOMI(MAPIES): Read yaml config file """ module_dir = os.path.dirname(__file__) - config = yaml.safe_load(open(os.path.join(module_dir,"config/satellite_config.yaml"))) + config = yaml.safe_load(open(os.path.join(module_dir, "config/satellite_config.yaml"))) print(config) variable_dict = config[self.datatype]["variables"] self.time_var = variable_dict["time_variable"] self.lon_var = variable_dict["lon_variable"] self.lat_var = variable_dict["lat_variable"] - + # If obs_var passed then run data analysis on that variable if not pull default obs_var = kwargs.get("obs_var") if obs_var: @@ -68,12 +71,12 @@ class TROPOMI(MAPIES): """ super().preprocess_vars() - + # no2 column values obs_dims = self.ds[self.obs_var].dims obs_shape = self.ds[self.obs_var].shape obs_attrs = self.ds[self.obs_var].attrs - + # Duplicate values in the time array to get the same shape as the flattened variable array self.time_values = inflate_array(self.time_values, self.time_shape, no2_shape) self.time_values_index = inflate_array(self.time_values_index, self.time_shape, obs_shape) @@ -110,8 +113,6 @@ class TROPOMI(MAPIES): self.obs, ) - # Need to also - @timeit def read_nc(self): diff --git a/mapies/util/func_tools.py b/mapies/util/func_tools.py index 1d955df..3e9d0ca 100644 --- a/mapies/util/func_tools.py +++ b/mapies/util/func_tools.py @@ -27,7 +27,6 @@ def timeit(func): # Function from Guillaume's script could be useful # Only supported in python3.10 -#def get_file_list(patterns: str | List[str]) -> List[str]: def get_file_list(patterns) -> List[str]: """ :param patterns: one or several glob patterns (or exact file names) pointing to the location of files on disk. @@ -41,7 +40,7 @@ def get_file_list(patterns) -> List[str]: # Then decode them one by one files = [] for pattern in patterns: - logging.debug(f"getting file list matching {pattern}") + logging.debug(f"Getting file list matching {pattern}") files.extend(glob(pattern)) return files diff --git a/mapies/viirs.py b/mapies/viirs.py index fe46ce5..72275ce 100644 --- a/mapies/viirs.py +++ b/mapies/viirs.py @@ -1,35 +1,36 @@ #!/usr/bin/env python -from dataclasses import dataclass, field -from typing import List, Tuple from datetime import datetime, timedelta - from functools import wraps -from mapies import MAPIES -from util.func_tools import timeit, get_file_list, time_converter, frequency_int, error_estimation, time_domain_selection, geo_to_rot -from grids.monarch import RotatedGrid -from pathlib import Path -import time import logging -import yaml import os +import time +import yaml + import numpy as np import pandas as pd import xarray as xr +from mapies import MAPIES +from util.func_tools import * +#timeit, get_file_list, time_converter, frequency_int, error_estimation, time_domain_selection, geo_to_rot +from grids.monarch import RotatedGrid +from pathlib import Path + + # TODO: still need to do a load of filtering huuuhhh # TODO: Create Yaml config file for time, lat, lon, uncertainty const etc. -# UNcertainty constants should be able to be passed as number, array or dict +# Uncertainty constants should be able to be passed as number, array or dict class VIIRS(MAPIES): """ Class for VIIRS specific data """ - + def __init__(self, start_date, end_date, **kwargs): """ Inherited init class with new variables @@ -39,7 +40,7 @@ class VIIRS(MAPIES): self.time_orig = np.datetime64("1993-01-01T00:00:00") self.datatype="viirs" frequency = kwargs.get("frequency") - self.dest = kwargs("dest") + self.dest = kwargs.get("dest") self.dates_slice = pd.date_range( self.start_date, @@ -48,7 +49,7 @@ class VIIRS(MAPIES): ).strftime('%Y%m%d%H%M') if frequency: self.frequency = frequency_int(frequency) - + if isinstance(self.start_date, str): self.start_date = time_converter(self.start_date) @@ -64,13 +65,13 @@ class VIIRS(MAPIES): Read yaml config file """ module_dir = os.path.dirname(__file__) - config = yaml.safe_load(open(os.path.join(module_dir,"config/satellite_config.yaml"))) - + config = yaml.safe_load(open(os.path.join(module_dir, "config/satellite_config.yaml"))) + variable_dict = config[self.datatype]["variables"] self.time_var = variable_dict["time_variable"] self.lon_var = variable_dict["lon_variable"] self.lat_var = variable_dict["lat_variable"] - + # If obs_var passed then run data analysis on that variable if not pull default obs_var = kwargs.get("obs_var") if obs_var: @@ -88,13 +89,13 @@ class VIIRS(MAPIES): else: self.unc_const = da_dict["uncertainty_constants"] #float, int, list, dict print(self.unc_const) - + grid_repr = kwargs.get("grid_repr") if grid_repr: self.grid_repr = grid_repr else: - self.grid_repr = da_dict["uncertainty_constants"] #str - + self.grid_repr = da_dict["grid_repr"] #str + @timeit @@ -139,31 +140,36 @@ class VIIRS(MAPIES): self.time_values_index, self.obs, ) - - # Load in unit conversion done by Dene and apply new formulas + @timeit def rotate(self): """ Perform Rotation of Grid representation """ - + r = RotatedGrid(centre_lon=20, centre_lat=35, dlon=.1, dlat=.1, west=-51, south=-35) lon, lat, obs = r.aggregate(self.lon_values, self.lat_values, self.obs) self.lon_values = lon self.lat_values = lat self.obs = obs - # This part needs improving with paralellization @timeit def read_nc(self): """ Read netcdf files with xarray """ + # Take in start and end date and convert to julian dates + file_patterns = [] + for date in self.dates_slice: + date = datetime.strptime(date, '%Y%m%d%H%M').strftime('%Y%j') + filepaths = f'/esarchive/obs/nasa/viirs_noaa20_aerdb_l2_nrt/original_files/VIIRS/{date[0:4]}/{date[4:]}/AERDB_L2_VIIRS_NOAA20.A{date}*' + print(filepaths) + file_patterns.append(filepaths) # TODO: maybe change this to NETCDF4 - files = get_file_list('/esarchive/obs/nasa/viirs_noaa20_aerdb_l2_nrt/original_files/VIIRS/2024/114/AERDB_L2_VIIRS_NOAA20.A2024114*') - + files = get_file_list(file_patterns) + # A bit of preprocessing to start before reading in the files def preprocess(ds): ds = ds.expand_dims(dim={"index": [ds.attrs["product_name"]]}) @@ -171,14 +177,13 @@ class VIIRS(MAPIES): # Open dataset with xarray and dask self.ds = xr.open_mfdataset(files, preprocess=preprocess) - + @timeit def to_da(self): """ Function that returns all the needed variables for the DA """ - #self.read_nc() - #self.preprocess_vars() + # TODO: Move this r = RotatedGrid(centre_lon=20, centre_lat=35, dlon=.1, dlat=.1, west=-51, south=-35) # Calculate error @@ -202,17 +207,17 @@ class VIIRS(MAPIES): l_border, r_border, ) - + # Reindex obs = self.reindex(frequency_index, self.obs) lon = self.reindex(frequency_index, self.lon_values) lat = self.reindex(frequency_index, self.lat_values) - obserr = self.reindex(frequency_index, self.obserr) + obserr = self.reindex(frequency_index, self.obserr) # Run aggregation with grid representation lon, lat, obs, obserr = r.aggregate(lon, lat, obs, obserr) rlon, rlat = geo_to_rot(lon, lat, centre_lon=20, centre_lat=35) - + # Create new arrays with same length as obs obsid = np.full(shape=obs.shape, fill_value=self.obsid_da, dtype=self.int_type) @@ -222,7 +227,7 @@ class VIIRS(MAPIES): time_values = np.full(shape=obs.shape, fill_value=date, dtype=str) # Coords equals index - + assert obs.shape == obserr.shape assert lon.shape == time_values.shape coords = dict(index=("index", np.indices(obs.shape)[0, :])) @@ -241,7 +246,7 @@ class VIIRS(MAPIES): ) ds = self.to_xarray(coords=coords, data_vars=data_vars) - + ds.to_netcdf(filename, encoding={}) outfiles.append(filename) @@ -252,18 +257,6 @@ class VIIRS(MAPIES): """ Plotting the observations specific to VIIRS """ - self.read_nc() - self.preprocess_vars() - self.rotate() + if self.grid_repr == "rotated": + self.rotate() super().plot_2D_obs() - - -if __name__ == "__main__": - start_date = "202404230730" - end_date = "202404231830" - c = VIIRS(start_date, end_date, frequency="3H", dest="/esarchive/scratch/cmeikle/Projects/data/VIIRS") - #c.read_nc() - #c.preprocess_vars() - - c.plot_2D_obs() - #c.to_da() \ No newline at end of file diff --git a/run/__init__.py b/run/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/run/run-nord3.sh b/run/run-nord3.sh new file mode 100644 index 0000000..c1dfeed --- /dev/null +++ b/run/run-nord3.sh @@ -0,0 +1,25 @@ +#!/bin/bash +#SBATCH -A bsc32 +# +# +#SBATCH --cpus-per-task=1 +#16 +#SBATCH -n 1 +#SBATCH -t 0-01:00:00 +#SBATCH -J mapies +#SBATCH -e /esarchive/scratch/cmeikle/out-logs/mapies-%j.err +#SBATCH -o /esarchive/scratch/cmeikle/out-logs/mapies-%j.out +#BSUB -x + +export PYTHONPATH=/esarchive/scratch/cmeikle/git_projects/mapies/mapies + +module load xarray/0.19.0-foss-2019b-Python-3.7.4 +module load pandas/1.3.5-foss-2019b-Python-3.7.4 +module load geopandas/0.10.2-foss-2019b-Python-3.7.4 +module load dask/2022.2.0-foss-2019b-Python-3.7.4 +module load NES/1.1.3-nord3-v2-foss-2019b-Python-3.7.4 +module load Cartopy/0.20.3-foss-2019b-Python-3.7.4 +module load netcdf4-python/1.5.3-foss-2019b-Python-3.7.4 + + +python3 run_viirs.py \ No newline at end of file diff --git a/run/run_viirs.py b/run/run_viirs.py new file mode 100644 index 0000000..738ec6d --- /dev/null +++ b/run/run_viirs.py @@ -0,0 +1,12 @@ +from viirs import VIIRS + + +if __name__ == "__main__": + start_date = "202404230730" + end_date = "202404231830" + c = VIIRS(start_date, end_date, frequency="3H", dest="/esarchive/scratch/cmeikle/Projects/data/VIIRS") + c.read_nc() + c.preprocess_vars() + + #c.plot_2D_obs() + c.to_da() \ No newline at end of file -- GitLab From 79c38646ecf6719c17010e8ec36a36b386aca416 Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Mon, 23 Sep 2024 15:01:35 +0200 Subject: [PATCH 2/2] Add a requirements.txt --- README.md | 4 ++++ requirements.txt | 6 ++++++ run/run-nord3.sh | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 requirements.txt diff --git a/README.md b/README.md index 57c37d1..82b7a39 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,6 @@ MAPIES Monarch Assimilation Preproc and Inversion of Emissions plus Satellites + +To run presently it is only available on nord3 as we use in-house modules like NES. +You can run it off-line on a local machine by running only global runs with the local-machine-run +git branch. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d4d4123 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +xarray==0.19.0 +pandas==1.3.5 +geopandas==0.10.2 +dask==2022.2.0 +Cartopy==0.20.3 +netcdf4-python==1.5.3 \ No newline at end of file diff --git a/run/run-nord3.sh b/run/run-nord3.sh index c1dfeed..2234324 100644 --- a/run/run-nord3.sh +++ b/run/run-nord3.sh @@ -11,7 +11,7 @@ #SBATCH -o /esarchive/scratch/cmeikle/out-logs/mapies-%j.out #BSUB -x -export PYTHONPATH=/esarchive/scratch/cmeikle/git_projects/mapies/mapies +export PYTHONPATH=../mapies module load xarray/0.19.0-foss-2019b-Python-3.7.4 module load pandas/1.3.5-foss-2019b-Python-3.7.4 -- GitLab