diff --git a/mapies/config/satellite_config.yaml b/mapies/config/satellite_config.yaml index 44c2b8a00eef468ea25fd0b201ba4f6230ab77d3..847a47d71a4792fb8f4467cc2c95294d6269738b 100644 --- a/mapies/config/satellite_config.yaml +++ b/mapies/config/satellite_config.yaml @@ -10,7 +10,22 @@ viirs: da: obsid: 34 uncertainty_constants: [0.2, 0.05, 0.02] - grid_repr: "rotated" + grids: + grid_repr: "" # default grid repr + rotated: + centre_lat: 35 + centre_lon: 20 + west: -51 + south: -35 + dlon: 0.1 + dlat: 0.1 + regular: + west: -180 + south: -90 + dlon: 0.1 + dlat: 0.1 + #nlon: 3600 + #nlat: 1800 qa: qa_variables: qa_land: @@ -26,7 +41,6 @@ viirs: qa_values: [0] qa_combined: True - tropomi: variables: time_variable: "delta_time" diff --git a/mapies/mapies.py b/mapies/mapies.py index 2129a03311e79e4d92c5f2a6d8458c75f42d4de1..67b93688d137d8177f5f222e8e3a4dbd826d33c7 100644 --- a/mapies/mapies.py +++ b/mapies/mapies.py @@ -5,6 +5,7 @@ from functools import wraps from datetime import datetime, timedelta import sys from mapies.util.func_tools import timeit, time_domain_selection, frequency_int, error_estimation +from mapies.grids.monarch import RotatedGrid, IrregularRotatedGrid, RegularGrid import time import logging import numpy as np @@ -16,6 +17,7 @@ import matplotlib.pyplot as plt import cartopy import cartopy.crs as ccrs import os +import yaml import matplotlib.cm as cm from matplotlib.colors import ListedColormap import matplotlib.colors as mcolors @@ -38,6 +40,68 @@ class MAPIES: self.start_date = start_date self.end_date = end_date + self.grid_dict = { + "rotated":RotatedGrid, + "regular":RegularGrid, + "ireg_rotated":IrregularRotatedGrid + } + + + def read_config(self, **kwargs): + """ + Read yaml config file + """ + module_dir = os.path.dirname(__file__) + + + # Let the user read in their own config + config_file = kwargs.get("config_file") + if config_file: + try: + self.config = yaml.safe_load(open(config_file)) + except FileNotFoundError: + logging.error("This is not a config file") + else: + self.config = yaml.safe_load(open(os.path.join(module_dir, "config/satellite_config.yaml"))) + + + variable_dict = self.config[self.datatype]["variables"] + self.time_var = variable_dict["time_variable"] + self.lon_var = variable_dict["lon_variable"] + self.lat_var = variable_dict["lat_variable"] + + # If obs_var passed then run data analysis on that variable if not pull default + obs_var = kwargs.get("obs_var") + if obs_var: + self.obs_var = obs_var + else: + self.obs_var = variable_dict["obs_variable"] + + + grid_dict = self.config[self.datatype]["grids"] + grid_repr = kwargs.get("grid_repr") + if grid_repr: + self.grid_repr = grid_repr + else: + self.grid_repr = grid_dict["grid_repr"] #str + + # Cread grid object if grid repr is selected in the input + self.grid_config = grid_dict[self.grid_repr] + self.grid = self.grid_dict[self.grid_repr](**self.grid_config) + print(self.grid) + + + qa_dict = self.config[self.datatype]["qa"] + #Check if the user wants qa flags + apply_qa = kwargs.get("apply_qa") + + if apply_qa is None: + self.apply_qa = True + else: + self.apply_qa = apply_qa + self.qualityFlags = qa_dict["qa_variables"] + + def preprocess_vars(self): """ @@ -64,9 +128,6 @@ class MAPIES: # Time domain selection self.time_values, self.time_values_index = time_domain_selection(self.time_values, self.start_date, self.end_date) - # TODO: We need to do some reindexing of the longitude and latitude variables too so that if we need them for regridding later we have them reindexed - - @timeit @@ -354,7 +415,9 @@ class MAPIES: unique_values = np.unique(num_obs) # Unique values in num_obs levels = np.linspace(unique_values.min(), unique_values.max(), unique_values.max()-unique_values.min()+1) + # Use a discrete colormap + cmap = plt.cm.rainbow norm = mcolors.Normalize(vmin=unique_values.min(), vmax=unique_values.max()) # norm = mcolors.BoundaryNorm(np.arange(unique_values.min()-0.5, unique_values.max()+1, 1), cmap.N) diff --git a/mapies/viirs.py b/mapies/viirs.py index 2725ecb8e817d3ee6d14764d04f1c2fa67316b72..3ede174d97a16f8b276d7c5d496f35c6800ff66b 100644 --- a/mapies/viirs.py +++ b/mapies/viirs.py @@ -54,6 +54,7 @@ def rotate_function(r, lon, lat, obs): raise ValueError("Invalid grid representation") + def process_single_file(r, file, obs_var, lat_var, lon_var, apply_qa, qualityFlags): """ Process a single file and return aggregated results. @@ -103,10 +104,12 @@ def process_batch(r, batch_files, obs_var, lat_var, lon_var, chunk_size, quality lon = ds[lon_var].values.flatten() lon_agg, lat_agg, obs_agg, count_obs = rotate_function(r,lon, lat, obs) + ds.close() return obs_agg, lat_agg, lon_agg, count_obs + def filter_file(args): root, filename, start_dt, end_dt = args try: @@ -136,6 +139,8 @@ class VIIRS(MAPIES): self.datatype="viirs" frequency = kwargs.get("frequency") self.dest = kwargs.get("dest") + if not os.path.exists(self.dest): + raise Exception("Doesn't exist") self.indir = kwargs.get("indir") self.year = int(start_date[:4]) @@ -165,34 +170,9 @@ class VIIRS(MAPIES): """ Read yaml config file """ - module_dir = os.path.dirname(__file__) - - - # Let the user read in their own config - config_file = kwargs.get("config_file") - if config_file: - try: - config = yaml.safe_load(open(config_file)) - except FileNotFoundError: - logging.error("This is not a config file") - else: - config = yaml.safe_load(open(os.path.join(module_dir, "config/satellite_config.yaml"))) - + super().read_config(**kwargs) - variable_dict = config[self.datatype]["variables"] - self.time_var = variable_dict["time_variable"] - self.lon_var = variable_dict["lon_variable"] - self.lat_var = variable_dict["lat_variable"] - - # If obs_var passed then run data analysis on that variable if not pull default - obs_var = kwargs.get("obs_var") - if obs_var: - self.obs_var = obs_var - else: - self.obs_var = variable_dict["obs_variable"] - - - da_dict = config[self.datatype]["da"] + da_dict = self.config[self.datatype]["da"] self.obsid_da = da_dict["obsid"] #int unc_const = kwargs.get("unc_const") @@ -202,24 +182,6 @@ class VIIRS(MAPIES): self.unc_const = da_dict["uncertainty_constants"] #float, int, list, dict - grid_repr = kwargs.get("grid_repr") - if grid_repr: - self.grid_repr = grid_repr - else: - self.grid_repr = da_dict["grid_repr"] #str - - qa_dict = config[self.datatype]["qa"] - #Check if the user wants qa flags - apply_qa = kwargs.get("apply_qa") - - if apply_qa is None: - self.apply_qa = True - else: - self.apply_qa = apply_qa - self.qualityFlags = qa_dict["qa_variables"] - - - @timeit def preprocess_vars(self): """ @@ -276,14 +238,20 @@ class VIIRS(MAPIES): Perform Rotation of Grid representation """ # Calculate the grid representation - #r = RotatedGrid(dlon=.1, dlat=.1, west=-51, south=-35, centre_lon=20, centre_lat=35) - r = RegularGrid(dlon=.1, dlat=.1, west=-180, south=-90) - # Aggregate the the observations to the grid representation - #lon, lat, rlon, rlat, obs, count_obs = r.aggregate(self.lon_values, self.lat_values, self.obs) - lon, lat, obs, count_obs = r.aggregate(self.lon_values, self.lat_values, self.obs) - self.lon_values = lon - self.lat_values = lat - self.obs = obs + if isinstance(self.grid, RotatedGrid): + # Aggregate the the observations to the grid representation + lon_agg, lat_agg, rlon_agg, rlat_agg, obs_agg, count_obs = self.grid.aggregate(self.lon_values, self.lat_values, self.obs) + + elif isinstance(self.grid, RegularGrid): + # Aggregate the the observations to the grid representation + lon_agg, lat_agg, obs_agg, count_obs = self.grid.aggregate(self.lon_values, self.lat_values, self.obs) + + else: + raise ValueError("Invalid grid representation") + + self.lon_values = lon_agg + self.lat_values = lat_agg + self.obs = obs_agg self.count_obs = count_obs @timeit @@ -296,18 +264,19 @@ class VIIRS(MAPIES): for date in self.dates_slice: # Convert to Julian day date = datetime.strptime(date, '%Y%m%d%H%M').strftime('%Y%j') + filepaths = f'{self.indir}/**/AERDB_L2_VIIRS_NOAA20.A{date}*' file_patterns.append(filepaths) # Add pattern to the set files = sorted(get_file_list(file_patterns)) print(f"Total number of files: {len(files)}") + start_dt = datetime.strftime(self.start_date, "%Y%m%d%H%M") end_dt = datetime.strftime(self.end_date, "%Y%m%d%H%M") start_dt = datetime.strptime(start_dt, "%Y%m%d%H%M") end_dt = datetime.strptime(end_dt, "%Y%m%d%H%M") - # print(f'Start date: {start_dt}, End date: {end_dt}') files_filtered = [] for file in files: @@ -353,8 +322,6 @@ class VIIRS(MAPIES): Function that returns all the needed variables for the DA """ - # TODO: Move this - r = RotatedGrid(centre_lon=20, centre_lat=35, dlon=.1, dlat=.1, west=-51, south=-35) # Calculate error self.obserr = error_estimation(self.datatype, self.obs, self.unc_const) @@ -380,7 +347,7 @@ class VIIRS(MAPIES): lat = self.reindex(frequency_index, self.lat_values) obserr = self.reindex(frequency_index, self.obserr) # Run aggregation with grid representation - lon, lat, obs, obserr = r.aggregate(lon, lat, obs, obserr) + lon, lat, obs, obserr = self.grid.aggregate(lon, lat, obs, obserr) rlon, rlat = geo_to_rot(lon, lat, centre_lon=20, centre_lat=35) @@ -422,11 +389,12 @@ class VIIRS(MAPIES): """ Plotting the observations specific to VIIRS """ - if self.grid_repr == "rotated": + if self.grid_repr != "": self.rotate() super().plot_2D_obs_custom() + @timeit def plot_2D_observations(self, months=None, outdir=None): @@ -452,6 +420,7 @@ class VIIRS(MAPIES): # title = f"Observation 2D plot of {self.datatype.upper()} data from {self.start_date} to {self.end_date}" title = f"Observation 2D plot of month {m} from {self.start_date} to {self.end_date}" filename = f"{outdir}/{self.datatype}_2D_obs_month{m}_{self.year}_.png" + super().plot_2D_obs_custom( lon_values=lon_values, lat_values=lat_values, @@ -525,7 +494,8 @@ class VIIRS(MAPIES): ) @timeit - def process_data(self, monthly_avg=True, batch_size=100): + def process_data(self, monthly_avg=True, batch_size=100, regrid=""): + """ Process the data for the specified year and months. @@ -624,6 +594,7 @@ class VIIRS(MAPIES): obs_values = month_data["obs"] count_values = month_data["count"] + # Initialize final arrays on the first valid month if cumulative_obs is None: cumulative_obs = obs_values * count_values @@ -679,8 +650,6 @@ class VIIRS(MAPIES): Returns: - lon_final, lat_final, obs_final: Aggregated longitude, latitude, and observation arrays. """ - #r = RotatedGrid(centre_lon=20, centre_lat=35, dlon=.1, dlat=.1, west=-51, south=-35) - r = RegularGrid(dlon=.1, dlat=.1, west=-180, south=-90) month_files = self.monthly_dict[month]["files"] print(f"Processing {len(month_files)} files for month {month}") @@ -701,7 +670,7 @@ class VIIRS(MAPIES): print(f"Processing a batch of {len(batch)} files...") # Use multiprocessing to process files within the batch - args = [(r, file, self.obs_var, self.lat_var, self.lon_var, self.apply_qa, self.qualityFlags) for file in batch] + args = [(self.grid, file, self.obs_var, self.lat_var, self.lon_var, self.apply_qa, self.qualityFlags) for file in batch] with multiprocessing.Pool(processes=8) as pool: # Adjust number of processes as needed results = pool.starmap(process_single_file, args) @@ -747,4 +716,4 @@ class VIIRS(MAPIES): print(f"Completed processing for month {month}.") - return final_lon, final_lat, final_obs, cumulative_count \ No newline at end of file + return final_lon, final_lat, final_obs, cumulative_count diff --git a/run/run_viirs.py b/run/run_viirs.py index 8685c6cd664b25428ba58eede0061c4727214bb4..b622c73be1bcf7dca9ea6d008eaf3af36936a1be 100644 --- a/run/run_viirs.py +++ b/run/run_viirs.py @@ -5,38 +5,17 @@ import time if __name__ == "__main__": # Change these variables - # 00:30 to 02:36 from 2024-01-01 to 2024-01-03 - # start_date = "202401010030" - # end_date = "202401030236" - # 00:30 to 03:36 of 2024-01-01 - # start_date = "202401010030" - # end_date = "202401010336" - # ONE YEAR - # start_date = "202401300000" - # end_date = "202402022359" - - # ONE MONTH - JANUARY 2024 start_date = "202401010000" - end_date = "202401312359" - - # outdir="/home/cmeikle/Projects/data/" - # indir="/home/cmeikle/Projects/data/VIIRS/original_files/AERDB_L2_VIIRS_NOAA20" - outdir = "/home/cgile/Documents/mapies/figures" - # indir = "/home/cgile/bscearth000/esarchive/obs/nasa/viirs_noaa20_aerdb_l2/original_files/VIIRS" - indir = "/home/cgile/Documents/mapies/VIIRS" - - start_time = time.time() - - c = VIIRS(start_date, end_date, frequency="D", dest=outdir, indir=indir) + end_date = "202401020000" + outdir="/esarchive/scratch/cmeikle/" + indir="/esarchive/obs/nasa/viirs_noaa20_aerdb_l2/original_files/VIIRS" + c = VIIRS(start_date, end_date, frequency="D", dest=outdir, indir=indir, apply_qa=True, grid_repr="regular") c.read_nc() - c.process_data(monthly_avg=True, batch_size = 100) - c.yearly_average() - c.plot_2D_observations(months=[1], outdir=outdir) - c.plot_2D_num_obs(months=[1], outdir=outdir) - end_time = time.time() - elapsed_time = end_time - start_time - print(f"Script executed in {elapsed_time:.2f} seconds.") - + c.process_data(monthly_avg=False, batch_size = 10) + #c.plot_2D_obs() + c.plot_2D_observations(months=[0], outdir=outdir) + c.plot_2D_num_obs(months=[0], outdir=outdir) + diff --git a/setup.py b/setup.py index 9567e38d52605e1c325fc6f162c97c9d43b49170..b147a08a39fef5086f5f11e6d4a98dc1949fd394 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ from setuptools import find_packages from setuptools import setup # Could update this using versioneer -version="0.0.7" +version="0.0.8" setup( name="mapies",