From c204a4ad3692a236a763d160dc4f0246731ffc55 Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Fri, 7 Feb 2025 15:10:49 +0100 Subject: [PATCH 1/8] Updating qa to run in process data rather than in the initialisation of the method --- mapies/mapies.py | 10 ++-------- mapies/viirs.py | 11 ++++++----- run/run_viirs.py | 19 ++++++------------- 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/mapies/mapies.py b/mapies/mapies.py index 206969bc..615e6b2f 100644 --- a/mapies/mapies.py +++ b/mapies/mapies.py @@ -40,7 +40,7 @@ class MAPIES: self.time_orig = np.datetime64("1900-01-01T00:00:00") self.start_date = start_date self.end_date = end_date - + self.apply_qa = True self.grid_dict = { "rotated":RotatedGrid, @@ -96,13 +96,7 @@ class MAPIES: qa_dict = self.config[self.datatype]["qa"] - #Check if the user wants qa flags - apply_qa = kwargs.get("apply_qa") - - if apply_qa is None: - self.apply_qa = True - else: - self.apply_qa = apply_qa + self.qualityFlags = qa_dict["qa_variables"] diff --git a/mapies/viirs.py b/mapies/viirs.py index 50b3c180..69acd2b6 100644 --- a/mapies/viirs.py +++ b/mapies/viirs.py @@ -289,7 +289,7 @@ class VIIRS(MAPIES): @timeit - def process_data(self, monthly_avg=False, batch_size=100): + def process_data(self, monthly_avg=False, batch_size=100, apply_qa=True): """ Process the data. @@ -301,6 +301,7 @@ class VIIRS(MAPIES): - if monthly_avg --> updated monthly dictorionary for the processed month. - if not monthly_avg --> updated self.lon_values, self.lat_values, self.obs, self.count_obs. """ + #Check if the user wants qa flags if monthly_avg: for month in self.monthly_dict.keys(): # print(f'Month: {self.monthly_dict[month]}') @@ -312,7 +313,7 @@ class VIIRS(MAPIES): try: # Process the data for the current month final_lon, final_lat, final_obs, cumulative_count = self.process_monthly_data( - batch_size=batch_size, month=month + batch_size=batch_size, month=month, apply_qa=apply_qa ) # Update the monthly dictionary @@ -337,7 +338,7 @@ class VIIRS(MAPIES): else: try: final_lon, final_lat, final_obs, cumulative_count = self.process_monthly_data( - batch_size=batch_size, month=None + batch_size=batch_size, month=None, apply_qa=apply_qa ) self.lon_values = final_lon self.lat_values = final_lat @@ -347,7 +348,7 @@ class VIIRS(MAPIES): print(f"Error processing data for the time period from {self.start_date} to {self.end_date}: {e}") return - def process_monthly_data(self, batch_size, month=None): + def process_monthly_data(self, batch_size, month=None, apply_qa=True): """ Process the data with multiprocessing. @@ -386,7 +387,7 @@ class VIIRS(MAPIES): self.time_var, self.start_date, self.end_date, - self.apply_qa, + apply_qa, self.qualityFlags, flag ) diff --git a/run/run_viirs.py b/run/run_viirs.py index d158cf87..e2fb1b9a 100644 --- a/run/run_viirs.py +++ b/run/run_viirs.py @@ -22,25 +22,18 @@ if __name__ == "__main__": # start_date = "202401010000" # end_date = "202401312359" - # outdir="/home/cmeikle/Projects/data/" - # indir="/home/cmeikle/Projects/data/VIIRS/original_files/AERDB_L2_VIIRS_NOAA20" - - outdir = "/home/cgile/Documents/mapies/figures" - # indir = "/home/cgile/bscearth000/esarchive/obs/nasa/viirs_noaa20_aerdb_l2/original_files/VIIRS" - # indir = "/home/cgile/Documents/mapies/VIIRS" - indir = '' - filename_obs = 'carlotta' - filename_num_obs = 'carlotta' + outdir="/home/cmeikle/Projects/data/" + indir="/home/cmeikle/Projects/data/VIIRS/original_files/AERDB_L2_VIIRS_NOAA20" start_time = time.time() - c = VIIRS(start_date, end_date, dest=outdir, indir=indir, apply_qa=False, grid_repr="rotated") + c = VIIRS(start_date, end_date, dest=outdir, indir=indir, grid_repr="rotated") c.read_nc() - c.process_data(monthly_avg=False, batch_size = 50) + c.process_data(monthly_avg=False, batch_size = 50, apply_qa=False) # c.yearly_average() - c.plot_2D_observations(months=[0], filename=filename_obs, outdir=outdir) - c.plot_2D_num_obs(months=[0], filename=filename_num_obs, outdir=outdir) + c.plot_2D_observations(months=[0], filename="Viirs_qa_not_applied.png", outdir=outdir) + c.plot_2D_num_obs(months=[0], filename="Viirs_num_obsqa_not_applied.png", outdir=outdir) # c.process_lazy_data() end_time = time.time() elapsed_time = end_time - start_time -- GitLab From 0317636cab234fe5dd2408ba7ec072d2f7b90919 Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Fri, 7 Feb 2025 17:40:10 +0100 Subject: [PATCH 2/8] Updating qa for tropomi --- mapies/tropomi.py | 5 ++++- run/run_tropomi.py | 16 ++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/mapies/tropomi.py b/mapies/tropomi.py index 26e3bef7..433eb00a 100644 --- a/mapies/tropomi.py +++ b/mapies/tropomi.py @@ -181,7 +181,7 @@ class TROPOMI(MAPIES): @timeit - def process_data(self, monthly_avg=False, batch_size=100, geospatial_crop: NDArray | None = None): + def process_data(self, monthly_avg=False, batch_size=100, apply_qa=True, geospatial_crop: NDArray | None = None): """ Process the data for the specified year and months. @@ -195,6 +195,9 @@ class TROPOMI(MAPIES): - if monthly_avg --> lon_final, lat_final, obs_final: Aggregated longitude, latitude, and observation arrays. - if not monthly_avg --> preprocess_vars() is called. """ + #Check if the user wants qa flags + if apply_qa: + self.apply_qa = True if geospatial_crop is not None: self.geospatial_crop = np.array(geospatial_crop) if monthly_avg: diff --git a/run/run_tropomi.py b/run/run_tropomi.py index 4ee763a5..9223cb64 100644 --- a/run/run_tropomi.py +++ b/run/run_tropomi.py @@ -5,21 +5,21 @@ import time if __name__ == "__main__": - start_date = "202212010000" - end_date = "202212202359" + start_date = "202301010000" + end_date = "202301012359" - outdir = "/esarchive/scratch/cmeikle/" - indir = "/esarchive/obs/sentinel/tropomi/original_files/tropomi_OFFL_NO2" + outdir = "/home/cmeikle/Projects/data/" + indir = "/home/cmeikle/Projects/data/TROPOMI" start_time = time.time() - c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, apply_qa=True, grid_repr="rotated") + c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, grid_repr="regular") c.read_nc() - c.process_data(monthly_avg=True, batch_size = 13) + c.process_data(monthly_avg=False, batch_size = 13, apply_qa=True) # c.yearly_average() - c.plot_2D_observations(months=[12], outdir=outdir) - c.plot_2D_num_obs(months=[12], outdir=outdir) + c.plot_2D_observations(months=[0], outdir=outdir) + c.plot_2D_num_obs(months=[0], outdir=outdir) #c.process_lazy_data() end_time = time.time() elapsed_time = end_time - start_time -- GitLab From 03eee3eca0aab63e759aca624a677bb4884ccb2b Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Mon, 10 Feb 2025 12:18:42 +0100 Subject: [PATCH 3/8] Added qa greater less than functionality and updated tests --- mapies/config/satellite_config.yaml | 3 +- mapies/mapies.py | 5 +++- mapies/tests/test_func_tools.py | 9 +++--- mapies/tropomi.py | 1 - mapies/util/func_tools.py | 44 ++++++++++++++++++++++++++--- 5 files changed, 51 insertions(+), 11 deletions(-) diff --git a/mapies/config/satellite_config.yaml b/mapies/config/satellite_config.yaml index af4837f2..b94f5324 100644 --- a/mapies/config/satellite_config.yaml +++ b/mapies/config/satellite_config.yaml @@ -67,8 +67,9 @@ tropomi: qa_variables: qa_value: qa_name: "qa_value" - qa_values: [1] #[0.62, 0.65999997, 0.66999996, 0.69, 0.72999996, 0.74, 0.78999996, 0.84, 0.88, 0.9, 0.93, 1.] + qa_values: 0.5 qa_combined: True + qa_condition: "greater_than_or_equal" # Examples, greater_than, less_than, equal, greater_than_or_equal, less_than_or_equal diff --git a/mapies/mapies.py b/mapies/mapies.py index 453a7efe..a0844f4f 100644 --- a/mapies/mapies.py +++ b/mapies/mapies.py @@ -41,6 +41,7 @@ class MAPIES: self.start_date = start_date self.end_date = end_date self.apply_qa = True + self.geospatial_crop = None self.grid_dict = { "rotated":RotatedGrid, @@ -186,10 +187,12 @@ class MAPIES: if self.datatype == "viirs": label = "AOD" levels = [0.02, 0.04, 0.06, 0.08, 0.10, 0.15, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 1.00] + level_format = 2 elif self.datatype == "tropomi": #TODO remove hard coding label = "NO2" levels = [0.00001, 0.00002, 0.00003, 0.00004, 0.00005, 0.00006, 0.00007, 0.00008, 0.00009, 0.00010, 0.00015, 0.00020, 0.00030, 0.00040] + level_format = 6 # Define the custom color list colors = [ '#290AD8', '#2645FA', '#3A91FF', '#66CBFF', '#99EEFF', @@ -216,7 +219,7 @@ class MAPIES: # Add the color bar cbar = fig.colorbar(im, ax=ax, orientation="vertical", pad=0.05, ticks=levels, extend="both") cbar.set_label(label) - cbar.ax.set_yticklabels([f"{level:.2f}" for level in levels]) # Format tick labels + cbar.ax.set_yticklabels([f"{level:.f}" for level in levels]) # Format tick labels # Add title and save the plot ax.set_title(title) diff --git a/mapies/tests/test_func_tools.py b/mapies/tests/test_func_tools.py index 52afee64..da0df3e7 100644 --- a/mapies/tests/test_func_tools.py +++ b/mapies/tests/test_func_tools.py @@ -42,6 +42,7 @@ ds = xr.Dataset( obs = np.array([1, 2, 3, 4]) lon = np.array([1, 2, 3, 4]) lat = np.array([1, 2, 3, 4]) +time = np.array([1, 2, 3, 4]) qualityFlags1 = {"qa_flag1":{"qa_name":"flag1", "qa_values":[1], "qa_combined":False}, "qa_flag2":{"qa_name":"flag2", "qa_values":[1], "qa_combined":False}} qualityFlags2 = {"qa_flag1":{"qa_name":"flag1", "qa_values":[1], "qa_combined":True}, "qa_flag2":{"qa_name":"flag2", "qa_values":[1], "qa_combined":True}} qualityFlags3 = {"qa_flag1":{"qa_name":"flag1", "qa_values":[1], "qa_combined":True}, "qa_flag2":{"qa_name":"flag2", "qa_values":[1], "qa_combined":False}} @@ -49,9 +50,9 @@ qualityFlags3 = {"qa_flag1":{"qa_name":"flag1", "qa_values":[1], "qa_combined":T @pytest.mark.parametrize( "test_input,expected", [ - ((ds, obs, lat, lon, qualityFlags1), (np.array([1, 3, 4]), np.array([1, 3, 4]), np.array([1, 3, 4]))), - ((ds, obs, lat, lon, qualityFlags2), (np.array([4]), np.array([4]), np.array([4]))), - ((ds, obs, lat, lon, qualityFlags3), (np.array([1, 3, 4]), np.array([1, 3, 4]), np.array([1, 3, 4]))), + ((ds, obs, lat, lon, time, qualityFlags1), (np.array([1, 3, 4]), np.array([1, 3, 4]), np.array([1, 3, 4]), np.array([1, 3, 4]))), + ((ds, obs, lat, lon, time, qualityFlags2), (np.array([4]), np.array([4]), np.array([4]), np.array([4]))), + ((ds, obs, lat, lon, time, qualityFlags3), (np.array([1, 3, 4]), np.array([1, 3, 4]), np.array([1, 3, 4]), np.array([1, 3, 4]))), ] ) def test_quality_assurance(test_input, expected): @@ -60,7 +61,7 @@ def test_quality_assurance(test_input, expected): assert quality_assurance(*test_input)[2].all() == expected[2].all() """ @pytest.mark.parametrize( - "test_input,expected", + "test_input,expected", [ ([], np.array([])), ] diff --git a/mapies/tropomi.py b/mapies/tropomi.py index 433eb00a..09c76e9f 100644 --- a/mapies/tropomi.py +++ b/mapies/tropomi.py @@ -87,7 +87,6 @@ class TROPOMI(MAPIES): self.daily_dict = {} # Dictionary to produce daily netcdf files self.datatype = "tropomi" - self.geospatial_crop = None # Add quality value filter number diff --git a/mapies/util/func_tools.py b/mapies/util/func_tools.py index 300f2ec6..29856b6e 100644 --- a/mapies/util/func_tools.py +++ b/mapies/util/func_tools.py @@ -81,23 +81,57 @@ def time_converter(date:str) -> pd.Timestamp: return date + + def quality_assurance(ds, obs, lat, lon, time_values, qualityFlags): """ Applying quality assurance flags to mapies data """ mask = np.zeros_like(obs, dtype=bool) - + # Iterate through each qa flag for i, (flag, info) in enumerate(qualityFlags.items()): + # TODO add logger of flag being applied with condition and value flag_mask = np.zeros_like(obs, dtype=bool) arr = ds[info["qa_name"]].values.flatten() - for value in info["qa_values"]: - flag_mask |= (arr == value) + # Get qa conditions + try: + condition = info["qa_condition"] + except KeyError: + # print("No QA condition passed, putting equal") + condition = "equals" + # Get qa values + try: + values = info["qa_values"] + except KeyError: + raise Exception("No QA values passed, putting equal") + + if isinstance(values, (float, int)): + values = [values] + + for value in values: + if condition == "greater_than": + flag_mask |= (arr > value) + elif condition == "less_than": + flag_mask |= (arr < value) + elif condition == "greater_than_or_equal": + flag_mask |= (arr >= value) + elif condition == "less_than_or_equal": + flag_mask |= (arr <= value) + elif condition == "equal": + flag_mask |= (arr == value) if i == 0: mask = flag_mask else: - if info["qa_combined"]: + # If we want to perform an and or or between conditions + try: + combination = info["qa_combined"] + except KeyError: + # print("No combination of and or or passed for the condition, presuming or(False)") + combination = False + + if combination: mask &= flag_mask else: mask |= flag_mask @@ -110,6 +144,8 @@ def quality_assurance(ds, obs, lat, lon, time_values, qualityFlags): return obs, lat, lon, time_values + + def spatial_domain_selection(obs, lat, lon, time, spatial_bounds): """ A function for cutting the data spatially -- GitLab From 288109d7490313cf40b43225fa2bf71d9449ad20 Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Mon, 10 Feb 2025 14:42:20 +0100 Subject: [PATCH 4/8] Formatting of plot --- mapies/mapies.py | 6 +++--- mapies/viirs.py | 11 +++++++++-- run/run_tropomi.py | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/mapies/mapies.py b/mapies/mapies.py index a0844f4f..dc40c295 100644 --- a/mapies/mapies.py +++ b/mapies/mapies.py @@ -187,12 +187,12 @@ class MAPIES: if self.datatype == "viirs": label = "AOD" levels = [0.02, 0.04, 0.06, 0.08, 0.10, 0.15, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 1.00] - level_format = 2 + level_format = [f"{level:.2f}" for level in levels] elif self.datatype == "tropomi": #TODO remove hard coding label = "NO2" levels = [0.00001, 0.00002, 0.00003, 0.00004, 0.00005, 0.00006, 0.00007, 0.00008, 0.00009, 0.00010, 0.00015, 0.00020, 0.00030, 0.00040] - level_format = 6 + level_format = [f"{level:.5f}" for level in levels] # Define the custom color list colors = [ '#290AD8', '#2645FA', '#3A91FF', '#66CBFF', '#99EEFF', @@ -219,7 +219,7 @@ class MAPIES: # Add the color bar cbar = fig.colorbar(im, ax=ax, orientation="vertical", pad=0.05, ticks=levels, extend="both") cbar.set_label(label) - cbar.ax.set_yticklabels([f"{level:.f}" for level in levels]) # Format tick labels + cbar.ax.set_yticklabels(level_format) # Format tick labels # Add title and save the plot ax.set_title(title) diff --git a/mapies/viirs.py b/mapies/viirs.py index f1bb0ace..d1159d81 100644 --- a/mapies/viirs.py +++ b/mapies/viirs.py @@ -445,11 +445,16 @@ class VIIRS(MAPIES): print("Yearly average computation completed.") @timeit - def process_lazy_data(self): + def process_lazy_data(self, apply_qa=True, geospatial_crop: NDArray | None = None): """ Process the data for the specified time range. It only retrieves a daily dictionary using dask lazy loading. """ flag = True + #Check if the user wants qa flags + if apply_qa: + self.apply_qa = True + if geospatial_crop is not None: + self.geospatial_crop = np.array(geospatial_crop) for day in self.daily_dict.keys(): try: files = self.daily_dict[day]["files"] @@ -465,7 +470,9 @@ class VIIRS(MAPIES): self.end_date, self.apply_qa, self.qualityFlags, - flag + flag, + self.geospatial_crop + ) for file in files ] diff --git a/run/run_tropomi.py b/run/run_tropomi.py index 9223cb64..13871692 100644 --- a/run/run_tropomi.py +++ b/run/run_tropomi.py @@ -15,7 +15,7 @@ if __name__ == "__main__": c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, grid_repr="regular") c.read_nc() - c.process_data(monthly_avg=False, batch_size = 13, apply_qa=True) + c.process_data(monthly_avg=False, batch_size = 13, apply_qa=True, geospatial_crop=[[-10, 40], [30, 70]]) # c.yearly_average() c.plot_2D_observations(months=[0], outdir=outdir) -- GitLab From 99e5db601940712caf595874c5fa646132f47b14 Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Mon, 10 Feb 2025 17:16:39 +0100 Subject: [PATCH 5/8] Updated gantt chart, removing 0 values in tropomi data too --- docs/project-management/gantt.py | 25 +++++++++++++++---------- mapies/tropomi.py | 26 ++++++++++++++------------ mapies/util/func_tools.py | 2 +- mapies/viirs.py | 9 ++++++--- run/run_tropomi.py | 3 +-- run/run_viirs.py | 4 ++-- 6 files changed, 39 insertions(+), 30 deletions(-) diff --git a/docs/project-management/gantt.py b/docs/project-management/gantt.py index eb0faa43..a6f62caa 100644 --- a/docs/project-management/gantt.py +++ b/docs/project-management/gantt.py @@ -5,21 +5,26 @@ import plotly.figure_factory as ff df = [ dict(Task="Viirs base processor", Start='2024-08-01', Finish='2024-09-30', Resource='Complete'), dict(Task="Add pytests for already created functions (3-9)", Start='2024-08-01', Finish='2024-12-31', Resource='Complete'), - dict(Task="Tropomi base processor", Start='2024-08-01', Finish='2025-02-03', Resource='Incomplete'), - dict(Task="Tropomi to_plumes functionality (11)", Start='2025-02-03', Finish='2025-03-31', Resource='Not Started'), + dict(Task="Tropomi base processor", Start='2024-08-01', Finish='2025-02-03', Resource='Complete'), + dict(Task="Tropomi to_plumes functionality (11)", Start='2025-03-01', Finish='2025-03-14', Resource='Not Started'), dict(Task="Time evaluation statistics (18)", Start='2024-11-01', Finish='2024-11-28', Resource='Complete'), - dict(Task="Test DA implementation for VIIRS", Start='2025-02-03', Finish='2025-02-28', Resource='Not Started'), + dict(Task="Test DA implementation for VIIRS", Start='2025-03-01', Finish='2025-03-31', Resource='Not Started'), dict(Task="Convert Grid representation to numpy from geopandas", Start='2024-12-01', Finish='2024-12-31', Resource="Complete"), - dict(Task="Creating Grid Config for rotated and regular global grids (14)", Start='2024-11-18', Finish='2025-02-03', Resource='Incomplete'), - dict(Task="Add original indices of the data to the outputted netcdf (13)", Start='2025-02-03', Finish='2025-01-14', Resource='Not Started'), - dict(Task="Add history of processes to the netcdf (10)", Start='2025-01-01', Finish='2025-02-03', Resource='Not Started'), - dict(Task="Update grid representations to include Vertical profiling (20)", Start='2025-03-01', Finish='2025-03-31', Resource='Not Started'), + dict(Task="Creating Grid Config for rotated and regular global grids (14)", Start='2024-11-18', Finish='2025-02-03', Resource='Complete'), + dict(Task="Add original indices of the data to the outputted netcdf (13)", Start='2025-03-01', Finish='2025-03-07', Resource='Not Started'), + dict(Task="Add history of processes to the netcdf (10)", Start='2025-01-01', Finish='2025-02-03', Resource='Incomplete'), + dict(Task="Update grid representations to include Vertical profiling (20)", Start='2025-04-01', Finish='2025-04-30', Resource='Not Started'), dict(Task="Test Viirs data with Providentia Interpolation", Start="2025-03-01", Finish="2025-03-31", Resource="Not Started"), dict(Task="Produce Number of counts plots (29)", Start="2025-01-13", Finish="2025-01-20", Resource="Complete"), - dict(Task="QA flags (23)", Start="2025-01-13", Finish="2025-02-03", Resource="Incomplete"), - dict(Task="Finalise netcdf storage output (30)", Start="2025-01-20", Finish="2025-02-03", Resource="Not Started"), + dict(Task="QA flags (23)", Start="2025-01-13", Finish="2025-02-03", Resource="Complete"), + dict(Task="Finalise netcdf storage output (30)", Start="2025-01-20", Finish="2025-02-03", Resource="Incomplete"), dict(Task="Create conda environment", Start="2025-01-01", Finish="2025-01-14", Resource="Complete"), - dict(Task="Create Logging file output", Start="2025-01-01", Finish="2025-02-03", Resource="Incomplete"), + dict(Task="Create Logging file output", Start="2025-02-03", Finish="2025-02-24", Resource="Incomplete"), + dict(Task="Filter for min number of obs points to be aggregated (32)", Start="2025-02-17", Finish="2025-02-24", Resource="Not Started"), + dict(Task="Code refactor (41)", Start="2025-02-17", Finish="2025-03-01", Resource="Not Started"), + dict(Task="Update grid config to be in dataclasses instead of yaml files (40)", Start="2025-02-17", Finish="2025-02-24", Resource="Not Started"), + dict(Task="Implement process lazy data in Tropomi (50)", Start="2025-02-10", Finish="2025-02-17", Resource="Not Started"), + dict(Task="Investigate whether we can include Unit Converter used by Providentia/Ghost into code (51)", Start="2025-04-01", Finish="2025-04-10", Resource="Not Started"), ] diff --git a/mapies/tropomi.py b/mapies/tropomi.py index 09c76e9f..a8ee9207 100644 --- a/mapies/tropomi.py +++ b/mapies/tropomi.py @@ -215,11 +215,12 @@ class TROPOMI(MAPIES): # final_lon, final_lat, final_obs, cumulative_count = self.process_files_in_blocks(month, block_size=300, batch_size=batch_size) # Update the monthly dictionary + valid_obs = final_obs > 0 self.monthly_dict[month] = { - "lon": final_lon, - "lat": final_lat, - "obs": final_obs, - "count": cumulative_count, + "lon": final_lon[valid_obs], + "lat": final_lat[valid_obs], + "obs": final_obs[valid_obs], + "count": cumulative_count[valid_obs], } print(f'Updated monthly dictionary for month {month}') print(f'Lon shape: {final_lon.shape}, Lat shape: {final_lat.shape}, Obs shape: {final_obs.shape}') @@ -238,10 +239,11 @@ class TROPOMI(MAPIES): final_lon, final_lat, final_obs, cumulative_count = self.process_monthly_data( batch_size=batch_size, month=None ) - self.lon_values = final_lon - self.lat_values = final_lat - self.obs = final_obs - self.count_obs = cumulative_count + valid_obs = final_obs > 0 + self.lon = final_lon[valid_obs] + self.lat = final_lat[valid_obs] + self.obs = final_obs[valid_obs] + self.count_obs = cumulative_count[valid_obs] except Exception as e: print(f"Error processing data for the time period from {self.start_date} to {self.end_date}: {e}") @@ -334,8 +336,8 @@ class TROPOMI(MAPIES): if months == [0]: try: - lon_values = self.lon_values - lat_values = self.lat_values + lon_values = self.lon + lat_values = self.lat obs_values = self.obs except Exception as e: print(f"Error plotting all data: {e}") @@ -394,8 +396,8 @@ class TROPOMI(MAPIES): if months == [0]: try: - lon_values = self.lon_values - lat_values = self.lat_values + lon_values = self.lon + lat_values = self.lat cum_count = self.count_obs except Exception as e: print(f"Error plotting all data: {e}") diff --git a/mapies/util/func_tools.py b/mapies/util/func_tools.py index 29856b6e..15356703 100644 --- a/mapies/util/func_tools.py +++ b/mapies/util/func_tools.py @@ -99,7 +99,7 @@ def quality_assurance(ds, obs, lat, lon, time_values, qualityFlags): condition = info["qa_condition"] except KeyError: # print("No QA condition passed, putting equal") - condition = "equals" + condition = "equal" # Get qa values try: values = info["qa_values"] diff --git a/mapies/viirs.py b/mapies/viirs.py index 7e623b7d..65d95b6b 100644 --- a/mapies/viirs.py +++ b/mapies/viirs.py @@ -64,11 +64,11 @@ def process_single_file( obs, lat, lon, time_values, time_values_index = new_preprocess_vars(ds, obs_var, lat_var, lon_var, time_var, start_date, end_date) # obs = ds[obs_var].values.flatten() # lat = ds[lat_var].values.flatten() - if apply_qa: obs, lat, lon, time_values = quality_assurance(ds, obs, lat, lon, time_values, qualityFlags) if geospatial_crop is not None: obs, lat, lon, time_values = spatial_domain_selection(obs, lat, lon, time_values, geospatial_crop) + if not flag: # Regrid and aggregate if isinstance(r, RotatedGrid): @@ -250,8 +250,11 @@ class VIIRS(MAPIES): """ #Check if the user wants qa flags + if apply_qa: self.apply_qa = True + else: + self.apply_qa = False if geospatial_crop is not None: self.geospatial_crop = np.array(geospatial_crop) if monthly_avg: @@ -291,7 +294,7 @@ class VIIRS(MAPIES): else: try: final_lon, final_lat, final_obs, cumulative_count = self.process_monthly_data( - batch_size=batch_size, month=None, apply_qa=apply_qa + batch_size=batch_size, month=None ) valid_obs = final_obs > 0 @@ -313,7 +316,7 @@ class VIIRS(MAPIES): return - def process_monthly_data(self, batch_size, month=None, apply_qa=True): + def process_monthly_data(self, batch_size, month=None): """ Process the data with multiprocessing. diff --git a/run/run_tropomi.py b/run/run_tropomi.py index 13871692..b104aed5 100644 --- a/run/run_tropomi.py +++ b/run/run_tropomi.py @@ -15,12 +15,11 @@ if __name__ == "__main__": c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, grid_repr="regular") c.read_nc() - c.process_data(monthly_avg=False, batch_size = 13, apply_qa=True, geospatial_crop=[[-10, 40], [30, 70]]) + c.process_data(monthly_avg=False, batch_size = 14, apply_qa=True, geospatial_crop=[[-10, 40], [0, 70]]) # c.yearly_average() c.plot_2D_observations(months=[0], outdir=outdir) c.plot_2D_num_obs(months=[0], outdir=outdir) - #c.process_lazy_data() end_time = time.time() elapsed_time = end_time - start_time print(f"Script executed in {elapsed_time:.2f} seconds.") diff --git a/run/run_viirs.py b/run/run_viirs.py index a859c640..c8141635 100644 --- a/run/run_viirs.py +++ b/run/run_viirs.py @@ -23,12 +23,12 @@ if __name__ == "__main__": c = VIIRS(start_date, end_date, dest=outdir, indir=indir, grid_repr="rotated") c.read_nc() - c.process_data(monthly_avg=False, batch_size = 50, apply_qa=False) + c.process_data(monthly_avg=False, batch_size = 50, apply_qa=True) # c.yearly_average() c.plot_2D_observations(months=[0], filename="Viirs_qa_not_applied.png", outdir=outdir) c.plot_2D_num_obs(months=[0], filename="Viirs_num_obsqa_not_applied.png", outdir=outdir) # c.process_lazy_data() - + end_time = time.time() elapsed_time = end_time - start_time print(f"Script executed in {elapsed_time:.2f} seconds.") -- GitLab From 1170e88ed7568d6881e95a04a584d5b0f110ab51 Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Mon, 10 Feb 2025 17:32:37 +0100 Subject: [PATCH 6/8] removing 0 values in tropomi and gantt chart --- run/run_viirs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run/run_viirs.py b/run/run_viirs.py index c8141635..e6e0db2c 100644 --- a/run/run_viirs.py +++ b/run/run_viirs.py @@ -23,7 +23,7 @@ if __name__ == "__main__": c = VIIRS(start_date, end_date, dest=outdir, indir=indir, grid_repr="rotated") c.read_nc() - c.process_data(monthly_avg=False, batch_size = 50, apply_qa=True) + c.process_data(monthly_avg=False, batch_size = 50, apply_qa=True, geospatial_crop=[[-10, 40], [0, 70]]) # c.yearly_average() c.plot_2D_observations(months=[0], filename="Viirs_qa_not_applied.png", outdir=outdir) c.plot_2D_num_obs(months=[0], filename="Viirs_num_obsqa_not_applied.png", outdir=outdir) -- GitLab From 57f73e82c6dcd67e9d6cad5f7822b8c8558c4dfa Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Tue, 11 Feb 2025 11:43:29 +0100 Subject: [PATCH 7/8] Doing some cleaning and commenting for future issues --- mapies/mapies.py | 59 +++------------------------------------------- run/run_tropomi.py | 2 +- run/run_viirs.py | 2 +- 3 files changed, 5 insertions(+), 58 deletions(-) diff --git a/mapies/mapies.py b/mapies/mapies.py index dc40c295..d548f422 100644 --- a/mapies/mapies.py +++ b/mapies/mapies.py @@ -101,34 +101,7 @@ class MAPIES: self.qualityFlags = qa_dict["qa_variables"] - - def preprocess_vars(self): - """ - Preprocessing of the dataset - """ - # Get all info about time columns - self.time_dims = self.ds[self.time_var].dims - self.time_shape = self.ds[self.time_var].shape - self.time_attrs = self.ds[self.time_var].attrs # Useful if we need to convert to datetime - - - # Get time values flatten and convert to datetime values - self.time_values = self.ds[self.time_var].values - self.time_values = self.time_values.flatten() - - if self.time_values.dtype == "timedelta64[ns]": - #logger.info("Adding time origin to time values as the time variable is in timedelta") - self.time_values = np.add(self.time_orig, self.time_values) - self.time_values = pd.to_datetime(self.time_values) - else: - self.time_values = pd.to_datetime(self.time_values) - - - # Time domain selection - self.time_values, self.time_values_index = time_domain_selection(self.time_values, self.start_date, self.end_date) - - - + # TODO this can be removed I believe @timeit def plot_2D_obs(self, **kwargs): """ @@ -172,6 +145,7 @@ class MAPIES: #return fig + # TODO this can be moved to a plotting function python file @timeit def plot_2D_obs_custom(self, lon_values=None, lat_values=None, obs_values=None, outdir=None, title=None, filename=None, display_fig=False): """ @@ -307,33 +281,6 @@ class MAPIES: # return reindexed values return independent_var_values - def quality_assurance(self): - """ - Applying quality assurance flags to mapies data - """ - mask = np.zeros_like(self.obs, dtype=bool) - for i, (flag, info) in enumerate(self.qualityFlags.items()): - - flag_mask = np.zeros_like(self.obs, dtype=bool) - arr = self.ds[info["qa_name"]].values.flatten() - arr = self.reindex(self.time_values_index, arr) - - - for value in info["qa_values"]: - flag_mask |= (arr == value) - if i == 0: - mask = flag_mask - else: - if info["qa_combined"]: - mask &= flag_mask - else: - mask |= flag_mask - - self.obs = self.obs[mask] - self.lon_values = self.lon_values[mask] - self.lat_values = self.lat_values[mask] - self.time_values = self.time_values[mask] - @staticmethod def to_xarray(coords:dict, data_vars:dict, **kwargs): @@ -347,7 +294,7 @@ class MAPIES: ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) return ds - + # TODO this can be moved to a plotting function python file @timeit def plot_2D_num_obs_custom( self, lon_values=None, diff --git a/run/run_tropomi.py b/run/run_tropomi.py index b104aed5..c42eb7d6 100644 --- a/run/run_tropomi.py +++ b/run/run_tropomi.py @@ -15,7 +15,7 @@ if __name__ == "__main__": c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, grid_repr="regular") c.read_nc() - c.process_data(monthly_avg=False, batch_size = 14, apply_qa=True, geospatial_crop=[[-10, 40], [0, 70]]) + c.process_data(monthly_avg=False, batch_size = 14, apply_qa=True, geospatial_crop=[[-40, 40], [0, 70]]) # c.yearly_average() c.plot_2D_observations(months=[0], outdir=outdir) diff --git a/run/run_viirs.py b/run/run_viirs.py index e6e0db2c..04967a55 100644 --- a/run/run_viirs.py +++ b/run/run_viirs.py @@ -23,7 +23,7 @@ if __name__ == "__main__": c = VIIRS(start_date, end_date, dest=outdir, indir=indir, grid_repr="rotated") c.read_nc() - c.process_data(monthly_avg=False, batch_size = 50, apply_qa=True, geospatial_crop=[[-10, 40], [0, 70]]) + c.process_data(monthly_avg=False, batch_size = 50, apply_qa=False, geospatial_crop=[[0, 20], [10, 20]]) # c.yearly_average() c.plot_2D_observations(months=[0], filename="Viirs_qa_not_applied.png", outdir=outdir) c.plot_2D_num_obs(months=[0], filename="Viirs_num_obsqa_not_applied.png", outdir=outdir) -- GitLab From 1aaa3f42bcfb9bb25e50fa01faa60d4008fcfcc2 Mon Sep 17 00:00:00 2001 From: Calum Meikle Date: Tue, 11 Feb 2025 11:53:16 +0100 Subject: [PATCH 8/8] Updating version and gantt chart --- docs/project-management/gantt.py | 6 ++++-- setup.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/project-management/gantt.py b/docs/project-management/gantt.py index a6f62caa..41f03fc8 100644 --- a/docs/project-management/gantt.py +++ b/docs/project-management/gantt.py @@ -12,12 +12,12 @@ df = [ dict(Task="Convert Grid representation to numpy from geopandas", Start='2024-12-01', Finish='2024-12-31', Resource="Complete"), dict(Task="Creating Grid Config for rotated and regular global grids (14)", Start='2024-11-18', Finish='2025-02-03', Resource='Complete'), dict(Task="Add original indices of the data to the outputted netcdf (13)", Start='2025-03-01', Finish='2025-03-07', Resource='Not Started'), - dict(Task="Add history of processes to the netcdf (10)", Start='2025-01-01', Finish='2025-02-03', Resource='Incomplete'), + dict(Task="Add history of processes to the netcdf (10)", Start='2025-01-01', Finish='2025-02-03', Resource='Complete'), dict(Task="Update grid representations to include Vertical profiling (20)", Start='2025-04-01', Finish='2025-04-30', Resource='Not Started'), dict(Task="Test Viirs data with Providentia Interpolation", Start="2025-03-01", Finish="2025-03-31", Resource="Not Started"), dict(Task="Produce Number of counts plots (29)", Start="2025-01-13", Finish="2025-01-20", Resource="Complete"), dict(Task="QA flags (23)", Start="2025-01-13", Finish="2025-02-03", Resource="Complete"), - dict(Task="Finalise netcdf storage output (30)", Start="2025-01-20", Finish="2025-02-03", Resource="Incomplete"), + dict(Task="Finalise netcdf storage output (30)", Start="2025-01-20", Finish="2025-02-03", Resource="Complete"), dict(Task="Create conda environment", Start="2025-01-01", Finish="2025-01-14", Resource="Complete"), dict(Task="Create Logging file output", Start="2025-02-03", Finish="2025-02-24", Resource="Incomplete"), dict(Task="Filter for min number of obs points to be aggregated (32)", Start="2025-02-17", Finish="2025-02-24", Resource="Not Started"), @@ -25,6 +25,8 @@ df = [ dict(Task="Update grid config to be in dataclasses instead of yaml files (40)", Start="2025-02-17", Finish="2025-02-24", Resource="Not Started"), dict(Task="Implement process lazy data in Tropomi (50)", Start="2025-02-10", Finish="2025-02-17", Resource="Not Started"), dict(Task="Investigate whether we can include Unit Converter used by Providentia/Ghost into code (51)", Start="2025-04-01", Finish="2025-04-10", Resource="Not Started"), + dict(Task="Fix to_da bug (52)", Start="2025-02-10", Finish="2025-02-17", Resource="Not Started"), + dict(Task="Error with some datasets outside time range when apply_qa=True bug (49)", Start="2025-02-10", Finish="2025-02-17", Resource="Not Started"), ] diff --git a/setup.py b/setup.py index e954ab78..054f077e 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ from setuptools import find_packages from setuptools import setup # Could update this using versioneer -version="0.0.9" +version="0.0.10" setup( name="mapies", -- GitLab