From c204a4ad3692a236a763d160dc4f0246731ffc55 Mon Sep 17 00:00:00 2001
From: Calum Meikle <calum.meikle@bsc.es>
Date: Fri, 7 Feb 2025 15:10:49 +0100
Subject: [PATCH 1/8] Updating qa to run in process data rather than in the
 initialisation of the method

---
 mapies/mapies.py | 10 ++--------
 mapies/viirs.py  | 11 ++++++-----
 run/run_viirs.py | 19 ++++++-------------
 3 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/mapies/mapies.py b/mapies/mapies.py
index 206969bc..615e6b2f 100644
--- a/mapies/mapies.py
+++ b/mapies/mapies.py
@@ -40,7 +40,7 @@ class MAPIES:
         self.time_orig = np.datetime64("1900-01-01T00:00:00")
         self.start_date = start_date
         self.end_date = end_date
-
+        self.apply_qa = True
 
         self.grid_dict = {
             "rotated":RotatedGrid, 
@@ -96,13 +96,7 @@ class MAPIES:
         
 
         qa_dict = self.config[self.datatype]["qa"]
-        #Check if the user wants qa flags
-        apply_qa = kwargs.get("apply_qa")
-
-        if apply_qa is None:
-            self.apply_qa = True
-        else:
-            self.apply_qa = apply_qa
+        
         self.qualityFlags = qa_dict["qa_variables"]
 
 
diff --git a/mapies/viirs.py b/mapies/viirs.py
index 50b3c180..69acd2b6 100644
--- a/mapies/viirs.py
+++ b/mapies/viirs.py
@@ -289,7 +289,7 @@ class VIIRS(MAPIES):
 
     
     @timeit
-    def process_data(self, monthly_avg=False, batch_size=100):
+    def process_data(self, monthly_avg=False, batch_size=100, apply_qa=True):
         """
         Process the data.
 
@@ -301,6 +301,7 @@ class VIIRS(MAPIES):
         - if monthly_avg --> updated monthly dictorionary for the processed month.
         - if not monthly_avg --> updated self.lon_values, self.lat_values, self.obs, self.count_obs.
         """
+        #Check if the user wants qa flags
         if monthly_avg:
             for month in self.monthly_dict.keys():
                 # print(f'Month: {self.monthly_dict[month]}')
@@ -312,7 +313,7 @@ class VIIRS(MAPIES):
                 try:
                     # Process the data for the current month
                     final_lon, final_lat, final_obs, cumulative_count = self.process_monthly_data(
-                     batch_size=batch_size, month=month
+                     batch_size=batch_size, month=month, apply_qa=apply_qa
                     )
                     
                     # Update the monthly dictionary
@@ -337,7 +338,7 @@ class VIIRS(MAPIES):
         else:
             try: 
                 final_lon, final_lat, final_obs, cumulative_count = self.process_monthly_data(
-                    batch_size=batch_size, month=None
+                    batch_size=batch_size, month=None, apply_qa=apply_qa
                 )
                 self.lon_values = final_lon
                 self.lat_values = final_lat
@@ -347,7 +348,7 @@ class VIIRS(MAPIES):
                 print(f"Error processing data for the time period from {self.start_date} to {self.end_date}: {e}")
                 return
 
-    def process_monthly_data(self, batch_size, month=None):
+    def process_monthly_data(self, batch_size, month=None, apply_qa=True):
 
         """
         Process the data with multiprocessing.
@@ -386,7 +387,7 @@ class VIIRS(MAPIES):
                     self.time_var, 
                     self.start_date, 
                     self.end_date, 
-                    self.apply_qa, 
+                    apply_qa, 
                     self.qualityFlags, 
                     flag
                     ) 
diff --git a/run/run_viirs.py b/run/run_viirs.py
index d158cf87..e2fb1b9a 100644
--- a/run/run_viirs.py
+++ b/run/run_viirs.py
@@ -22,25 +22,18 @@ if __name__ == "__main__":
     # start_date = "202401010000"
     # end_date = "202401312359"
 
-    # outdir="/home/cmeikle/Projects/data/"
-    # indir="/home/cmeikle/Projects/data/VIIRS/original_files/AERDB_L2_VIIRS_NOAA20"
-
-    outdir = "/home/cgile/Documents/mapies/figures"
-    # indir = "/home/cgile/bscearth000/esarchive/obs/nasa/viirs_noaa20_aerdb_l2/original_files/VIIRS"
-    # indir = "/home/cgile/Documents/mapies/VIIRS"
-    indir = ''
-    filename_obs = 'carlotta'
-    filename_num_obs = 'carlotta'
+    outdir="/home/cmeikle/Projects/data/"
+    indir="/home/cmeikle/Projects/data/VIIRS/original_files/AERDB_L2_VIIRS_NOAA20"
 
 
     start_time = time.time()
 
-    c = VIIRS(start_date, end_date, dest=outdir, indir=indir, apply_qa=False, grid_repr="rotated")
+    c = VIIRS(start_date, end_date, dest=outdir, indir=indir, grid_repr="rotated")
     c.read_nc()
-    c.process_data(monthly_avg=False, batch_size = 50)
+    c.process_data(monthly_avg=False, batch_size = 50, apply_qa=False)
     # c.yearly_average()
-    c.plot_2D_observations(months=[0], filename=filename_obs, outdir=outdir)
-    c.plot_2D_num_obs(months=[0], filename=filename_num_obs, outdir=outdir)
+    c.plot_2D_observations(months=[0], filename="Viirs_qa_not_applied.png", outdir=outdir)
+    c.plot_2D_num_obs(months=[0], filename="Viirs_num_obsqa_not_applied.png", outdir=outdir)
     # c.process_lazy_data()
     end_time = time.time()
     elapsed_time = end_time - start_time
-- 
GitLab


From 0317636cab234fe5dd2408ba7ec072d2f7b90919 Mon Sep 17 00:00:00 2001
From: Calum Meikle <calum.meikle@bsc.es>
Date: Fri, 7 Feb 2025 17:40:10 +0100
Subject: [PATCH 2/8] Updating qa for tropomi

---
 mapies/tropomi.py  |  5 ++++-
 run/run_tropomi.py | 16 ++++++++--------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/mapies/tropomi.py b/mapies/tropomi.py
index 26e3bef7..433eb00a 100644
--- a/mapies/tropomi.py
+++ b/mapies/tropomi.py
@@ -181,7 +181,7 @@ class TROPOMI(MAPIES):
 
 
     @timeit
-    def process_data(self, monthly_avg=False, batch_size=100, geospatial_crop: NDArray | None = None):
+    def process_data(self, monthly_avg=False, batch_size=100, apply_qa=True, geospatial_crop: NDArray | None = None):
         """
         Process the data for the specified year and months.
 
@@ -195,6 +195,9 @@ class TROPOMI(MAPIES):
         - if monthly_avg --> lon_final, lat_final, obs_final: Aggregated longitude, latitude, and observation arrays.
         - if not monthly_avg --> preprocess_vars() is called.
         """
+        #Check if the user wants qa flags
+        if apply_qa:
+            self.apply_qa = True
         if geospatial_crop is not None:
             self.geospatial_crop = np.array(geospatial_crop)
         if monthly_avg:
diff --git a/run/run_tropomi.py b/run/run_tropomi.py
index 4ee763a5..9223cb64 100644
--- a/run/run_tropomi.py
+++ b/run/run_tropomi.py
@@ -5,21 +5,21 @@ import time
 
 
 if __name__ == "__main__":
-    start_date = "202212010000"
-    end_date = "202212202359"
+    start_date = "202301010000"
+    end_date = "202301012359"
 
-    outdir = "/esarchive/scratch/cmeikle/"
-    indir = "/esarchive/obs/sentinel/tropomi/original_files/tropomi_OFFL_NO2"
+    outdir = "/home/cmeikle/Projects/data/"
+    indir = "/home/cmeikle/Projects/data/TROPOMI"
 
     start_time = time.time()
 
-    c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, apply_qa=True, grid_repr="rotated")
+    c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, grid_repr="regular")
     c.read_nc()
-    c.process_data(monthly_avg=True, batch_size = 13)
+    c.process_data(monthly_avg=False, batch_size = 13, apply_qa=True)
 
     # c.yearly_average()
-    c.plot_2D_observations(months=[12], outdir=outdir)
-    c.plot_2D_num_obs(months=[12], outdir=outdir)
+    c.plot_2D_observations(months=[0], outdir=outdir)
+    c.plot_2D_num_obs(months=[0], outdir=outdir)
     #c.process_lazy_data()
     end_time = time.time()
     elapsed_time = end_time - start_time
-- 
GitLab


From 03eee3eca0aab63e759aca624a677bb4884ccb2b Mon Sep 17 00:00:00 2001
From: Calum Meikle <calum.meikle@bsc.es>
Date: Mon, 10 Feb 2025 12:18:42 +0100
Subject: [PATCH 3/8] Added qa greater less than functionality and updated
 tests

---
 mapies/config/satellite_config.yaml |  3 +-
 mapies/mapies.py                    |  5 +++-
 mapies/tests/test_func_tools.py     |  9 +++---
 mapies/tropomi.py                   |  1 -
 mapies/util/func_tools.py           | 44 ++++++++++++++++++++++++++---
 5 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/mapies/config/satellite_config.yaml b/mapies/config/satellite_config.yaml
index af4837f2..b94f5324 100644
--- a/mapies/config/satellite_config.yaml
+++ b/mapies/config/satellite_config.yaml
@@ -67,8 +67,9 @@ tropomi:
         qa_variables:
             qa_value:
                 qa_name: "qa_value"
-                qa_values: [1] #[0.62, 0.65999997, 0.66999996, 0.69, 0.72999996, 0.74, 0.78999996, 0.84, 0.88, 0.9, 0.93, 1.]
+                qa_values: 0.5
                 qa_combined: True
+                qa_condition: "greater_than_or_equal" # Examples, greater_than, less_than, equal, greater_than_or_equal, less_than_or_equal
 
 
diff --git a/mapies/mapies.py b/mapies/mapies.py
index 453a7efe..a0844f4f 100644
--- a/mapies/mapies.py
+++ b/mapies/mapies.py
@@ -41,6 +41,7 @@ class MAPIES:
         self.start_date = start_date
         self.end_date = end_date
         self.apply_qa = True
+        self.geospatial_crop = None
 
         self.grid_dict = {
             "rotated":RotatedGrid, 
@@ -186,10 +187,12 @@ class MAPIES:
         if self.datatype == "viirs":
             label = "AOD"
             levels = [0.02, 0.04, 0.06, 0.08, 0.10, 0.15, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 1.00]
+            level_format = 2
         elif self.datatype == "tropomi":
             #TODO remove hard coding
             label = "NO2"
             levels = [0.00001, 0.00002, 0.00003, 0.00004, 0.00005, 0.00006, 0.00007, 0.00008, 0.00009, 0.00010, 0.00015, 0.00020, 0.00030, 0.00040]
+            level_format = 6
         # Define the custom color list
         colors = [
             '#290AD8', '#2645FA', '#3A91FF', '#66CBFF', '#99EEFF', 
@@ -216,7 +219,7 @@ class MAPIES:
         # Add the color bar
         cbar = fig.colorbar(im, ax=ax, orientation="vertical", pad=0.05, ticks=levels, extend="both")
         cbar.set_label(label)
-        cbar.ax.set_yticklabels([f"{level:.2f}" for level in levels])  # Format tick labels
+        cbar.ax.set_yticklabels([f"{level:.f}" for level in levels])  # Format tick labels
 
         # Add title and save the plot
         ax.set_title(title)
diff --git a/mapies/tests/test_func_tools.py b/mapies/tests/test_func_tools.py
index 52afee64..da0df3e7 100644
--- a/mapies/tests/test_func_tools.py
+++ b/mapies/tests/test_func_tools.py
@@ -42,6 +42,7 @@ ds = xr.Dataset(
 obs = np.array([1, 2, 3, 4])
 lon = np.array([1, 2, 3, 4])
 lat = np.array([1, 2, 3, 4])
+time = np.array([1, 2, 3, 4])
 qualityFlags1 = {"qa_flag1":{"qa_name":"flag1", "qa_values":[1], "qa_combined":False}, "qa_flag2":{"qa_name":"flag2", "qa_values":[1], "qa_combined":False}}
 qualityFlags2 = {"qa_flag1":{"qa_name":"flag1", "qa_values":[1], "qa_combined":True}, "qa_flag2":{"qa_name":"flag2", "qa_values":[1], "qa_combined":True}}
 qualityFlags3 = {"qa_flag1":{"qa_name":"flag1", "qa_values":[1], "qa_combined":True}, "qa_flag2":{"qa_name":"flag2", "qa_values":[1], "qa_combined":False}}
@@ -49,9 +50,9 @@ qualityFlags3 = {"qa_flag1":{"qa_name":"flag1", "qa_values":[1], "qa_combined":T
 @pytest.mark.parametrize(
     "test_input,expected", 
     [
-        ((ds, obs, lat, lon, qualityFlags1), (np.array([1, 3, 4]), np.array([1, 3, 4]), np.array([1, 3, 4]))),
-        ((ds, obs, lat, lon, qualityFlags2), (np.array([4]), np.array([4]), np.array([4]))),
-        ((ds, obs, lat, lon, qualityFlags3), (np.array([1, 3, 4]), np.array([1, 3, 4]), np.array([1, 3, 4]))),
+        ((ds, obs, lat, lon, time, qualityFlags1), (np.array([1, 3, 4]), np.array([1, 3, 4]), np.array([1, 3, 4]), np.array([1, 3, 4]))),
+        ((ds, obs, lat, lon, time, qualityFlags2), (np.array([4]), np.array([4]), np.array([4]), np.array([4]))),
+        ((ds, obs, lat, lon, time, qualityFlags3), (np.array([1, 3, 4]), np.array([1, 3, 4]), np.array([1, 3, 4]), np.array([1, 3, 4]))),
     ]
 )
 def test_quality_assurance(test_input, expected):
@@ -60,7 +61,7 @@ def test_quality_assurance(test_input, expected):
     assert quality_assurance(*test_input)[2].all() == expected[2].all()
 """
 @pytest.mark.parametrize(
-    "test_input,expected", 
+    "test_input,expected",
     [
         ([], np.array([])),
     ]
diff --git a/mapies/tropomi.py b/mapies/tropomi.py
index 433eb00a..09c76e9f 100644
--- a/mapies/tropomi.py
+++ b/mapies/tropomi.py
@@ -87,7 +87,6 @@ class TROPOMI(MAPIES):
         self.daily_dict = {} # Dictionary to produce daily netcdf files 
         self.datatype = "tropomi"
 
-        self.geospatial_crop = None
 
         # Add quality value filter number
 
diff --git a/mapies/util/func_tools.py b/mapies/util/func_tools.py
index 300f2ec6..29856b6e 100644
--- a/mapies/util/func_tools.py
+++ b/mapies/util/func_tools.py
@@ -81,23 +81,57 @@ def time_converter(date:str) -> pd.Timestamp:
 
     return date
 
+
+
 def quality_assurance(ds, obs, lat, lon, time_values, qualityFlags):
     """
     Applying quality assurance flags to mapies data
     """
     mask = np.zeros_like(obs, dtype=bool)
-
+    # Iterate through each qa flag
     for i, (flag, info) in enumerate(qualityFlags.items()):
+        # TODO add logger of flag being applied with condition and value
         
         flag_mask = np.zeros_like(obs, dtype=bool)
         arr = ds[info["qa_name"]].values.flatten()
-        for value in info["qa_values"]:
-            flag_mask |= (arr == value)
+        # Get qa conditions
+        try:
+            condition = info["qa_condition"]
+        except KeyError:
+            # print("No QA condition passed, putting equal")
+            condition = "equals"
+        # Get qa values
+        try:
+            values = info["qa_values"]
+        except KeyError:
+            raise Exception("No QA values passed, putting equal")
+
+        if isinstance(values, (float, int)):
+            values = [values]
+
+        for value in values:
+            if condition == "greater_than":
+                flag_mask |= (arr > value)
+            elif condition == "less_than":
+                flag_mask |= (arr < value)
+            elif condition == "greater_than_or_equal":
+                flag_mask |= (arr >= value)
+            elif condition == "less_than_or_equal":
+                flag_mask |= (arr <= value)
+            elif condition == "equal": 
+                flag_mask |= (arr == value)
         
         if i == 0:
             mask = flag_mask
         else:
-            if info["qa_combined"]:
+            # If we want to perform an and or or between conditions
+            try:
+                combination = info["qa_combined"]
+            except KeyError:
+                # print("No combination of and or or passed for the condition, presuming or(False)")
+                combination = False
+            
+            if combination:
                 mask &= flag_mask
             else:
                 mask |= flag_mask
@@ -110,6 +144,8 @@ def quality_assurance(ds, obs, lat, lon, time_values, qualityFlags):
 
     return obs, lat, lon, time_values
 
+
+
 def spatial_domain_selection(obs, lat, lon, time, spatial_bounds):
     """
     A function for cutting the data spatially
-- 
GitLab


From 288109d7490313cf40b43225fa2bf71d9449ad20 Mon Sep 17 00:00:00 2001
From: Calum Meikle <calum.meikle@bsc.es>
Date: Mon, 10 Feb 2025 14:42:20 +0100
Subject: [PATCH 4/8] Formatting of plot

---
 mapies/mapies.py   |  6 +++---
 mapies/viirs.py    | 11 +++++++++--
 run/run_tropomi.py |  2 +-
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/mapies/mapies.py b/mapies/mapies.py
index a0844f4f..dc40c295 100644
--- a/mapies/mapies.py
+++ b/mapies/mapies.py
@@ -187,12 +187,12 @@ class MAPIES:
         if self.datatype == "viirs":
             label = "AOD"
             levels = [0.02, 0.04, 0.06, 0.08, 0.10, 0.15, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 1.00]
-            level_format = 2
+            level_format = [f"{level:.2f}" for level in levels]
         elif self.datatype == "tropomi":
             #TODO remove hard coding
             label = "NO2"
             levels = [0.00001, 0.00002, 0.00003, 0.00004, 0.00005, 0.00006, 0.00007, 0.00008, 0.00009, 0.00010, 0.00015, 0.00020, 0.00030, 0.00040]
-            level_format = 6
+            level_format = [f"{level:.5f}" for level in levels]
         # Define the custom color list
         colors = [
             '#290AD8', '#2645FA', '#3A91FF', '#66CBFF', '#99EEFF', 
@@ -219,7 +219,7 @@ class MAPIES:
         # Add the color bar
         cbar = fig.colorbar(im, ax=ax, orientation="vertical", pad=0.05, ticks=levels, extend="both")
         cbar.set_label(label)
-        cbar.ax.set_yticklabels([f"{level:.f}" for level in levels])  # Format tick labels
+        cbar.ax.set_yticklabels(level_format)  # Format tick labels
 
         # Add title and save the plot
         ax.set_title(title)
diff --git a/mapies/viirs.py b/mapies/viirs.py
index f1bb0ace..d1159d81 100644
--- a/mapies/viirs.py
+++ b/mapies/viirs.py
@@ -445,11 +445,16 @@ class VIIRS(MAPIES):
         print("Yearly average computation completed.")
 
     @timeit
-    def process_lazy_data(self):
+    def process_lazy_data(self, apply_qa=True, geospatial_crop: NDArray | None = None):
         """
         Process the data for the specified time range. It only retrieves a daily dictionary using dask lazy loading.
         """
         flag = True
+        #Check if the user wants qa flags
+        if apply_qa:
+            self.apply_qa = True
+        if geospatial_crop is not None:
+            self.geospatial_crop = np.array(geospatial_crop)
         for day in self.daily_dict.keys():
             try:
                 files = self.daily_dict[day]["files"]
@@ -465,7 +470,9 @@ class VIIRS(MAPIES):
                         self.end_date, 
                         self.apply_qa, 
                         self.qualityFlags,
-                        flag
+                        flag, 
+                        self.geospatial_crop
+
                     ) 
                     for file in files
                 ]
diff --git a/run/run_tropomi.py b/run/run_tropomi.py
index 9223cb64..13871692 100644
--- a/run/run_tropomi.py
+++ b/run/run_tropomi.py
@@ -15,7 +15,7 @@ if __name__ == "__main__":
 
     c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, grid_repr="regular")
     c.read_nc()
-    c.process_data(monthly_avg=False, batch_size = 13, apply_qa=True)
+    c.process_data(monthly_avg=False, batch_size = 13, apply_qa=True, geospatial_crop=[[-10, 40], [30, 70]])
 
     # c.yearly_average()
     c.plot_2D_observations(months=[0], outdir=outdir)
-- 
GitLab


From 99e5db601940712caf595874c5fa646132f47b14 Mon Sep 17 00:00:00 2001
From: Calum Meikle <calum.meikle@bsc.es>
Date: Mon, 10 Feb 2025 17:16:39 +0100
Subject: [PATCH 5/8] Updated gantt chart, removing 0 values in tropomi data
 too

---
 docs/project-management/gantt.py | 25 +++++++++++++++----------
 mapies/tropomi.py                | 26 ++++++++++++++------------
 mapies/util/func_tools.py        |  2 +-
 mapies/viirs.py                  |  9 ++++++---
 run/run_tropomi.py               |  3 +--
 run/run_viirs.py                 |  4 ++--
 6 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/docs/project-management/gantt.py b/docs/project-management/gantt.py
index eb0faa43..a6f62caa 100644
--- a/docs/project-management/gantt.py
+++ b/docs/project-management/gantt.py
@@ -5,21 +5,26 @@ import plotly.figure_factory as ff
 df = [
     dict(Task="Viirs base processor", Start='2024-08-01', Finish='2024-09-30', Resource='Complete'),
     dict(Task="Add pytests for already created functions (3-9)", Start='2024-08-01', Finish='2024-12-31', Resource='Complete'),
-    dict(Task="Tropomi base processor", Start='2024-08-01', Finish='2025-02-03', Resource='Incomplete'),
-    dict(Task="Tropomi to_plumes functionality (11)", Start='2025-02-03', Finish='2025-03-31', Resource='Not Started'),
+    dict(Task="Tropomi base processor", Start='2024-08-01', Finish='2025-02-03', Resource='Complete'),
+    dict(Task="Tropomi to_plumes functionality (11)", Start='2025-03-01', Finish='2025-03-14', Resource='Not Started'),
     dict(Task="Time evaluation statistics (18)", Start='2024-11-01', Finish='2024-11-28', Resource='Complete'),
-    dict(Task="Test DA implementation for VIIRS", Start='2025-02-03', Finish='2025-02-28', Resource='Not Started'),
+    dict(Task="Test DA implementation for VIIRS", Start='2025-03-01', Finish='2025-03-31', Resource='Not Started'),
     dict(Task="Convert Grid representation to numpy from geopandas", Start='2024-12-01', Finish='2024-12-31', Resource="Complete"),
-    dict(Task="Creating Grid Config for rotated and regular global grids (14)", Start='2024-11-18', Finish='2025-02-03', Resource='Incomplete'),
-    dict(Task="Add original indices of the data to the outputted netcdf (13)", Start='2025-02-03', Finish='2025-01-14', Resource='Not Started'),
-    dict(Task="Add history of processes to the netcdf (10)", Start='2025-01-01', Finish='2025-02-03', Resource='Not Started'),
-    dict(Task="Update grid representations to include Vertical profiling (20)", Start='2025-03-01', Finish='2025-03-31', Resource='Not Started'),
+    dict(Task="Creating Grid Config for rotated and regular global grids (14)", Start='2024-11-18', Finish='2025-02-03', Resource='Complete'),
+    dict(Task="Add original indices of the data to the outputted netcdf (13)", Start='2025-03-01', Finish='2025-03-07', Resource='Not Started'),
+    dict(Task="Add history of processes to the netcdf (10)", Start='2025-01-01', Finish='2025-02-03', Resource='Incomplete'),
+    dict(Task="Update grid representations to include Vertical profiling (20)", Start='2025-04-01', Finish='2025-04-30', Resource='Not Started'),
     dict(Task="Test Viirs data with Providentia Interpolation", Start="2025-03-01", Finish="2025-03-31", Resource="Not Started"),
     dict(Task="Produce Number of counts plots (29)", Start="2025-01-13", Finish="2025-01-20", Resource="Complete"),
-    dict(Task="QA flags (23)", Start="2025-01-13", Finish="2025-02-03", Resource="Incomplete"),
-    dict(Task="Finalise netcdf storage output (30)", Start="2025-01-20", Finish="2025-02-03", Resource="Not Started"),
+    dict(Task="QA flags (23)", Start="2025-01-13", Finish="2025-02-03", Resource="Complete"),
+    dict(Task="Finalise netcdf storage output (30)", Start="2025-01-20", Finish="2025-02-03", Resource="Incomplete"),
     dict(Task="Create conda environment", Start="2025-01-01", Finish="2025-01-14", Resource="Complete"),
-    dict(Task="Create Logging file output", Start="2025-01-01", Finish="2025-02-03", Resource="Incomplete"),
+    dict(Task="Create Logging file output", Start="2025-02-03", Finish="2025-02-24", Resource="Incomplete"),
+    dict(Task="Filter for min number of obs points to be aggregated (32)", Start="2025-02-17", Finish="2025-02-24", Resource="Not Started"),
+    dict(Task="Code refactor (41)", Start="2025-02-17", Finish="2025-03-01", Resource="Not Started"),
+    dict(Task="Update grid config to be in dataclasses instead of yaml files (40)", Start="2025-02-17", Finish="2025-02-24", Resource="Not Started"),
+    dict(Task="Implement process lazy data in Tropomi (50)", Start="2025-02-10", Finish="2025-02-17", Resource="Not Started"),
+    dict(Task="Investigate whether we can include Unit Converter used by Providentia/Ghost into code (51)", Start="2025-04-01", Finish="2025-04-10", Resource="Not Started"),
 ]
 
 
diff --git a/mapies/tropomi.py b/mapies/tropomi.py
index 09c76e9f..a8ee9207 100644
--- a/mapies/tropomi.py
+++ b/mapies/tropomi.py
@@ -215,11 +215,12 @@ class TROPOMI(MAPIES):
                     # final_lon, final_lat, final_obs, cumulative_count = self.process_files_in_blocks(month, block_size=300, batch_size=batch_size)
                     
                     # Update the monthly dictionary
+                    valid_obs = final_obs > 0
                     self.monthly_dict[month] = {
-                    "lon": final_lon,
-                    "lat": final_lat,
-                    "obs": final_obs,
-                    "count": cumulative_count,
+                    "lon": final_lon[valid_obs],
+                    "lat": final_lat[valid_obs],
+                    "obs": final_obs[valid_obs],
+                    "count": cumulative_count[valid_obs],
                     }
                     print(f'Updated monthly dictionary for month {month}')
                     print(f'Lon shape: {final_lon.shape}, Lat shape: {final_lat.shape}, Obs shape: {final_obs.shape}')
@@ -238,10 +239,11 @@ class TROPOMI(MAPIES):
                 final_lon, final_lat, final_obs, cumulative_count = self.process_monthly_data(
                     batch_size=batch_size, month=None
                 )
-                self.lon_values = final_lon
-                self.lat_values = final_lat
-                self.obs = final_obs
-                self.count_obs = cumulative_count
+                valid_obs = final_obs > 0
+                self.lon = final_lon[valid_obs]
+                self.lat = final_lat[valid_obs]
+                self.obs = final_obs[valid_obs]
+                self.count_obs = cumulative_count[valid_obs]
 
             except Exception as e:
                 print(f"Error processing data for the time period from {self.start_date} to {self.end_date}: {e}")
@@ -334,8 +336,8 @@ class TROPOMI(MAPIES):
 
         if months == [0]:
             try:
-                lon_values = self.lon_values
-                lat_values = self.lat_values
+                lon_values = self.lon
+                lat_values = self.lat
                 obs_values = self.obs
             except Exception as e:
                 print(f"Error plotting all data: {e}")
@@ -394,8 +396,8 @@ class TROPOMI(MAPIES):
 
         if months == [0]:
             try:
-                lon_values = self.lon_values
-                lat_values = self.lat_values
+                lon_values = self.lon
+                lat_values = self.lat
                 cum_count = self.count_obs
             except Exception as e:
                 print(f"Error plotting all data: {e}")
diff --git a/mapies/util/func_tools.py b/mapies/util/func_tools.py
index 29856b6e..15356703 100644
--- a/mapies/util/func_tools.py
+++ b/mapies/util/func_tools.py
@@ -99,7 +99,7 @@ def quality_assurance(ds, obs, lat, lon, time_values, qualityFlags):
             condition = info["qa_condition"]
         except KeyError:
             # print("No QA condition passed, putting equal")
-            condition = "equals"
+            condition = "equal"
         # Get qa values
         try:
             values = info["qa_values"]
diff --git a/mapies/viirs.py b/mapies/viirs.py
index 7e623b7d..65d95b6b 100644
--- a/mapies/viirs.py
+++ b/mapies/viirs.py
@@ -64,11 +64,11 @@ def process_single_file(
         obs, lat, lon, time_values, time_values_index = new_preprocess_vars(ds, obs_var, lat_var, lon_var, time_var, start_date, end_date)
         # obs = ds[obs_var].values.flatten()
         # lat = ds[lat_var].values.flatten()
-
         if apply_qa:
             obs, lat, lon, time_values = quality_assurance(ds, obs, lat, lon, time_values, qualityFlags)
         if geospatial_crop is not None:
             obs, lat, lon, time_values = spatial_domain_selection(obs, lat, lon, time_values, geospatial_crop)
+
         if not flag:
             # Regrid  and aggregate
             if isinstance(r, RotatedGrid):
@@ -250,8 +250,11 @@ class VIIRS(MAPIES):
         """
 
         #Check if the user wants qa flags
+        
         if apply_qa:
             self.apply_qa = True
+        else:
+            self.apply_qa = False
         if geospatial_crop is not None:
             self.geospatial_crop = np.array(geospatial_crop)
         if monthly_avg:
@@ -291,7 +294,7 @@ class VIIRS(MAPIES):
         else:
             try: 
                 final_lon, final_lat, final_obs, cumulative_count = self.process_monthly_data(
-                    batch_size=batch_size, month=None, apply_qa=apply_qa
+                    batch_size=batch_size, month=None
                 )
 
                 valid_obs = final_obs > 0
@@ -313,7 +316,7 @@ class VIIRS(MAPIES):
                 return
 
 
-    def process_monthly_data(self, batch_size, month=None, apply_qa=True):
+    def process_monthly_data(self, batch_size, month=None):
 
         """
         Process the data with multiprocessing.
diff --git a/run/run_tropomi.py b/run/run_tropomi.py
index 13871692..b104aed5 100644
--- a/run/run_tropomi.py
+++ b/run/run_tropomi.py
@@ -15,12 +15,11 @@ if __name__ == "__main__":
 
     c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, grid_repr="regular")
     c.read_nc()
-    c.process_data(monthly_avg=False, batch_size = 13, apply_qa=True, geospatial_crop=[[-10, 40], [30, 70]])
+    c.process_data(monthly_avg=False, batch_size = 14, apply_qa=True, geospatial_crop=[[-10, 40], [0, 70]])
 
     # c.yearly_average()
     c.plot_2D_observations(months=[0], outdir=outdir)
     c.plot_2D_num_obs(months=[0], outdir=outdir)
-    #c.process_lazy_data()
     end_time = time.time()
     elapsed_time = end_time - start_time
     print(f"Script executed in {elapsed_time:.2f} seconds.")
diff --git a/run/run_viirs.py b/run/run_viirs.py
index a859c640..c8141635 100644
--- a/run/run_viirs.py
+++ b/run/run_viirs.py
@@ -23,12 +23,12 @@ if __name__ == "__main__":
     c = VIIRS(start_date, end_date, dest=outdir, indir=indir, grid_repr="rotated")
     c.read_nc()
 
-    c.process_data(monthly_avg=False, batch_size = 50, apply_qa=False)
+    c.process_data(monthly_avg=False, batch_size = 50, apply_qa=True)
     # c.yearly_average()
     c.plot_2D_observations(months=[0], filename="Viirs_qa_not_applied.png", outdir=outdir)
     c.plot_2D_num_obs(months=[0], filename="Viirs_num_obsqa_not_applied.png", outdir=outdir)
     # c.process_lazy_data()
-
+    
     end_time = time.time()
     elapsed_time = end_time - start_time
     print(f"Script executed in {elapsed_time:.2f} seconds.")
-- 
GitLab


From 1170e88ed7568d6881e95a04a584d5b0f110ab51 Mon Sep 17 00:00:00 2001
From: Calum Meikle <calum.meikle@bsc.es>
Date: Mon, 10 Feb 2025 17:32:37 +0100
Subject: [PATCH 6/8] removing 0 values in tropomi and gantt chart

---
 run/run_viirs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run/run_viirs.py b/run/run_viirs.py
index c8141635..e6e0db2c 100644
--- a/run/run_viirs.py
+++ b/run/run_viirs.py
@@ -23,7 +23,7 @@ if __name__ == "__main__":
     c = VIIRS(start_date, end_date, dest=outdir, indir=indir, grid_repr="rotated")
     c.read_nc()
 
-    c.process_data(monthly_avg=False, batch_size = 50, apply_qa=True)
+    c.process_data(monthly_avg=False, batch_size = 50, apply_qa=True, geospatial_crop=[[-10, 40], [0, 70]])
     # c.yearly_average()
     c.plot_2D_observations(months=[0], filename="Viirs_qa_not_applied.png", outdir=outdir)
     c.plot_2D_num_obs(months=[0], filename="Viirs_num_obsqa_not_applied.png", outdir=outdir)
-- 
GitLab


From 57f73e82c6dcd67e9d6cad5f7822b8c8558c4dfa Mon Sep 17 00:00:00 2001
From: Calum Meikle <calum.meikle@bsc.es>
Date: Tue, 11 Feb 2025 11:43:29 +0100
Subject: [PATCH 7/8] Doing some cleaning and commenting for future issues

---
 mapies/mapies.py   | 59 +++-------------------------------------------
 run/run_tropomi.py |  2 +-
 run/run_viirs.py   |  2 +-
 3 files changed, 5 insertions(+), 58 deletions(-)

diff --git a/mapies/mapies.py b/mapies/mapies.py
index dc40c295..d548f422 100644
--- a/mapies/mapies.py
+++ b/mapies/mapies.py
@@ -101,34 +101,7 @@ class MAPIES:
         self.qualityFlags = qa_dict["qa_variables"]
 
 
-
-    def preprocess_vars(self):
-        """
-        Preprocessing of the dataset
-        """
-        # Get all info about time columns
-        self.time_dims = self.ds[self.time_var].dims
-        self.time_shape = self.ds[self.time_var].shape
-        self.time_attrs = self.ds[self.time_var].attrs # Useful if we need to convert to datetime
-        
-
-        # Get time values flatten and convert to datetime values
-        self.time_values = self.ds[self.time_var].values
-        self.time_values = self.time_values.flatten()
-        
-        if self.time_values.dtype == "timedelta64[ns]":
-            #logger.info("Adding time origin to time values as the time variable is in timedelta")
-            self.time_values = np.add(self.time_orig, self.time_values)
-            self.time_values = pd.to_datetime(self.time_values)
-        else:
-            self.time_values = pd.to_datetime(self.time_values)
-        
-        
-        # Time domain selection
-        self.time_values, self.time_values_index = time_domain_selection(self.time_values, self.start_date, self.end_date)
-
-
-
+    # TODO this can be removed I believe
     @timeit
     def plot_2D_obs(self, **kwargs):
         """
@@ -172,6 +145,7 @@ class MAPIES:
         #return fig
 
 
+    # TODO this can be moved to a plotting function python file
     @timeit
     def plot_2D_obs_custom(self, lon_values=None, lat_values=None, obs_values=None, outdir=None, title=None, filename=None, display_fig=False):
         """
@@ -307,33 +281,6 @@ class MAPIES:
         # return reindexed values
         return independent_var_values
 
-    def quality_assurance(self):
-        """
-        Applying quality assurance flags to mapies data
-        """
-        mask = np.zeros_like(self.obs, dtype=bool)
-        for i, (flag, info) in enumerate(self.qualityFlags.items()):
-            
-            flag_mask = np.zeros_like(self.obs, dtype=bool)
-            arr = self.ds[info["qa_name"]].values.flatten()
-            arr = self.reindex(self.time_values_index, arr)
-            
-
-            for value in info["qa_values"]:
-                flag_mask |= (arr == value)
-            if i == 0:
-                mask = flag_mask
-            else:
-                if info["qa_combined"]:
-                    mask &= flag_mask
-                else:
-                    mask |= flag_mask
-
-        self.obs = self.obs[mask]
-        self.lon_values = self.lon_values[mask]
-        self.lat_values = self.lat_values[mask]
-        self.time_values = self.time_values[mask]
-
 
     @staticmethod
     def to_xarray(coords:dict, data_vars:dict, **kwargs):
@@ -347,7 +294,7 @@ class MAPIES:
         ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs)
         return ds
 
-
+    # TODO this can be moved to a plotting function python file
     @timeit 
     def plot_2D_num_obs_custom(
         self, lon_values=None, 
diff --git a/run/run_tropomi.py b/run/run_tropomi.py
index b104aed5..c42eb7d6 100644
--- a/run/run_tropomi.py
+++ b/run/run_tropomi.py
@@ -15,7 +15,7 @@ if __name__ == "__main__":
 
     c = TROPOMI(start_date, end_date, dest=outdir, indir=indir, grid_repr="regular")
     c.read_nc()
-    c.process_data(monthly_avg=False, batch_size = 14, apply_qa=True, geospatial_crop=[[-10, 40], [0, 70]])
+    c.process_data(monthly_avg=False, batch_size = 14, apply_qa=True, geospatial_crop=[[-40, 40], [0, 70]])
 
     # c.yearly_average()
     c.plot_2D_observations(months=[0], outdir=outdir)
diff --git a/run/run_viirs.py b/run/run_viirs.py
index e6e0db2c..04967a55 100644
--- a/run/run_viirs.py
+++ b/run/run_viirs.py
@@ -23,7 +23,7 @@ if __name__ == "__main__":
     c = VIIRS(start_date, end_date, dest=outdir, indir=indir, grid_repr="rotated")
     c.read_nc()
 
-    c.process_data(monthly_avg=False, batch_size = 50, apply_qa=True, geospatial_crop=[[-10, 40], [0, 70]])
+    c.process_data(monthly_avg=False, batch_size = 50, apply_qa=False, geospatial_crop=[[0, 20], [10, 20]])
     # c.yearly_average()
     c.plot_2D_observations(months=[0], filename="Viirs_qa_not_applied.png", outdir=outdir)
     c.plot_2D_num_obs(months=[0], filename="Viirs_num_obsqa_not_applied.png", outdir=outdir)
-- 
GitLab


From 1aaa3f42bcfb9bb25e50fa01faa60d4008fcfcc2 Mon Sep 17 00:00:00 2001
From: Calum Meikle <calum.meikle@bsc.es>
Date: Tue, 11 Feb 2025 11:53:16 +0100
Subject: [PATCH 8/8] Updating version and gantt chart

---
 docs/project-management/gantt.py | 6 ++++--
 setup.py                         | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/project-management/gantt.py b/docs/project-management/gantt.py
index a6f62caa..41f03fc8 100644
--- a/docs/project-management/gantt.py
+++ b/docs/project-management/gantt.py
@@ -12,12 +12,12 @@ df = [
     dict(Task="Convert Grid representation to numpy from geopandas", Start='2024-12-01', Finish='2024-12-31', Resource="Complete"),
     dict(Task="Creating Grid Config for rotated and regular global grids (14)", Start='2024-11-18', Finish='2025-02-03', Resource='Complete'),
     dict(Task="Add original indices of the data to the outputted netcdf (13)", Start='2025-03-01', Finish='2025-03-07', Resource='Not Started'),
-    dict(Task="Add history of processes to the netcdf (10)", Start='2025-01-01', Finish='2025-02-03', Resource='Incomplete'),
+    dict(Task="Add history of processes to the netcdf (10)", Start='2025-01-01', Finish='2025-02-03', Resource='Complete'),
     dict(Task="Update grid representations to include Vertical profiling (20)", Start='2025-04-01', Finish='2025-04-30', Resource='Not Started'),
     dict(Task="Test Viirs data with Providentia Interpolation", Start="2025-03-01", Finish="2025-03-31", Resource="Not Started"),
     dict(Task="Produce Number of counts plots (29)", Start="2025-01-13", Finish="2025-01-20", Resource="Complete"),
     dict(Task="QA flags (23)", Start="2025-01-13", Finish="2025-02-03", Resource="Complete"),
-    dict(Task="Finalise netcdf storage output (30)", Start="2025-01-20", Finish="2025-02-03", Resource="Incomplete"),
+    dict(Task="Finalise netcdf storage output (30)", Start="2025-01-20", Finish="2025-02-03", Resource="Complete"),
     dict(Task="Create conda environment", Start="2025-01-01", Finish="2025-01-14", Resource="Complete"),
     dict(Task="Create Logging file output", Start="2025-02-03", Finish="2025-02-24", Resource="Incomplete"),
     dict(Task="Filter for min number of obs points to be aggregated (32)", Start="2025-02-17", Finish="2025-02-24", Resource="Not Started"),
@@ -25,6 +25,8 @@ df = [
     dict(Task="Update grid config to be in dataclasses instead of yaml files (40)", Start="2025-02-17", Finish="2025-02-24", Resource="Not Started"),
     dict(Task="Implement process lazy data in Tropomi (50)", Start="2025-02-10", Finish="2025-02-17", Resource="Not Started"),
     dict(Task="Investigate whether we can include Unit Converter used by Providentia/Ghost into code (51)", Start="2025-04-01", Finish="2025-04-10", Resource="Not Started"),
+    dict(Task="Fix to_da bug (52)", Start="2025-02-10", Finish="2025-02-17", Resource="Not Started"),
+    dict(Task="Error with some datasets outside time range when apply_qa=True bug (49)", Start="2025-02-10", Finish="2025-02-17", Resource="Not Started"),
 ]
 
 
diff --git a/setup.py b/setup.py
index e954ab78..054f077e 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@ from setuptools import find_packages
 from setuptools import setup
 
 # Could update this using versioneer
-version="0.0.9"
+version="0.0.10"
 
 setup(
     name="mapies",
-- 
GitLab