diff --git a/mapies/grids/rotated.py b/mapies/grids/rotated.py index 6c0801cefabc77b11836cbcf396906a02bfa5102..5aa1993ee9eb5ee53df7749a204df467a31f9947 100644 --- a/mapies/grids/rotated.py +++ b/mapies/grids/rotated.py @@ -178,7 +178,8 @@ class RotatedGrid(Grid): return lon_out, lat_out, rlon_out, rlat_out, obs_out, count_obs else: if aggregated_obs_err is not None: - return self.grid_lon2d, self.grid_lat2d, aggregated_obs, aggregated_obs_err, count_obs + LON2D, LAT2D = rot_to_geo_cartopy(self.center_longitudes, self.center_latitudes, self.centre_lon, self.centre_lat) + return aggregated_obs, aggregated_obs_err, LON2D, LAT2D, count_obs, self.center_latitudes, self.center_longitudes, self.centre_lat, self.centre_lon else: LON2D, LAT2D = rot_to_geo_cartopy(self.center_longitudes, self.center_latitudes, self.centre_lon, self.centre_lat) return aggregated_obs, LON2D, LAT2D, count_obs, self.center_latitudes, self.center_longitudes, self.centre_lat, self.centre_lon diff --git a/mapies/mapies.py b/mapies/mapies.py index b3f82cdb4418c273835cb126d49ff079525bddaf..7bde689c25f5c3df00f1de3fe2e7556fe896759f 100644 --- a/mapies/mapies.py +++ b/mapies/mapies.py @@ -31,7 +31,7 @@ logger.add(sys.stderr, level="INFO") logger.add("../logs/mapies.log", level="DEBUG") @logger.catch -def process_single_date_slice(date, daily_dict, grid, start_date, end_date, freq_hours, save=False, outdir="./"): +def process_single_date_slice(date, daily_dict, grid, start_date, end_date, freq_hours, save=False, save_format="flatten", outdir="./"): logger.debug(f"Processing date slice: {date}") center_time = datetime.strptime(date, "%Y%m%d%H%M") l_border = (datetime.strptime(date, "%Y%m%d%H%M") - timedelta(hours=freq_hours/2)).strftime('%Y%m%d%H%M') @@ -67,13 +67,18 @@ def process_single_date_slice(date, daily_dict, grid, start_date, end_date, freq # Aggregate on the grid if isinstance(grid, RotatedGrid): if save: - obs2D, lat2D, lon2D, count2D, rlat, rlon, centre_lon, centre_lat = grid.aggregate(lon, lat, obs, None, flattening=False) - - if obs2D is not None and not np.all(np.isnan(obs2D)) and not np.all(obs2D == 0): - logger.debug(f"Saving data to NetCDF for date: {date}") - - day_start, day_end = day_boundaries(center_time, start_date, end_date) - to_netCDF_gridded_rotated(obs2D, lat2D, lon2D, count2D, rlat, rlon, date, centre_lon, centre_lat, outdir) + if save_format == "gridded": + obs2D, lon2D, lat2D, count2D, rlat, rlon, centre_lon, centre_lat = grid.aggregate(lon, lat, obs, None, flattening=False) + + if obs2D is not None and not np.all(np.isnan(obs2D)) and not np.all(obs2D == 0): + logger.debug(f"Saving data to NetCDF for date: {date}") + + # day_start, day_end = day_boundaries(center_time, start_date, end_date) + to_netCDF_gridded_rotated(obs2D, lat2D, lon2D, count2D, rlat, rlon, date, centre_lon, centre_lat, outdir) + else: + lon, lat, rlon_out, rlat_out, obs, count_obs = grid.aggregate(lon, lat, obs, None, flattening=True) + final_obs, final_lat, final_lon, final_count = filter_observations(obs, lat, lon, count_obs) + return (date, final_lon, final_lat, final_obs, final_count, time_values, center_time.date()) else: lon, lat, rlon_out, rlat_out, obs, count_obs = grid.aggregate(lon, lat, obs, None, flattening=True) final_obs, final_lat, final_lon, final_count = filter_observations(obs, lat, lon, count_obs) @@ -81,13 +86,18 @@ def process_single_date_slice(date, daily_dict, grid, start_date, end_date, freq elif isinstance(grid, RegularGrid): if save: - lon2D, lat2D, obs2D, count2D = grid.aggregate(lon, lat, obs, None, flattening=False) - - if obs2D is not None and not np.all(np.isnan(obs2D)) and not np.all(obs2D == 0): - logger.debug(f"Saving data to NetCDF for date: {date}") - - day_start, day_end = day_boundaries(center_time, start_date, end_date) - to_netCDF_gridded_regular(obs2D, lat2D, lon2D, count2D, time_values, day_start, day_end, date, outdir) + if save_format == "gridded": + lon2D, lat2D, obs2D, count2D = grid.aggregate(lon, lat, obs, None, flattening=False) + + if obs2D is not None and not np.all(np.isnan(obs2D)) and not np.all(obs2D == 0): + logger.debug(f"Saving data to NetCDF for date: {date}") + + # day_start, day_end = day_boundaries(center_time, start_date, end_date) + to_netCDF_gridded_regular(obs2D, lat2D, lon2D, count2D, date, outdir) + else: + lon, lat, obs, count_obs = grid.aggregate(lon, lat, obs, None, flattening=True) + final_obs, final_lat, final_lon, final_count = filter_observations(obs, lat, lon, count_obs) + return (date, final_lon, final_lat, final_obs, final_count, time_values, center_time.date()) else: lon, lat, obs, count_obs = grid.aggregate(lon, lat, obs, None, flattening=True) final_obs, final_lat, final_lon, final_count = filter_observations(obs, lat, lon, count_obs) @@ -189,6 +199,7 @@ class MAPIES: min_num_obs: int = 0, batch_size: int = 100, save: bool = False, + save_format: str = "flatten", apply_qa: bool = True, geospatial_crop: NDArray | None = None ): @@ -217,7 +228,7 @@ class MAPIES: if frequency_avg is not None: logger.info(f"Running frequency-based averaging with frequency='{frequency_avg}'") self.process_lazy_data(apply_qa=apply_qa, geospatial_crop=geospatial_crop, save=False) - self.average_by_frequency(frequency=frequency_avg, save_nc=save) + self.average_by_frequency(frequency=frequency_avg, save_nc=save, save_format=save_format) else: final_lon, final_lat, final_obs, cumulative_count = self.process_data_mp( batch_size=batch_size, month=None @@ -227,14 +238,19 @@ class MAPIES: if save: - mid_time_array = generate_mid_time_array(self.start_date, self.end_date, self.obs) - - self.to_netCDF(self.obs, self.lat, self.lon, mid_time_array, self.start_date, self.end_date) - logger.debug(f'Saved processed data to netCDF') + if save_format == "flatten": + mid_time_array = generate_mid_time_array(self.start_date, self.end_date, self.obs) + + self.to_netCDF(self.obs, self.lat, self.lon, mid_time_array, self.start_date, self.end_date) + logger.debug(f'Saved processed data to netCDF') + else: + logger.error(f"Error: save_format '{save_format}' is not supported. Use 'flatten' instead.") + raise ValueError(f"Unsupported save_format: {save_format}") @logger.catch - def average_by_frequency(self, frequency="D", save_nc=False): + def average_by_frequency(self, frequency="D", save_nc=False, save_format="flatten"): + freq_hours = frequency_int(frequency) logger.debug(f"Frequency in hours: {freq_hours}") avg_results = [] @@ -242,6 +258,7 @@ class MAPIES: dates_slice = time_slicing(self.start_date, self.end_date, frequency) logger.debug(f"Dates slice: {dates_slice}") save = save_nc + save_format = save_format outdir = self.dest args = [ ( @@ -252,6 +269,7 @@ class MAPIES: self.end_date, freq_hours, save, + save_format, outdir) for date in dates_slice ] @@ -265,18 +283,26 @@ class MAPIES: if result is None: continue date, lon, lat, obs, count_obs, time_values, center_time = result - avg_results.append((date, lon, lat, obs, count_obs)) - - # # Optionally save NetCDF - # if save_nc: - # if obs is not None and len(obs) > 0: - # # Save the data to NetCDF - # logger.debug(f"Saving data to NetCDF for date: {date}") - # # Use the center_time date for boundaries - # day_start, day_end = day_boundaries(center_time, self.start_date, self.end_date) - # self.to_netCDF_for_ncview(obs, lat, lon, time_values, day_start, day_end, n) - # n += 1 + + if save and save_format=="flatten": + # Save the data to netCDF + l_border = pd.Timestamp(center_time - timedelta(hours=freq_hours/2)) + r_border = pd.Timestamp(center_time + timedelta(hours=freq_hours/2)) + if l_border < self.start_date: + day_start = self.start_date + else: + day_start = l_border + if r_border > self.end_date: + day_end = self.end_date + else: + day_end = r_border + + mid_time = np.datetime64(center_time) + N = len(obs) + mid_time_array = np.repeat(mid_time, N) + + self.to_netCDF(obs, lat, lon, mid_time_array, day_start, day_end) self.avg_results = avg_results diff --git a/mapies/util/func_tools.py b/mapies/util/func_tools.py index 73caae3eec1aa1a9b70270ab6f5a141e0d739e64..ff84170c4113b43b828b5e18255543024319f776 100644 --- a/mapies/util/func_tools.py +++ b/mapies/util/func_tools.py @@ -578,9 +578,6 @@ def to_netCDF_gridded_regular( lat: NDArray | None = None, lon: NDArray | None = None, count: NDArray | None = None, - time: NDArray | None = None, - start_time: pd.Timestamp | None = None, - end_time: pd.Timestamp | None = None, date: NDArray | None = None, outdir: str = "./", ): @@ -608,8 +605,6 @@ def to_netCDF_gridded_regular( if not np.allclose(grid_lon2d, grid_lon2d[0,:][np.newaxis, :]): raise ValueError("Longitude grid is not regular") - print(f"Type of start and end time: {type(start_time)}, {type(end_time)}") - # Convert date to a datetime object if isinstance(date, str): t = pd.to_datetime(date, format="%Y%m%d%H%M") @@ -711,8 +706,8 @@ def to_netCDF_gridded_rotated( }), "rotated_pole": ((), 0, { "grid_mapping_name": "rotated_latitude_longitude", - "grid_north_pole_latitude": centre_lat, - "grid_north_pole_longitude": centre_lon, + "grid_north_pole_latitude": 90-centre_lat, + "grid_north_pole_longitude": -180+centre_lon, }) }, attrs={"title": f"aod550 averaged around {date}", diff --git a/run/run_viirs.py b/run/run_viirs.py index 129a81af496a885ac33b09d31795699015d25e22..3bfbea609051e9f4f93218fbe29b88a5e15c7b86 100644 --- a/run/run_viirs.py +++ b/run/run_viirs.py @@ -11,13 +11,15 @@ if __name__ == "__main__": - outdir="/home/cmeikle/Projects/data/" - indir="/home/cmeikle/Projects/data/VIIRS" + outdir="/home/cgile/Documents/mapies/figures/averages/11062025" + indir="/home/cgile/Documents/mapies/VIIRS" + start_time = time.time() c = VIIRS(start_date, end_date, dest=outdir, indir=indir, grid_repr="bdrc") + c.gather_nc_files() # geospatial_crop = [[-51,75], [-35,45]] @@ -25,9 +27,11 @@ if __name__ == "__main__": frequency_avg = "D" min_num_obs = 0 - c.process_avg_data(frequency_avg=frequency_avg, min_num_obs = min_num_obs, batch_size = 100, save=False, apply_qa = False, geospatial_crop = geospatial_crop) - #c.plot_2D_observations(months=[0], filename="plot_10062025.png", outdir=outdir) - c.plot_2D_observations(months=[1], outdir=outdir) + + c.process_avg_data(frequency_avg=frequency_avg, min_num_obs = min_num_obs, batch_size = 100, save=True, save_format = "gridded", apply_qa = False, geospatial_crop = geospatial_crop) + # c.plot_2D_observations(months=[0], filename="plot_10062025.png", outdir=outdir) + # c.plot_2D_observations(months=[1], outdir=outdir) + c.plot_2D_num_obs(months=[1], outdir=outdir) # c.process_lazy_data(apply_qa=True, save=True)